[Pkg-lustre-svn-commit] updated: [eb5c9ad] Updated patchless patch to the newest 2.6.30 patch from Axel
Patrick Winnertz
winnie at debian.org
Thu Aug 20 11:50:29 UTC 2009
The following commit has been merged in the lustre-1.6 branch:
commit eb5c9adc1aef8c6781d0d2fb75d6bf6bf829e57b
Author: Patrick Winnertz <winnie at debian.org>
Date: Thu Aug 20 10:25:55 2009 +0200
Updated patchless patch to the newest 2.6.30 patch from Axel
Signed-off-by: Patrick Winnertz <winnie at debian.org>
diff --git a/debian/patches/patchless_support_2.6.30.dpatch b/debian/patches/patchless_support_2.6.30.dpatch
index 5cc66f7..c591b85 100755
--- a/debian/patches/patchless_support_2.6.30.dpatch
+++ b/debian/patches/patchless_support_2.6.30.dpatch
@@ -7,7 +7,7 @@
@DPATCH@
diff -urNad lustre~/build/autoMakefile.am.toplevel lustre/build/autoMakefile.am.toplevel
--- lustre~/build/autoMakefile.am.toplevel 2009-08-19 09:51:07.000000000 +0200
-+++ lustre/build/autoMakefile.am.toplevel 2009-08-19 14:10:45.000000000 +0200
++++ lustre/build/autoMakefile.am.toplevel 2009-08-20 10:25:20.000000000 +0200
@@ -51,7 +51,7 @@
modules: $(DEP) all-sources
$(MAKE) $(ARCH_UM) CC="$(CC)" -C $(LINUX_OBJ) \
@@ -19,7 +19,7 @@ diff -urNad lustre~/build/autoMakefile.am.toplevel lustre/build/autoMakefile.am.
endif # LINUX
diff -urNad lustre~/build/autoconf/lustre-build-linux.m4 lustre/build/autoconf/lustre-build-linux.m4
--- lustre~/build/autoconf/lustre-build-linux.m4 2009-08-19 09:51:07.000000000 +0200
-+++ lustre/build/autoconf/lustre-build-linux.m4 2009-08-19 14:11:38.000000000 +0200
++++ lustre/build/autoconf/lustre-build-linux.m4 2009-08-20 10:25:20.000000000 +0200
@@ -308,6 +308,24 @@
AC_SUBST(UML_CFLAGS)
])
@@ -47,7 +47,7 @@ diff -urNad lustre~/build/autoconf/lustre-build-linux.m4 lustre/build/autoconf/l
diff -urNad lustre~/lnet/autoconf/lustre-lnet.m4 lustre/lnet/autoconf/lustre-lnet.m4
--- lustre~/lnet/autoconf/lustre-lnet.m4 2009-08-19 09:51:08.000000000 +0200
-+++ lustre/lnet/autoconf/lustre-lnet.m4 2009-08-19 14:10:45.000000000 +0200
++++ lustre/lnet/autoconf/lustre-lnet.m4 2009-08-20 10:25:20.000000000 +0200
@@ -1362,6 +1362,22 @@
])
])
@@ -80,46 +80,60 @@ diff -urNad lustre~/lnet/autoconf/lustre-lnet.m4 lustre/lnet/autoconf/lustre-lne
])
#
+diff -urNad lustre~/lnet/include/libcfs/libcfs.h lustre/lnet/include/libcfs/libcfs.h
+--- lustre~/lnet/include/libcfs/libcfs.h 2009-08-19 09:51:08.000000000 +0200
++++ lustre/lnet/include/libcfs/libcfs.h 2009-08-20 10:25:20.000000000 +0200
+@@ -60,6 +60,11 @@
+ #include <stdio.h>
+ #endif
+
++#ifdef HAVE_CRED_IN_STRUCT_TASK_STRUCT
++# define CRED(ts) ts->cred
++# define CREDENTIALS(ts,x) CRED(ts)->x
++#endif
++
+ /* Controlled via configure key */
+ /* #define LIBCFS_DEBUG */
+
diff -urNad lustre~/lnet/include/lnet/types.h lustre/lnet/include/lnet/types.h
--- lustre~/lnet/include/lnet/types.h 2008-08-07 11:50:16.000000000 +0200
-+++ lustre/lnet/include/lnet/types.h 2009-08-19 14:10:45.000000000 +0200
-@@ -39,6 +39,33 @@
++++ lustre/lnet/include/lnet/types.h 2009-08-20 10:25:20.000000000 +0200
+@@ -39,6 +39,21 @@
#include <libcfs/libcfs.h>
-+/*
-+** dropped from linux/kernel.h
-+*/
-+
-+#ifdef KERNEL_2_6_26
-+
-+#define NIPQUAD(addr) \
-+ ((unsigned char *)&addr)[0], \
-+ ((unsigned char *)&addr)[1], \
-+ ((unsigned char *)&addr)[2], \
-+ ((unsigned char *)&addr)[3]
-+
-+#if defined(__LITTLE_ENDIAN)
-+#define HIPQUAD(addr) \
++/* since 2.6.26 HIPQUAD is no longer defined... */
++#ifndef HIPQUAD
++# if defined(__LITTLE_ENDIAN)
++# define HIPQUAD(addr) \
+ ((unsigned char *)&addr)[3], \
+ ((unsigned char *)&addr)[2], \
+ ((unsigned char *)&addr)[1], \
+ ((unsigned char *)&addr)[0]
-+#elif defined(__BIG_ENDIAN)
-+#define HIPQUAD NIPQUAD
-+#else
-+#error "Undefined byteorder??"
-+#endif /* __LITTLE_ENDIAN */
-+
++# elif defined(__BIG_ENDIAN)
++# define HIPQUAD NIPQUAD
++# else
++# error "Please fix asm/byteorder.h"
++# endif /* __LITTLE_ENDIAN */
+#endif
+
-+
#define LNET_RESERVED_PORTAL 0 /* portals reserved for lnet's own use */
typedef __u64 lnet_nid_t;
+diff -urNad lustre~/lnet/klnds/socklnd/socklnd.c lustre/lnet/klnds/socklnd/socklnd.c
+--- lustre~/lnet/klnds/socklnd/socklnd.c 2009-08-19 09:51:08.000000000 +0200
++++ lustre/lnet/klnds/socklnd/socklnd.c 2009-08-20 10:25:20.000000000 +0200
+@@ -41,6 +41,7 @@
+ * Author: Eric Barton <eric at bartonsoftware.com>
+ */
+
++#include <linux/kernel.h> /* NIPQUAD() */
+ #include "socklnd.h"
+
+ lnd_t the_ksocklnd = {
diff -urNad lustre~/lnet/libcfs/linux/linux-curproc.c lustre/lnet/libcfs/linux/linux-curproc.c
--- lustre~/lnet/libcfs/linux/linux-curproc.c 2008-11-20 10:27:06.000000000 +0100
-+++ lustre/lnet/libcfs/linux/linux-curproc.c 2009-08-19 14:10:45.000000000 +0200
++++ lustre/lnet/libcfs/linux/linux-curproc.c 2009-08-20 10:25:20.000000000 +0200
@@ -41,6 +41,7 @@
*/
@@ -128,79 +142,61 @@ diff -urNad lustre~/lnet/libcfs/linux/linux-curproc.c lustre/lnet/libcfs/linux/l
#define DEBUG_SUBSYSTEM S_LNET
-@@ -54,22 +55,38 @@
+@@ -54,22 +55,22 @@
uid_t cfs_curproc_uid(void)
{
-+#ifdef HAS_STRUCT_CRED
-+ return current->real_cred->uid;
-+#else
- return current->uid;
-+#endif
+- return current->uid;
++ return CREDENTIALS(current,uid);
}
gid_t cfs_curproc_gid(void)
{
-+#ifdef HAS_STRUCT_CRED
-+ return current->real_cred->gid;
-+#else
- return current->gid;
-+#endif
+- return current->gid;
++ return CREDENTIALS(current,gid);
}
uid_t cfs_curproc_fsuid(void)
{
-+#ifdef HAS_STRUCT_CRED
-+ return current->real_cred->fsuid;
-+#else
- return current->fsuid;
-+#endif
+- return current->fsuid;
++ return CREDENTIALS(current,fsuid);
}
gid_t cfs_curproc_fsgid(void)
{
-+#ifdef HAS_STRUCT_CRED
-+ return current->real_cred->fsgid;
-+#else
- return current->fsgid;
-+#endif
+- return current->fsgid;
++ return CREDENTIALS(current,fsgid);
}
pid_t cfs_curproc_pid(void)
-@@ -83,7 +100,11 @@
+@@ -83,7 +84,7 @@
#if LINUX_VERSION_CODE >= KERNEL_VERSION(2,6,4)
task_lock(current);
-+# ifdef HAS_STRUCT_CRED
-+ nr = current->real_cred->group_info->ngroups;
-+# else
- nr = current->group_info->ngroups;
-+# endif
+- nr = current->group_info->ngroups;
++ nr = CREDENTIALS(current,group_info)->ngroups;
task_unlock(current);
#else
nr = current->ngroups;
-@@ -95,8 +116,13 @@
+@@ -95,8 +96,8 @@
{
#if LINUX_VERSION_CODE >= KERNEL_VERSION(2,6,4)
task_lock(current);
-+# ifdef HAS_STRUCT_CRED
-+ size = min_t(int, size, current->real_cred->group_info->ngroups);
-+ memcpy(array, current->real_cred->group_info->blocks[0], size * sizeof(__u32));
-+# else
- size = min_t(int, size, current->group_info->ngroups);
- memcpy(array, current->group_info->blocks[0], size * sizeof(__u32));
-+# endif
+- size = min_t(int, size, current->group_info->ngroups);
+- memcpy(array, current->group_info->blocks[0], size * sizeof(__u32));
++ size = min_t(int, size, CREDENTIALS(current,group_info)->ngroups);
++ memcpy(array, CREDENTIALS(current,group_info)->blocks[0], size * sizeof(__u32));
task_unlock(current);
#else
LASSERT(size <= NGROUPS);
-@@ -127,17 +153,36 @@
+@@ -127,17 +128,32 @@
void cfs_cap_raise(cfs_cap_t cap)
{
-+#ifdef HAS_STRUCT_CRED
++#ifdef HAVE_CRED_IN_STRUCT_TASK_STRUCT
+ /* lustre/include/liblustre.h: #define cfs_current() current */
+ struct cred *new = prepare_creds();
-+
++
+ cap_raise(new->cap_effective, cfs_cap_unpack(cap));
+ commit_creds(new);
+#else
@@ -210,7 +206,7 @@ diff -urNad lustre~/lnet/libcfs/linux/linux-curproc.c lustre/lnet/libcfs/linux/l
void cfs_cap_lower(cfs_cap_t cap)
{
-+#ifdef HAS_STRUCT_CRED
++#ifdef HAVE_CRED_IN_STRUCT_TASK_STRUCT
+ struct cred *new = prepare_creds();
+
+ cap_lower(new->cap_effective, cfs_cap_unpack(cap));
@@ -222,55 +218,44 @@ diff -urNad lustre~/lnet/libcfs/linux/linux-curproc.c lustre/lnet/libcfs/linux/l
int cfs_cap_raised(cfs_cap_t cap)
{
-+#ifdef HAS_STRUCT_CRED
-+ return cap_raised(cfs_current()->real_cred->cap_effective, cfs_cap_unpack(cap));
-+#else
- return cap_raised(cfs_current()->cap_effective, cfs_cap_unpack(cap));
-+#endif
+- return cap_raised(cfs_current()->cap_effective, cfs_cap_unpack(cap));
++ return cap_raised(CREDENTIALS(cfs_current(),cap_effective), cfs_cap_unpack(cap));
}
void cfs_kernel_cap_pack(cfs_kernel_cap_t kcap, cfs_cap_t *cap)
-@@ -170,13 +215,22 @@
+@@ -170,13 +186,15 @@
cfs_cap_t cfs_curproc_cap_pack(void)
{
cfs_cap_t cap;
-+#ifdef HAS_STRUCT_CRED
-+ cfs_kernel_cap_pack(current->real_cred->cap_effective, &cap);
-+#else
- cfs_kernel_cap_pack(current->cap_effective, &cap);
-+#endif
+- cfs_kernel_cap_pack(current->cap_effective, &cap);
++ cfs_kernel_cap_pack(CREDENTIALS(current,cap_effective), &cap);
return cap;
}
void cfs_curproc_cap_unpack(cfs_cap_t cap)
{
-+#ifdef HAS_STRUCT_CRED
+- cfs_kernel_cap_unpack(¤t->cap_effective, cap);
+ /* warning: passing argument 1 of 'cfs_kernel_cap_unpack' discards qualifiers from pointer target type --azi */
-+ cfs_kernel_cap_unpack((kernel_cap_t *)¤t->real_cred->cap_effective, cap);
-+#else
- cfs_kernel_cap_unpack(¤t->cap_effective, cap);
-+#endif
++ // cfs_kernel_cap_unpack((kernel_cap_t *)¤t->real_cred->cap_effective, cap);
++ cfs_kernel_cap_unpack(&CREDENTIALS(current,cap_effective), cap);
}
int cfs_capable(cfs_cap_t cap)
diff -urNad lustre~/lnet/libcfs/linux/linux-module.c lustre/lnet/libcfs/linux/linux-module.c
--- lustre~/lnet/libcfs/linux/linux-module.c 2008-09-15 20:44:53.000000000 +0200
-+++ lustre/lnet/libcfs/linux/linux-module.c 2009-08-19 14:10:45.000000000 +0200
-@@ -139,7 +139,11 @@
++++ lustre/lnet/libcfs/linux/linux-module.c 2009-08-20 10:25:20.000000000 +0200
+@@ -139,7 +139,7 @@
struct cfs_psdev_file pfile;
int rc = 0;
-+#ifdef HAS_STRUCT_CRED
-+ if (current->real_cred->fsuid != 0)
-+#else
- if (current->fsuid != 0)
-+#endif
+- if (current->fsuid != 0)
++ if (CREDENTIALS(current,fsuid) != 0)
return -EACCES;
if ( _IOC_TYPE(cmd) != IOC_LIBCFS_TYPE ||
diff -urNad lustre~/lnet/libcfs/linux/linux-prim.c lustre/lnet/libcfs/linux/linux-prim.c
--- lustre~/lnet/libcfs/linux/linux-prim.c 2009-08-19 09:51:08.000000000 +0200
-+++ lustre/lnet/libcfs/linux/linux-prim.c 2009-08-19 14:10:45.000000000 +0200
++++ lustre/lnet/libcfs/linux/linux-prim.c 2009-08-20 10:25:20.000000000 +0200
@@ -40,6 +40,7 @@
#endif
#include <linux/module.h>
@@ -290,7 +275,7 @@ diff -urNad lustre~/lnet/libcfs/linux/linux-prim.c lustre/lnet/libcfs/linux/linu
#else
diff -urNad lustre~/lnet/libcfs/linux/linux-tcpip.c lustre/lnet/libcfs/linux/linux-tcpip.c
--- lustre~/lnet/libcfs/linux/linux-tcpip.c 2009-08-19 09:51:08.000000000 +0200
-+++ lustre/lnet/libcfs/linux/linux-tcpip.c 2009-08-19 14:10:45.000000000 +0200
++++ lustre/lnet/libcfs/linux/linux-tcpip.c 2009-08-20 10:25:20.000000000 +0200
@@ -63,7 +63,11 @@
return rc;
}
@@ -305,7 +290,7 @@ diff -urNad lustre~/lnet/libcfs/linux/linux-tcpip.c lustre/lnet/libcfs/linux/lin
sock_release(sock);
diff -urNad lustre~/lnet/lnet/api-ni.c lustre/lnet/lnet/api-ni.c
--- lustre~/lnet/lnet/api-ni.c 2009-08-19 09:51:08.000000000 +0200
-+++ lustre/lnet/lnet/api-ni.c 2009-08-19 14:10:45.000000000 +0200
++++ lustre/lnet/lnet/api-ni.c 2009-08-20 10:25:20.000000000 +0200
@@ -1032,7 +1032,7 @@
#ifdef __KERNEL__
if (lnd == NULL) {
@@ -317,7 +302,7 @@ diff -urNad lustre~/lnet/lnet/api-ni.c lustre/lnet/lnet/api-ni.c
lnd = lnet_find_lnd_by_type(lnd_type);
diff -urNad lustre~/lustre/autoconf/lustre-core.m4 lustre/lustre/autoconf/lustre-core.m4
--- lustre~/lustre/autoconf/lustre-core.m4 2009-08-19 09:51:08.000000000 +0200
-+++ lustre/lustre/autoconf/lustre-core.m4 2009-08-19 14:10:45.000000000 +0200
++++ lustre/lustre/autoconf/lustre-core.m4 2009-08-20 10:25:20.000000000 +0200
@@ -1106,15 +1106,20 @@
AC_DEFUN([LC_PAGE_CHECKED],
[AC_MSG_CHECKING([kernel has PageChecked and SetPageChecked])
@@ -383,7 +368,7 @@ diff -urNad lustre~/lustre/autoconf/lustre-core.m4 lustre/lustre/autoconf/lustre
AC_DEFUN([LC_KERNEL_SPLICE_READ],
[AC_MSG_CHECKING([if kernel has .splice_read])
LB_LINUX_TRY_COMPILE([
-@@ -1268,11 +1295,397 @@
+@@ -1268,11 +1295,466 @@
# 2.6.23 extract nfs export related data into exportfs.h
AC_DEFUN([LC_HAVE_EXPORTFS_H],
@@ -783,1679 +768,85 @@ diff -urNad lustre~/lustre/autoconf/lustre-core.m4 lustre/lustre/autoconf/lustre
+],[
+ AC_MSG_RESULT([no])
+])
- ])
-
- #
-@@ -1372,8 +1785,45 @@
- LC_FS_RENAME_DOES_D_MOVE
- # 2.6.23
- LC_UNREGISTER_BLKDEV_RETURN_INT
-+ LC_KERNEL_SENDFILE
- LC_KERNEL_SPLICE_READ
- LC_HAVE_EXPORTFS_H
-+ LC_VM_OP_FAULT
-+ LC_REGISTER_SHRINKER
-+
-+ #2.6.25
-+ LC_MAPPING_CAP_WRITEBACK_DIRTY
-+
-+ # 2.6.24
-+ LC_HAVE_MMTYPES_H
-+ LC_BIO_ENDIO_2ARG
-+ LC_FH_TO_DENTRY
-+ LC_PROCFS_DELETED
-+
-+ # 2.6.26
-+ LC_FS_STRUCT_USE_PATH
-+ LC_RCU_LIST_SAFE
-+ LC_PATH_RELEASE
-+
-+ # 2.6.27
-+ LC_INODE_PERMISION_2ARGS
-+ LC_FILE_REMOVE_SUID
-+ LC_TRYLOCKPAGE
-+ LC_RW_TREE_LOCK
-+ #done until here
-+ LC_READ_INODE_IN_SBOPS #done
-+ LC_EXPORT_INODE_PERMISSION #done
-+ LC_QUOTA_ON_5ARGS #done
-+ LC_QUOTA_OFF_3ARGS #done
-+ LC_VFS_DQ_OFF #done
-+
-+ # 2.6.27.15-2 sles11
-+ LC_BI_HW_SEGMENTS #done
-+ LC_HAVE_QUOTAIO_V1_H #done
-+ LC_VFS_SYMLINK_5ARGS #done
-+ LC_SB_ANY_QUOTA_ACTIVE
-+ LC_SB_HAS_QUOTA_ACTIVE
-+
- ])
-
- #
-@@ -1606,6 +2056,7 @@
- ],[
- AC_MSG_RESULT([no])
- ])
-+
- ],[
- AC_MSG_RESULT([no])
- ])
-diff -urNad lustre~/lustre/autoconf/lustre-core.m4.orig lustre/lustre/autoconf/lustre-core.m4.orig
---- lustre~/lustre/autoconf/lustre-core.m4.orig 1970-01-01 01:00:00.000000000 +0100
-+++ lustre/lustre/autoconf/lustre-core.m4.orig 2009-08-19 14:10:45.000000000 +0200
-@@ -0,0 +1,2075 @@
-+#* -*- mode: c; c-basic-offset: 8; indent-tabs-mode: nil; -*-
-+#* vim:expandtab:shiftwidth=8:tabstop=8:
-+#
-+# LC_CONFIG_SRCDIR
-+#
-+# Wrapper for AC_CONFIG_SUBDIR
-+#
-+AC_DEFUN([LC_CONFIG_SRCDIR],
-+[AC_CONFIG_SRCDIR([lustre/obdclass/obdo.c])
+])
+
-+#
-+# LC_PATH_DEFAULTS
-+#
-+# lustre specific paths
-+#
-+AC_DEFUN([LC_PATH_DEFAULTS],
-+[# ptlrpc kernel build requires this
-+LUSTRE="$PWD/lustre"
-+AC_SUBST(LUSTRE)
-+
-+# mount.lustre
-+rootsbindir='/sbin'
-+AC_SUBST(rootsbindir)
-+
-+demodir='$(docdir)/demo'
-+AC_SUBST(demodir)
-+
-+pkgexampledir='${pkgdatadir}/examples'
-+AC_SUBST(pkgexampledir)
-+])
-+
-+#
-+# LC_TARGET_SUPPORTED
-+#
-+# is the target os supported?
-+#
-+AC_DEFUN([LC_TARGET_SUPPORTED],
-+[case $target_os in
-+ linux* | darwin*)
-+$1
-+ ;;
-+ *)
-+$2
-+ ;;
-+esac
-+])
-+
-+#
-+# LC_CONFIG_EXT3
-+#
-+# that ext3 is enabled in the kernel
-+#
-+AC_DEFUN([LC_CONFIG_EXT3],
-+[LB_LINUX_CONFIG([EXT3_FS],[],[
-+ LB_LINUX_CONFIG([EXT3_FS_MODULE],[],[$2])
-+])
-+LB_LINUX_CONFIG([EXT3_FS_XATTR],[$1],[$3])
-+])
-+
-+#
-+# LC_FSHOOKS
-+#
-+# If we have (and can build) fshooks.h
-+#
-+AC_DEFUN([LC_FSHOOKS],
-+[LB_CHECK_FILE([$LINUX/include/linux/fshooks.h],[
-+ AC_MSG_CHECKING([if fshooks.h can be compiled])
-+ LB_LINUX_TRY_COMPILE([
-+ #include <linux/fshooks.h>
-+ ],[],[
-+ AC_MSG_RESULT([yes])
-+ ],[
-+ AC_MSG_RESULT([no])
-+ AC_MSG_WARN([You might have better luck with gcc 3.3.x.])
-+ AC_MSG_WARN([You can set CC=gcc33 before running configure.])
-+ AC_MSG_ERROR([Your compiler cannot build fshooks.h.])
-+ ])
-+$1
-+],[
-+$2
-+])
-+])
-+
-+#
-+# LC_STRUCT_KIOBUF
-+#
-+# rh 2.4.18 has iobuf->dovary, but other kernels do not
-+#
-+AC_DEFUN([LC_STRUCT_KIOBUF],
-+[AC_MSG_CHECKING([if struct kiobuf has a dovary field])
-+LB_LINUX_TRY_COMPILE([
-+ #include <linux/iobuf.h>
-+],[
-+ struct kiobuf iobuf;
-+ iobuf.dovary = 1;
-+],[
-+ AC_MSG_RESULT([yes])
-+ AC_DEFINE(HAVE_KIOBUF_DOVARY, 1, [struct kiobuf has a dovary field])
-+],[
-+ AC_MSG_RESULT([no])
-+])
-+])
-+
-+#
-+# LC_FUNC_COND_RESCHED
-+#
-+# cond_resched() was introduced in 2.4.20
-+#
-+AC_DEFUN([LC_FUNC_COND_RESCHED],
-+[AC_MSG_CHECKING([if kernel offers cond_resched])
++##starting from here the configure checks are written by credativ GmbH
++#2.6.29: task_struct has cred entry
++AC_DEFUN([LC_CRED_IN_STRUCT_TASK_STRUCT],
++[AC_MSG_CHECKING([kernel has cred in struct task_struct])
+LB_LINUX_TRY_COMPILE([
+ #include <linux/sched.h>
+],[
-+ cond_resched();
++ struct task_struct foo;
++ foo.cred = NULL;
+],[
++ AC_DEFINE(HAVE_CRED_IN_STRUCT_TASK_STRUCT, 1,
++ [struct task_struct {} introduces struct cred * to keep task credentials])
+ AC_MSG_RESULT([yes])
-+ AC_DEFINE(HAVE_COND_RESCHED, 1, [cond_resched found])
+],[
+ AC_MSG_RESULT([no])
+])
+])
+
-+#
-+# LC_FUNC_ZAP_PAGE_RANGE
-+#
-+# if zap_page_range() takes a vma arg
-+#
-+AC_DEFUN([LC_FUNC_ZAP_PAGE_RANGE],
-+[AC_MSG_CHECKING([if zap_page_range with vma parameter])
-+ZAP_PAGE_RANGE_VMA="`grep -c 'zap_page_range.*struct vm_area_struct' $LINUX/include/linux/mm.h`"
-+if test "$ZAP_PAGE_RANGE_VMA" != 0 ; then
-+ AC_DEFINE(ZAP_PAGE_RANGE_VMA, 1, [zap_page_range with vma parameter])
-+ AC_MSG_RESULT([yes])
-+else
-+ AC_MSG_RESULT([no])
-+fi
-+])
+
-+#
-+# LC_FUNC_PDE
-+#
-+# if proc_fs.h defines PDE()
-+#
-+AC_DEFUN([LC_FUNC_PDE],
-+[AC_MSG_CHECKING([if kernel defines PDE])
-+HAVE_PDE="`grep -c 'proc_dir_entry..PDE' $LINUX/include/linux/proc_fs.h`"
-+if test "$HAVE_PDE" != 0 ; then
-+ AC_DEFINE(HAVE_PDE, 1, [the kernel defines PDE])
-+ AC_MSG_RESULT([yes])
-+else
-+ AC_MSG_RESULT([no])
-+fi
-+])
-+
-+#
-+# LC_FUNC_FILEMAP_FDATASYNC
-+#
-+# if filemap_fdatasync() exists
-+#
-+AC_DEFUN([LC_FUNC_FILEMAP_FDATAWRITE],
-+[AC_MSG_CHECKING([whether filemap_fdatawrite() is defined])
-+LB_LINUX_TRY_COMPILE([
-+ #include <linux/fs.h>
-+],[
-+ int (*foo)(struct address_space *)= filemap_fdatawrite;
-+],[
-+ AC_MSG_RESULT([yes])
-+ AC_DEFINE(HAVE_FILEMAP_FDATAWRITE, 1, [filemap_fdatawrite() found])
-+],[
-+ AC_MSG_RESULT([no])
-+])
-+])
-+
-+#
-+# LC_FUNC_DIRECT_IO
-+#
-+# if direct_IO takes a struct file argument
-+#
-+AC_DEFUN([LC_FUNC_DIRECT_IO],
-+[AC_MSG_CHECKING([if kernel passes struct file to direct_IO])
-+HAVE_DIO_FILE="`grep -c 'direct_IO.*struct file' $LINUX/include/linux/fs.h`"
-+if test "$HAVE_DIO_FILE" != 0 ; then
-+ AC_DEFINE(HAVE_DIO_FILE, 1, [the kernel passes struct file to direct_IO])
-+ AC_MSG_RESULT(yes)
-+else
-+ AC_MSG_RESULT(no)
-+fi
-+])
-+
-+#
-+# LC_HEADER_MM_INLINE
-+#
-+# RHEL kernels define page_count in mm_inline.h
-+#
-+AC_DEFUN([LC_HEADER_MM_INLINE],
-+[AC_MSG_CHECKING([if kernel has mm_inline.h header])
-+LB_LINUX_TRY_COMPILE([
-+ #include <linux/mm_inline.h>
-+],[
-+ #ifndef page_count
-+ #error mm_inline.h does not define page_count
-+ #endif
-+],[
-+ AC_MSG_RESULT([yes])
-+ AC_DEFINE(HAVE_MM_INLINE, 1, [mm_inline found])
-+],[
-+ AC_MSG_RESULT([no])
-+])
-+])
-+
-+#
-+# LC_STRUCT_INODE
-+#
-+# if inode->i_alloc_sem exists
-+#
-+AC_DEFUN([LC_STRUCT_INODE],
-+[AC_MSG_CHECKING([if struct inode has i_alloc_sem])
-+LB_LINUX_TRY_COMPILE([
-+ #include <linux/fs.h>
-+ #include <linux/version.h>
-+],[
-+ struct inode i;
-+ return (char *)&i.i_alloc_sem - (char *)&i;
-+],[
-+ AC_MSG_RESULT([yes])
-+ AC_DEFINE(HAVE_I_ALLOC_SEM, 1, [struct inode has i_alloc_sem])
-+],[
-+ AC_MSG_RESULT([no])
-+])
-+])
-+
-+#
-+# LC_FUNC_REGISTER_CACHE
-+#
-+# if register_cache() is defined by kernel
-+#
-+AC_DEFUN([LC_FUNC_REGISTER_CACHE],
-+[AC_MSG_CHECKING([if kernel defines register_cache()])
-+LB_LINUX_TRY_COMPILE([
-+ #include <linux/list.h>
-+ #include <linux/cache_def.h>
-+],[
-+ struct cache_definition cache;
-+],[
-+ AC_MSG_RESULT([yes])
-+ AC_DEFINE(HAVE_REGISTER_CACHE, 1, [register_cache found])
-+ AC_MSG_CHECKING([if kernel expects return from cache shrink function])
-+ HAVE_CACHE_RETURN_INT="`grep -c 'int.*shrink' $LINUX/include/linux/cache_def.h`"
-+ if test "$HAVE_CACHE_RETURN_INT" != 0 ; then
-+ AC_DEFINE(HAVE_CACHE_RETURN_INT, 1, [kernel expects return from shrink_cache])
-+ AC_MSG_RESULT(yes)
-+ else
-+ AC_MSG_RESULT(no)
-+ fi
-+],[
-+ AC_MSG_RESULT([no])
-+])
-+])
-+
-+#
-+# LC_FUNC_GRAB_CACHE_PAGE_NOWAIT_GFP
-+#
-+# check for our patched grab_cache_page_nowait_gfp() function
-+#
-+AC_DEFUN([LC_FUNC_GRAB_CACHE_PAGE_NOWAIT_GFP],
-+[AC_MSG_CHECKING([if kernel defines grab_cache_page_nowait_gfp()])
-+HAVE_GCPN_GFP="`grep -c 'grab_cache_page_nowait_gfp' $LINUX/include/linux/pagemap.h`"
-+if test "$HAVE_GCPN_GFP" != 0 ; then
-+ AC_DEFINE(HAVE_GRAB_CACHE_PAGE_NOWAIT_GFP, 1,
-+ [kernel has grab_cache_page_nowait_gfp()])
-+ AC_MSG_RESULT(yes)
-+else
-+ AC_MSG_RESULT(no)
-+fi
-+])
-+
-+#
-+# LC_FUNC_DEV_SET_RDONLY
-+#
-+# check for the old-style dev_set_rdonly which took an extra "devno" param
-+# and can only set a single device to discard writes at one time
-+#
-+AC_DEFUN([LC_FUNC_DEV_SET_RDONLY],
-+[AC_MSG_CHECKING([if kernel has new dev_set_rdonly])
-+LB_LINUX_TRY_COMPILE([
-+ #include <linux/fs.h>
-+],[
-+ #ifndef HAVE_CLEAR_RDONLY_ON_PUT
-+ #error needs to be patched by lustre kernel patches from Lustre version 1.4.3 or above.
-+ #endif
-+],[
-+ AC_MSG_RESULT([yes])
-+ AC_DEFINE(HAVE_DEV_SET_RDONLY, 1, [kernel has new dev_set_rdonly])
-+],[
-+ AC_MSG_RESULT([no, Linux kernel source needs to be patches by lustre
-+kernel patches from Lustre version 1.4.3 or above.])
-+])
-+])
-+
-+#
-+# LC_CONFIG_BACKINGFS
-+#
-+# setup, check the backing filesystem
-+#
-+AC_DEFUN([LC_CONFIG_BACKINGFS],
-+[
-+BACKINGFS="ldiskfs"
-+
-+if test x$with_ldiskfs = xno ; then
-+ BACKINGFS="ext3"
-+
-+ if test x$linux25$enable_server = xyesyes ; then
-+ AC_MSG_ERROR([ldiskfs is required for 2.6-based servers.])
-+ fi
-+
-+ # --- Check that ext3 and ext3 xattr are enabled in the kernel
-+ LC_CONFIG_EXT3([],[
-+ AC_MSG_ERROR([Lustre requires that ext3 is enabled in the kernel])
-+ ],[
-+ AC_MSG_WARN([Lustre requires that extended attributes for ext3 are enabled in the kernel])
-+ AC_MSG_WARN([This build may fail.])
-+ ])
-+else
-+ # ldiskfs is enabled
-+ LB_DEFINE_LDISKFS_OPTIONS
-+fi #ldiskfs
-+
-+AC_MSG_CHECKING([which backing filesystem to use])
-+AC_MSG_RESULT([$BACKINGFS])
-+AC_SUBST(BACKINGFS)
-+])
-+
-+#
-+# LC_CONFIG_PINGER
-+#
-+# the pinger is temporary, until we have the recovery node in place
-+#
-+AC_DEFUN([LC_CONFIG_PINGER],
-+[AC_MSG_CHECKING([whether to enable pinger support])
-+AC_ARG_ENABLE([pinger],
-+ AC_HELP_STRING([--disable-pinger],
-+ [disable recovery pinger support]),
-+ [],[enable_pinger='yes'])
-+AC_MSG_RESULT([$enable_pinger])
-+if test x$enable_pinger != xno ; then
-+ AC_DEFINE(ENABLE_PINGER, 1, Use the Pinger)
-+fi
-+])
-+
-+#
-+# LC_CONFIG_CHECKSUM
-+#
-+# do checksum of bulk data between client and OST
-+#
-+AC_DEFUN([LC_CONFIG_CHECKSUM],
-+[AC_MSG_CHECKING([whether to enable data checksum support])
-+AC_ARG_ENABLE([checksum],
-+ AC_HELP_STRING([--disable-checksum],
-+ [disable data checksum support]),
-+ [],[enable_checksum='yes'])
-+AC_MSG_RESULT([$enable_checksum])
-+if test x$enable_checksum != xno ; then
-+ AC_DEFINE(ENABLE_CHECKSUM, 1, do data checksums)
-+fi
-+])
-+
-+#
-+# LC_CONFIG_HEALTH_CHECK_WRITE
-+#
-+# Turn on the actual write to the disk
-+#
-+AC_DEFUN([LC_CONFIG_HEALTH_CHECK_WRITE],
-+[AC_MSG_CHECKING([whether to enable a write with the health check])
-+AC_ARG_ENABLE([health-write],
-+ AC_HELP_STRING([--enable-health-write],
-+ [enable disk writes when doing health check]),
-+ [],[enable_health_write='no'])
-+AC_MSG_RESULT([$enable_health_write])
-+if test x$enable_health_write == xyes ; then
-+ AC_DEFINE(USE_HEALTH_CHECK_WRITE, 1, Write when Checking Health)
-+fi
-+])
-+
-+#
-+# LC_CONFIG_LIBLUSTRE_RECOVERY
-+#
-+AC_DEFUN([LC_CONFIG_LIBLUSTRE_RECOVERY],
-+[AC_MSG_CHECKING([whether to enable liblustre recovery support])
-+AC_ARG_ENABLE([liblustre-recovery],
-+ AC_HELP_STRING([--disable-liblustre-recovery],
-+ [disable liblustre recovery support]),
-+ [],[enable_liblustre_recovery='yes'])
-+AC_MSG_RESULT([$enable_liblustre_recovery])
-+if test x$enable_liblustre_recovery != xno ; then
-+ AC_DEFINE(ENABLE_LIBLUSTRE_RECOVERY, 1, Liblustre Can Recover)
-+fi
-+])
-+
-+#
-+# LC_CONFIG_OBD_BUFFER_SIZE
-+#
-+# the maximum buffer size of lctl ioctls
-+#
-+AC_DEFUN([LC_CONFIG_OBD_BUFFER_SIZE],
-+[AC_MSG_CHECKING([maximum OBD ioctl size])
-+AC_ARG_WITH([obd-buffer-size],
-+ AC_HELP_STRING([--with-obd-buffer-size=[size]],
-+ [set lctl ioctl maximum bytes (default=8192)]),
-+ [
-+ OBD_BUFFER_SIZE=$with_obd_buffer_size
-+ ],[
-+ OBD_BUFFER_SIZE=8192
-+ ])
-+AC_MSG_RESULT([$OBD_BUFFER_SIZE bytes])
-+AC_DEFINE_UNQUOTED(OBD_MAX_IOCTL_BUFFER, $OBD_BUFFER_SIZE, [IOCTL Buffer Size])
-+])
-+
-+#
-+# LC_STRUCT_STATFS
-+#
-+# AIX does not have statfs.f_namelen
-+#
-+AC_DEFUN([LC_STRUCT_STATFS],
-+[AC_MSG_CHECKING([if struct statfs has a f_namelen field])
++#2.6.29: struct task_struct {} introduces struct cred * to keep task credentials: use get_group_info()/put_group_info()
++AC_DEFUN([LC_GET_GROUP_INFO],
++[AC_MSG_CHECKING([use get_group_info on struct cred])
+LB_LINUX_TRY_COMPILE([
-+ #include <linux/vfs.h>
++ #include <linux/sched.h>
++ #include <include/linux/cred.h>
+],[
-+ struct statfs sfs;
-+ sfs.f_namelen = 1;
++ #TODO: this needs to be written
+],[
++ AC_DEFINE(HAVE_GET_GROUP_INFO, 1,
++ [struct task_struct {} introduces struct cred * to keep task credentials: use get_group_info/put_group_info])
+ AC_MSG_RESULT([yes])
-+ AC_DEFINE(HAVE_STATFS_NAMELEN, 1, [struct statfs has a namelen field])
+],[
+ AC_MSG_RESULT([no])
+])
+])
+
-+#
-+# LC_READLINK_SSIZE_T
-+#
-+AC_DEFUN([LC_READLINK_SSIZE_T],
-+[AC_MSG_CHECKING([if readlink returns ssize_t])
-+AC_TRY_COMPILE([
-+ #include <unistd.h>
-+],[
-+ ssize_t readlink(const char *, char *, size_t);
-+],[
-+ AC_MSG_RESULT([yes])
-+ AC_DEFINE(HAVE_POSIX_1003_READLINK, 1, [readlink returns ssize_t])
-+],[
-+ AC_MSG_RESULT([no])
-+])
-+])
+
-+AC_DEFUN([LC_FUNC_PAGE_MAPPED],
-+[AC_MSG_CHECKING([if kernel offers page_mapped])
++#2.6.29: is d_alloc_anon available
++AC_DEFUN([LC_HAVE_D_ALLOC_ANON],
++[AC_MSG_CHECKING([is d_alloc_annon declared in dache.h])
+LB_LINUX_TRY_COMPILE([
-+ #include <linux/mm.h>
++ #include <linux/dcache.h>
+],[
-+ page_mapped(NULL);
++ d_alloc_annon(NULL);
+],[
++ AC_DEFINE(HAVE_D_ALLOC_ANON, 1,
++ [dcache.h no longer declares d_alloc_anon - d_obtain_alias does about the same])
+ AC_MSG_RESULT([yes])
-+ AC_DEFINE(HAVE_PAGE_MAPPED, 1, [page_mapped found])
+],[
+ AC_MSG_RESULT([no])
+])
+])
+
-+AC_DEFUN([LC_STRUCT_FILE_OPS_UNLOCKED_IOCTL],
-+[AC_MSG_CHECKING([if struct file_operations has an unlocked_ioctl field])
-+LB_LINUX_TRY_COMPILE([
-+ #include <linux/fs.h>
-+],[
-+ struct file_operations fops;
-+ &fops.unlocked_ioctl;
-+],[
-+ AC_MSG_RESULT([yes])
-+ AC_DEFINE(HAVE_UNLOCKED_IOCTL, 1, [struct file_operations has an unlock ed_ioctl field])
-+],[
-+ AC_MSG_RESULT([no])
-+])
-+])
-+
-+AC_DEFUN([LC_FILEMAP_POPULATE],
-+[AC_MSG_CHECKING([for exported filemap_populate])
-+LB_LINUX_TRY_COMPILE([
-+ #include <asm/page.h>
-+ #include <linux/mm.h>
-+],[
-+ filemap_populate(NULL, 0, 0, __pgprot(0), 0, 0);
-+],[
-+ AC_MSG_RESULT([yes])
-+ AC_DEFINE(HAVE_FILEMAP_POPULATE, 1, [Kernel exports filemap_populate])
-+],[
-+ AC_MSG_RESULT([no])
-+])
-+])
-+
-+AC_DEFUN([LC_D_ADD_UNIQUE],
-+[AC_MSG_CHECKING([for d_add_unique])
-+LB_LINUX_TRY_COMPILE([
-+ #include <linux/dcache.h>
-+],[
-+ d_add_unique(NULL, NULL);
-+],[
-+ AC_MSG_RESULT([yes])
-+ AC_DEFINE(HAVE_D_ADD_UNIQUE, 1, [Kernel has d_add_unique])
-+],[
-+ AC_MSG_RESULT([no])
-+])
-+])
-+
-+AC_DEFUN([LC_BIT_SPINLOCK_H],
-+[LB_CHECK_FILE([$LINUX/include/linux/bit_spinlock.h],[
-+ AC_MSG_CHECKING([if bit_spinlock.h can be compiled])
-+ LB_LINUX_TRY_COMPILE([
-+ #include <asm/processor.h>
-+ #include <linux/spinlock.h>
-+ #include <linux/bit_spinlock.h>
-+ ],[],[
-+ AC_MSG_RESULT([yes])
-+ AC_DEFINE(HAVE_BIT_SPINLOCK_H, 1, [Kernel has bit_spinlock.h])
-+ ],[
-+ AC_MSG_RESULT([no])
-+ ])
-+],
-+[])
-+])
-+
-+#
-+# LC_POSIX_ACL_XATTR
-+#
-+# If we have xattr_acl.h
-+#
-+AC_DEFUN([LC_XATTR_ACL],
-+[LB_CHECK_FILE([$LINUX/include/linux/xattr_acl.h],[
-+ AC_MSG_CHECKING([if xattr_acl.h can be compiled])
-+ LB_LINUX_TRY_COMPILE([
-+ #include <linux/xattr_acl.h>
-+ ],[],[
-+ AC_MSG_RESULT([yes])
-+ AC_DEFINE(HAVE_XATTR_ACL, 1, [Kernel has xattr_acl])
-+ ],[
-+ AC_MSG_RESULT([no])
-+ ])
-+],
-+[])
-+])
-+
-+#
-+# LC_LINUX_FIEMAP_H
-+#
-+# If we have fiemap.h
-+# after 2.6.27 use fiemap.h in include/linux
-+#
-+AC_DEFUN([LC_LINUX_FIEMAP_H],
-+[LB_CHECK_FILE([$LINUX/include/linux/fiemap.h],[
-+ AC_MSG_CHECKING([if fiemap.h can be compiled])
-+ LB_LINUX_TRY_COMPILE([
-+ #include <linux/fiemap.h>
-+ ],[],[
-+ AC_MSG_RESULT([yes])
-+ AC_DEFINE(HAVE_LINUX_FIEMAP_H, 1, [Kernel has fiemap.h])
-+ ],[
-+ AC_MSG_RESULT([no])
-+ ])
-+],
-+[])
-+])
-+
-+
-+AC_DEFUN([LC_STRUCT_INTENT_FILE],
-+[AC_MSG_CHECKING([if struct open_intent has a file field])
-+LB_LINUX_TRY_COMPILE([
-+ #include <linux/fs.h>
-+ #include <linux/namei.h>
-+],[
-+ struct open_intent intent;
-+ &intent.file;
-+],[
-+ AC_MSG_RESULT([yes])
-+ AC_DEFINE(HAVE_FILE_IN_STRUCT_INTENT, 1, [struct open_intent has a file field])
-+],[
-+ AC_MSG_RESULT([no])
-+])
-+])
-+
-+
-+AC_DEFUN([LC_POSIX_ACL_XATTR_H],
-+[LB_CHECK_FILE([$LINUX/include/linux/posix_acl_xattr.h],[
-+ AC_MSG_CHECKING([if linux/posix_acl_xattr.h can be compiled])
-+ LB_LINUX_TRY_COMPILE([
-+ #include <linux/posix_acl_xattr.h>
-+ ],[],[
-+ AC_MSG_RESULT([yes])
-+ AC_DEFINE(HAVE_LINUX_POSIX_ACL_XATTR_H, 1, [linux/posix_acl_xattr.h found])
-+
-+ ],[
-+ AC_MSG_RESULT([no])
-+ ])
-+$1
-+],[
-+AC_MSG_RESULT([no])
-+])
-+])
-+
-+#
-+# LC_EXPORT___IGET
-+# starting from 2.6.19 linux kernel exports __iget()
-+#
-+AC_DEFUN([LC_EXPORT___IGET],
-+[LB_CHECK_SYMBOL_EXPORT([__iget],
-+[fs/inode.c],[
-+ AC_DEFINE(HAVE_EXPORT___IGET, 1, [kernel exports __iget])
-+],[
-+])
-+])
-+
-+
-+AC_DEFUN([LC_LUSTRE_VERSION_H],
-+[LB_CHECK_FILE([$LINUX/include/linux/lustre_version.h],[
-+ rm -f "$LUSTRE/include/linux/lustre_version.h"
-+],[
-+ touch "$LUSTRE/include/linux/lustre_version.h"
-+ if test x$enable_server = xyes ; then
-+ AC_MSG_WARN([Unpatched kernel detected.])
-+ AC_MSG_WARN([Lustre servers cannot be built with an unpatched kernel;])
-+ AC_MSG_WARN([disabling server build])
-+ enable_server='no'
-+ fi
-+])
-+])
-+
-+AC_DEFUN([LC_FUNC_SET_FS_PWD],
-+[LB_CHECK_SYMBOL_EXPORT([set_fs_pwd],
-+[fs/namespace.c],[
-+ AC_DEFINE(HAVE_SET_FS_PWD, 1, [set_fs_pwd is exported])
-+],[
-+])
-+])
-+
-+#
-+# check for FS_RENAME_DOES_D_MOVE flag
-+#
-+AC_DEFUN([LC_FS_RENAME_DOES_D_MOVE],
-+[AC_MSG_CHECKING([if kernel has FS_RENAME_DOES_D_MOVE flag])
-+LB_LINUX_TRY_COMPILE([
-+ #include <linux/fs.h>
-+],[
-+ int v = FS_RENAME_DOES_D_MOVE;
-+],[
-+ AC_MSG_RESULT([yes])
-+ AC_DEFINE(HAVE_FS_RENAME_DOES_D_MOVE, 1, [kernel has FS_RENAME_DOES_D_MOVE flag])
-+],[
-+ AC_MSG_RESULT([no])
-+])
-+])
-+
-+#
-+# LC_FUNC_MS_FLOCK_LOCK
-+#
-+# SLES9 kernel has MS_FLOCK_LOCK sb flag
-+#
-+AC_DEFUN([LC_FUNC_MS_FLOCK_LOCK],
-+[AC_MSG_CHECKING([if kernel has MS_FLOCK_LOCK sb flag])
-+LB_LINUX_TRY_COMPILE([
-+ #include <linux/fs.h>
-+],[
-+ int flags = MS_FLOCK_LOCK;
-+],[
-+ AC_DEFINE(HAVE_MS_FLOCK_LOCK, 1,
-+ [kernel has MS_FLOCK_LOCK flag])
-+ AC_MSG_RESULT([yes])
-+],[
-+ AC_MSG_RESULT([no])
-+])
-+])
-+
-+#
-+# LC_FUNC_HAVE_CAN_SLEEP_ARG
-+#
-+# SLES9 kernel has third arg can_sleep
-+# in fs/locks.c: flock_lock_file_wait()
-+#
-+AC_DEFUN([LC_FUNC_HAVE_CAN_SLEEP_ARG],
-+[AC_MSG_CHECKING([if kernel has third arg can_sleep in fs/locks.c: flock_lock_file_wait()])
++#2.6.30:
++AC_DEFUN([LC_COUNT_IN_STRUCT_FS_STRUCT],
++[AC_MSG_CHECKING([is fs_struct available])
+LB_LINUX_TRY_COMPILE([
-+ #include <linux/fs.h>
++ #include <TODO.h>
+],[
-+ int cansleep;
-+ struct file *file;
-+ struct file_lock *file_lock;
-+ flock_lock_file_wait(file, file_lock, cansleep);
-+],[
-+ AC_DEFINE(HAVE_CAN_SLEEP_ARG, 1,
-+ [kernel has third arg can_sleep in fs/locks.c: flock_lock_file_wait()])
-+ AC_MSG_RESULT([yes])
-+],[
-+ AC_MSG_RESULT([no])
-+])
-+])
-+
-+#
-+# LC_FUNC_F_OP_FLOCK
-+#
-+# rhel4.2 kernel has f_op->flock field
-+#
-+AC_DEFUN([LC_FUNC_F_OP_FLOCK],
-+[AC_MSG_CHECKING([if struct file_operations has flock field])
-+LB_LINUX_TRY_COMPILE([
-+ #include <linux/fs.h>
-+],[
-+ struct file_operations ll_file_operations_flock;
-+ ll_file_operations_flock.flock = NULL;
-+],[
-+ AC_DEFINE(HAVE_F_OP_FLOCK, 1,
-+ [struct file_operations has flock field])
-+ AC_MSG_RESULT([yes])
-+],[
-+ AC_MSG_RESULT([no])
-+])
-+])
-+
-+#
-+# LC_FUNC_MS_FLOCK_LOCK
-+#
-+# SLES9 kernel has MS_FLOCK_LOCK sb flag
-+#
-+AC_DEFUN([LC_FUNC_MS_FLOCK_LOCK],
-+[AC_MSG_CHECKING([if kernel has MS_FLOCK_LOCK sb flag])
-+LB_LINUX_TRY_COMPILE([
-+ #include <linux/fs.h>
-+],[
-+ int flags = MS_FLOCK_LOCK;
-+],[
-+ AC_DEFINE(HAVE_MS_FLOCK_LOCK, 1,
-+ [kernel has MS_FLOCK_LOCK flag])
-+ AC_MSG_RESULT([yes])
-+],[
-+ AC_MSG_RESULT([no])
-+])
-+])
-+
-+#
-+# LC_FUNC_HAVE_CAN_SLEEP_ARG
-+#
-+# SLES9 kernel has third arg can_sleep
-+# in fs/locks.c: flock_lock_file_wait()
-+#
-+AC_DEFUN([LC_FUNC_HAVE_CAN_SLEEP_ARG],
-+[AC_MSG_CHECKING([if kernel has third arg can_sleep in fs/locks.c: flock_lock_file_wait()])
-+LB_LINUX_TRY_COMPILE([
-+ #include <linux/fs.h>
-+],[
-+ int cansleep;
-+ struct file *file;
-+ struct file_lock *file_lock;
-+ flock_lock_file_wait(file, file_lock, cansleep);
-+],[
-+ AC_DEFINE(HAVE_CAN_SLEEP_ARG, 1,
-+ [kernel has third arg can_sleep in fs/locks.c: flock_lock_file_wait()])
-+ AC_MSG_RESULT([yes])
-+],[
-+ AC_MSG_RESULT([no])
-+])
-+])
-+
-+#
-+# LC_TASK_PPTR
-+#
-+# task struct has p_pptr instead of parent
-+#
-+AC_DEFUN([LC_TASK_PPTR],
-+[AC_MSG_CHECKING([task p_pptr found])
-+LB_LINUX_TRY_COMPILE([
-+ #include <linux/sched.h>
-+],[
-+ struct task_struct *p;
-+
-+ p = p->p_pptr;
++ #TODO
+],[
++ AC_DEFINE(HAVE_COUNT_IN_STRUCT_FS_STRUCT, 1,
++ [fs_struct counter replaced by user])
+ AC_MSG_RESULT([yes])
-+ AC_DEFINE(HAVE_TASK_PPTR, 1, [task p_pptr found])
+],[
+ AC_MSG_RESULT([no])
+])
-+])
-+
-+#
-+# LC_FUNC_F_OP_FLOCK
-+#
-+# rhel4.2 kernel has f_op->flock field
-+#
-+AC_DEFUN([LC_FUNC_F_OP_FLOCK],
-+[AC_MSG_CHECKING([if struct file_operations has flock field])
-+LB_LINUX_TRY_COMPILE([
-+ #include <linux/fs.h>
-+],[
-+ struct file_operations ll_file_operations_flock;
-+ ll_file_operations_flock.flock = NULL;
-+],[
-+ AC_DEFINE(HAVE_F_OP_FLOCK, 1,
-+ [struct file_operations has flock field])
-+ AC_MSG_RESULT([yes])
-+],[
-+ AC_MSG_RESULT([no])
-+])
-+])
-+
-+# LC_INODE_I_MUTEX
-+# after 2.6.15 inode have i_mutex intead of i_sem
-+AC_DEFUN([LC_INODE_I_MUTEX],
-+[AC_MSG_CHECKING([if inode has i_mutex ])
-+LB_LINUX_TRY_COMPILE([
-+ #include <linux/mutex.h>
-+ #include <linux/fs.h>
-+ #undef i_mutex
-+],[
-+ struct inode i;
-+
-+ mutex_unlock(&i.i_mutex);
-+],[
-+ AC_MSG_RESULT(yes)
-+ AC_DEFINE(HAVE_INODE_I_MUTEX, 1,
-+ [after 2.6.15 inode have i_mutex intead of i_sem])
-+],[
-+ AC_MSG_RESULT(no)
-+])
-+])
-+
-+# LC_DQUOTOFF_MUTEX
-+# after 2.6.17 dquote use mutex instead if semaphore
-+AC_DEFUN([LC_DQUOTOFF_MUTEX],
-+[AC_MSG_CHECKING([use dqonoff_mutex])
-+LB_LINUX_TRY_COMPILE([
-+ #include <linux/mutex.h>
-+ #include <linux/fs.h>
-+ #include <linux/quota.h>
-+],[
-+ struct quota_info dq;
-+
-+ mutex_unlock(&dq.dqonoff_mutex);
-+],[
-+ AC_MSG_RESULT(yes)
-+ AC_DEFINE(HAVE_DQUOTOFF_MUTEX, 1,
-+ [after 2.6.17 dquote use mutex instead if semaphore])
-+],[
-+ AC_MSG_RESULT(no)
-+])
-+])
-+
-+#
-+# LC_STATFS_DENTRY_PARAM
-+# starting from 2.6.18 linux kernel uses dentry instead of
-+# super_block for first vfs_statfs argument
-+#
-+AC_DEFUN([LC_STATFS_DENTRY_PARAM],
-+[AC_MSG_CHECKING([first vfs_statfs parameter is dentry])
-+LB_LINUX_TRY_COMPILE([
-+ #include <linux/fs.h>
-+],[
-+ int vfs_statfs(struct dentry *, struct kstatfs *);
-+],[
-+ AC_DEFINE(HAVE_STATFS_DENTRY_PARAM, 1,
-+ [first parameter of vfs_statfs is dentry])
-+ AC_MSG_RESULT([yes])
-+],[
-+ AC_MSG_RESULT([no])
-+])
-+])
-+
-+#
-+# LC_VFS_KERN_MOUNT
-+# starting from 2.6.18 kernel don't export do_kern_mount
-+# and want to use vfs_kern_mount instead.
-+#
-+AC_DEFUN([LC_VFS_KERN_MOUNT],
-+[AC_MSG_CHECKING([vfs_kern_mount exist in kernel])
-+LB_LINUX_TRY_COMPILE([
-+ #include <linux/mount.h>
-+],[
-+ vfs_kern_mount(NULL, 0, NULL, NULL);
-+],[
-+ AC_DEFINE(HAVE_VFS_KERN_MOUNT, 1,
-+ [vfs_kern_mount exist in kernel])
-+ AC_MSG_RESULT([yes])
-+],[
-+ AC_MSG_RESULT([no])
-+])
-+])
-+
-+#
-+# LC_INVALIDATEPAGE_RETURN_INT
-+# more 2.6 api changes. return type for the invalidatepage
-+# address_space_operation is 'void' in new kernels but 'int' in old
-+#
-+AC_DEFUN([LC_INVALIDATEPAGE_RETURN_INT],
-+[AC_MSG_CHECKING([invalidatepage has return int])
-+LB_LINUX_TRY_COMPILE([
-+ #include <linux/buffer_head.h>
-+],[
-+ int rc = block_invalidatepage(NULL, 0);
-+],[
-+ AC_MSG_RESULT(yes)
-+ AC_DEFINE(HAVE_INVALIDATEPAGE_RETURN_INT, 1,
-+ [Define if return type of invalidatepage should be int])
-+],[
-+ AC_MSG_RESULT(no)
-+])
-+])
-+
-+# LC_UMOUNTBEGIN_HAS_VFSMOUNT
-+# more 2.6 API changes. 2.6.18 umount_begin has different parameters
-+AC_DEFUN([LC_UMOUNTBEGIN_HAS_VFSMOUNT],
-+[AC_MSG_CHECKING([if umount_begin needs vfsmount parameter instead of super_block])
-+tmp_flags="$EXTRA_KCFLAGS"
-+EXTRA_KCFLAGS="-Werror"
-+LB_LINUX_TRY_COMPILE([
-+ #include <linux/fs.h>
-+
-+ struct vfsmount;
-+ static void cfg_umount_begin (struct vfsmount *v, int flags)
-+ {
-+ ;
-+ }
-+
-+ static struct super_operations cfg_super_operations = {
-+ .umount_begin = cfg_umount_begin,
-+ };
-+],[
-+ cfg_super_operations.umount_begin(NULL,0);
-+],[
-+ AC_MSG_RESULT(yes)
-+ AC_DEFINE(HAVE_UMOUNTBEGIN_VFSMOUNT, 1,
-+ [Define umount_begin need second argument])
-+],[
-+ AC_MSG_RESULT(no)
-+])
-+EXTRA_KCFLAGS="$tmp_flags"
-+])
-+
-+# 2.6.19 API changes
-+# inode don't have i_blksize field
-+AC_DEFUN([LC_INODE_BLKSIZE],
-+[AC_MSG_CHECKING([inode has i_blksize field])
-+LB_LINUX_TRY_COMPILE([
-+#include <linux/fs.h>
-+],[
-+ struct inode i;
-+ i.i_blksize = 0;
-+],[
-+ AC_MSG_RESULT(yes)
-+ AC_DEFINE(HAVE_INODE_BLKSIZE, 1,
-+ [struct inode has i_blksize field])
-+],[
-+ AC_MSG_RESULT(no)
-+])
-+])
-+
-+# LC_VFS_READDIR_U64_INO
-+# 2.6.19 use u64 for inode number instead of inode_t
-+AC_DEFUN([LC_VFS_READDIR_U64_INO],
-+[AC_MSG_CHECKING([check vfs_readdir need 64bit inode number])
-+tmp_flags="$EXTRA_KCFLAGS"
-+EXTRA_KCFLAGS="-Werror"
-+LB_LINUX_TRY_COMPILE([
-+#include <linux/fs.h>
-+ int fillonedir(void * __buf, const char * name, int namlen, loff_t offset,
-+ u64 ino, unsigned int d_type)
-+ {
-+ return 0;
-+ }
-+],[
-+ filldir_t filter;
-+
-+ filter = fillonedir;
-+ return 1;
-+],[
-+ AC_MSG_RESULT(yes)
-+ AC_DEFINE(HAVE_VFS_READDIR_U64_INO, 1,
-+ [if vfs_readdir need 64bit inode number])
-+],[
-+ AC_MSG_RESULT(no)
-+])
-+EXTRA_KCFLAGS="$tmp_flags"
-+])
-+
-+# LC_FILE_WRITEV
-+# 2.6.19 replaced writev with aio_write
-+AC_DEFUN([LC_FILE_WRITEV],
-+[AC_MSG_CHECKING([writev in fops])
-+LB_LINUX_TRY_COMPILE([
-+ #include <linux/fs.h>
-+],[
-+ struct file_operations *fops = NULL;
-+ fops->writev = NULL;
-+],[
-+ AC_MSG_RESULT(yes)
-+ AC_DEFINE(HAVE_FILE_WRITEV, 1,
-+ [use fops->writev])
-+],[
-+ AC_MSG_RESULT(no)
-+])
-+])
-+
-+# LC_GENERIC_FILE_READ
-+# 2.6.19 replaced readv with aio_read
-+AC_DEFUN([LC_FILE_READV],
-+[AC_MSG_CHECKING([readv in fops])
-+LB_LINUX_TRY_COMPILE([
-+ #include <linux/fs.h>
-+],[
-+ struct file_operations *fops = NULL;
-+ fops->readv = NULL;
-+],[
-+ AC_MSG_RESULT(yes)
-+ AC_DEFINE(HAVE_FILE_READV, 1,
-+ [use fops->readv])
-+],[
-+ AC_MSG_RESULT(no)
-+])
-+])
-+
-+# LC_NR_PAGECACHE
-+# 2.6.18 don't export nr_pagecahe
-+AC_DEFUN([LC_NR_PAGECACHE],
-+[AC_MSG_CHECKING([kernel export nr_pagecache])
-+LB_LINUX_TRY_COMPILE([
-+ #include <linux/pagemap.h>
-+],[
-+ return atomic_read(&nr_pagecache);
-+],[
-+ AC_MSG_RESULT(yes)
-+ AC_DEFINE(HAVE_NR_PAGECACHE, 1,
-+ [is kernel export nr_pagecache])
-+],[
-+ AC_MSG_RESULT(no)
-+])
-+])
-+
-+# LC_CANCEL_DIRTY_PAGE
-+# 2.6.20 introduse cancel_dirty_page instead of
-+# clear_page_dirty.
-+AC_DEFUN([LC_CANCEL_DIRTY_PAGE],
-+[AC_MSG_CHECKING([kernel has cancel_dirty_page])
-+LB_LINUX_TRY_COMPILE([
-+ #include <linux/mm.h>
-+ #include <linux/page-flags.h>
-+],[
-+ cancel_dirty_page(NULL, 0);
-+],[
-+ AC_MSG_RESULT(yes)
-+ AC_DEFINE(HAVE_CANCEL_DIRTY_PAGE, 1,
-+ [kernel has cancel_dirty_page instead of clear_page_dirty])
-+],[
-+ AC_MSG_RESULT(no)
-+])
-+])
-+
-+#
-+# LC_PAGE_CONSTANT
-+#
-+# In order to support raid5 zerocopy patch, we have to patch the kernel to make
-+# it support constant page, which means the page won't be modified during the
-+# IO.
-+#
-+AC_DEFUN([LC_PAGE_CONSTANT],
-+[AC_MSG_CHECKING([if kernel have PageConstant defined])
-+LB_LINUX_TRY_COMPILE([
-+ #include <linux/mm.h>
-+ #include <linux/page-flags.h>
-+],[
-+ #ifndef PG_constant
-+ #error "Have no raid5 zcopy patch"
-+ #endif
-+],[
-+ AC_MSG_RESULT(yes)
-+ AC_DEFINE(HAVE_PAGE_CONSTANT, 1, [kernel have PageConstant supported])
-+],[
-+ AC_MSG_RESULT(no);
-+])
-+])
-+
-+# RHEL5 in FS-cache patch rename PG_checked flag
-+# into PG_fs_misc
-+AC_DEFUN([LC_PG_FS_MISC],
-+[AC_MSG_CHECKING([kernel has PG_fs_misc])
-+LB_LINUX_TRY_COMPILE([
-+ #include <linux/mm.h>
-+ #include <linux/page-flags.h>
-+],[
-+ #ifndef PG_fs_misc
-+ #error PG_fs_misc not defined in kernel
-+ #endif
-+],[
-+ AC_MSG_RESULT(yes)
-+ AC_DEFINE(HAVE_PG_FS_MISC, 1,
-+ [is kernel have PG_fs_misc])
-+],[
-+ AC_MSG_RESULT(no)
-+])
-+])
-+
-+# RHEL5 PageChecked and SetPageChecked defined
-+AC_DEFUN([LC_PAGE_CHECKED],
-+[AC_MSG_CHECKING([kernel has PageChecked and SetPageChecked])
-+LB_LINUX_TRY_COMPILE([
-+ #include <linux/autoconf.h>
-+#ifdef HAVE_LINUX_MMTYPES_H
-+ #include <linux/mm_types.h>
-+#endif
-+ #include <linux/page-flags.h>
-+],[
-+ struct page *p;
-+
-+ /* before 2.6.26 this define*/
-+ #ifndef PageChecked
-+ /* 2.6.26 use function instead of define for it */
-+ SetPageChecked(p);
-+ PageChecked(p);
-+ #endif
-+],[
-+ AC_MSG_RESULT(yes)
-+ AC_DEFINE(HAVE_PAGE_CHECKED, 1,
-+ [does kernel have PageChecked and SetPageChecked])
-+],[
-+ AC_MSG_RESULT(no)
-+])
-+])
-+
-+AC_DEFUN([LC_EXPORT_TRUNCATE_COMPLETE],
-+[LB_CHECK_SYMBOL_EXPORT([truncate_complete_page],
-+[mm/truncate.c],[
-+AC_DEFINE(HAVE_TRUNCATE_COMPLETE_PAGE, 1,
-+ [kernel export truncate_complete_page])
-+],[
-+])
-+])
-+
-+AC_DEFUN([LC_EXPORT_D_REHASH_COND],
-+[LB_CHECK_SYMBOL_EXPORT([d_rehash_cond],
-+[fs/dcache.c],[
-+AC_DEFINE(HAVE_D_REHASH_COND, 1,
-+ [d_rehash_cond is exported by the kernel])
-+],[
-+])
-+])
-+
-+AC_DEFUN([LC_EXPORT___D_REHASH],
-+[LB_CHECK_SYMBOL_EXPORT([__d_rehash],
-+[fs/dcache.c],[
-+AC_DEFINE(HAVE___D_REHASH, 1,
-+ [__d_rehash is exported by the kernel])
-+],[
-+])
-+])
-+
-+AC_DEFUN([LC_EXPORT_D_MOVE_LOCKED],
-+[LB_CHECK_SYMBOL_EXPORT([d_move_locked],
-+[fs/dcache.c],[
-+AC_DEFINE(HAVE_D_MOVE_LOCKED, 1,
-+ [d_move_locked is exported by the kernel])
-+],[
-+])
-+])
-+
-+AC_DEFUN([LC_EXPORT___D_MOVE],
-+[LB_CHECK_SYMBOL_EXPORT([__d_move],
-+[fs/dcache.c],[
-+AC_DEFINE(HAVE___D_MOVE, 1,
-+ [__d_move is exported by the kernel])
-+],[
-+])
-+])
-+
-+# The actual symbol exported varies among architectures, so we need
-+# to check many symbols (but only in the current architecture.) No
-+# matter what symbol is exported, the kernel #defines node_to_cpumask
-+# to the appropriate function and that's what we use.
-+AC_DEFUN([LC_EXPORT_NODE_TO_CPUMASK],
-+ [LB_CHECK_SYMBOL_EXPORT([node_to_cpumask],
-+ [arch/$LINUX_ARCH/mm/numa.c],
-+ [AC_DEFINE(HAVE_NODE_TO_CPUMASK, 1,
-+ [node_to_cpumask is exported by
-+ the kernel])]) # x86_64
-+ LB_CHECK_SYMBOL_EXPORT([node_to_cpu_mask],
-+ [arch/$LINUX_ARCH/kernel/smpboot.c],
-+ [AC_DEFINE(HAVE_NODE_TO_CPUMASK, 1,
-+ [node_to_cpumask is exported by
-+ the kernel])]) # ia64
-+ LB_CHECK_SYMBOL_EXPORT([node_2_cpu_mask],
-+ [arch/$LINUX_ARCH/kernel/smpboot.c],
-+ [AC_DEFINE(HAVE_NODE_TO_CPUMASK, 1,
-+ [node_to_cpumask is exported by
-+ the kernel])]) # i386
-+ ])
-+
-+#
-+# LC_VFS_INTENT_PATCHES
-+#
-+# check if the kernel has the VFS intent patches
-+AC_DEFUN([LC_VFS_INTENT_PATCHES],
-+[AC_MSG_CHECKING([if the kernel has the VFS intent patches])
-+LB_LINUX_TRY_COMPILE([
-+ #include <linux/fs.h>
-+ #include <linux/namei.h>
-+],[
-+ struct nameidata nd;
-+ struct lookup_intent *it;
-+
-+ it = &nd.intent;
-+ intent_init(it, IT_OPEN);
-+ it->d.lustre.it_disposition = 0;
-+ it->d.lustre.it_data = NULL;
-+],[
-+ AC_MSG_RESULT([yes])
-+ AC_DEFINE(HAVE_VFS_INTENT_PATCHES, 1, [VFS intent patches are applied])
-+],[
-+ AC_MSG_RESULT([no])
-+])
-+])
-+
-+# 2.6.22 lost second parameter for invalidate_bdev
-+AC_DEFUN([LC_INVALIDATE_BDEV_2ARG],
-+[AC_MSG_CHECKING([if invalidate_bdev has second argument])
-+LB_LINUX_TRY_COMPILE([
-+ #include <linux/buffer_head.h>
-+],[
-+ invalidate_bdev(NULL,0);
-+],[
-+ AC_MSG_RESULT([yes])
-+ AC_DEFINE(HAVE_INVALIDATE_BDEV_2ARG, 1,
-+ [invalidate_bdev has second argument])
-+],[
-+ AC_MSG_RESULT([no])
-+])
-+])
-+
-+# 2.6.18
-+
-+
-+# 2.6.23 have return type 'void' for unregister_blkdev
-+AC_DEFUN([LC_UNREGISTER_BLKDEV_RETURN_INT],
-+[AC_MSG_CHECKING([if unregister_blkdev return int])
-+LB_LINUX_TRY_COMPILE([
-+ #include <linux/fs.h>
-+],[
-+ int i = unregister_blkdev(0,NULL);
-+],[
-+ AC_MSG_RESULT([yes])
-+ AC_DEFINE(HAVE_UNREGISTER_BLKDEV_RETURN_INT, 1,
-+ [unregister_blkdev return int])
-+],[
-+ AC_MSG_RESULT([no])
-+])
-+])
-+
-+# 2.6.23 change .sendfile to .splice_read
-+# RHEL4 (-92 kernel) have both sendfile and .splice_read API
-+AC_DEFUN([LC_KERNEL_SENDFILE],
-+[AC_MSG_CHECKING([if kernel has .sendfile])
-+LB_LINUX_TRY_COMPILE([
-+ #include <linux/fs.h>
-+],[
-+ struct file_operations file;
-+
-+ file.sendfile = NULL;
-+], [
-+ AC_MSG_RESULT([yes])
-+ AC_DEFINE(HAVE_KERNEL_SENDFILE, 1,
-+ [kernel has .sendfile])
-+],[
-+ AC_MSG_RESULT([no])
-+])
-+])
-+
-+# 2.6.23 change .sendfile to .splice_read
-+AC_DEFUN([LC_KERNEL_SPLICE_READ],
-+[AC_MSG_CHECKING([if kernel has .splice_read])
-+LB_LINUX_TRY_COMPILE([
-+ #include <linux/fs.h>
-+],[
-+ struct file_operations file;
-+
-+ file.splice_read = NULL;
-+], [
-+ AC_MSG_RESULT([yes])
-+ AC_DEFINE(HAVE_KERNEL_SPLICE_READ, 1,
-+ [kernel has .slice_read])
-+],[
-+ AC_MSG_RESULT([no])
-+])
-+])
-+
-+# 2.6.23 extract nfs export related data into exportfs.h
-+AC_DEFUN([LC_HAVE_EXPORTFS_H],
-+[LB_CHECK_FILE([$LINUX/include/linux/exportfs.h], [
-+ AC_DEFINE(HAVE_LINUX_EXPORTFS_H, 1,
-+ [kernel has include/exportfs.h])
-+],[
-+ AC_MSG_RESULT([no])
-+])
-+])
-+
-+# 2.6.23 have new page fault handling API
-+AC_DEFUN([LC_VM_OP_FAULT],
-+[AC_MSG_CHECKING([if kernel has .fault in vm_operation_struct])
-+LB_LINUX_TRY_COMPILE([
-+ #include <linux/mm.h>
-+],[
-+ struct vm_operations_struct op;
-+
-+ op.fault = NULL;
-+], [
-+ AC_MSG_RESULT([yes])
-+ AC_DEFINE(HAVE_VM_OP_FAULT, 1,
-+ [if kernel has .fault in vm_operation_struct])
-+],[
-+ AC_MSG_RESULT([no])
-+])
-+])
-+
-+#2.6.23 has new shrinker API
-+AC_DEFUN([LC_REGISTER_SHRINKER],
-+[AC_MSG_CHECKING([if kernel has register_shrinker])
-+LB_LINUX_TRY_COMPILE([
-+ #include <linux/mm.h>
-+],[
-+ register_shrinker(NULL);
-+], [
-+ AC_MSG_RESULT([yes])
-+ AC_DEFINE(HAVE_REGISTER_SHRINKER, 1,
-+ [if kernel has register_shrinker])
-+],[
-+ AC_MSG_RESULT([no])
-+])
-+])
-+
-+# 2.6.24 has bio_endio with 2 args
-+AC_DEFUN([LC_BIO_ENDIO_2ARG],
-+[AC_MSG_CHECKING([if kernel has bio_endio with 2 args])
-+LB_LINUX_TRY_COMPILE([
-+ #include <linux/bio.h>
-+],[
-+ bio_endio(NULL, 0);
-+], [
-+ AC_MSG_RESULT([yes])
-+ AC_DEFINE(HAVE_BIO_ENDIO_2ARG, 1,
-+ [if kernel has bio_endio with 2 args])
-+],[
-+ AC_MSG_RESULT([no])
-+])
-+])
-+
-+# 2.6.24 has new members in exports struct.
-+AC_DEFUN([LC_FH_TO_DENTRY],
-+[AC_MSG_CHECKING([if kernel has .fh_to_dentry member in export_operations struct])
-+LB_LINUX_TRY_COMPILE([
-+#ifdef HAVE_LINUX_EXPORTFS_H
-+ #include <linux/exportfs.h>
-+#else
-+ #include <linux/fs.h>
-+#endif
-+],[
-+ struct export_operations exp;
-+
-+ exp.fh_to_dentry = NULL;
-+], [
-+ AC_MSG_RESULT([yes])
-+ AC_DEFINE(HAVE_FH_TO_DENTRY, 1,
-+ [kernel has .fh_to_dentry member in export_operations struct])
-+],[
-+ AC_MSG_RESULT([no])
-+])
-+])
-+
-+# 2.6.24 need linux/mm_types.h included
-+AC_DEFUN([LC_HAVE_MMTYPES_H],
-+[LB_CHECK_FILE([$LINUX/include/linux/mm_types.h], [
-+ AC_DEFINE(HAVE_LINUX_MMTYPES_H, 1,
-+ [kernel has include/mm_types.h])
-+],[
-+ AC_MSG_RESULT([no])
-+])
-+])
-+
-+# 2.6.24 remove long aged procfs entry -> deleted member
-+AC_DEFUN([LC_PROCFS_DELETED],
-+[AC_MSG_CHECKING([if kernel has deleted member in procfs entry struct])
-+LB_LINUX_TRY_COMPILE([
-+ #include <linux/proc_fs.h>
-+],[
-+ struct proc_dir_entry pde;
-+
-+ pde.deleted = NULL;
-+], [
-+ AC_MSG_RESULT([yes])
-+ AC_DEFINE(HAVE_PROCFS_DELETED, 1,
-+ [kernel has deleted member in procfs entry struct])
-+],[
-+ AC_MSG_RESULT([no])
-+])
-+])
-+
-+# 2.6.25 change define to inline
-+AC_DEFUN([LC_MAPPING_CAP_WRITEBACK_DIRTY],
-+[AC_MSG_CHECKING([if kernel have mapping_cap_writeback_dirty])
-+LB_LINUX_TRY_COMPILE([
-+ #include <linux/backing-dev.h>
-+],[
-+ #ifndef mapping_cap_writeback_dirty
-+ mapping_cap_writeback_dirty(NULL);
-+ #endif
-+],[
-+ AC_MSG_RESULT([yes])
-+ AC_DEFINE(HAVE_MAPPING_CAP_WRITEBACK_DIRTY, 1,
-+ [kernel have mapping_cap_writeback_dirty])
-+],[
-+ AC_MSG_RESULT([no])
-+])
-+])
-+
-+
-+
-+# 2.6.26 isn't export set_fs_pwd and change paramter in fs struct
-+AC_DEFUN([LC_FS_STRUCT_USE_PATH],
-+[AC_MSG_CHECKING([fs_struct use path structure])
-+LB_LINUX_TRY_COMPILE([
-+ #include <asm/atomic.h>
-+ #include <linux/spinlock.h>
-+ #include <linux/fs_struct.h>
-+],[
-+ struct path path;
-+ struct fs_struct fs;
-+
-+ fs.pwd = path;
-+], [
-+ AC_MSG_RESULT([yes])
-+ AC_DEFINE(HAVE_FS_STRUCT_USE_PATH, 1,
-+ [fs_struct use path structure])
-+],[
-+ AC_MSG_RESULT([no])
-+])
-+])
-+
-+# 2.6.26 remove path_release and use path_put instead
-+AC_DEFUN([LC_PATH_RELEASE],
-+[AC_MSG_CHECKING([if path_release exist])
-+LB_LINUX_TRY_COMPILE([
-+ #include <linux/dcache.h>
-+ #include <linux/namei.h>
-+],[
-+ path_release(NULL);
-+],[
-+ AC_DEFINE(HAVE_PATH_RELEASE, 1, [path_release exist])
-+ AC_MSG_RESULT([yes])
-+],[
-+ AC_MSG_RESULT([no])
-+])
-+])
-+
-+#2.6.27
-+AC_DEFUN([LC_INODE_PERMISION_2ARGS],
-+[AC_MSG_CHECKING([inode_operations->permission have two args])
-+LB_LINUX_TRY_COMPILE([
-+ #include <linux/fs.h>
-+],[
-+ struct inode *inode;
-+
-+ inode->i_op->permission(NULL,0);
-+],[
-+ AC_DEFINE(HAVE_INODE_PERMISION_2ARGS, 1,
-+ [inode_operations->permission have two args])
-+ AC_MSG_RESULT([yes])
-+],[
-+ AC_MSG_RESULT([no])
-+])
-+])
-+
-+# 2.6.27 have file_remove_suid instead of remove_suid
-+AC_DEFUN([LC_FILE_REMOVE_SUID],
-+[AC_MSG_CHECKING([kernel have file_remove_suid])
-+LB_LINUX_TRY_COMPILE([
-+ #include <linux/fs.h>
-+],[
-+ file_remove_suid(NULL);
-+],[
-+ AC_DEFINE(HAVE_FILE_REMOVE_SUID, 1,
-+ [kernel have file_remove_suid])
-+ AC_MSG_RESULT([yes])
-+],[
-+ AC_MSG_RESULT([no])
-+])
-+])
-+
-+# 2.6.27 have new page locking API
-+AC_DEFUN([LC_TRYLOCKPAGE],
-+[AC_MSG_CHECKING([kernel use trylock_page for page lock])
-+LB_LINUX_TRY_COMPILE([
-+ #include <linux/pagemap.h>
-+],[
-+ trylock_page(NULL);
-+],[
-+ AC_DEFINE(HAVE_TRYLOCK_PAGE, 1,
-+ [kernel use trylock_page for page lock])
-+ AC_MSG_RESULT([yes])
-+],[
-+ AC_MSG_RESULT([no])
-+])
-+])
-+
-+#
-+# LC_PROG_LINUX
-+#
-+# Lustre linux kernel checks
-+#
-+AC_DEFUN([LC_PROG_LINUX],
-+ [LC_LUSTRE_VERSION_H
-+ if test x$enable_server = xyes ; then
-+ LC_CONFIG_BACKINGFS
-+ fi
-+ LC_CONFIG_PINGER
-+ LC_CONFIG_CHECKSUM
-+ LC_CONFIG_LIBLUSTRE_RECOVERY
-+ LC_CONFIG_HEALTH_CHECK_WRITE
-+ LC_CONFIG_LRU_RESIZE
-+ LC_CONFIG_ADAPTIVE_TIMEOUTS
-+ LC_QUOTA_MODULE
-+
-+ LC_TASK_PPTR
-+ # RHEL4 patches
-+ LC_EXPORT_TRUNCATE_COMPLETE
-+ LC_EXPORT_D_REHASH_COND
-+ LC_EXPORT___D_REHASH
-+ LC_EXPORT_D_MOVE_LOCKED
-+ LC_EXPORT___D_MOVE
-+ LC_EXPORT_NODE_TO_CPUMASK
-+
-+ LC_STRUCT_KIOBUF
-+ LC_FUNC_COND_RESCHED
-+ LC_FUNC_ZAP_PAGE_RANGE
-+ LC_FUNC_PDE
-+ LC_FUNC_DIRECT_IO
-+ LC_HEADER_MM_INLINE
-+ LC_STRUCT_INODE
-+ LC_FUNC_REGISTER_CACHE
-+ LC_FUNC_GRAB_CACHE_PAGE_NOWAIT_GFP
-+ LC_FUNC_DEV_SET_RDONLY
-+ LC_FUNC_FILEMAP_FDATAWRITE
-+ LC_STRUCT_STATFS
-+ LC_FUNC_PAGE_MAPPED
-+ LC_STRUCT_FILE_OPS_UNLOCKED_IOCTL
-+ LC_FILEMAP_POPULATE
-+ LC_D_ADD_UNIQUE
-+ LC_BIT_SPINLOCK_H
-+ LC_XATTR_ACL
-+ LC_STRUCT_INTENT_FILE
-+ LC_POSIX_ACL_XATTR_H
-+ LC_EXPORT___IGET
-+ LC_FUNC_SET_FS_PWD
-+ LC_FUNC_MS_FLOCK_LOCK
-+ LC_FUNC_HAVE_CAN_SLEEP_ARG
-+ LC_FUNC_F_OP_FLOCK
-+ LC_QUOTA_READ
-+ LC_COOKIE_FOLLOW_LINK
-+ LC_FUNC_RCU
-+ LC_QUOTA64
-+
-+ # does the kernel have VFS intent patches?
-+ LC_VFS_INTENT_PATCHES
-+
-+ # 2.6.15
-+ LC_INODE_I_MUTEX
-+
-+ # 2.6.16
-+ LC_SECURITY_PLUG # for SLES10 SP2
-+
-+ # 2.6.17
-+ LC_DQUOTOFF_MUTEX
-+
-+ # 2.6.18
-+ LC_NR_PAGECACHE
-+ LC_STATFS_DENTRY_PARAM
-+ LC_VFS_KERN_MOUNT
-+ LC_INVALIDATEPAGE_RETURN_INT
-+ LC_UMOUNTBEGIN_HAS_VFSMOUNT
-+
-+ #2.6.18 + RHEL5 (fc6)
-+ LC_PG_FS_MISC
-+ LC_PAGE_CHECKED
-+
-+ # 2.6.19
-+ LC_INODE_BLKSIZE
-+ LC_VFS_READDIR_U64_INO
-+ LC_FILE_WRITEV
-+ LC_FILE_READV
-+
-+ # 2.6.20
-+ LC_CANCEL_DIRTY_PAGE
-+
-+ # raid5-zerocopy patch
-+ LC_PAGE_CONSTANT
-+
-+ # 2.6.22
-+ LC_INVALIDATE_BDEV_2ARG
-+ LC_FS_RENAME_DOES_D_MOVE
-+ # 2.6.23
-+ LC_UNREGISTER_BLKDEV_RETURN_INT
+ ])
+
+ #
+@@ -1372,8 +1854,45 @@
+ LC_FS_RENAME_DOES_D_MOVE
+ # 2.6.23
+ LC_UNREGISTER_BLKDEV_RETURN_INT
+ LC_KERNEL_SENDFILE
-+ LC_KERNEL_SPLICE_READ
-+ LC_HAVE_EXPORTFS_H
+ LC_KERNEL_SPLICE_READ
+ LC_HAVE_EXPORTFS_H
+ LC_VM_OP_FAULT
+ LC_REGISTER_SHRINKER
+
@@ -2477,456 +868,40 @@ diff -urNad lustre~/lustre/autoconf/lustre-core.m4.orig lustre/lustre/autoconf/l
+ LC_INODE_PERMISION_2ARGS
+ LC_FILE_REMOVE_SUID
+ LC_TRYLOCKPAGE
-+])
-+
-+#
-+# LC_CONFIG_CLIENT_SERVER
-+#
-+# Build client/server sides of Lustre
-+#
-+AC_DEFUN([LC_CONFIG_CLIENT_SERVER],
-+[AC_MSG_CHECKING([whether to build Lustre server support])
-+AC_ARG_ENABLE([server],
-+ AC_HELP_STRING([--disable-server],
-+ [disable Lustre server support]),
-+ [],[enable_server='yes'])
-+AC_MSG_RESULT([$enable_server])
-+
-+AC_MSG_CHECKING([whether to build Lustre client support])
-+AC_ARG_ENABLE([client],
-+ AC_HELP_STRING([--disable-client],
-+ [disable Lustre client support]),
-+ [],[enable_client='yes'])
-+AC_MSG_RESULT([$enable_client])])
-+
-+#
-+# LC_CONFIG_LIBLUSTRE
-+#
-+# whether to build liblustre
-+#
-+AC_DEFUN([LC_CONFIG_LIBLUSTRE],
-+[AC_MSG_CHECKING([whether to build Lustre library])
-+AC_ARG_ENABLE([liblustre],
-+ AC_HELP_STRING([--disable-liblustre],
-+ [disable building of Lustre library]),
-+ [],[enable_liblustre=$with_sysio])
-+AC_MSG_RESULT([$enable_liblustre])
-+# only build sysio if liblustre is built
-+with_sysio="$enable_liblustre"
-+
-+AC_MSG_CHECKING([whether to build liblustre tests])
-+AC_ARG_ENABLE([liblustre-tests],
-+ AC_HELP_STRING([--enable-liblustre-tests],
-+ [enable liblustre tests, if --disable-tests is used]),
-+ [],[enable_liblustre_tests=$enable_tests])
-+if test x$enable_liblustre != xyes ; then
-+ enable_liblustre_tests='no'
-+fi
-+AC_MSG_RESULT([$enable_liblustre_tests])
-+
-+AC_MSG_CHECKING([whether to enable liblustre acl])
-+AC_ARG_ENABLE([liblustre-acl],
-+ AC_HELP_STRING([--disable-liblustre-acl],
-+ [disable ACL support for liblustre]),
-+ [],[enable_liblustre_acl=yes])
-+AC_MSG_RESULT([$enable_liblustre_acl])
-+if test x$enable_liblustre_acl = xyes ; then
-+ AC_DEFINE(LIBLUSTRE_POSIX_ACL, 1, Liblustre Support ACL-enabled MDS)
-+fi
-+
-+#
-+# --enable-mpitest
-+#
-+AC_ARG_ENABLE(mpitests,
-+ AC_HELP_STRING([--enable-mpitest=yes|no|mpich directory],
-+ [include mpi tests]),
-+ [
-+ enable_mpitests=yes
-+ case $enableval in
-+ yes)
-+ MPI_ROOT=/opt/mpich
-+ LDFLAGS="$LDFLAGS -L$MPI_ROOT/ch-p4/lib -L$MPI_ROOT/ch-p4/lib64"
-+ CFLAGS="$CFLAGS -I$MPI_ROOT/include"
-+ ;;
-+ no)
-+ enable_mpitests=no
-+ ;;
-+ [[\\/$]]* | ?:[[\\/]]* )
-+ MPI_ROOT=$enableval
-+ LDFLAGS="$LDFLAGS -L$with_mpi/lib"
-+ CFLAGS="$CFLAGS -I$MPI_ROOT/include"
-+ ;;
-+ *)
-+ AC_MSG_ERROR([expected absolute directory name for --enable-mpitests or yes or no])
-+ ;;
-+ esac
-+ ],
-+ [
-+ MPI_ROOT=/opt/mpich
-+ LDFLAGS="$LDFLAGS -L$MPI_ROOT/ch-p4/lib -L$MPI_ROOT/ch-p4/lib64"
-+ CFLAGS="$CFLAGS -I$MPI_ROOT/include"
-+ enable_mpitests=yes
-+ ]
-+)
-+AC_SUBST(MPI_ROOT)
-+
-+if test x$enable_mpitests != xno; then
-+ AC_MSG_CHECKING([whether to mpitests can be built])
-+ AC_CHECK_FILE([$MPI_ROOT/include/mpi.h],
-+ [AC_CHECK_LIB([mpich],[MPI_Start],[enable_mpitests=yes],[enable_mpitests=no])],
-+ [enable_mpitests=no])
-+fi
-+AC_MSG_RESULT([$enable_mpitests])
-+
-+
-+AC_MSG_NOTICE([Enabling Lustre configure options for libsysio])
-+ac_configure_args="$ac_configure_args --with-lustre-hack --with-sockets"
-+
-+LC_CONFIG_PINGER
-+LC_CONFIG_LIBLUSTRE_RECOVERY
-+])
-+
-+AC_DEFUN([LC_CONFIG_LRU_RESIZE],
-+[AC_MSG_CHECKING([whether to enable lru self-adjusting])
-+AC_ARG_ENABLE([lru_resize],
-+ AC_HELP_STRING([--enable-lru-resize],
-+ [enable lru resize support]),
-+ [],[enable_lru_resize='yes'])
-+AC_MSG_RESULT([$enable_lru_resize])
-+if test x$enable_lru_resize != xno; then
-+ AC_DEFINE(HAVE_LRU_RESIZE_SUPPORT, 1, [Enable lru resize support])
-+fi
-+])
-+
-+AC_DEFUN([LC_CONFIG_ADAPTIVE_TIMEOUTS],
-+[AC_MSG_CHECKING([whether to enable ptlrpc adaptive timeouts support])
-+AC_ARG_ENABLE([adaptive_timeouts],
-+ AC_HELP_STRING([--enable-adaptive-timeouts],
-+ [enable ptlrpc adaptive timeouts support]),
-+ [],[enable_adaptive_timeouts='no'])
-+AC_MSG_RESULT([$enable_adaptive_timeouts])
-+if test x$enable_adaptive_timeouts == xyes; then
-+ AC_DEFINE(HAVE_AT_SUPPORT, 1, [Enable adaptive timeouts support])
-+fi
-+])
-+
-+#
-+# LC_CONFIG_QUOTA
-+#
-+# whether to enable quota support global control
-+#
-+AC_DEFUN([LC_CONFIG_QUOTA],
-+[AC_ARG_ENABLE([quota],
-+ AC_HELP_STRING([--enable-quota],
-+ [enable quota support]),
-+ [],[enable_quota='yes'])
-+])
-+
-+# whether to enable quota support(kernel modules)
-+AC_DEFUN([LC_QUOTA_MODULE],
-+[if test x$enable_quota != xno; then
-+ LB_LINUX_CONFIG([QUOTA],[
-+ enable_quota_module='yes'
-+ AC_DEFINE(HAVE_QUOTA_SUPPORT, 1, [Enable quota support])
-+ ],[
-+ enable_quota_module='no'
-+ AC_MSG_WARN([quota is not enabled because the kernel - lacks quota support])
-+ ])
-+fi
-+])
-+
-+AC_DEFUN([LC_QUOTA],
-+[#check global
-+LC_CONFIG_QUOTA
-+#check for utils
-+AC_CHECK_HEADER(sys/quota.h,
-+ [AC_DEFINE(HAVE_SYS_QUOTA_H, 1, [Define to 1 if you have <sys/quota.h>.])],
-+ [AC_MSG_ERROR([don't find <sys/quota.h> in your system])])
-+])
-+
-+AC_DEFUN([LC_QUOTA_READ],
-+[AC_MSG_CHECKING([if kernel supports quota_read])
-+LB_LINUX_TRY_COMPILE([
-+ #include <linux/fs.h>
-+],[
-+ struct super_operations sp;
-+ void *i = (void *)sp.quota_read;
-+],[
-+ AC_MSG_RESULT([yes])
-+ AC_DEFINE(KERNEL_SUPPORTS_QUOTA_READ, 1, [quota_read found])
-+],[
-+ AC_MSG_RESULT([no])
-+])
-+])
-+
-+#
-+# LC_COOKIE_FOLLOW_LINK
-+#
-+# kernel 2.6.13+ ->follow_link returns a cookie
-+#
-+
-+AC_DEFUN([LC_COOKIE_FOLLOW_LINK],
-+[AC_MSG_CHECKING([if inode_operations->follow_link returns a cookie])
-+LB_LINUX_TRY_COMPILE([
-+ #include <linux/fs.h>
-+ #include <linux/namei.h>
-+],[
-+ struct dentry dentry;
-+ struct nameidata nd;
-+
-+ dentry.d_inode->i_op->put_link(&dentry, &nd, NULL);
-+],[
-+ AC_DEFINE(HAVE_COOKIE_FOLLOW_LINK, 1, [inode_operations->follow_link returns a cookie])
-+ AC_MSG_RESULT([yes])
-+],[
-+ AC_MSG_RESULT([no])
-+])
-+])
-+
-+#
-+# LC_FUNC_RCU
-+#
-+# kernels prior than 2.6.0(?) have no RCU supported; in kernel 2.6.5(SUSE),
-+# call_rcu takes three parameters.
-+#
-+AC_DEFUN([LC_FUNC_RCU],
-+[AC_MSG_CHECKING([if kernel have RCU supported])
-+LB_LINUX_TRY_COMPILE([
-+ #include <linux/rcupdate.h>
-+],[],[
-+ AC_DEFINE(HAVE_RCU, 1, [have RCU defined])
-+ AC_MSG_RESULT([yes])
-+
-+ AC_MSG_CHECKING([if call_rcu takes three parameters])
-+ LB_LINUX_TRY_COMPILE([
-+ #include <linux/rcupdate.h>
-+ ],[
-+ struct rcu_head rh;
-+ call_rcu(&rh, (void (*)(struct rcu_head *))1, NULL);
-+ ],[
-+ AC_DEFINE(HAVE_CALL_RCU_PARAM, 1, [call_rcu takes three parameters])
-+ AC_MSG_RESULT([yes])
-+ ],[
-+ AC_MSG_RESULT([no])
-+ ])
-+
-+],[
-+ AC_MSG_RESULT([no])
-+])
-+])
-+
-+#
-+# LC_QUOTA64
-+# linux kernel may have 64-bit limits support
-+#
-+AC_DEFUN([LC_QUOTA64],
-+[AC_MSG_CHECKING([if kernel has 64-bit quota limits support])
-+LB_LINUX_TRY_COMPILE([
-+ #include <linux/kernel.h>
-+ #include <linux/fs.h>
-+ #include <linux/quotaio_v2.h>
-+ int versions[] = V2_INITQVERSIONS_R1;
-+ struct v2_disk_dqblk_r1 dqblk_r1;
-+],[],[
-+ AC_DEFINE(HAVE_QUOTA64, 1, [have quota64])
-+ AC_MSG_RESULT([yes])
-+
-+],[
-+ AC_MSG_WARN([4 TB (or larger) block quota limits can only be used with OSTs not larger than 4 TB.])
-+ AC_MSG_WARN([Continuing with limited quota support.])
-+ AC_MSG_WARN([quotacheck is needed for filesystems with recent quota versions.])
-+ AC_MSG_RESULT([no])
-+])
-+])
-+
-+# LC_SECURITY_PLUG # for SLES10 SP2
-+# check security plug in sles10 sp2 kernel
-+AC_DEFUN([LC_SECURITY_PLUG],
-+[AC_MSG_CHECKING([If kernel has security plug support])
-+LB_LINUX_TRY_COMPILE([
-+ #include <linux/fs.h>
-+],[
-+ struct dentry *dentry;
-+ struct vfsmount *mnt;
-+ struct iattr *iattr;
-+
-+ notify_change(dentry, mnt, iattr);
-+],[
-+ AC_MSG_RESULT(yes)
-+ AC_DEFINE(HAVE_SECURITY_PLUG, 1,
-+ [SLES10 SP2 use extra parameter in vfs])
-+],[
-+ AC_MSG_RESULT(no)
-+])
-+])
-+
-+#
-+# LC_CONFIGURE
-+#
-+# other configure checks
-+#
-+AC_DEFUN([LC_CONFIGURE],
-+[LC_CONFIG_OBD_BUFFER_SIZE
-+
-+# include/liblustre.h
-+AC_CHECK_HEADERS([asm/page.h sys/user.h sys/vfs.h stdint.h blkid/blkid.h])
-+
-+# liblustre/llite_lib.h
-+AC_CHECK_HEADERS([xtio.h file.h])
-+
-+# liblustre/dir.c
-+AC_CHECK_HEADERS([linux/types.h sys/types.h linux/unistd.h unistd.h])
-+
-+# liblustre/lutil.c
-+AC_CHECK_HEADERS([netinet/in.h arpa/inet.h catamount/data.h])
-+AC_CHECK_FUNCS([inet_ntoa])
-+
-+# libsysio/src/readlink.c
-+LC_READLINK_SSIZE_T
-+
-+# lvfs/prng.c - depends on linux/types.h from liblustre/dir.c
-+AC_CHECK_HEADERS([linux/random.h], [], [],
-+ [#ifdef HAVE_LINUX_TYPES_H
-+ # include <linux/types.h>
-+ #endif
-+ ])
-+
-+# utils/llverfs.c
-+AC_CHECK_HEADERS([ext2fs/ext2fs.h])
-+
-+# check for -lz support
-+ZLIB=""
-+AC_CHECK_LIB([z],
-+ [adler32],
-+ [AC_CHECK_HEADERS([zlib.h],
-+ [ZLIB="-lz"
-+ AC_DEFINE([HAVE_ADLER], 1,
-+ [support alder32 checksum type])],
-+ [AC_MSG_WARN([No zlib-devel package found,
-+ unable to use adler32 checksum])])],
-+ [AC_MSG_WARN([No zlib package found, unable to use adler32 checksum])]
-+)
-+AC_SUBST(ZLIB)
-+
-+# Super safe df
-+AC_ARG_ENABLE([mindf],
-+ AC_HELP_STRING([--enable-mindf],
-+ [Make statfs report the minimum available space on any single OST instead of the sum of free space on all OSTs]),
-+ [],[])
-+if test "$enable_mindf" = "yes" ; then
-+ AC_DEFINE([MIN_DF], 1, [Report minimum OST free space])
-+fi
-+
-+AC_ARG_ENABLE([fail_alloc],
-+ AC_HELP_STRING([--disable-fail-alloc],
-+ [disable randomly alloc failure]),
-+ [],[enable_fail_alloc=yes])
-+AC_MSG_CHECKING([whether to randomly failing memory alloc])
-+AC_MSG_RESULT([$enable_fail_alloc])
-+if test x$enable_fail_alloc != xno ; then
-+ AC_DEFINE([RANDOM_FAIL_ALLOC], 1, [enable randomly alloc failure])
-+fi
-+
-+])
++ LC_RW_TREE_LOCK
++ #done until here
++ LC_READ_INODE_IN_SBOPS #done
++ LC_EXPORT_INODE_PERMISSION #done
++ LC_QUOTA_ON_5ARGS #done
++ LC_QUOTA_OFF_3ARGS #done
++ LC_VFS_DQ_OFF #done
+
-+#
-+# LC_CONDITIONALS
-+#
-+# AM_CONDITIONALS for lustre
-+#
-+AC_DEFUN([LC_CONDITIONALS],
-+[AM_CONDITIONAL(LIBLUSTRE, test x$enable_liblustre = xyes)
-+AM_CONDITIONAL(USE_QUILT, test x$QUILT != xno)
-+AM_CONDITIONAL(LIBLUSTRE_TESTS, test x$enable_liblustre_tests = xyes)
-+AM_CONDITIONAL(MPITESTS, test x$enable_mpitests = xyes, Build MPI Tests)
-+AM_CONDITIONAL(CLIENT, test x$enable_client = xyes)
-+AM_CONDITIONAL(SERVER, test x$enable_server = xyes)
-+AM_CONDITIONAL(QUOTA, test x$enable_quota_module = xyes)
-+AM_CONDITIONAL(BLKID, test x$ac_cv_header_blkid_blkid_h = xyes)
-+AM_CONDITIONAL(EXT2FS_DEVEL, test x$ac_cv_header_ext2fs_ext2fs_h = xyes)
-+AM_CONDITIONAL(LIBPTHREAD, test x$enable_libpthread = xyes)
-+])
++ # 2.6.27.15-2 sles11
++ LC_BI_HW_SEGMENTS #done
++ LC_HAVE_QUOTAIO_V1_H #done
++ LC_VFS_SYMLINK_5ARGS #done
++ LC_SB_ANY_QUOTA_ACTIVE
++ LC_SB_HAS_QUOTA_ACTIVE
+
-+#
-+# LC_CONFIG_FILES
-+#
-+# files that should be generated with AC_OUTPUT
-+#
-+AC_DEFUN([LC_CONFIG_FILES],
-+[AC_CONFIG_FILES([
-+lustre/Makefile
-+lustre/autoMakefile
-+lustre/autoconf/Makefile
-+lustre/contrib/Makefile
-+lustre/doc/Makefile
-+lustre/include/Makefile
-+lustre/include/lustre_ver.h
-+lustre/include/linux/Makefile
-+lustre/include/lustre/Makefile
-+lustre/kernel_patches/targets/2.6-suse.target
-+lustre/kernel_patches/targets/2.6-vanilla.target
-+lustre/kernel_patches/targets/2.6-rhel4.target
-+lustre/kernel_patches/targets/2.6-rhel5.target
-+lustre/kernel_patches/targets/2.6-fc5.target
-+lustre/kernel_patches/targets/2.6-patchless.target
-+lustre/kernel_patches/targets/2.6-sles10.target
-+lustre/kernel_patches/targets/hp_pnnl-2.4.target
-+lustre/kernel_patches/targets/rh-2.4.target
-+lustre/kernel_patches/targets/rhel-2.4.target
-+lustre/kernel_patches/targets/suse-2.4.21-2.target
-+lustre/kernel_patches/targets/sles-2.4.target
-+lustre/ldlm/Makefile
-+lustre/liblustre/Makefile
-+lustre/liblustre/tests/Makefile
-+lustre/llite/Makefile
-+lustre/llite/autoMakefile
-+lustre/lov/Makefile
-+lustre/lov/autoMakefile
-+lustre/lvfs/Makefile
-+lustre/lvfs/autoMakefile
-+lustre/mdc/Makefile
-+lustre/mdc/autoMakefile
-+lustre/mds/Makefile
-+lustre/mds/autoMakefile
-+lustre/obdclass/Makefile
-+lustre/obdclass/autoMakefile
-+lustre/obdclass/linux/Makefile
-+lustre/obdecho/Makefile
-+lustre/obdecho/autoMakefile
-+lustre/obdfilter/Makefile
-+lustre/obdfilter/autoMakefile
-+lustre/osc/Makefile
-+lustre/osc/autoMakefile
-+lustre/ost/Makefile
-+lustre/ost/autoMakefile
-+lustre/mgc/Makefile
-+lustre/mgc/autoMakefile
-+lustre/mgs/Makefile
-+lustre/mgs/autoMakefile
-+lustre/ptlrpc/Makefile
-+lustre/ptlrpc/autoMakefile
-+lustre/quota/Makefile
-+lustre/quota/autoMakefile
-+lustre/scripts/Makefile
-+lustre/scripts/version_tag.pl
-+lustre/tests/Makefile
-+lustre/utils/Makefile
-+])
-+case $lb_target_os in
-+ darwin)
-+ AC_CONFIG_FILES([ lustre/obdclass/darwin/Makefile ])
-+ ;;
-+esac
+ ])
+
+ #
+@@ -1606,6 +2125,7 @@
+ ],[
+ AC_MSG_RESULT([no])
+ ])
+
-+])
+ ],[
+ AC_MSG_RESULT([no])
+ ])
diff -urNad lustre~/lustre/include/liblustre.h lustre/lustre/include/liblustre.h
--- lustre~/lustre/include/liblustre.h 2009-08-19 09:51:08.000000000 +0200
-+++ lustre/lustre/include/liblustre.h 2009-08-19 14:10:45.000000000 +0200
-@@ -586,6 +586,55 @@
++++ lustre/lustre/include/liblustre.h 2009-08-20 10:25:20.000000000 +0200
+@@ -586,6 +586,52 @@
int signal;
};
-+#ifdef HAS_STRUCT_CRED
++#ifdef HAVE_CRED_IN_STRUCT_TASK_STRUCT
+
+struct cred {
+// atomic_t usage;
@@ -2965,12 +940,9 @@ diff -urNad lustre~/lustre/include/liblustre.h lustre/lustre/include/liblustre.h
+ int pid;
+ struct cred *real_cred;
+ struct cred *cred;
-+// int fsuid;
-+// int fsgid;
+ int max_groups;
+ int ngroups;
+ gid_t *groups;
-+// cfs_cap_t cap_effective;
+};
+
+#else
@@ -2978,7 +950,7 @@ diff -urNad lustre~/lustre/include/liblustre.h lustre/lustre/include/liblustre.h
struct task_struct {
int state;
struct signal pending;
-@@ -599,6 +648,8 @@
+@@ -599,6 +645,8 @@
cfs_cap_t cap_effective;
};
@@ -2987,945 +959,9 @@ diff -urNad lustre~/lustre/include/liblustre.h lustre/lustre/include/liblustre.h
typedef struct task_struct cfs_task_t;
#define cfs_current() current
#define cfs_curproc_pid() (current->pid)
-@@ -607,6 +658,14 @@
- extern struct task_struct *current;
- int in_group_p(gid_t gid);
-
-+#ifdef HAS_STRUCT_CRED_INVALID
-+/* since 2.6.29 the task credentials are kept in a
-+** const struct cred *cread / *real_cred */
-+# define current_cred current->real_cred
-+#else
-+# define current_cred current
-+#endif
-+
- #define set_current_state(foo) do { current->state = foo; } while (0)
-
- #define init_waitqueue_entry(q,p) do { (q)->process = p; } while (0)
-diff -urNad lustre~/lustre/include/liblustre.h.orig lustre/lustre/include/liblustre.h.orig
---- lustre~/lustre/include/liblustre.h.orig 1970-01-01 01:00:00.000000000 +0100
-+++ lustre/lustre/include/liblustre.h.orig 2009-08-19 14:10:45.000000000 +0200
-@@ -0,0 +1,917 @@
-+/* -*- mode: c; c-basic-offset: 8; indent-tabs-mode: nil; -*-
-+ * vim:expandtab:shiftwidth=8:tabstop=8:
-+ *
-+ * GPL HEADER START
-+ *
-+ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
-+ *
-+ * This program is free software; you can redistribute it and/or modify
-+ * it under the terms of the GNU General Public License version 2 only,
-+ * as published by the Free Software Foundation.
-+ *
-+ * This program is distributed in the hope that it will be useful, but
-+ * WITHOUT ANY WARRANTY; without even the implied warranty of
-+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
-+ * General Public License version 2 for more details (a copy is included
-+ * in the LICENSE file that accompanied this code).
-+ *
-+ * You should have received a copy of the GNU General Public License
-+ * version 2 along with this program; If not, see
-+ * http://www.sun.com/software/products/lustre/docs/GPLv2.pdf
-+ *
-+ * Please contact Sun Microsystems, Inc., 4150 Network Circle, Santa Clara,
-+ * CA 95054 USA or visit www.sun.com if you need additional information or
-+ * have any questions.
-+ *
-+ * GPL HEADER END
-+ */
-+/*
-+ * Copyright 2008 Sun Microsystems, Inc. All rights reserved
-+ * Use is subject to license terms.
-+ */
-+/*
-+ * This file is part of Lustre, http://www.lustre.org/
-+ * Lustre is a trademark of Sun Microsystems, Inc.
-+ *
-+ * lustre/include/liblustre.h
-+ *
-+ * User-space Lustre headers.
-+ */
-+
-+#ifndef LIBLUSTRE_H__
-+#define LIBLUSTRE_H__
-+
-+#ifdef __KERNEL__
-+#error Kernel files should not #include <liblustre.h>
-+#else
-+/*
-+ * The userspace implementations of linux/spinlock.h vary; we just
-+ * include our own for all of them
-+ */
-+#define __LINUX_SPINLOCK_H
-+#endif
-+
-+#include <sys/mman.h>
-+#ifdef HAVE_STDINT_H
-+# include <stdint.h>
-+#endif
-+#ifdef HAVE_ASM_PAGE_H
-+# include <asm/page.h>
-+#endif
-+#ifdef HAVE_SYS_USER_H
-+# include <sys/user.h>
-+#endif
-+#ifdef HAVE_SYS_IOCTL_H
-+# include <sys/ioctl.h>
-+#endif
-+#ifndef _IOWR
-+# include "ioctl.h"
-+#endif
-+
-+#include <stdio.h>
-+#include <sys/ioctl.h>
-+#include <stdlib.h>
-+#include <string.h>
-+#include <errno.h>
-+#include <sys/stat.h>
-+#ifdef HAVE_SYS_VFS_H
-+# include <sys/vfs.h>
-+#endif
-+#include <unistd.h>
-+#include <fcntl.h>
-+
-+#include <libcfs/list.h>
-+#include <lnet/lnet.h>
-+#include <libcfs/kp30.h>
-+#include <libcfs/user-bitops.h>
-+
-+/* definitions for liblustre */
-+
-+#ifdef __CYGWIN__
-+
-+#define CFS_PAGE_SHIFT 12
-+#define CFS_PAGE_SIZE (1UL << CFS_PAGE_SHIFT)
-+#define CFS_PAGE_MASK (~((__u64)CFS_PAGE_SIZE-1))
-+#define loff_t long long
-+#define ERESTART 2001
-+typedef unsigned short umode_t;
-+
-+#endif
-+
-+#ifndef CURRENT_SECONDS
-+# define CURRENT_SECONDS time(0)
-+#endif
-+
-+#ifndef ARRAY_SIZE
-+#define ARRAY_SIZE(a) ((sizeof (a))/(sizeof ((a)[0])))
-+#endif
-+
-+/* This is because lprocfs_status.h gets included here indirectly. It would
-+ * be much better to just avoid lprocfs being included into liblustre entirely
-+ * but that requires more header surgery than I can handle right now.
-+ */
-+#ifndef smp_processor_id
-+#define smp_processor_id() 0
-+#endif
-+#ifndef num_online_cpus
-+#define num_online_cpus() 1
-+#endif
-+#ifndef num_possible_cpus
-+#define num_possible_cpus() 1
-+#endif
-+
-+/* always adopt 2.5 definitions */
-+#define KERNEL_VERSION(a,b,c) ((a)*100+(b)*10+c)
-+#define LINUX_VERSION_CODE KERNEL_VERSION(2,5,0)
-+
-+#ifndef page_private
-+#define page_private(page) ((page)->private)
-+#define set_page_private(page, v) ((page)->private = (v))
-+#endif
-+
-+static inline void inter_module_put(void *a)
-+{
-+ return;
-+}
-+
-+void *inter_module_get(char *arg);
-+
-+/* cheats for now */
-+
-+struct work_struct {
-+ void (*ws_task)(void *arg);
-+ void *ws_arg;
-+};
-+
-+static inline void prepare_work(struct work_struct *q, void (*t)(void *),
-+ void *arg)
-+{
-+ q->ws_task = t;
-+ q->ws_arg = arg;
-+ return;
-+}
-+
-+static inline void schedule_work(struct work_struct *q)
-+{
-+ q->ws_task(q->ws_arg);
-+}
-+
-+
-+#define strnlen(a,b) strlen(a)
-+static inline void *kmalloc(int size, int prot)
-+{
-+ return malloc(size);
-+}
-+#define vmalloc malloc
-+#define vfree free
-+#define kfree(a) free(a)
-+#define GFP_KERNEL 1
-+#define GFP_HIGHUSER 1
-+#define GFP_ATOMIC 1
-+#define GFP_NOFS 1
-+#define IS_ERR(a) ((unsigned long)(a) > (unsigned long)-1000L)
-+#define PTR_ERR(a) ((long)(a))
-+#define ERR_PTR(a) ((void*)((long)(a)))
-+
-+typedef struct {
-+ void *cwd;
-+}mm_segment_t;
-+
-+typedef int (read_proc_t)(char *page, char **start, off_t off,
-+ int count, int *eof, void *data);
-+
-+struct file; /* forward ref */
-+typedef int (write_proc_t)(struct file *file, const char *buffer,
-+ unsigned long count, void *data);
-+
-+#define NIPQUAD(addr) \
-+ ((unsigned char *)&addr)[0], \
-+ ((unsigned char *)&addr)[1], \
-+ ((unsigned char *)&addr)[2], \
-+ ((unsigned char *)&addr)[3]
-+
-+#if defined(__LITTLE_ENDIAN)
-+#define HIPQUAD(addr) \
-+ ((unsigned char *)&addr)[3], \
-+ ((unsigned char *)&addr)[2], \
-+ ((unsigned char *)&addr)[1], \
-+ ((unsigned char *)&addr)[0]
-+#elif defined(__BIG_ENDIAN)
-+#define HIPQUAD NIPQUAD
-+#else
-+#error "Undefined byteorder??"
-+#endif /* __LITTLE_ENDIAN */
-+
-+/* bits ops */
-+
-+/* a long can be more than 32 bits, so use BITS_PER_LONG
-+ * to allow the compiler to adjust the bit shifting accordingly
-+ */
-+
-+static __inline__ int ext2_set_bit(int nr, void *addr)
-+{
-+ return set_bit(nr, addr);
-+}
-+
-+static __inline__ int ext2_clear_bit(int nr, void *addr)
-+{
-+ return clear_bit(nr, addr);
-+}
-+
-+static __inline__ int ext2_test_bit(int nr, void *addr)
-+{
-+ return test_bit(nr, addr);
-+}
-+
-+/* modules */
-+
-+struct module {
-+ int count;
-+};
-+
-+static inline void MODULE_AUTHOR(char *name)
-+{
-+ printf("%s\n", name);
-+}
-+#define MODULE_DESCRIPTION(name) MODULE_AUTHOR(name)
-+#define MODULE_LICENSE(name) MODULE_AUTHOR(name)
-+
-+#define THIS_MODULE NULL
-+#define __init
-+#define __exit
-+
-+/* devices */
-+
-+static inline int misc_register(void *foo)
-+{
-+ return 0;
-+}
-+
-+static inline int misc_deregister(void *foo)
-+{
-+ return 0;
-+}
-+
-+static inline int request_module(char *name)
-+{
-+ return (-EINVAL);
-+}
-+
-+#define __MOD_INC_USE_COUNT(m) do {} while (0)
-+#define __MOD_DEC_USE_COUNT(m) do {} while (0)
-+#define MOD_INC_USE_COUNT do {} while (0)
-+#define MOD_DEC_USE_COUNT do {} while (0)
-+static inline void __module_get(struct module *module)
-+{
-+}
-+
-+static inline int try_module_get(struct module *module)
-+{
-+ return 1;
-+}
-+
-+static inline void module_put(struct module *module)
-+{
-+}
-+
-+/* module initialization */
-+extern int init_obdclass(void);
-+extern int ptlrpc_init(void);
-+extern int ldlm_init(void);
-+extern int osc_init(void);
-+extern int lov_init(void);
-+extern int mdc_init(void);
-+extern int mgc_init(void);
-+extern int echo_client_init(void);
-+
-+
-+
-+/* general stuff */
-+
-+#define EXPORT_SYMBOL(S)
-+
-+struct rcu_head { };
-+
-+typedef struct { } spinlock_t;
-+typedef __u64 kdev_t;
-+
-+#define SPIN_LOCK_UNLOCKED (spinlock_t) { }
-+#define LASSERT_SPIN_LOCKED(lock) do {} while(0)
-+#define LASSERT_SEM_LOCKED(sem) do {} while(0)
-+
-+static inline void spin_lock(spinlock_t *l) {return;}
-+static inline void spin_unlock(spinlock_t *l) {return;}
-+static inline void spin_lock_init(spinlock_t *l) {return;}
-+static inline void local_irq_save(unsigned long flag) {return;}
-+static inline void local_irq_restore(unsigned long flag) {return;}
-+static inline int spin_is_locked(spinlock_t *l) {return 1;}
-+
-+static inline void spin_lock_bh(spinlock_t *l) {}
-+static inline void spin_unlock_bh(spinlock_t *l) {}
-+static inline void spin_lock_irqsave(spinlock_t *a, unsigned long b) {}
-+static inline void spin_unlock_irqrestore(spinlock_t *a, unsigned long b) {}
-+
-+typedef spinlock_t rwlock_t;
-+#define RW_LOCK_UNLOCKED SPIN_LOCK_UNLOCKED
-+#define read_lock(l) spin_lock(l)
-+#define read_unlock(l) spin_unlock(l)
-+#define write_lock(l) spin_lock(l)
-+#define write_unlock(l) spin_unlock(l)
-+#define rwlock_init(l) spin_lock_init(l)
-+
-+#define min(x,y) ((x)<(y) ? (x) : (y))
-+#define max(x,y) ((x)>(y) ? (x) : (y))
-+
-+#ifndef min_t
-+#define min_t(type,x,y) \
-+ ({ type __x = (x); type __y = (y); __x < __y ? __x: __y; })
-+#endif
-+#ifndef max_t
-+#define max_t(type,x,y) \
-+ ({ type __x = (x); type __y = (y); __x > __y ? __x: __y; })
-+#endif
-+
-+#define simple_strtol strtol
-+
-+/* registering symbols */
-+#ifndef ERESTARTSYS
-+#define ERESTARTSYS ERESTART
-+#endif
-+#define HZ 1
-+
-+/* random */
-+
-+void get_random_bytes(void *ptr, int size);
-+
-+/* memory */
-+
-+/* memory size: used for some client tunables */
-+#define num_physpages (256 * 1024) /* 1GB */
-+
-+static inline int copy_from_user(void *a,void *b, int c)
-+{
-+ memcpy(a,b,c);
-+ return 0;
-+}
-+
-+static inline int copy_to_user(void *a,void *b, int c)
-+{
-+ memcpy(a,b,c);
-+ return 0;
-+}
-+
-+
-+/* slabs */
-+typedef struct {
-+ int size;
-+} kmem_cache_t;
-+#define SLAB_HWCACHE_ALIGN 0
-+static inline kmem_cache_t *
-+kmem_cache_create(const char *name, size_t objsize, size_t cdum,
-+ unsigned long d,
-+ void (*e)(void *, kmem_cache_t *, unsigned long),
-+ void (*f)(void *, kmem_cache_t *, unsigned long))
-+{
-+ kmem_cache_t *c;
-+ c = malloc(sizeof(*c));
-+ if (!c)
-+ return NULL;
-+ c->size = objsize;
-+ CDEBUG(D_MALLOC, "alloc slab cache %s at %p, objsize %d\n",
-+ name, c, (int)objsize);
-+ return c;
-+};
-+
-+static inline int kmem_cache_destroy(kmem_cache_t *a)
-+{
-+ CDEBUG(D_MALLOC, "destroy slab cache %p, objsize %u\n", a, a->size);
-+ free(a);
-+ return 0;
-+}
-+
-+/* struct page decl moved out from here into portals/include/libcfs/user-prim.h */
-+
-+/* 2.4 defines */
-+#define PAGE_LIST_ENTRY list
-+#define PAGE_LIST(page) ((page)->list)
-+
-+#define kmap(page) (page)->addr
-+#define kunmap(a) do {} while (0)
-+
-+static inline cfs_page_t *alloc_pages(int mask, unsigned long order)
-+{
-+ cfs_page_t *pg = malloc(sizeof(*pg));
-+
-+ if (!pg)
-+ return NULL;
-+#if 0 //#ifdef MAP_ANONYMOUS
-+ pg->addr = mmap(0, PAGE_SIZE << order, PROT_WRITE, MAP_PRIVATE|MAP_ANONYMOUS, 0, 0);
-+#else
-+ pg->addr = malloc(CFS_PAGE_SIZE << order);
-+#endif
-+
-+ if (!pg->addr) {
-+ free(pg);
-+ return NULL;
-+ }
-+ return pg;
-+}
-+#define cfs_alloc_pages(mask, order) alloc_pages((mask), (order))
-+
-+#define alloc_page(mask) alloc_pages((mask), 0)
-+#define cfs_alloc_page(mask) alloc_page(mask)
-+
-+static inline void __free_pages(cfs_page_t *pg, int what)
-+{
-+#if 0 //#ifdef MAP_ANONYMOUS
-+ munmap(pg->addr, PAGE_SIZE);
-+#else
-+ free(pg->addr);
-+#endif
-+ free(pg);
-+}
-+#define __cfs_free_pages(pg, order) __free_pages((pg), (order))
-+
-+#define __free_page(page) __free_pages((page), 0)
-+#define free_page(page) __free_page(page)
-+#define __cfs_free_page(page) __cfs_free_pages((page), 0)
-+
-+static inline cfs_page_t* __grab_cache_page(unsigned long index)
-+{
-+ cfs_page_t *pg = alloc_pages(0, 0);
-+
-+ if (pg)
-+ pg->index = index;
-+ return pg;
-+}
-+
-+#define grab_cache_page(index) __grab_cache_page(index)
-+#define page_cache_release(page) __free_pages(page, 0)
-+
-+/* arithmetic */
-+#define do_div(a,b) \
-+ ({ \
-+ unsigned long remainder;\
-+ remainder = (a) % (b); \
-+ (a) = (a) / (b); \
-+ (remainder); \
-+ })
-+
-+/* VFS stuff */
-+#define ATTR_MODE 0x0001
-+#define ATTR_UID 0x0002
-+#define ATTR_GID 0x0004
-+#define ATTR_SIZE 0x0008
-+#define ATTR_ATIME 0x0010
-+#define ATTR_MTIME 0x0020
-+#define ATTR_CTIME 0x0040
-+#define ATTR_ATIME_SET 0x0080
-+#define ATTR_MTIME_SET 0x0100
-+#define ATTR_FORCE 0x0200 /* Not a change, but a change it */
-+#define ATTR_ATTR_FLAG 0x0400
-+#define ATTR_RAW 0x0800 /* file system, not vfs will massage attrs */
-+#define ATTR_FROM_OPEN 0x1000 /* called from open path, ie O_TRUNC */
-+#define ATTR_CTIME_SET 0x2000
-+#define ATTR_KILL_SUID 0
-+#define ATTR_KILL_SGID 0
-+
-+struct iattr {
-+ unsigned int ia_valid;
-+ umode_t ia_mode;
-+ uid_t ia_uid;
-+ gid_t ia_gid;
-+ loff_t ia_size;
-+ time_t ia_atime;
-+ time_t ia_mtime;
-+ time_t ia_ctime;
-+ unsigned int ia_attr_flags;
-+};
-+#define ll_iattr_struct iattr
-+
-+#define IT_OPEN 0x0001
-+#define IT_CREAT 0x0002
-+#define IT_READDIR 0x0004
-+#define IT_GETATTR 0x0008
-+#define IT_LOOKUP 0x0010
-+#define IT_UNLINK 0x0020
-+#define IT_GETXATTR 0x0040
-+#define IT_EXEC 0x0080
-+#define IT_PIN 0x0100
-+
-+#define IT_FL_LOCKED 0x0001
-+#define IT_FL_FOLLOWED 0x0002 /* set by vfs_follow_link */
-+
-+#define INTENT_MAGIC 0x19620323
-+
-+struct lustre_intent_data {
-+ int it_disposition;
-+ int it_status;
-+ __u64 it_lock_handle;
-+ void *it_data;
-+ int it_lock_mode;
-+ int it_int_flags;
-+};
-+struct lookup_intent {
-+ int it_magic;
-+ void (*it_op_release)(struct lookup_intent *);
-+ int it_op;
-+ int it_flags;
-+ int it_create_mode;
-+ union {
-+ struct lustre_intent_data lustre;
-+ } d;
-+};
-+
-+static inline void intent_init(struct lookup_intent *it, int op, int flags)
-+{
-+ memset(it, 0, sizeof(*it));
-+ it->it_magic = INTENT_MAGIC;
-+ it->it_op = op;
-+ it->it_flags = flags;
-+}
-+
-+
-+struct dentry {
-+ int d_count;
-+};
-+
-+struct vfsmount {
-+ void *pwd;
-+};
-+
-+/* semaphores */
-+struct rw_semaphore {
-+ int count;
-+};
-+
-+/* semaphores */
-+struct semaphore {
-+ int count;
-+};
-+
-+/* use the macro's argument to avoid unused warnings */
-+#define down(a) do { (void)a; } while (0)
-+#define mutex_down(a) down(a)
-+#define up(a) do { (void)a; } while (0)
-+#define mutex_up(a) up(a)
-+#define down_read(a) do { (void)a; } while (0)
-+#define up_read(a) do { (void)a; } while (0)
-+#define down_write(a) do { (void)a; } while (0)
-+#define up_write(a) do { (void)a; } while (0)
-+#define sema_init(a,b) do { (void)a; } while (0)
-+#define init_rwsem(a) do { (void)a; } while (0)
-+#define DECLARE_MUTEX(name) \
-+ struct semaphore name = { 1 }
-+static inline void init_MUTEX (struct semaphore *sem)
-+{
-+ sema_init(sem, 1);
-+}
-+static inline void init_MUTEX_LOCKED (struct semaphore *sem)
-+{
-+ sema_init(sem, 0);
-+}
-+
-+#define init_mutex(s) init_MUTEX(s)
-+
-+typedef struct {
-+ struct list_head sleepers;
-+} wait_queue_head_t;
-+
-+typedef struct {
-+ struct list_head sleeping;
-+ void *process;
-+} wait_queue_t;
-+
-+struct signal {
-+ int signal;
-+};
-+
-+struct task_struct {
-+ int state;
-+ struct signal pending;
-+ char comm[32];
-+ int pid;
-+ int fsuid;
-+ int fsgid;
-+ int max_groups;
-+ int ngroups;
-+ gid_t *groups;
-+ cfs_cap_t cap_effective;
-+};
-+
-+typedef struct task_struct cfs_task_t;
-+#define cfs_current() current
-+#define cfs_curproc_pid() (current->pid)
-+#define cfs_curproc_comm() (current->comm)
-+
-+extern struct task_struct *current;
-+int in_group_p(gid_t gid);
-+
-+#define set_current_state(foo) do { current->state = foo; } while (0)
-+
-+#define init_waitqueue_entry(q,p) do { (q)->process = p; } while (0)
-+#define add_wait_queue(q,p) do { list_add(&(q)->sleepers, &(p)->sleeping); } while (0)
-+#define del_wait_queue(p) do { list_del(&(p)->sleeping); } while (0)
-+#define remove_wait_queue(q,p) do { list_del(&(p)->sleeping); } while (0)
-+
-+#define DECLARE_WAIT_QUEUE_HEAD(HEAD) \
-+ wait_queue_head_t HEAD = { \
-+ .sleepers = CFS_LIST_HEAD_INIT(HEAD.sleepers) \
-+ }
-+#define init_waitqueue_head(l) CFS_INIT_LIST_HEAD(&(l)->sleepers)
-+#define wake_up(l) do { int a = 0; a++; } while (0)
-+#define TASK_INTERRUPTIBLE 0
-+#define TASK_UNINTERRUPTIBLE 1
-+#define TASK_RUNNING 2
-+
-+#define wait_event_interruptible(wq, condition) \
-+({ \
-+ struct l_wait_info lwi; \
-+ int timeout = 100000000;/* for ever */ \
-+ int ret; \
-+ \
-+ lwi = LWI_TIMEOUT(timeout, NULL, NULL); \
-+ ret = l_wait_event(NULL, condition, &lwi); \
-+ \
-+ ret; \
-+})
-+
-+#define in_interrupt() (0)
-+
-+#define schedule() do {} while (0)
-+static inline int schedule_timeout(signed long t)
-+{
-+ return 0;
-+}
-+
-+#define lock_kernel() do {} while (0)
-+#define unlock_kernel() do {} while (0)
-+#define daemonize(l) do {} while (0)
-+#define sigfillset(l) do {} while (0)
-+#define recalc_sigpending(l) do {} while (0)
-+#define kernel_thread(l,m,n) LBUG()
-+
-+#define USERMODEHELPER(path, argv, envp) (0)
-+#define SIGNAL_MASK_ASSERT()
-+#define KERN_INFO
-+
-+#include <sys/time.h>
-+#if HZ != 1
-+#error "liblustre's jiffies currently expects HZ to be 1"
-+#endif
-+#define jiffies \
-+({ \
-+ unsigned long _ret = 0; \
-+ struct timeval tv; \
-+ if (gettimeofday(&tv, NULL) == 0) \
-+ _ret = tv.tv_sec; \
-+ _ret; \
-+})
-+#define get_jiffies_64() (__u64)jiffies
-+#define time_after(a, b) ((long)(b) - (long)(a) < 0)
-+#define time_before(a, b) time_after(b,a)
-+#define time_after_eq(a,b) ((long)(a) - (long)(b) >= 0)
-+
-+struct timer_list {
-+ struct list_head tl_list;
-+ void (*function)(unsigned long unused);
-+ unsigned long data;
-+ long expires;
-+};
-+
-+static inline int timer_pending(struct timer_list *l)
-+{
-+ if (time_after(l->expires, jiffies))
-+ return 1;
-+ else
-+ return 0;
-+}
-+
-+static inline int init_timer(struct timer_list *l)
-+{
-+ CFS_INIT_LIST_HEAD(&l->tl_list);
-+ return 0;
-+}
-+
-+static inline void mod_timer(struct timer_list *l, int thetime)
-+{
-+ l->expires = thetime;
-+}
-+
-+static inline void del_timer(struct timer_list *l)
-+{
-+ free(l);
-+}
-+
-+typedef struct { volatile int counter; } atomic_t;
-+
-+#define ATOMIC_INIT(i) { i }
-+
-+#define atomic_read(a) ((a)->counter)
-+#define atomic_set(a,b) do {(a)->counter = b; } while (0)
-+#define atomic_dec_and_test(a) ((--((a)->counter)) == 0)
-+#define atomic_dec_and_lock(a,b) ((--((a)->counter)) == 0)
-+#define atomic_inc(a) (((a)->counter)++)
-+#define atomic_dec(a) do { (a)->counter--; } while (0)
-+#define atomic_add(b,a) do {(a)->counter += b;} while (0)
-+#define atomic_add_return(n,a) ((a)->counter += n)
-+#define atomic_inc_return(a) atomic_add_return(1,a)
-+#define atomic_sub(b,a) do {(a)->counter -= b;} while (0)
-+#define atomic_sub_return(n,a) ((a)->counter -= n)
-+#define atomic_dec_return(a) atomic_sub_return(1,a)
-+
-+#ifndef likely
-+#define likely(exp) (exp)
-+#endif
-+#ifndef unlikely
-+#define unlikely(exp) (exp)
-+#endif
-+
-+/* FIXME sys/capability will finally included linux/fs.h thus
-+ * cause numerous trouble on x86-64. as temporary solution for
-+ * build broken at cary, we copy definition we need from capability.h
-+ * FIXME
-+ */
-+struct _cap_struct;
-+typedef struct _cap_struct *cap_t;
-+typedef int cap_value_t;
-+typedef enum {
-+ CAP_EFFECTIVE=0,
-+ CAP_PERMITTED=1,
-+ CAP_INHERITABLE=2
-+} cap_flag_t;
-+typedef enum {
-+ CAP_CLEAR=0,
-+ CAP_SET=1
-+} cap_flag_value_t;
-+
-+cap_t cap_get_proc(void);
-+int cap_get_flag(cap_t, cap_value_t, cap_flag_t, cap_flag_value_t *);
-+
-+static inline void libcfs_run_lbug_upcall(char *file, const char *fn,
-+ const int l){}
-+
-+/* completion */
-+struct completion {
-+ unsigned int done;
-+ cfs_waitq_t wait;
-+};
-+
-+#define COMPLETION_INITIALIZER(work) \
-+ { 0, __WAIT_QUEUE_HEAD_INITIALIZER((work).wait) }
-+
-+#define DECLARE_COMPLETION(work) \
-+ struct completion work = COMPLETION_INITIALIZER(work)
-+
-+#define INIT_COMPLETION(x) ((x).done = 0)
-+
-+static inline void init_completion(struct completion *x)
-+{
-+ x->done = 0;
-+ init_waitqueue_head(&x->wait);
-+}
-+
-+struct liblustre_wait_callback {
-+ struct list_head llwc_list;
-+ const char *llwc_name;
-+ int (*llwc_fn)(void *arg);
-+ void *llwc_arg;
-+};
-+
-+void *liblustre_register_wait_callback(const char *name,
-+ int (*fn)(void *arg), void *arg);
-+void liblustre_deregister_wait_callback(void *notifier);
-+int liblustre_wait_event(int timeout);
-+
-+void *liblustre_register_idle_callback(const char *name,
-+ int (*fn)(void *arg), void *arg);
-+void liblustre_deregister_idle_callback(void *notifier);
-+void liblustre_wait_idle(void);
-+
-+/* flock related */
-+struct nfs_lock_info {
-+ __u32 state;
-+ __u32 flags;
-+ void *host;
-+};
-+
-+typedef struct file_lock {
-+ struct file_lock *fl_next; /* singly linked list for this inode */
-+ struct list_head fl_link; /* doubly linked list of all locks */
-+ struct list_head fl_block; /* circular list of blocked processes */
-+ void *fl_owner;
-+ unsigned int fl_pid;
-+ cfs_waitq_t fl_wait;
-+ struct file *fl_file;
-+ unsigned char fl_flags;
-+ unsigned char fl_type;
-+ loff_t fl_start;
-+ loff_t fl_end;
-+
-+ void (*fl_notify)(struct file_lock *); /* unblock callback */
-+ void (*fl_insert)(struct file_lock *); /* lock insertion callback */
-+ void (*fl_remove)(struct file_lock *); /* lock removal callback */
-+
-+ void *fl_fasync; /* for lease break notifications */
-+ unsigned long fl_break_time; /* for nonblocking lease breaks */
-+
-+ union {
-+ struct nfs_lock_info nfs_fl;
-+ } fl_u;
-+} cfs_flock_t;
-+
-+#define cfs_flock_type(fl) ((fl)->fl_type)
-+#define cfs_flock_set_type(fl, type) do { (fl)->fl_type = (type); } while(0)
-+#define cfs_flock_pid(fl) ((fl)->fl_pid)
-+#define cfs_flock_set_pid(fl, pid) do { (fl)->fl_pid = (pid); } while(0)
-+#define cfs_flock_start(fl) ((fl)->fl_start)
-+#define cfs_flock_set_start(fl, start) do { (fl)->fl_start = (start); } while(0)
-+#define cfs_flock_end(fl) ((fl)->fl_end)
-+#define cfs_flock_set_end(fl, end) do { (fl)->fl_end = (end); } while(0)
-+
-+#ifndef OFFSET_MAX
-+#define INT_LIMIT(x) (~((x)1 << (sizeof(x)*8 - 1)))
-+#define OFFSET_MAX INT_LIMIT(loff_t)
-+#endif
-+
-+/* XXX: defined in kernel */
-+#define FL_POSIX 1
-+#define FL_SLEEP 128
-+
-+/* quota */
-+#define QUOTA_OK 0
-+#define NO_QUOTA 1
-+
-+/* ACL */
-+struct posix_acl_entry {
-+ short e_tag;
-+ unsigned short e_perm;
-+ unsigned int e_id;
-+};
-+
-+struct posix_acl {
-+ atomic_t a_refcount;
-+ unsigned int a_count;
-+ struct posix_acl_entry a_entries[0];
-+};
-+
-+typedef struct {
-+ __u16 e_tag;
-+ __u16 e_perm;
-+ __u32 e_id;
-+} xattr_acl_entry;
-+
-+typedef struct {
-+ __u32 a_version;
-+ xattr_acl_entry a_entries[0];
-+} xattr_acl_header;
-+
-+static inline size_t xattr_acl_size(int count)
-+{
-+ return sizeof(xattr_acl_header) + count * sizeof(xattr_acl_entry);
-+}
-+
-+static inline
-+struct posix_acl * posix_acl_from_xattr(const void *value, size_t size)
-+{
-+ return NULL;
-+}
-+
-+static inline
-+int posix_acl_valid(const struct posix_acl *acl)
-+{
-+ return 0;
-+}
-+
-+static inline
-+void posix_acl_release(struct posix_acl *acl)
-+{
-+}
-+
-+#ifdef LIBLUSTRE_POSIX_ACL
-+ #ifndef posix_acl_xattr_entry
-+ #define posix_acl_xattr_entry xattr_acl_entry
-+ #endif
-+ #ifndef posix_acl_xattr_header
-+ #define posix_acl_xattr_header xattr_acl_header
-+ #endif
-+ #ifndef posix_acl_xattr_size
-+ #define posix_acl_xattr_size(entry) xattr_acl_size(entry)
-+ #endif
-+ #ifndef CONFIG_FS_POSIX_ACL
-+ #define CONFIG_FS_POSIX_ACL 1
-+ #endif
-+#endif
-+
-+#ifndef ENOTSUPP
-+#define ENOTSUPP ENOTSUP
-+#endif
-+
-+#include <obd_support.h>
-+#include <lustre/lustre_idl.h>
-+#include <lustre_lib.h>
-+#include <lustre_import.h>
-+#include <lustre_export.h>
-+#include <lustre_net.h>
-+
-+#endif
diff -urNad lustre~/lustre/include/linux/lustre_compat25.h lustre/lustre/include/linux/lustre_compat25.h
--- lustre~/lustre/include/linux/lustre_compat25.h 2009-08-19 09:51:08.000000000 +0200
-+++ lustre/lustre/include/linux/lustre_compat25.h 2009-08-19 14:10:45.000000000 +0200
++++ lustre/lustre/include/linux/lustre_compat25.h 2009-08-20 10:25:20.000000000 +0200
@@ -44,8 +44,8 @@
#endif
@@ -3973,23 +1009,18 @@ diff -urNad lustre~/lustre/include/linux/lustre_compat25.h lustre/lustre/include
#else
#define ll_set_fs_pwd set_fs_pwd
#endif /* HAVE_SET_FS_PWD */
-@@ -121,8 +144,13 @@
+@@ -121,8 +144,8 @@
void groups_free(struct group_info *ginfo);
#else /* >= 2.6.4 */
-#define current_ngroups current->group_info->ngroups
-#define current_groups current->group_info->small_block
-+# ifdef HAS_STRUCT_CRED
-+# define current_ngroups current->real_cred->group_info->ngroups
-+# define current_groups current->real_cred->group_info->small_block
-+# else
-+# define current_ngroups current->group_info->ngroups
-+# define current_groups current->group_info->small_block
-+# endif
++# define current_ngroups CREDENTIALS(current,group_info)->ngroups
++# define current_groups CREDENTIALS(current,group_info)->small_block
#endif /* LINUX_VERSION_CODE < KERNEL_VERSION(2,6,4) */
-@@ -151,7 +179,12 @@
+@@ -151,7 +174,12 @@
#endif
/* XXX our code should be using the 2.6 calls, not the other way around */
@@ -4002,7 +1033,7 @@ diff -urNad lustre~/lustre/include/linux/lustre_compat25.h lustre/lustre/include
#define Page_Uptodate(page) PageUptodate(page)
#define ll_redirty_page(page) set_page_dirty(page)
-@@ -364,8 +397,17 @@
+@@ -364,8 +392,17 @@
#define LL_RENAME_DOES_D_MOVE FS_ODD_RENAME
#endif
@@ -4021,7 +1052,7 @@ diff -urNad lustre~/lustre/include/linux/lustre_compat25.h lustre/lustre/include
#define ll_vfs_rmdir(dir,entry,mnt) vfs_rmdir(dir,entry,mnt)
#define ll_vfs_mkdir(inode,dir,mnt,mode) vfs_mkdir(inode,dir,mnt,mode)
#define ll_vfs_link(old,mnt,dir,new,mnt1) vfs_link(old,mnt,dir,new,mnt1)
-@@ -377,7 +419,6 @@
+@@ -377,7 +414,6 @@
#define ll_vfs_rename(old,old_dir,mnt,new,new_dir,mnt1) \
vfs_rename(old,old_dir,mnt,new,new_dir,mnt1)
#else
@@ -4029,7 +1060,7 @@ diff -urNad lustre~/lustre/include/linux/lustre_compat25.h lustre/lustre/include
#define ll_vfs_rmdir(dir,entry,mnt) vfs_rmdir(dir,entry)
#define ll_vfs_mkdir(inode,dir,mnt,mode) vfs_mkdir(inode,dir,mode)
#define ll_vfs_link(old,mnt,dir,new,mnt1) vfs_link(old,dir,new)
-@@ -388,6 +429,57 @@
+@@ -388,6 +424,57 @@
vfs_rename(old,old_dir,new,new_dir)
#endif
@@ -4087,424 +1118,9 @@ diff -urNad lustre~/lustre/include/linux/lustre_compat25.h lustre/lustre/include
#ifndef abs
static inline int abs(int x)
{
-diff -urNad lustre~/lustre/include/linux/lustre_compat25.h.orig lustre/lustre/include/linux/lustre_compat25.h.orig
---- lustre~/lustre/include/linux/lustre_compat25.h.orig 1970-01-01 01:00:00.000000000 +0100
-+++ lustre/lustre/include/linux/lustre_compat25.h.orig 2009-08-19 14:10:45.000000000 +0200
-@@ -0,0 +1,411 @@
-+/* -*- mode: c; c-basic-offset: 8; indent-tabs-mode: nil; -*-
-+ * vim:expandtab:shiftwidth=8:tabstop=8:
-+ *
-+ * GPL HEADER START
-+ *
-+ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
-+ *
-+ * This program is free software; you can redistribute it and/or modify
-+ * it under the terms of the GNU General Public License version 2 only,
-+ * as published by the Free Software Foundation.
-+ *
-+ * This program is distributed in the hope that it will be useful, but
-+ * WITHOUT ANY WARRANTY; without even the implied warranty of
-+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
-+ * General Public License version 2 for more details (a copy is included
-+ * in the LICENSE file that accompanied this code).
-+ *
-+ * You should have received a copy of the GNU General Public License
-+ * version 2 along with this program; If not, see
-+ * http://www.sun.com/software/products/lustre/docs/GPLv2.pdf
-+ *
-+ * Please contact Sun Microsystems, Inc., 4150 Network Circle, Santa Clara,
-+ * CA 95054 USA or visit www.sun.com if you need additional information or
-+ * have any questions.
-+ *
-+ * GPL HEADER END
-+ */
-+/*
-+ * Copyright 2008 Sun Microsystems, Inc. All rights reserved
-+ * Use is subject to license terms.
-+ */
-+/*
-+ * This file is part of Lustre, http://www.lustre.org/
-+ * Lustre is a trademark of Sun Microsystems, Inc.
-+ */
-+
-+#ifndef _LINUX_COMPAT25_H
-+#define _LINUX_COMPAT25_H
-+
-+#ifdef __KERNEL__
-+
-+#if LINUX_VERSION_CODE < KERNEL_VERSION(2,6,5)
-+#error sorry, lustre requires at least 2.6.5
-+#endif
-+
-+#include <libcfs/linux/portals_compat25.h>
-+
-+#include <linux/lustre_patchless_compat.h>
-+
-+#if LINUX_VERSION_CODE >= KERNEL_VERSION(2,6,14)
-+struct ll_iattr_struct {
-+ struct iattr iattr;
-+ unsigned int ia_attr_flags;
-+};
-+#else
-+#define ll_iattr_struct iattr
-+#endif /* LINUX_VERSION_CODE >= KERNEL_VERSION(2,6,14) */
-+
-+#ifndef HAVE_SET_FS_PWD
-+static inline void ll_set_fs_pwd(struct fs_struct *fs, struct vfsmount *mnt,
-+ struct dentry *dentry)
-+{
-+ struct dentry *old_pwd;
-+ struct vfsmount *old_pwdmnt;
-+
-+ write_lock(&fs->lock);
-+ old_pwd = fs->pwd;
-+ old_pwdmnt = fs->pwdmnt;
-+ fs->pwdmnt = mntget(mnt);
-+ fs->pwd = dget(dentry);
-+ write_unlock(&fs->lock);
-+
-+ if (old_pwd) {
-+ dput(old_pwd);
-+ mntput(old_pwdmnt);
-+ }
-+}
-+#else
-+#define ll_set_fs_pwd set_fs_pwd
-+#endif /* HAVE_SET_FS_PWD */
-+
-+#ifdef HAVE_INODE_I_MUTEX
-+#define UNLOCK_INODE_MUTEX(inode) do {mutex_unlock(&(inode)->i_mutex); } while(0)
-+#define LOCK_INODE_MUTEX(inode) do {mutex_lock(&(inode)->i_mutex); } while(0)
-+#define TRYLOCK_INODE_MUTEX(inode) mutex_trylock(&(inode)->i_mutex)
-+#else
-+#define UNLOCK_INODE_MUTEX(inode) do {up(&(inode)->i_sem); } while(0)
-+#define LOCK_INODE_MUTEX(inode) do {down(&(inode)->i_sem); } while(0)
-+#define TRYLOCK_INODE_MUTEX(inode) (!down_trylock(&(inode)->i_sem))
-+#endif /* HAVE_INODE_I_MUTEX */
-+
-+#if LINUX_VERSION_CODE >= KERNEL_VERSION(2,6,15)
-+#define d_child d_u.d_child
-+#define d_rcu d_u.d_rcu
-+#endif
-+
-+#ifdef HAVE_DQUOTOFF_MUTEX
-+#define UNLOCK_DQONOFF_MUTEX(dqopt) do {mutex_unlock(&(dqopt)->dqonoff_mutex); } while(0)
-+#define LOCK_DQONOFF_MUTEX(dqopt) do {mutex_lock(&(dqopt)->dqonoff_mutex); } while(0)
-+#else
-+#define UNLOCK_DQONOFF_MUTEX(dqopt) do {up(&(dqopt)->dqonoff_sem); } while(0)
-+#define LOCK_DQONOFF_MUTEX(dqopt) do {down(&(dqopt)->dqonoff_sem); } while(0)
-+#endif /* HAVE_DQUOTOFF_MUTEX */
-+
-+
-+#if LINUX_VERSION_CODE < KERNEL_VERSION(2,6,4)
-+#define NGROUPS_SMALL NGROUPS
-+#define NGROUPS_PER_BLOCK ((int)(EXEC_PAGESIZE / sizeof(gid_t)))
-+
-+struct group_info {
-+ int ngroups;
-+ atomic_t usage;
-+ gid_t small_block[NGROUPS_SMALL];
-+ int nblocks;
-+ gid_t *blocks[0];
-+};
-+#define current_ngroups current->ngroups
-+#define current_groups current->groups
-+
-+struct group_info *groups_alloc(int gidsetsize);
-+void groups_free(struct group_info *ginfo);
-+#else /* >= 2.6.4 */
-+
-+#define current_ngroups current->group_info->ngroups
-+#define current_groups current->group_info->small_block
-+
-+#endif /* LINUX_VERSION_CODE < KERNEL_VERSION(2,6,4) */
-+
-+#ifndef page_private
-+#define page_private(page) ((page)->private)
-+#define set_page_private(page, v) ((page)->private = (v))
-+#endif
-+
-+#ifndef HAVE_GFP_T
-+#define gfp_t int
-+#endif
-+
-+#define lock_dentry(___dentry) spin_lock(&(___dentry)->d_lock)
-+#define unlock_dentry(___dentry) spin_unlock(&(___dentry)->d_lock)
-+
-+#define ll_kernel_locked() kernel_locked()
-+
-+/*
-+ * OBD need working random driver, thus all our
-+ * initialization routines must be called after device
-+ * driver initialization
-+ */
-+#ifndef MODULE
-+#undef module_init
-+#define module_init(a) late_initcall(a)
-+#endif
-+
-+/* XXX our code should be using the 2.6 calls, not the other way around */
-+#define TryLockPage(page) TestSetPageLocked(page)
-+#define Page_Uptodate(page) PageUptodate(page)
-+#define ll_redirty_page(page) set_page_dirty(page)
-+
-+#define KDEVT_INIT(val) (val)
-+
-+#define LTIME_S(time) (time.tv_sec)
-+#define ll_path_lookup path_lookup
-+#define ll_permission(inode,mask,nd) permission(inode,mask,nd)
-+
-+#define ll_pgcache_lock(mapping) spin_lock(&mapping->page_lock)
-+#define ll_pgcache_unlock(mapping) spin_unlock(&mapping->page_lock)
-+#define ll_call_writepage(inode, page) \
-+ (inode)->i_mapping->a_ops->writepage(page, NULL)
-+#define ll_invalidate_inode_pages(inode) \
-+ invalidate_inode_pages((inode)->i_mapping)
-+#define ll_truncate_complete_page(page) \
-+ truncate_complete_page(page->mapping, page)
-+
-+#define ll_vfs_create(a,b,c,d) vfs_create(a,b,c,d)
-+#define ll_dev_t dev_t
-+#define kdev_t dev_t
-+#define to_kdev_t(dev) (dev)
-+#define kdev_t_to_nr(dev) (dev)
-+#define val_to_kdev(dev) (dev)
-+#define ILOOKUP(sb, ino, test, data) ilookup5(sb, ino, test, data);
-+
-+#include <linux/writeback.h>
-+
-+static inline int cleanup_group_info(void)
-+{
-+ struct group_info *ginfo;
-+
-+ ginfo = groups_alloc(0);
-+ if (!ginfo)
-+ return -ENOMEM;
-+
-+ set_current_groups(ginfo);
-+ put_group_info(ginfo);
-+
-+ return 0;
-+}
-+
-+#define __set_page_ll_data(page, llap) \
-+ do { \
-+ page_cache_get(page); \
-+ SetPagePrivate(page); \
-+ set_page_private(page, (unsigned long)llap); \
-+ } while (0)
-+#define __clear_page_ll_data(page) \
-+ do { \
-+ ClearPagePrivate(page); \
-+ set_page_private(page, 0); \
-+ page_cache_release(page); \
-+ } while(0)
-+
-+#define kiobuf bio
-+
-+#include <linux/proc_fs.h>
-+
-+#if !defined(HAVE_D_REHASH_COND) && defined(HAVE___D_REHASH)
-+#define d_rehash_cond(dentry, lock) __d_rehash(dentry, lock)
-+extern void __d_rehash(struct dentry *dentry, int lock);
-+#endif
-+
-+#if !defined(HAVE_D_MOVE_LOCKED) && defined(HAVE___D_MOVE)
-+#define d_move_locked(dentry, target) __d_move(dentry, target)
-+extern void __d_move(struct dentry *dentry, struct dentry *target);
-+#endif
-+
-+#ifdef HAVE_CAN_SLEEP_ARG
-+#define ll_flock_lock_file_wait(file, lock, can_sleep) \
-+ flock_lock_file_wait(file, lock, can_sleep)
-+#else
-+#define ll_flock_lock_file_wait(file, lock, can_sleep) \
-+ flock_lock_file_wait(file, lock)
-+#endif
-+
-+#define CheckWriteback(page, cmd) \
-+ ((!PageWriteback(page) && (cmd & OBD_BRW_READ)) || \
-+ (PageWriteback(page) && (cmd & OBD_BRW_WRITE)))
-+
-+
-+#ifdef HAVE_PAGE_LIST
-+static inline int mapping_has_pages(struct address_space *mapping)
-+{
-+ int rc = 1;
-+
-+ ll_pgcache_lock(mapping);
-+ if (list_empty(&mapping->dirty_pages) &&
-+ list_empty(&mapping->clean_pages) &&
-+ list_empty(&mapping->locked_pages)) {
-+ rc = 0;
-+ }
-+ ll_pgcache_unlock(mapping);
-+
-+ return rc;
-+}
-+#else
-+static inline int mapping_has_pages(struct address_space *mapping)
-+{
-+ return mapping->nrpages > 0;
-+}
-+#endif
-+
-+#ifdef HAVE_KIOBUF_KIO_BLOCKS
-+#define KIOBUF_GET_BLOCKS(k) ((k)->kio_blocks)
-+#else
-+#define KIOBUF_GET_BLOCKS(k) ((k)->blocks)
-+#endif
-+
-+#if (LINUX_VERSION_CODE < KERNEL_VERSION(2,6,7))
-+#define ll_set_dflags(dentry, flags) do { dentry->d_vfs_flags |= flags; } while(0)
-+#define ll_vfs_symlink(dir, dentry, mnt, path, mode) \
-+ vfs_symlink(dir, dentry, path)
-+#else
-+#define ll_set_dflags(dentry, flags) do { \
-+ spin_lock(&dentry->d_lock); \
-+ dentry->d_flags |= flags; \
-+ spin_unlock(&dentry->d_lock); \
-+ } while(0)
-+#ifdef HAVE_SECURITY_PLUG
-+#define ll_vfs_symlink(dir, dentry, mnt, path, mode) \
-+ vfs_symlink(dir, dentry, mnt, path, mode)
-+#else
-+#define ll_vfs_symlink(dir, dentry, mnt, path, mode) \
-+ vfs_symlink(dir, dentry, path, mode)
-+#endif
-+#endif
-+
-+#ifndef container_of
-+#define container_of(ptr, type, member) ({ \
-+ const typeof( ((type *)0)->member ) *__mptr = (ptr); \
-+ (type *)( (char *)__mptr - offsetof(type,member) );})
-+#endif
-+
-+#ifdef HAVE_I_ALLOC_SEM
-+#define UP_WRITE_I_ALLOC_SEM(i) do { up_write(&(i)->i_alloc_sem); } while (0)
-+#define DOWN_WRITE_I_ALLOC_SEM(i) do { down_write(&(i)->i_alloc_sem); } while(0)
-+#define LASSERT_I_ALLOC_SEM_WRITE_LOCKED(i) LASSERT(down_read_trylock(&(i)->i_alloc_sem) == 0)
-+
-+#define UP_READ_I_ALLOC_SEM(i) do { up_read(&(i)->i_alloc_sem); } while (0)
-+#define DOWN_READ_I_ALLOC_SEM(i) do { down_read(&(i)->i_alloc_sem); } while (0)
-+#define LASSERT_I_ALLOC_SEM_READ_LOCKED(i) LASSERT(down_write_trylock(&(i)->i_alloc_sem) == 0)
-+#else
-+#define UP_READ_I_ALLOC_SEM(i) do { } while (0)
-+#define DOWN_READ_I_ALLOC_SEM(i) do { } while (0)
-+#define LASSERT_I_ALLOC_SEM_READ_LOCKED(i) do { } while (0)
-+
-+#define UP_WRITE_I_ALLOC_SEM(i) do { } while (0)
-+#define DOWN_WRITE_I_ALLOC_SEM(i) do { } while (0)
-+#define LASSERT_I_ALLOC_SEM_WRITE_LOCKED(i) do { } while (0)
-+#endif
-+
-+#ifndef HAVE_GRAB_CACHE_PAGE_NOWAIT_GFP
-+#define grab_cache_page_nowait_gfp(x, y, z) grab_cache_page_nowait((x), (y))
-+#endif
-+
-+#ifndef HAVE_FILEMAP_FDATAWRITE
-+#define filemap_fdatawrite(mapping) filemap_fdatasync(mapping)
-+#endif
-+
-+#ifdef HAVE_VFS_KERN_MOUNT
-+static inline
-+struct vfsmount *
-+ll_kern_mount(const char *fstype, int flags, const char *name, void *data)
-+{
-+ struct file_system_type *type = get_fs_type(fstype);
-+ struct vfsmount *mnt;
-+ if (!type)
-+ return ERR_PTR(-ENODEV);
-+ mnt = vfs_kern_mount(type, flags, name, data);
-+ module_put(type->owner);
-+ return mnt;
-+}
-+#else
-+#define ll_kern_mount(fstype, flags, name, data) do_kern_mount((fstype), (flags), (name), (data))
-+#endif
-+
-+#ifdef HAVE_STATFS_DENTRY_PARAM
-+#define ll_do_statfs(sb, sfs) (sb)->s_op->statfs((sb)->s_root, (sfs))
-+#else
-+#define ll_do_statfs(sb, sfs) (sb)->s_op->statfs((sb), (sfs))
-+#endif
-+
-+/* task_struct */
-+#ifndef HAVE_TASK_PPTR
-+#define p_pptr parent
-+#endif
-+
-+#ifdef HAVE_UNREGISTER_BLKDEV_RETURN_INT
-+#define ll_unregister_blkdev(a,b) unregister_blkdev((a),(b))
-+#else
-+static inline
-+int ll_unregister_blkdev(unsigned int dev, const char *name)
-+{
-+ unregister_blkdev(dev, name);
-+ return 0;
-+}
-+#endif
-+
-+#ifdef HAVE_INVALIDATE_BDEV_2ARG
-+#define ll_invalidate_bdev(a,b) invalidate_bdev((a),(b))
-+#else
-+#define ll_invalidate_bdev(a,b) invalidate_bdev((a))
-+#endif
-+
-+#ifdef HAVE_FS_RENAME_DOES_D_MOVE
-+#define LL_RENAME_DOES_D_MOVE FS_RENAME_DOES_D_MOVE
-+#else
-+#define LL_RENAME_DOES_D_MOVE FS_ODD_RENAME
-+#endif
-+
-+#ifdef HAVE_SECURITY_PLUG
-+#define ll_remove_suid(inode,mnt) remove_suid(inode,mnt)
-+#define ll_vfs_rmdir(dir,entry,mnt) vfs_rmdir(dir,entry,mnt)
-+#define ll_vfs_mkdir(inode,dir,mnt,mode) vfs_mkdir(inode,dir,mnt,mode)
-+#define ll_vfs_link(old,mnt,dir,new,mnt1) vfs_link(old,mnt,dir,new,mnt1)
-+#define ll_vfs_unlink(inode,entry,mnt) vfs_unlink(inode,entry,mnt)
-+#define ll_vfs_mknod(dir,entry,mnt,mode,dev) \
-+ vfs_mknod(dir,entry,mnt,mode,dev)
-+#define ll_security_inode_unlink(dir,entry,mnt) \
-+ security_inode_unlink(dir,entry,mnt)
-+#define ll_vfs_rename(old,old_dir,mnt,new,new_dir,mnt1) \
-+ vfs_rename(old,old_dir,mnt,new,new_dir,mnt1)
-+#else
-+#define ll_remove_suid(inode,mnt) remove_suid(inode)
-+#define ll_vfs_rmdir(dir,entry,mnt) vfs_rmdir(dir,entry)
-+#define ll_vfs_mkdir(inode,dir,mnt,mode) vfs_mkdir(inode,dir,mode)
-+#define ll_vfs_link(old,mnt,dir,new,mnt1) vfs_link(old,dir,new)
-+#define ll_vfs_unlink(inode,entry,mnt) vfs_unlink(inode,entry)
-+#define ll_vfs_mknod(dir,entry,mnt,mode,dev) vfs_mknod(dir,entry,mode,dev)
-+#define ll_security_inode_unlink(dir,entry,mnt) security_inode_unlink(dir,entry)
-+#define ll_vfs_rename(old,old_dir,mnt,new,new_dir,mnt1) \
-+ vfs_rename(old,old_dir,new,new_dir)
-+#endif
-+
-+#ifndef abs
-+static inline int abs(int x)
-+{
-+ return (x < 0) ? -x : x;
-+}
-+#endif
-+
-+#ifndef labs
-+static inline long labs(long x)
-+{
-+ return (x < 0) ? -x : x;
-+}
-+#endif
-+
-+/* Using kernel fls(). Userspace will use one defined in user-bitops.h. */
-+#ifndef __fls
-+#define __fls fls
-+#endif
-+
-+#endif /* __KERNEL__ */
-+#endif /* _COMPAT25_H */
diff -urNad lustre~/lustre/include/linux/lustre_lib.h lustre/lustre/include/linux/lustre_lib.h
--- lustre~/lustre/include/linux/lustre_lib.h 2009-08-19 09:51:08.000000000 +0200
-+++ lustre/lustre/include/linux/lustre_lib.h 2009-08-19 14:10:45.000000000 +0200
++++ lustre/lustre/include/linux/lustre_lib.h 2009-08-20 10:25:20.000000000 +0200
@@ -49,7 +49,6 @@
# include <string.h>
# include <sys/types.h>
@@ -4515,7 +1131,7 @@ diff -urNad lustre~/lustre/include/linux/lustre_lib.h lustre/lustre/include/linu
# include <linux/signal.h>
diff -urNad lustre~/lustre/include/linux/lustre_patchless_compat.h lustre/lustre/include/linux/lustre_patchless_compat.h
--- lustre~/lustre/include/linux/lustre_patchless_compat.h 2009-08-19 09:51:08.000000000 +0200
-+++ lustre/lustre/include/linux/lustre_patchless_compat.h 2009-08-19 14:10:45.000000000 +0200
++++ lustre/lustre/include/linux/lustre_patchless_compat.h 2009-08-20 10:25:20.000000000 +0200
@@ -52,7 +52,7 @@
BUG_ON(!PageLocked(page));
@@ -4536,7 +1152,7 @@ diff -urNad lustre~/lustre/include/linux/lustre_patchless_compat.h lustre/lustre
spin_unlock_irq(&mapping->tree_lock);
diff -urNad lustre~/lustre/include/lprocfs_status.h lustre/lustre/include/lprocfs_status.h
--- lustre~/lustre/include/lprocfs_status.h 2009-08-19 09:51:09.000000000 +0200
-+++ lustre/lustre/include/lprocfs_status.h 2009-08-19 14:10:45.000000000 +0200
++++ lustre/lustre/include/lprocfs_status.h 2009-08-20 10:25:20.000000000 +0200
@@ -521,6 +521,8 @@
#define LPROCFS_EXIT() do { \
up_read(&_lprocfs_lock); \
@@ -4569,844 +1185,48 @@ diff -urNad lustre~/lustre/include/lprocfs_status.h lustre/lustre/include/lprocf
/* You must use these macros when you want to refer to
* the import in a client obd_device for a lprocfs entry */
#define LPROCFS_CLIMP_CHECK(obd) do { \
-diff -urNad lustre~/lustre/include/lprocfs_status.h.orig lustre/lustre/include/lprocfs_status.h.orig
---- lustre~/lustre/include/lprocfs_status.h.orig 1970-01-01 01:00:00.000000000 +0100
-+++ lustre/lustre/include/lprocfs_status.h.orig 2009-08-19 14:10:45.000000000 +0200
-@@ -0,0 +1,817 @@
-+/* -*- mode: c; c-basic-offset: 8; indent-tabs-mode: nil; -*-
-+ * vim:expandtab:shiftwidth=8:tabstop=8:
-+ *
-+ * GPL HEADER START
-+ *
-+ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
-+ *
-+ * This program is free software; you can redistribute it and/or modify
-+ * it under the terms of the GNU General Public License version 2 only,
-+ * as published by the Free Software Foundation.
-+ *
-+ * This program is distributed in the hope that it will be useful, but
-+ * WITHOUT ANY WARRANTY; without even the implied warranty of
-+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
-+ * General Public License version 2 for more details (a copy is included
-+ * in the LICENSE file that accompanied this code).
-+ *
-+ * You should have received a copy of the GNU General Public License
-+ * version 2 along with this program; If not, see
-+ * http://www.sun.com/software/products/lustre/docs/GPLv2.pdf
-+ *
-+ * Please contact Sun Microsystems, Inc., 4150 Network Circle, Santa Clara,
-+ * CA 95054 USA or visit www.sun.com if you need additional information or
-+ * have any questions.
-+ *
-+ * GPL HEADER END
-+ */
-+/*
-+ * Copyright 2008 Sun Microsystems, Inc. All rights reserved
-+ * Use is subject to license terms.
-+ */
-+/*
-+ * This file is part of Lustre, http://www.lustre.org/
-+ * Lustre is a trademark of Sun Microsystems, Inc.
-+ *
-+ * lustre/include/lprocfs_status.h
-+ *
-+ * Top level header file for LProc SNMP
-+ *
-+ * Author: Hariharan Thantry thantry at users.sourceforge.net
-+ */
-+#ifndef _LPROCFS_SNMP_H
-+#define _LPROCFS_SNMP_H
-+
-+#include <lustre/lustre_idl.h>
-+#if defined(__linux__)
-+#include <linux/lprocfs_status.h>
-+#elif defined(__APPLE__)
-+#include <darwin/lprocfs_status.h>
-+#elif defined(__WINNT__)
-+#include <winnt/lprocfs_status.h>
-+#else
-+#error Unsupported operating system.
-+#endif
-+
-+#undef LPROCFS
-+#if (defined(__KERNEL__) && defined(CONFIG_PROC_FS))
-+# define LPROCFS
-+#endif
-+
-+struct lprocfs_vars {
-+ const char *name;
-+ cfs_read_proc_t *read_fptr;
-+ cfs_write_proc_t *write_fptr;
-+ void *data;
-+ struct file_operations *fops;
-+ /**
-+ * /proc file mode.
-+ */
-+ mode_t proc_mode;
-+};
-+
-+struct lprocfs_static_vars {
-+ struct lprocfs_vars *module_vars;
-+ struct lprocfs_vars *obd_vars;
-+};
-+
-+/* if we find more consumers this could be generalized */
-+#define OBD_HIST_MAX 32
-+struct obd_histogram {
-+ spinlock_t oh_lock;
-+ unsigned long oh_buckets[OBD_HIST_MAX];
-+};
-+
-+enum {
-+ BRW_R_PAGES = 0,
-+ BRW_W_PAGES,
-+ BRW_R_RPC_HIST,
-+ BRW_W_RPC_HIST,
-+ BRW_R_IO_TIME,
-+ BRW_W_IO_TIME,
-+ BRW_R_DISCONT_PAGES,
-+ BRW_W_DISCONT_PAGES,
-+ BRW_R_DISCONT_BLOCKS,
-+ BRW_W_DISCONT_BLOCKS,
-+ BRW_R_DISK_IOSIZE,
-+ BRW_W_DISK_IOSIZE,
-+ BRW_R_DIO_FRAGS,
-+ BRW_W_DIO_FRAGS,
-+ BRW_LAST,
-+};
-+
-+struct brw_stats {
-+ struct obd_histogram hist[BRW_LAST];
-+};
-+
-+
-+/* An lprocfs counter can be configured using the enum bit masks below.
-+ *
-+ * LPROCFS_CNTR_EXTERNALLOCK indicates that an external lock already
-+ * protects this counter from concurrent updates. If not specified,
-+ * lprocfs an internal per-counter lock variable. External locks are
-+ * not used to protect counter increments, but are used to protect
-+ * counter readout and resets.
-+ *
-+ * LPROCFS_CNTR_AVGMINMAX indicates a multi-valued counter samples,
-+ * (i.e. counter can be incremented by more than "1"). When specified,
-+ * the counter maintains min, max and sum in addition to a simple
-+ * invocation count. This allows averages to be be computed.
-+ * If not specified, the counter is an increment-by-1 counter.
-+ * min, max, sum, etc. are not maintained.
-+ *
-+ * LPROCFS_CNTR_STDDEV indicates that the counter should track sum of
-+ * squares (for multi-valued counter samples only). This allows
-+ * external computation of standard deviation, but involves a 64-bit
-+ * multiply per counter increment.
-+ */
-+
-+enum {
-+ LPROCFS_CNTR_EXTERNALLOCK = 0x0001,
-+ LPROCFS_CNTR_AVGMINMAX = 0x0002,
-+ LPROCFS_CNTR_STDDEV = 0x0004,
-+
-+ /* counter data type */
-+ LPROCFS_TYPE_REGS = 0x0100,
-+ LPROCFS_TYPE_BYTES = 0x0200,
-+ LPROCFS_TYPE_PAGES = 0x0400,
-+ LPROCFS_TYPE_CYCLE = 0x0800,
-+};
-+
-+struct lprocfs_atomic {
-+ atomic_t la_entry;
-+ atomic_t la_exit;
-+};
-+
-+#define LC_MIN_INIT ((~(__u64)0) >> 1)
-+
-+struct lprocfs_counter {
-+ struct lprocfs_atomic lc_cntl; /* may need to move to per set */
-+ unsigned int lc_config;
-+ __s64 lc_count;
-+ __s64 lc_sum;
-+ __s64 lc_min;
-+ __s64 lc_max;
-+ __s64 lc_sumsquare;
-+ const char *lc_name; /* must be static */
-+ const char *lc_units; /* must be static */
-+};
-+
-+struct lprocfs_percpu {
-+ struct lprocfs_counter lp_cntr[0];
-+};
-+
-+#define LPROCFS_GET_NUM_CPU 0x0001
-+#define LPROCFS_GET_SMP_ID 0x0002
-+
-+enum lprocfs_stats_flags {
-+ LPROCFS_STATS_FLAG_PERCPU = 0x0000, /* per cpu counter */
-+ LPROCFS_STATS_FLAG_NOPERCPU = 0x0001, /* stats have no percpu
-+ * area and need locking */
-+};
-+
-+enum lprocfs_fields_flags {
-+ LPROCFS_FIELDS_FLAGS_CONFIG = 0x0001,
-+ LPROCFS_FIELDS_FLAGS_SUM = 0x0002,
-+ LPROCFS_FIELDS_FLAGS_MIN = 0x0003,
-+ LPROCFS_FIELDS_FLAGS_MAX = 0x0004,
-+ LPROCFS_FIELDS_FLAGS_AVG = 0x0005,
-+ LPROCFS_FIELDS_FLAGS_SUMSQUARE = 0x0006,
-+ LPROCFS_FIELDS_FLAGS_COUNT = 0x0007,
-+};
-+
-+struct lprocfs_stats {
-+ unsigned int ls_num; /* # of counters */
-+ int ls_flags; /* See LPROCFS_STATS_FLAG_* */
-+ spinlock_t ls_lock; /* Lock used only when there are
-+ * no percpu stats areas */
-+ struct lprocfs_percpu *ls_percpu[0];
-+};
-+
-+static inline int opcode_offset(__u32 opc) {
-+ if (opc < OST_LAST_OPC) {
-+ /* OST opcode */
-+ return (opc - OST_FIRST_OPC);
-+ } else if (opc < MDS_LAST_OPC) {
-+ /* MDS opcode */
-+ return (opc - MDS_FIRST_OPC +
-+ (OST_LAST_OPC - OST_FIRST_OPC));
-+ } else if (opc < LDLM_LAST_OPC) {
-+ /* LDLM Opcode */
-+ return (opc - LDLM_FIRST_OPC +
-+ (MDS_LAST_OPC - MDS_FIRST_OPC) +
-+ (OST_LAST_OPC - OST_FIRST_OPC));
-+ } else if (opc < MGS_LAST_OPC) {
-+ /* MGS Opcode */
-+ return (opc - MGS_FIRST_OPC +
-+ (LDLM_LAST_OPC - LDLM_FIRST_OPC) +
-+ (MDS_LAST_OPC - MDS_FIRST_OPC) +
-+ (OST_LAST_OPC - OST_FIRST_OPC));
-+ } else if (opc < OBD_LAST_OPC) {
-+ /* OBD Ping */
-+ return (opc - OBD_FIRST_OPC +
-+ (MGS_LAST_OPC - MGS_FIRST_OPC) +
-+ (LDLM_LAST_OPC - LDLM_FIRST_OPC) +
-+ (MDS_LAST_OPC - MDS_FIRST_OPC) +
-+ (OST_LAST_OPC - OST_FIRST_OPC));
-+ } else if (opc < LLOG_LAST_OPC) {
-+ /* LLOG Opcode */
-+ return (opc - LLOG_FIRST_OPC +
-+ (OBD_LAST_OPC - OBD_FIRST_OPC) +
-+ (MGS_LAST_OPC - MGS_FIRST_OPC) +
-+ (LDLM_LAST_OPC - LDLM_FIRST_OPC) +
-+ (MDS_LAST_OPC - MDS_FIRST_OPC) +
-+ (OST_LAST_OPC - OST_FIRST_OPC));
-+ } else if (opc < QUOTA_LAST_OPC) {
-+ /* LQUOTA Opcode */
-+ return (opc - QUOTA_FIRST_OPC +
-+ (LLOG_LAST_OPC - LLOG_FIRST_OPC) +
-+ (OBD_LAST_OPC - OBD_FIRST_OPC) +
-+ (MGS_LAST_OPC - MGS_FIRST_OPC) +
-+ (LDLM_LAST_OPC - LDLM_FIRST_OPC) +
-+ (MDS_LAST_OPC - MDS_FIRST_OPC) +
-+ (OST_LAST_OPC - OST_FIRST_OPC));
-+ } else {
-+ /* Unknown Opcode */
-+ return -1;
-+ }
-+}
-+
-+#define LUSTRE_MAX_OPCODES ((OST_LAST_OPC - OST_FIRST_OPC) + \
-+ (MDS_LAST_OPC - MDS_FIRST_OPC) + \
-+ (LDLM_LAST_OPC - LDLM_FIRST_OPC) + \
-+ (MGS_LAST_OPC - MGS_FIRST_OPC) + \
-+ (OBD_LAST_OPC - OBD_FIRST_OPC) + \
-+ (LLOG_LAST_OPC - LLOG_FIRST_OPC) + \
-+ (QUOTA_LAST_OPC - QUOTA_FIRST_OPC))
-+
-+#define EXTRA_MAX_OPCODES ((PTLRPC_LAST_CNTR - PTLRPC_FIRST_CNTR) + \
-+ (EXTRA_LAST_OPC - EXTRA_FIRST_OPC))
-+
-+enum {
-+ PTLRPC_REQWAIT_CNTR = 0,
-+ PTLRPC_REQQDEPTH_CNTR,
-+ PTLRPC_REQACTIVE_CNTR,
-+ PTLRPC_TIMEOUT,
-+ PTLRPC_REQBUF_AVAIL_CNTR,
-+ PTLRPC_LAST_CNTR
-+};
-+
-+#define PTLRPC_FIRST_CNTR PTLRPC_REQWAIT_CNTR
-+
-+enum {
-+ LDLM_GLIMPSE_ENQUEUE = 0,
-+ LDLM_PLAIN_ENQUEUE,
-+ LDLM_EXTENT_ENQUEUE,
-+ LDLM_FLOCK_ENQUEUE,
-+ LDLM_IBITS_ENQUEUE,
-+ MDS_REINT_SETATTR,
-+ MDS_REINT_CREATE,
-+ MDS_REINT_LINK,
-+ MDS_REINT_UNLINK,
-+ MDS_REINT_RENAME,
-+ MDS_REINT_OPEN,
-+ BRW_READ_BYTES,
-+ BRW_WRITE_BYTES,
-+ EXTRA_LAST_OPC
-+};
-+
-+#define EXTRA_FIRST_OPC LDLM_GLIMPSE_ENQUEUE
-+/* class_obd.c */
-+extern cfs_proc_dir_entry_t *proc_lustre_root;
-+
-+struct obd_device;
-+struct file;
-+struct obd_histogram;
-+
-+/* Days / hours / mins / seconds format */
-+struct dhms {
-+ int d,h,m,s;
-+};
-+static inline void s2dhms(struct dhms *ts, time_t secs)
-+{
-+ ts->d = secs / 86400;
-+ secs = secs % 86400;
-+ ts->h = secs / 3600;
-+ secs = secs % 3600;
-+ ts->m = secs / 60;
-+ ts->s = secs % 60;
-+}
-+#define DHMS_FMT "%dd%dh%02dm%02ds"
-+#define DHMS_VARS(x) (x)->d, (x)->h, (x)->m, (x)->s
-+
-+
-+#ifdef LPROCFS
-+
-+static inline int lprocfs_stats_lock(struct lprocfs_stats *stats, int type)
-+{
-+ int rc = 0;
-+
-+ if (stats->ls_flags & LPROCFS_STATS_FLAG_NOPERCPU) {
-+ if (type & LPROCFS_GET_NUM_CPU)
-+ rc = 1;
-+ if (type & LPROCFS_GET_SMP_ID)
-+ rc = 0;
-+ spin_lock(&stats->ls_lock);
-+ } else {
-+ if (type & LPROCFS_GET_NUM_CPU)
-+ rc = num_possible_cpus();
-+ if (type & LPROCFS_GET_SMP_ID)
-+ rc = smp_processor_id();
-+ }
-+ return rc;
-+}
-+
-+static inline void lprocfs_stats_unlock(struct lprocfs_stats *stats)
-+{
-+ if (stats->ls_flags & LPROCFS_STATS_FLAG_NOPERCPU)
-+ spin_unlock(&stats->ls_lock);
-+}
-+
-+/* Two optimized LPROCFS counter increment functions are provided:
-+ * lprocfs_counter_incr(cntr, value) - optimized for by-one counters
-+ * lprocfs_counter_add(cntr) - use for multi-valued counters
-+ * Counter data layout allows config flag, counter lock and the
-+ * count itself to reside within a single cache line.
-+ */
-+
-+extern void lprocfs_counter_add(struct lprocfs_stats *stats, int idx,
-+ long amount);
-+extern void lprocfs_counter_sub(struct lprocfs_stats *stats, int idx,
-+ long amount);
-+
-+#define lprocfs_counter_incr(stats, idx) \
-+ lprocfs_counter_add(stats, idx, 1)
-+#define lprocfs_counter_decr(stats, idx) \
-+ lprocfs_counter_sub(stats, idx, 1)
-+
-+extern __s64 lprocfs_read_helper(struct lprocfs_counter *lc,
-+ enum lprocfs_fields_flags field);
-+
-+static inline __u64 lprocfs_stats_collector(struct lprocfs_stats *stats,
-+ int idx,
-+ enum lprocfs_fields_flags field)
-+{
-+ __u64 ret = 0;
-+ int i;
-+
-+ LASSERT(stats != NULL);
-+ for (i = 0; i < num_possible_cpus(); i++)
-+ ret += lprocfs_read_helper(&(stats->ls_percpu[i]->lp_cntr[idx]),
-+ field);
-+ return ret;
-+}
-+
-+extern struct lprocfs_stats *lprocfs_alloc_stats(unsigned int num,
-+ enum lprocfs_stats_flags flags);
-+extern void lprocfs_clear_stats(struct lprocfs_stats *stats);
-+extern void lprocfs_free_stats(struct lprocfs_stats **stats);
-+extern void lprocfs_init_ops_stats(int num_private_stats,
-+ struct lprocfs_stats *stats);
-+extern void lprocfs_init_ldlm_stats(struct lprocfs_stats *ldlm_stats);
-+extern int lprocfs_alloc_obd_stats(struct obd_device *obddev,
-+ unsigned int num_private_stats);
-+extern void lprocfs_counter_init(struct lprocfs_stats *stats, int index,
-+ unsigned conf, const char *name,
-+ const char *units);
-+extern void lprocfs_free_obd_stats(struct obd_device *obddev);
-+struct obd_export;
-+extern int lprocfs_add_clear_entry(struct obd_device * obd,
-+ cfs_proc_dir_entry_t *entry);
-+extern int lprocfs_exp_setup(struct obd_export *exp,
-+ lnet_nid_t *peer_nid, int *newnid);
-+extern int lprocfs_exp_cleanup(struct obd_export *exp);
-+extern int lprocfs_add_simple(struct proc_dir_entry *root,
-+ char *name, read_proc_t *read_proc,
-+ write_proc_t *write_proc, void *data);
-+extern int lprocfs_register_stats(cfs_proc_dir_entry_t *root, const char *name,
-+ struct lprocfs_stats *stats);
-+
-+/* lprocfs_status.c */
-+extern int lprocfs_add_vars(cfs_proc_dir_entry_t *root,
-+ struct lprocfs_vars *var,
-+ void *data);
-+
-+extern cfs_proc_dir_entry_t *lprocfs_register(const char *name,
-+ cfs_proc_dir_entry_t *parent,
-+ struct lprocfs_vars *list,
-+ void *data);
-+
-+extern void lprocfs_remove(cfs_proc_dir_entry_t **root);
-+
-+extern cfs_proc_dir_entry_t *lprocfs_srch(cfs_proc_dir_entry_t *root,
-+ const char *name);
-+
-+extern int lprocfs_obd_setup(struct obd_device *obd, struct lprocfs_vars *list);
-+extern int lprocfs_obd_cleanup(struct obd_device *obd);
-+extern int lprocfs_add_simple(struct proc_dir_entry *root, char *name,
-+ read_proc_t *read_proc, write_proc_t *write_proc,
-+ void *data);
-+struct nid_stat;
-+extern void lprocfs_free_per_client_stats(struct obd_device *obd);
-+extern int lprocfs_nid_stats_clear_write(struct file *file, const char *buffer,
-+ unsigned long count, void *data);
-+extern int lprocfs_nid_stats_clear_read(char *page, char **start, off_t off,
-+ int count, int *eof, void *data);
-+
-+
-+extern struct file_operations lprocfs_evict_client_fops;
-+
-+extern int lprocfs_seq_create(cfs_proc_dir_entry_t *parent, char *name,
-+ mode_t mode, struct file_operations *seq_fops,
-+ void *data);
-+extern int lprocfs_obd_seq_create(struct obd_device *dev, char *name,
-+ mode_t mode, struct file_operations *seq_fops,
-+ void *data);
-+
-+/* Generic callbacks */
-+
-+extern int lprocfs_rd_u64(char *page, char **start, off_t off,
-+ int count, int *eof, void *data);
-+extern int lprocfs_rd_atomic(char *page, char **start, off_t off,
-+ int count, int *eof, void *data);
-+extern int lprocfs_wr_atomic(struct file *file, const char *buffer,
-+ unsigned long count, void *data);
-+extern int lprocfs_rd_uint(char *page, char **start, off_t off,
-+ int count, int *eof, void *data);
-+extern int lprocfs_wr_uint(struct file *file, const char *buffer,
-+ unsigned long count, void *data);
-+extern int lprocfs_rd_uuid(char *page, char **start, off_t off,
-+ int count, int *eof, void *data);
-+extern int lprocfs_rd_name(char *page, char **start, off_t off,
-+ int count, int *eof, void *data);
-+extern int lprocfs_rd_fstype(char *page, char **start, off_t off,
-+ int count, int *eof, void *data);
-+extern int lprocfs_rd_server_uuid(char *page, char **start, off_t off,
-+ int count, int *eof, void *data);
-+extern int lprocfs_rd_conn_uuid(char *page, char **start, off_t off,
-+ int count, int *eof, void *data);
-+extern int lprocfs_rd_import(char *page, char **start, off_t off, int count,
-+ int *eof, void *data);
-+extern int lprocfs_rd_connect_flags(char *page, char **start, off_t off,
-+ int count, int *eof, void *data);
-+extern int lprocfs_rd_num_exports(char *page, char **start, off_t off,
-+ int count, int *eof, void *data);
-+extern int lprocfs_rd_numrefs(char *page, char **start, off_t off,
-+ int count, int *eof, void *data);
-+struct adaptive_timeout;
-+extern int lprocfs_at_hist_helper(char *page, int count, int rc,
-+ struct adaptive_timeout *at);
-+extern int lprocfs_rd_timeouts(char *page, char **start, off_t off,
-+ int count, int *eof, void *data);
-+extern int lprocfs_wr_timeouts(struct file *file, const char *buffer,
-+ unsigned long count, void *data);
-+extern int lprocfs_wr_evict_client(struct file *file, const char *buffer,
-+ unsigned long count, void *data);
-+extern int lprocfs_wr_ping(struct file *file, const char *buffer,
-+ unsigned long count, void *data);
-+
-+/* Statfs helpers */
-+extern int lprocfs_rd_blksize(char *page, char **start, off_t off,
-+ int count, int *eof, void *data);
-+extern int lprocfs_rd_kbytestotal(char *page, char **start, off_t off,
-+ int count, int *eof, void *data);
-+extern int lprocfs_rd_kbytesfree(char *page, char **start, off_t off,
-+ int count, int *eof, void *data);
-+extern int lprocfs_rd_kbytesavail(char *page, char **start, off_t off,
-+ int count, int *eof, void *data);
-+extern int lprocfs_rd_filestotal(char *page, char **start, off_t off,
-+ int count, int *eof, void *data);
-+extern int lprocfs_rd_filesfree(char *page, char **start, off_t off,
-+ int count, int *eof, void *data);
-+extern int lprocfs_rd_filegroups(char *page, char **start, off_t off,
-+ int count, int *eof, void *data);
-+
-+extern int lprocfs_write_helper(const char *buffer, unsigned long count,
-+ int *val);
-+extern int lprocfs_write_frac_helper(const char *buffer, unsigned long count,
-+ int *val, int mult);
-+extern int lprocfs_read_frac_helper(char *buffer, unsigned long count,
-+ long val, int mult);
-+extern int lprocfs_write_u64_helper(const char *buffer, unsigned long count,
-+ __u64 *val);
-+extern int lprocfs_write_frac_u64_helper(const char *buffer, unsigned long count,
-+ __u64 *val, int mult);
-+void lprocfs_oh_tally(struct obd_histogram *oh, unsigned int value);
-+void lprocfs_oh_tally_log2(struct obd_histogram *oh, unsigned int value);
-+void lprocfs_oh_clear(struct obd_histogram *oh);
-+unsigned long lprocfs_oh_sum(struct obd_histogram *oh);
-+
-+/* lprocfs_status.c: counter read/write functions */
-+extern int lprocfs_counter_read(char *page, char **start, off_t off,
-+ int count, int *eof, void *data);
-+extern int lprocfs_counter_write(struct file *file, const char *buffer,
-+ unsigned long count, void *data);
-+
-+/* lprocfs_status.c: recovery status */
-+int lprocfs_obd_rd_recovery_status(char *page, char **start, off_t off,
-+ int count, int *eof, void *data);
-+
-+/* lprocfs_statuc.c: hash statistics */
-+int lprocfs_obd_rd_hash(char *page, char **start, off_t off,
-+ int count, int *eof, void *data);
-+
-+extern int lprocfs_seq_release(struct inode *, struct file *);
-+
-+/* in lprocfs_stat.c, to protect the private data for proc entries */
-+extern struct rw_semaphore _lprocfs_lock;
-+#define LPROCFS_ENTRY() do { \
-+ down_read(&_lprocfs_lock); \
-+} while(0)
-+#define LPROCFS_EXIT() do { \
-+ up_read(&_lprocfs_lock); \
-+} while(0)
-+#define LPROCFS_ENTRY_AND_CHECK(dp) do { \
-+ typecheck(struct proc_dir_entry *, dp); \
-+ LPROCFS_ENTRY(); \
-+ if ((dp)->deleted) { \
-+ LPROCFS_EXIT(); \
-+ return -ENODEV; \
-+ } \
-+} while(0)
-+#define LPROCFS_WRITE_ENTRY() do { \
-+ down_write(&_lprocfs_lock); \
-+} while(0)
-+#define LPROCFS_WRITE_EXIT() do { \
-+ up_write(&_lprocfs_lock); \
-+} while(0)
-+
-+/* You must use these macros when you want to refer to
-+ * the import in a client obd_device for a lprocfs entry */
-+#define LPROCFS_CLIMP_CHECK(obd) do { \
-+ typecheck(struct obd_device *, obd); \
-+ down_read(&(obd)->u.cli.cl_sem); \
-+ if ((obd)->u.cli.cl_import == NULL) { \
-+ up_read(&(obd)->u.cli.cl_sem); \
-+ return -ENODEV; \
-+ } \
-+} while(0)
-+#define LPROCFS_CLIMP_EXIT(obd) \
-+ up_read(&(obd)->u.cli.cl_sem);
-+
-+
-+/* write the name##_seq_show function, call LPROC_SEQ_FOPS_RO for read-only
-+ proc entries; otherwise, you will define name##_seq_write function also for
-+ a read-write proc entry, and then call LPROC_SEQ_SEQ instead. Finally,
-+ call lprocfs_obd_seq_create(obd, filename, 0444, &name#_fops, data); */
-+#define __LPROC_SEQ_FOPS(name, custom_seq_write) \
-+static int name##_seq_open(struct inode *inode, struct file *file) { \
-+ struct proc_dir_entry *dp = PDE(inode); \
-+ int rc; \
-+ LPROCFS_ENTRY_AND_CHECK(dp); \
-+ rc = single_open(file, name##_seq_show, dp->data); \
-+ if (rc) { \
-+ LPROCFS_EXIT(); \
-+ return rc; \
-+ } \
-+ return 0; \
-+} \
-+struct file_operations name##_fops = { \
-+ .owner = THIS_MODULE, \
-+ .open = name##_seq_open, \
-+ .read = seq_read, \
-+ .write = custom_seq_write, \
-+ .llseek = seq_lseek, \
-+ .release = lprocfs_seq_release, \
-+}
-+
-+#define LPROC_SEQ_FOPS_RO(name) __LPROC_SEQ_FOPS(name, NULL)
-+#define LPROC_SEQ_FOPS(name) __LPROC_SEQ_FOPS(name, name##_seq_write)
-+
-+/* lproc_ptlrpc.c */
-+struct ptlrpc_request;
-+extern void target_print_req(void *seq_file, struct ptlrpc_request *req);
-+
-+#ifdef CRAY_XT3
-+/* lprocfs_status.c: read recovery max time bz13079 */
-+int lprocfs_obd_rd_recovery_maxtime(char *page, char **start, off_t off,
-+ int count, int *eof, void *data);
-+
-+/* lprocfs_status.c: write recovery max time bz13079 */
-+int lprocfs_obd_wr_recovery_maxtime(struct file *file, const char *buffer,
-+ unsigned long count, void *data);
-+#endif
-+
-+/* all quota proc functions */
-+extern int lprocfs_quota_rd_bunit(char *page, char **start, off_t off, int count,
-+ int *eof, void *data);
-+extern int lprocfs_quota_wr_bunit(struct file *file, const char *buffer,
-+ unsigned long count, void *data);
-+extern int lprocfs_quota_rd_btune(char *page, char **start, off_t off, int count,
-+ int *eof, void *data);
-+extern int lprocfs_quota_wr_btune(struct file *file, const char *buffer,
-+ unsigned long count, void *data);
-+extern int lprocfs_quota_rd_iunit(char *page, char **start, off_t off, int count,
-+ int *eof, void *data);
-+extern int lprocfs_quota_wr_iunit(struct file *file, const char *buffer,
-+ unsigned long count, void *data);
-+extern int lprocfs_quota_rd_itune(char *page, char **start, off_t off, int count,
-+ int *eof, void *data);
-+extern int lprocfs_quota_wr_itune(struct file *file, const char *buffer,
-+ unsigned long count, void *data);
-+extern int lprocfs_quota_rd_type(char *page, char **start, off_t off, int count,
-+ int *eof, void *data);
-+extern int lprocfs_quota_wr_type(struct file *file, const char *buffer,
-+ unsigned long count, void *data);
-+extern int lprocfs_quota_rd_switch_seconds(char *page, char **start, off_t off,
-+ int count, int *eof, void *data);
-+extern int lprocfs_quota_wr_switch_seconds(struct file *file, const char *buffer,
-+ unsigned long count, void *data);
-+extern int lprocfs_quota_rd_sync_blk(char *page, char **start, off_t off,
-+ int count, int *eof, void *data);
-+extern int lprocfs_quota_wr_sync_blk(struct file *file, const char *buffer,
-+ unsigned long count, void *data);
-+extern int lprocfs_quota_rd_switch_qs(char *page, char **start, off_t off,
-+ int count, int *eof, void *data);
-+extern int lprocfs_quota_wr_switch_qs(struct file *file, const char *buffer,
-+ unsigned long count, void *data);
-+extern int lprocfs_quota_rd_boundary_factor(char *page, char **start, off_t off,
-+ int count, int *eof, void *data);
-+extern int lprocfs_quota_wr_boundary_factor(struct file *file, const char *buffer,
-+ unsigned long count, void *data);
-+extern int lprocfs_quota_rd_least_bunit(char *page, char **start, off_t off,
-+ int count, int *eof, void *data);
-+extern int lprocfs_quota_wr_least_bunit(struct file *file, const char *buffer,
-+ unsigned long count, void *data);
-+extern int lprocfs_quota_rd_least_iunit(char *page, char **start, off_t off,
-+ int count, int *eof, void *data);
-+extern int lprocfs_quota_wr_least_iunit(struct file *file, const char *buffer,
-+ unsigned long count, void *data);
-+extern int lprocfs_quota_rd_qs_factor(char *page, char **start, off_t off,
-+ int count, int *eof, void *data);
-+extern int lprocfs_quota_wr_qs_factor(struct file *file, const char *buffer,
-+ unsigned long count, void *data);
-+
-+#else
-+/* LPROCFS is not defined */
-+static inline void lprocfs_counter_add(struct lprocfs_stats *stats,
-+ int index, long amount) { return; }
-+static inline void lprocfs_counter_incr(struct lprocfs_stats *stats,
-+ int index) { return; }
-+static inline void lprocfs_counter_sub(struct lprocfs_stats *stats,
-+ int index, long amount) { return; }
-+static inline void lprocfs_counter_init(struct lprocfs_stats *stats,
-+ int index, unsigned conf,
-+ const char *name, const char *units)
-+{ return; }
-+
-+static inline __u64 lc_read_helper(struct lprocfs_counter *lc,
-+ enum lprocfs_fields_flags field)
-+{ return 0; }
-+
-+static inline struct lprocfs_stats* lprocfs_alloc_stats(unsigned int num,
-+ enum lprocfs_stats_flags flags)
-+{ return NULL; }
-+static inline void lprocfs_clear_stats(struct lprocfs_stats *stats)
-+{ return; }
-+static inline void lprocfs_free_stats(struct lprocfs_stats **stats)
-+{ return; }
-+static inline int lprocfs_register_stats(cfs_proc_dir_entry_t *root,
-+ const char *name,
-+ struct lprocfs_stats *stats)
-+{ return 0; }
-+static inline void lprocfs_init_ops_stats(int num_private_stats,
-+ struct lprocfs_stats *stats)
-+{ return; }
-+static inline void lprocfs_init_ldlm_stats(struct lprocfs_stats *ldlm_stats)
-+{ return; }
-+static inline int lprocfs_alloc_obd_stats(struct obd_device *obddev,
-+ unsigned int num_private_stats)
-+{ return 0; }
-+static inline void lprocfs_free_obd_stats(struct obd_device *obddev)
-+{ return; }
-+
-+struct obd_export;
-+static inline int lprocfs_add_clear_entry(struct obd_export *exp)
-+{ return 0; }
-+static inline int lprocfs_exp_setup(struct obd_export *exp,
-+ lnet_nid_t *peer_nid, int *newnid)
-+{ return 0; }
-+static inline int lprocfs_exp_cleanup(struct obd_export *exp)
-+{ return 0; }
-+static inline int lprocfs_add_simple(struct proc_dir_entry *root,
-+ char *name,
-+ read_proc_t *read_proc,
-+ write_proc_t *write_proc,
-+ void *data)
-+{return 0; }
-+struct nid_stat;
-+static inline void lprocfs_free_per_client_stats(struct obd_device *obd)
-+{}
-+static inline
-+int lprocfs_nid_stats_clear_write(struct file *file, const char *buffer,
-+ unsigned long count, void *data)
-+{return count;}
-+static inline
-+int lprocfs_nid_stats_clear_read(char *page, char **start, off_t off,
-+ int count, int *eof, void *data)
-+{return count;}
-+
-+
-+static inline cfs_proc_dir_entry_t *
-+lprocfs_register(const char *name, cfs_proc_dir_entry_t *parent,
-+ struct lprocfs_vars *list, void *data) { return NULL; }
-+static inline int lprocfs_add_vars(cfs_proc_dir_entry_t *root,
-+ struct lprocfs_vars *var,
-+ void *data) { return 0; }
-+static inline void lprocfs_remove(cfs_proc_dir_entry_t **root) {};
-+static inline cfs_proc_dir_entry_t *lprocfs_srch(cfs_proc_dir_entry_t *head,
-+ const char *name) {return 0;}
-+static inline int lprocfs_obd_setup(struct obd_device *dev,
-+ struct lprocfs_vars *list) { return 0; }
-+static inline int lprocfs_obd_cleanup(struct obd_device *dev) { return 0; }
-+static inline int lprocfs_rd_u64(char *page, char **start, off_t off,
-+ int count, int *eof, void *data) { return 0; }
-+static inline int lprocfs_rd_uuid(char *page, char **start, off_t off,
-+ int count, int *eof, void *data) { return 0; }
-+static inline int lprocfs_rd_name(char *page, char **start, off_t off,
-+ int count, int *eof, void *data) { return 0; }
-+static inline int lprocfs_rd_server_uuid(char *page, char **start, off_t off,
-+ int count, int *eof, void *data)
-+{ return 0; }
-+static inline int lprocfs_rd_conn_uuid(char *page, char **start, off_t off,
-+ int count, int *eof, void *data)
-+{ return 0; }
-+static inline int lprocfs_rd_import(char *page, char **start, off_t off, int count,
-+ int *eof, void *data) { return 0; }
-+static inline int lprocfs_rd_connect_flags(char *page, char **start, off_t off,
-+ int count, int *eof, void *data)
-+{ return 0; }
-+static inline int lprocfs_rd_num_exports(char *page, char **start, off_t off,
-+ int count, int *eof, void *data)
-+{ return 0; }
-+static inline int lprocfs_rd_numrefs(char *page, char **start, off_t off,
-+ int count, int *eof, void *data)
-+{ return 0; }
-+struct adaptive_timeout;
-+static inline int lprocfs_at_hist_helper(char *page, int count, int rc,
-+ struct adaptive_timeout *at)
-+{ return 0; }
-+static inline int lprocfs_rd_timeouts(char *page, char **start, off_t off,
-+ int count, int *eof, void *data)
-+{ return 0; }
-+static inline int lprocfs_wr_timeouts(struct file *file, const char *buffer,
-+ unsigned long count, void *data)
-+{ return 0; }
-+static inline int lprocfs_wr_evict_client(struct file *file, const char *buffer,
-+ unsigned long count, void *data)
-+{ return 0; }
-+static inline int lprocfs_wr_ping(struct file *file, const char *buffer,
-+ unsigned long count, void *data)
-+{ return 0; }
-+
-+
-+/* Statfs helpers */
-+static inline
-+int lprocfs_rd_blksize(char *page, char **start, off_t off,
-+ int count, int *eof, void *data) { return 0; }
-+static inline
-+int lprocfs_rd_kbytestotal(char *page, char **start, off_t off,
-+ int count, int *eof, void *data) { return 0; }
-+static inline
-+int lprocfs_rd_kbytesfree(char *page, char **start, off_t off,
-+ int count, int *eof, void *data) { return 0; }
-+static inline
-+int lprocfs_rd_kbytesavail(char *page, char **start, off_t off,
-+ int count, int *eof, void *data) { return 0; }
-+static inline
-+int lprocfs_rd_filestotal(char *page, char **start, off_t off,
-+ int count, int *eof, void *data) { return 0; }
-+static inline
-+int lprocfs_rd_filesfree(char *page, char **start, off_t off,
-+ int count, int *eof, void *data) { return 0; }
-+static inline
-+int lprocfs_rd_filegroups(char *page, char **start, off_t off,
-+ int count, int *eof, void *data) { return 0; }
-+static inline
-+void lprocfs_oh_tally(struct obd_histogram *oh, unsigned int value) {}
-+static inline
-+void lprocfs_oh_tally_log2(struct obd_histogram *oh, unsigned int value) {}
-+static inline
-+void lprocfs_oh_clear(struct obd_histogram *oh) {}
-+static inline
-+unsigned long lprocfs_oh_sum(struct obd_histogram *oh) { return 0; }
-+static inline
-+int lprocfs_counter_read(char *page, char **start, off_t off,
-+ int count, int *eof, void *data) { return 0; }
-+static inline
-+int lprocfs_counter_write(struct file *file, const char *buffer,
-+ unsigned long count, void *data) { return 0; }
-+
-+static inline
-+__u64 lprocfs_stats_collector(struct lprocfs_stats *stats, int idx,
-+ enum lprocfs_fields_flags field)
-+{ return (__u64)0; }
-+
-+#define LPROCFS_ENTRY()
-+#define LPROCFS_EXIT()
-+#define LPROCFS_ENTRY_AND_CHECK(dp)
-+#define LPROC_SEQ_FOPS_RO(name)
-+#define LPROC_SEQ_FOPS(name)
-+
-+/* lproc_ptlrpc.c */
-+#define target_print_req NULL
-+
-+#endif /* LPROCFS */
-+
-+#endif /* LPROCFS_SNMP_H */
diff -urNad lustre~/lustre/include/lustre/ll_fiemap.h lustre/lustre/include/lustre/ll_fiemap.h
--- lustre~/lustre/include/lustre/ll_fiemap.h 2009-08-19 09:51:09.000000000 +0200
-+++ lustre/lustre/include/lustre/ll_fiemap.h 2009-08-19 14:10:45.000000000 +0200
-@@ -109,8 +109,45 @@
++++ lustre/lustre/include/lustre/ll_fiemap.h 2009-08-20 10:25:20.000000000 +0200
+@@ -86,7 +86,8 @@
+ #define FIEMAP_EXTENT_UNKNOWN 0x00000002 /* Data location unknown. */
+ #define FIEMAP_EXTENT_DELALLOC 0x00000004 /* Location still pending.
+ * Sets EXTENT_UNKNOWN. */
+-#define FIEMAP_EXTENT_NO_DIRECT 0x00000008 /* Data mapping undefined */
++#define FIEMAP_EXTENT_ENCODED 0x00000008 /* Data can not be read
++ * while fs is unmounted */
+ #define FIEMAP_EXTENT_SECONDARY 0x00000010 /* Data copied offline. May
+ * set EXTENT_NO_DIRECT. */
+ #define FIEMAP_EXTENT_NET 0x00000020 /* Data stored remotely.
+@@ -106,11 +107,60 @@
+ #define FIEMAP_EXTENT_MERGED 0x00001000 /* File does not natively
+ * support extents. Result
+ * merged for efficiency. */
++#define FIEMAP_EXTENT_NO_DIRECT 0x00002000 /* Data mapping undefined */
#else
++# include <linux/fiemap.h>
++
++/* dirty hack, but obviously lustre requires and serves those flags... */
++
+# ifndef FIEMAP_FLAG_DEVICE_ORDER
-+/* dirty hack, but obviously lustre requires and serves FIEMAP_FLAG_DEVICE_ORDER */
-+# define FIEMAP_FLAG_DEVICE_ORDER 0x40000000 /* return device ordered mapping */
++# define FIEMAP_FLAG_DEVICE_ORDER 0x40000000
+# undef FIEMAP_FLAGS_COMPAT
-+# define FIEMAP_FLAGS_COMPAT (FIEMAP_FLAG_SYNC | FIEMAP_FLAG_XATTR | \
-+ FIEMAP_FLAG_DEVICE_ORDER)
++# define FIEMAP_FLAGS_COMPAT (FIEMAP_FLAG_SYNC | FIEMAP_FLAG_XATTR | FIEMAP_FLAG_DEVICE_ORDER)
++# endif
++# ifndef FIEMAP_EXTENT_NO_DIRECT
++# define FIEMAP_EXTENT_NO_DIRECT 0x00002000
++# endif
++# ifndef FIEMAP_EXTENT_SECONDARY
++# define FIEMAP_EXTENT_SECONDARY 0x00000010
++# endif
++# ifndef FIEMAP_EXTENT_DATA_COMPRESSED
++# define FIEMAP_EXTENT_DATA_COMPRESSED 0x00000040
++# endif
++# ifndef FIEMAP_EXTENT_NET
++# define FIEMAP_EXTENT_NET 0x00000020
+# endif
-+
-+#define FIEMAP_EXTENT_NET 0x00000020 /* Data stored remotely.
-+ * Sets EXTENT_NO_DIRECT. */
+
+struct ll_fiemap_extent {
+ __u64 fe_logical; /* logical offset in bytes for the start of
@@ -5439,187 +1259,168 @@ diff -urNad lustre~/lustre/include/lustre/ll_fiemap.h lustre/lustre/include/lust
#endif /* HAVE_LINUX_FIEMAP_H */
+diff -urNad lustre~/lustre/include/lustre_lite.h lustre/lustre/include/lustre_lite.h
+--- lustre~/lustre/include/lustre_lite.h 2009-08-19 09:51:09.000000000 +0200
++++ lustre/lustre/include/lustre_lite.h 2009-08-20 10:25:20.000000000 +0200
+@@ -53,6 +53,13 @@
+ #include <lustre_mds.h>
+ #include <lustre_ha.h>
+
++#ifdef HAVE_CRED_IN_STRUCT_TASK_STRUCT
++# define CRED(ts) ts->cred
++# define CREDENTIALS(ts,x) CRED(ts)->x
++#endif
++
++
++
+ #ifdef __KERNEL__
+
+ /* careful, this is easy to screw up */
diff -urNad lustre~/lustre/liblustre/lutil.c lustre/lustre/liblustre/lutil.c
--- lustre~/lustre/liblustre/lutil.c 2009-08-11 12:37:07.000000000 +0200
-+++ lustre/lustre/liblustre/lutil.c 2009-08-19 14:10:45.000000000 +0200
-@@ -184,7 +184,11 @@
++++ lustre/lustre/liblustre/lutil.c 2009-08-20 10:25:20.000000000 +0200
+@@ -176,7 +176,7 @@
+ #define FAKE_ROOT_CAP 0x1ffffeff
+ #define FAKE_USER_CAP 0
+
+- *res = (current->fsuid == 0) ? FAKE_ROOT_CAP: FAKE_USER_CAP;
++ *res = (CREDENTIALS(current,fsuid) == 0) ? FAKE_ROOT_CAP: FAKE_USER_CAP;
+ #endif
+ }
+
+@@ -184,7 +184,7 @@
{
int i;
-+#ifdef HAS_STRUCT_CRED
-+ if (gid == current->real_cred->fsgid)
-+#else
- if (gid == current->fsgid)
-+#endif
+- if (gid == current->fsgid)
++ if (gid == CREDENTIALS(current,fsgid))
return 1;
for (i = 0; i < current->ngroups; i++) {
-@@ -205,8 +209,13 @@
+@@ -205,8 +205,8 @@
strncpy(current->comm, comm, sizeof(current->comm));
current->pid = getpid();
-+#ifdef HAS_STRUCT_CRED
-+ current->real_cred->fsuid = geteuid();
-+ current->real_cred->fsgid = getegid();
-+#else
- current->fsuid = geteuid();
- current->fsgid = getegid();
-+#endif
+- current->fsuid = geteuid();
+- current->fsgid = getegid();
++ CREDENTIALS(current,fsuid) = geteuid();
++ CREDENTIALS(current,fsgid) = getegid();
memset(¤t->pending, 0, sizeof(current->pending));
current->max_groups = sysconf(_SC_NGROUPS_MAX);
-@@ -221,24 +230,40 @@
+@@ -221,24 +221,24 @@
return -EINVAL;
}
-+#ifdef HAS_STRUCT_CRED
-+ init_capability(¤t->real_cred->cap_effective);
-+#else
- init_capability(¤t->cap_effective);
-+#endif
+- init_capability(¤t->cap_effective);
++ init_capability(&CREDENTIALS(current,cap_effective));
return 0;
}
void cfs_cap_raise(cfs_cap_t cap)
{
-+#ifdef HAS_STRUCT_CRED
-+ current->real_cred->cap_effective |= (1 << cap);
-+#else
- current->cap_effective |= (1 << cap);
-+#endif
+- current->cap_effective |= (1 << cap);
++ CREDENTIALS(current,cap_effective) |= (1 << cap);
}
void cfs_cap_lower(cfs_cap_t cap)
{
-+#ifdef HAS_STRUCT_CRED
-+ current->real_cred->cap_effective &= ~(1 << cap);
-+#else
- current->cap_effective &= ~(1 << cap);
-+#endif
+- current->cap_effective &= ~(1 << cap);
++ CREDENTIALS(current,cap_effective) &= ~(1 << cap);
}
int cfs_cap_raised(cfs_cap_t cap)
{
-+#ifdef HAS_STRUCT_CRED
-+ return current->real_cred->cap_effective & (1 << cap);
-+#else
- return current->cap_effective & (1 << cap);
-+#endif
+- return current->cap_effective & (1 << cap);
++ return CREDENTIALS(current,cap_effective) & (1 << cap);
}
void cfs_kernel_cap_pack(cfs_kernel_cap_t kcap, cfs_cap_t *cap)
-@@ -253,12 +278,20 @@
+@@ -253,12 +253,12 @@
cfs_cap_t cfs_curproc_cap_pack(void) {
cfs_cap_t cap;
-+#ifdef HAS_STRUCT_CRED
-+ cfs_kernel_cap_pack(cfs_current()->real_cred->cap_effective, &cap);
-+#else
- cfs_kernel_cap_pack(cfs_current()->cap_effective, &cap);
-+#endif
+- cfs_kernel_cap_pack(cfs_current()->cap_effective, &cap);
++ cfs_kernel_cap_pack(CREDENTIALS(cfs_current(),cap_effective), &cap);
return cap;
}
void cfs_curproc_cap_unpack(cfs_cap_t cap) {
-+#ifdef HAS_STRUCT_CRED
-+ cfs_kernel_cap_unpack(&cfs_current()->real_cred->cap_effective, cap);
-+#else
- cfs_kernel_cap_unpack(&cfs_current()->cap_effective, cap);
-+#endif
+- cfs_kernel_cap_unpack(&cfs_current()->cap_effective, cap);
++ cfs_kernel_cap_unpack(&CREDENTIALS(cfs_current(),cap_effective), cap);
}
int cfs_capable(cfs_cap_t cap)
diff -urNad lustre~/lustre/liblustre/super.c lustre/lustre/liblustre/super.c
--- lustre~/lustre/liblustre/super.c 2009-08-19 09:51:09.000000000 +0200
-+++ lustre/lustre/liblustre/super.c 2009-08-19 14:10:45.000000000 +0200
-@@ -82,7 +82,11 @@
++++ lustre/lustre/liblustre/super.c 2009-08-20 10:25:20.000000000 +0200
+@@ -82,7 +82,7 @@
struct intnl_stat *st = llu_i2stat(inode);
mode_t mode = st->st_mode;
-+#ifdef HAS_STRUCT_CRED
-+ if (current->real_cred->fsuid == st->st_uid)
-+#else
- if (current->fsuid == st->st_uid)
-+#endif
+- if (current->fsuid == st->st_uid)
++ if (CREDENTIALS(current,fsuid) == st->st_uid)
mode >>= 6;
else if (in_group_p(st->st_gid))
mode >>= 3;
-@@ -731,12 +735,20 @@
+@@ -731,12 +731,12 @@
if (ia_valid & (ATTR_MTIME | ATTR_ATIME)) {
/* from sys_utime() */
if (!(ia_valid & (ATTR_MTIME_SET | ATTR_ATIME_SET))) {
-+#ifdef HAS_STRUCT_CRED
-+ if (current->real_cred->fsuid != st->st_uid &&
-+#else
- if (current->fsuid != st->st_uid &&
-+#endif
+- if (current->fsuid != st->st_uid &&
++ if (CREDENTIALS(current,fsuid) != st->st_uid &&
(rc = ll_permission(inode, MAY_WRITE)) != 0)
RETURN(rc);
} else {
/* from inode_change_ok() */
-+#ifdef HAS_STRUCT_CRED
-+ if (current->real_cred->fsuid != st->st_uid &&
-+#else
- if (current->fsuid != st->st_uid &&
-+#endif
+- if (current->fsuid != st->st_uid &&
++ if (CREDENTIALS(current,fsuid) != st->st_uid &&
!cfs_capable(CFS_CAP_FOWNER))
RETURN(-EPERM);
}
-@@ -889,7 +901,11 @@
+@@ -889,7 +889,7 @@
llu_prepare_mdc_op_data(&op_data, dir, NULL, name, len, 0);
err = mdc_create(sbi->ll_mdc_exp, &op_data, tgt, strlen(tgt) + 1,
-+#ifdef HAS_STRUCT_CRED
-+ S_IFLNK | S_IRWXUGO, current->real_cred->fsuid, current->real_cred->fsgid,
-+#else
- S_IFLNK | S_IRWXUGO, current->fsuid, current->fsgid,
-+#endif
+- S_IFLNK | S_IRWXUGO, current->fsuid, current->fsgid,
++ S_IFLNK | S_IRWXUGO, CREDENTIALS(current,fsuid), CREDENTIALS(current,fsgid),
cfs_curproc_cap_pack(), 0, &request);
ptlrpc_req_finished(request);
liblustre_wait_event(0);
-@@ -1018,7 +1034,11 @@
+@@ -1018,7 +1018,7 @@
pno->p_base->pb_name.len,
0);
err = mdc_create(sbi->ll_mdc_exp, &op_data, NULL, 0, mode,
-+#ifdef HAS_STRUCT_CRED
-+ current->real_cred->fsuid, current->real_cred->fsgid,
-+#else
- current->fsuid, current->fsgid,
-+#endif
+- current->fsuid, current->fsgid,
++ CREDENTIALS(current,fsuid), CREDENTIALS(current,fsgid),
cfs_curproc_cap_pack(), dev, &request);
ptlrpc_req_finished(request);
break;
-@@ -1248,7 +1268,11 @@
+@@ -1248,7 +1248,7 @@
llu_prepare_mdc_op_data(&op_data, dir, NULL, name, len, 0);
err = mdc_create(llu_i2sbi(dir)->ll_mdc_exp, &op_data, NULL, 0,
-+#ifdef HAS_STRUCT_CRED
-+ mode | S_IFDIR, current->real_cred->fsuid, current->real_cred->fsgid,
-+#else
- mode | S_IFDIR, current->fsuid, current->fsgid,
-+#endif
+- mode | S_IFDIR, current->fsuid, current->fsgid,
++ mode | S_IFDIR, CREDENTIALS(current,fsuid), CREDENTIALS(current,fsgid),
cfs_curproc_cap_pack(), 0, &request);
ptlrpc_req_finished(request);
liblustre_wait_event(0);
diff -urNad lustre~/lustre/llite/dir.c lustre/lustre/llite/dir.c
--- lustre~/lustre/llite/dir.c 2009-08-19 09:51:09.000000000 +0200
-+++ lustre/lustre/llite/dir.c 2009-08-19 14:10:45.000000000 +0200
-@@ -955,7 +955,11 @@
++++ lustre/lustre/llite/dir.c 2009-08-20 10:25:20.000000000 +0200
+@@ -955,7 +955,7 @@
GOTO(out_quotactl, rc = -EPERM);
break;
case Q_GETQUOTA:
-+#ifdef HAS_STRUCT_CRED
-+ if (((type == USRQUOTA && current->real_cred->euid != id) ||
-+#else
- if (((type == USRQUOTA && current->euid != id) ||
-+#endif
+- if (((type == USRQUOTA && current->euid != id) ||
++ if (((type == USRQUOTA && CREDENTIALS(current,euid) != id) ||
(type == GRPQUOTA && !in_egroup_p(id))) &&
!cfs_capable(CFS_CAP_SYS_ADMIN))
GOTO(out_quotactl, rc = -EPERM);
diff -urNad lustre~/lustre/llite/file.c lustre/lustre/llite/file.c
---- lustre~/lustre/llite/file.c 2009-08-19 14:10:44.000000000 +0200
-+++ lustre/lustre/llite/file.c 2009-08-19 14:10:45.000000000 +0200
+--- lustre~/lustre/llite/file.c 2009-08-20 10:25:20.000000000 +0200
++++ lustre/lustre/llite/file.c 2009-08-20 10:25:20.000000000 +0200
@@ -1817,11 +1817,12 @@
#endif
}
@@ -5850,3353 +1651,14 @@ diff -urNad lustre~/lustre/llite/file.c lustre/lustre/llite/file.c
.fsync = ll_fsync,
#ifdef HAVE_F_OP_FLOCK
.flock = ll_file_noflock,
-diff -urNad lustre~/lustre/llite/file.c.orig lustre/lustre/llite/file.c.orig
---- lustre~/lustre/llite/file.c.orig 1970-01-01 01:00:00.000000000 +0100
-+++ lustre/lustre/llite/file.c.orig 2009-08-19 14:10:45.000000000 +0200
-@@ -0,0 +1,3335 @@
-+/* -*- mode: c; c-basic-offset: 8; indent-tabs-mode: nil; -*-
-+ * vim:expandtab:shiftwidth=8:tabstop=8:
-+ *
-+ * GPL HEADER START
-+ *
-+ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
-+ *
-+ * This program is free software; you can redistribute it and/or modify
-+ * it under the terms of the GNU General Public License version 2 only,
-+ * as published by the Free Software Foundation.
-+ *
-+ * This program is distributed in the hope that it will be useful, but
-+ * WITHOUT ANY WARRANTY; without even the implied warranty of
-+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
-+ * General Public License version 2 for more details (a copy is included
-+ * in the LICENSE file that accompanied this code).
-+ *
-+ * You should have received a copy of the GNU General Public License
-+ * version 2 along with this program; If not, see
-+ * http://www.sun.com/software/products/lustre/docs/GPLv2.pdf
-+ *
-+ * Please contact Sun Microsystems, Inc., 4150 Network Circle, Santa Clara,
-+ * CA 95054 USA or visit www.sun.com if you need additional information or
-+ * have any questions.
-+ *
-+ * GPL HEADER END
-+ */
-+/*
-+ * Copyright 2008 Sun Microsystems, Inc. All rights reserved
-+ * Use is subject to license terms.
-+ */
-+/*
-+ * This file is part of Lustre, http://www.lustre.org/
-+ * Lustre is a trademark of Sun Microsystems, Inc.
-+ *
-+ * lustre/llite/file.c
-+ *
-+ * Author: Peter Braam <braam at clusterfs.com>
-+ * Author: Phil Schwan <phil at clusterfs.com>
-+ * Author: Andreas Dilger <adilger at clusterfs.com>
-+ */
-+
-+#define DEBUG_SUBSYSTEM S_LLITE
-+#include <lustre_dlm.h>
-+#include <lustre_lite.h>
-+#include <linux/pagemap.h>
-+#include <linux/file.h>
-+#include <linux/posix_acl.h>
-+#include "llite_internal.h"
-+#include <lustre/ll_fiemap.h>
-+
-+/* also used by llite/special.c:ll_special_open() */
-+struct ll_file_data *ll_file_data_get(void)
-+{
-+ struct ll_file_data *fd;
-+
-+ OBD_SLAB_ALLOC_PTR(fd, ll_file_data_slab);
-+ return fd;
-+}
-+
-+static void ll_file_data_put(struct ll_file_data *fd)
-+{
-+ if (fd != NULL)
-+ OBD_SLAB_FREE_PTR(fd, ll_file_data_slab);
-+}
-+
-+static int ll_close_inode_openhandle(struct inode *inode,
-+ struct obd_client_handle *och)
-+{
-+ struct ptlrpc_request *req = NULL;
-+ struct obd_device *obd;
-+ struct obdo *oa;
-+ int rc;
-+ ENTRY;
-+
-+ obd = class_exp2obd(ll_i2mdcexp(inode));
-+ if (obd == NULL) {
-+ CERROR("Invalid MDC connection handle "LPX64"\n",
-+ ll_i2mdcexp(inode)->exp_handle.h_cookie);
-+ GOTO(out, rc = 0);
-+ }
-+
-+ /*
-+ * here we check if this is forced umount. If so this is called on
-+ * canceling "open lock" and we do not call mdc_close() in this case, as
-+ * it will not be successful, as import is already deactivated.
-+ */
-+ if (obd->obd_force)
-+ GOTO(out, rc = 0);
-+
-+ OBDO_ALLOC(oa);
-+ if (!oa)
-+ RETURN(-ENOMEM); // XXX We leak openhandle and request here.
-+
-+ oa->o_id = inode->i_ino;
-+ oa->o_valid = OBD_MD_FLID;
-+ obdo_from_inode(oa, inode, OBD_MD_FLTYPE | OBD_MD_FLMODE |
-+ OBD_MD_FLSIZE | OBD_MD_FLBLOCKS |
-+ OBD_MD_FLATIME | OBD_MD_FLMTIME |
-+ OBD_MD_FLCTIME);
-+ if (ll_is_inode_dirty(inode)) {
-+ oa->o_flags = MDS_BFLAG_UNCOMMITTED_WRITES;
-+ oa->o_valid |= OBD_MD_FLFLAGS;
-+ }
-+
-+ rc = mdc_close(ll_i2mdcexp(inode), oa, och, &req);
-+ if (rc == EAGAIN) {
-+ /* We are the last writer, so the MDS has instructed us to get
-+ * the file size and any write cookies, then close again. */
-+ ll_queue_done_writing(inode);
-+ rc = 0;
-+ } else if (rc) {
-+ CERROR("inode %lu mdc close failed: rc = %d\n",
-+ inode->i_ino, rc);
-+ }
-+
-+ OBDO_FREE(oa);
-+
-+ if (rc == 0) {
-+ rc = ll_objects_destroy(req, inode);
-+ if (rc)
-+ CERROR("inode %lu ll_objects destroy: rc = %d\n",
-+ inode->i_ino, rc);
-+ }
-+
-+ ptlrpc_req_finished(req); /* This is close request */
-+ EXIT;
-+out:
-+ mdc_clear_open_replay_data(och);
-+
-+ return rc;
-+}
-+
-+int ll_mdc_real_close(struct inode *inode, int flags)
-+{
-+ struct ll_inode_info *lli = ll_i2info(inode);
-+ int rc = 0;
-+ struct obd_client_handle **och_p;
-+ struct obd_client_handle *och;
-+ __u64 *och_usecount;
-+
-+ ENTRY;
-+
-+ if (flags & FMODE_WRITE) {
-+ och_p = &lli->lli_mds_write_och;
-+ och_usecount = &lli->lli_open_fd_write_count;
-+ } else if (flags & FMODE_EXEC) {
-+ och_p = &lli->lli_mds_exec_och;
-+ och_usecount = &lli->lli_open_fd_exec_count;
-+ } else {
-+ LASSERT(flags & FMODE_READ);
-+ och_p = &lli->lli_mds_read_och;
-+ och_usecount = &lli->lli_open_fd_read_count;
-+ }
-+
-+ down(&lli->lli_och_sem);
-+ if (*och_usecount) { /* There are still users of this handle, so
-+ skip freeing it. */
-+ up(&lli->lli_och_sem);
-+ RETURN(0);
-+ }
-+ och=*och_p;
-+ *och_p = NULL;
-+ up(&lli->lli_och_sem);
-+
-+ if (och) { /* There might be a race and somebody have freed this och
-+ already */
-+ rc = ll_close_inode_openhandle(inode, och);
-+ och->och_fh.cookie = DEAD_HANDLE_MAGIC;
-+ OBD_FREE(och, sizeof *och);
-+ }
-+
-+ RETURN(rc);
-+}
-+
-+int ll_mdc_close(struct obd_export *mdc_exp, struct inode *inode,
-+ struct file *file)
-+{
-+ struct ll_file_data *fd = LUSTRE_FPRIVATE(file);
-+ struct ll_inode_info *lli = ll_i2info(inode);
-+ int rc = 0;
-+ ENTRY;
-+
-+ /* clear group lock, if present */
-+ if (unlikely(fd->fd_flags & LL_FILE_GROUP_LOCKED)) {
-+ struct lov_stripe_md *lsm = ll_i2info(inode)->lli_smd;
-+ fd->fd_flags &= ~(LL_FILE_GROUP_LOCKED|LL_FILE_IGNORE_LOCK);
-+ rc = ll_extent_unlock(fd, inode, lsm, LCK_GROUP,
-+ &fd->fd_cwlockh);
-+ }
-+
-+ /* Let's see if we have good enough OPEN lock on the file and if
-+ we can skip talking to MDS */
-+ if (file->f_dentry->d_inode) { /* Can this ever be false? */
-+ int lockmode;
-+ int flags = LDLM_FL_BLOCK_GRANTED | LDLM_FL_TEST_LOCK;
-+ struct lustre_handle lockh;
-+ struct inode *inode = file->f_dentry->d_inode;
-+ struct ldlm_res_id file_res_id = {.name={inode->i_ino,
-+ inode->i_generation}};
-+ ldlm_policy_data_t policy = {.l_inodebits={MDS_INODELOCK_OPEN}};
-+
-+ down(&lli->lli_och_sem);
-+ if (fd->fd_omode & FMODE_WRITE) {
-+ lockmode = LCK_CW;
-+ LASSERT(lli->lli_open_fd_write_count);
-+ lli->lli_open_fd_write_count--;
-+ } else if (fd->fd_omode & FMODE_EXEC) {
-+ lockmode = LCK_PR;
-+ LASSERT(lli->lli_open_fd_exec_count);
-+ lli->lli_open_fd_exec_count--;
-+ } else {
-+ lockmode = LCK_CR;
-+ LASSERT(lli->lli_open_fd_read_count);
-+ lli->lli_open_fd_read_count--;
-+ }
-+ up(&lli->lli_och_sem);
-+
-+ if (!ldlm_lock_match(mdc_exp->exp_obd->obd_namespace, flags,
-+ &file_res_id, LDLM_IBITS, &policy,lockmode,
-+ &lockh)) {
-+ rc = ll_mdc_real_close(file->f_dentry->d_inode,
-+ fd->fd_omode);
-+ }
-+ } else {
-+ CERROR("Releasing a file %p with negative dentry %p. Name %s",
-+ file, file->f_dentry, file->f_dentry->d_name.name);
-+ }
-+
-+ LUSTRE_FPRIVATE(file) = NULL;
-+ ll_file_data_put(fd);
-+
-+ RETURN(rc);
-+}
-+
-+int lov_test_and_clear_async_rc(struct lov_stripe_md *lsm);
-+
-+/* While this returns an error code, fput() the caller does not, so we need
-+ * to make every effort to clean up all of our state here. Also, applications
-+ * rarely check close errors and even if an error is returned they will not
-+ * re-try the close call.
-+ */
-+int ll_file_release(struct inode *inode, struct file *file)
-+{
-+ struct ll_file_data *fd;
-+ struct ll_sb_info *sbi = ll_i2sbi(inode);
-+ struct ll_inode_info *lli = ll_i2info(inode);
-+ struct lov_stripe_md *lsm = lli->lli_smd;
-+ int rc;
-+ ENTRY;
-+
-+ CDEBUG(D_VFSTRACE, "VFS Op:inode=%lu/%u(%p)\n", inode->i_ino,
-+ inode->i_generation, inode);
-+
-+
-+ if (inode->i_sb->s_root != file->f_dentry)
-+ ll_stats_ops_tally(sbi, LPROC_LL_RELEASE, 1);
-+ fd = LUSTRE_FPRIVATE(file);
-+ LASSERT(fd != NULL);
-+
-+ /* The last ref on @file, maybe not the the owner pid of statahead.
-+ * Different processes can open the same dir, "ll_opendir_key" means:
-+ * it is me that should stop the statahead thread. */
-+ if (lli->lli_opendir_key == fd && lli->lli_opendir_pid != 0)
-+ ll_stop_statahead(inode, lli->lli_opendir_key);
-+
-+ if (inode->i_sb->s_root == file->f_dentry) {
-+ LUSTRE_FPRIVATE(file) = NULL;
-+ ll_file_data_put(fd);
-+ RETURN(0);
-+ }
-+
-+ if (lsm)
-+ lov_test_and_clear_async_rc(lsm);
-+ lli->lli_async_rc = 0;
-+
-+ /* Ensure that dirty pages are flushed out with the right creds */
-+ if (file->f_mode & FMODE_WRITE)
-+ filemap_fdatawrite(file->f_mapping);
-+
-+ rc = ll_mdc_close(sbi->ll_mdc_exp, inode, file);
-+ RETURN(rc);
-+}
-+
-+static int ll_intent_file_open(struct file *file, void *lmm,
-+ int lmmsize, struct lookup_intent *itp)
-+{
-+ struct ll_sb_info *sbi = ll_i2sbi(file->f_dentry->d_inode);
-+ struct mdc_op_data data;
-+ struct dentry *parent = file->f_dentry->d_parent;
-+ const char *name = file->f_dentry->d_name.name;
-+ const int len = file->f_dentry->d_name.len;
-+ struct inode *inode = file->f_dentry->d_inode;
-+ struct ptlrpc_request *req;
-+ int rc;
-+ ENTRY;
-+
-+ if (!parent)
-+ RETURN(-ENOENT);
-+
-+ ll_prepare_mdc_op_data(&data, parent->d_inode, inode,
-+ name, len, O_RDWR, NULL);
-+
-+ /* Usually we come here only for NFSD, and we want open lock.
-+ But we can also get here with pre 2.6.15 patchless kernels, and in
-+ that case that lock is also ok */
-+ /* We can also get here if there was cached open handle in revalidate_it
-+ * but it disappeared while we were getting from there to ll_file_open.
-+ * But this means this file was closed and immediatelly opened which
-+ * makes a good candidate for using OPEN lock */
-+ /* If lmmsize & lmm are not 0, we are just setting stripe info
-+ * parameters. No need for the open lock */
-+ if (!lmm && !lmmsize)
-+ itp->it_flags |= MDS_OPEN_LOCK;
-+
-+ rc = mdc_intent_lock(sbi->ll_mdc_exp, &data, lmm, lmmsize, itp,
-+ 0 /*unused */, &req, ll_mdc_blocking_ast, 0);
-+ if (rc == -ESTALE) {
-+ /* reason for keep own exit path - don`t flood log
-+ * with messages with -ESTALE errors.
-+ */
-+ if (!it_disposition(itp, DISP_OPEN_OPEN) ||
-+ it_open_error(DISP_OPEN_OPEN, itp))
-+ GOTO(out, rc);
-+ ll_release_openhandle(file->f_dentry, itp);
-+ GOTO(out, rc);
-+ }
-+
-+ if (rc != 0 || it_open_error(DISP_OPEN_OPEN, itp)) {
-+ rc = rc ? rc : it_open_error(DISP_OPEN_OPEN, itp);
-+ CDEBUG(D_VFSTRACE, "lock enqueue: err: %d\n", rc);
-+ GOTO(out, rc);
-+ }
-+
-+ if (itp->d.lustre.it_lock_mode)
-+ mdc_set_lock_data(&itp->d.lustre.it_lock_handle,
-+ inode);
-+
-+ rc = ll_prep_inode(sbi->ll_osc_exp, &file->f_dentry->d_inode,
-+ req, DLM_REPLY_REC_OFF, NULL);
-+out:
-+ ptlrpc_req_finished(itp->d.lustre.it_data);
-+ it_clear_disposition(itp, DISP_ENQ_COMPLETE);
-+ ll_intent_drop_lock(itp);
-+
-+ RETURN(rc);
-+}
-+
-+
-+static void ll_och_fill(struct ll_inode_info *lli, struct lookup_intent *it,
-+ struct obd_client_handle *och)
-+{
-+ struct ptlrpc_request *req = it->d.lustre.it_data;
-+ struct mds_body *body;
-+
-+ LASSERT(och);
-+
-+ body = lustre_msg_buf(req->rq_repmsg, DLM_REPLY_REC_OFF, sizeof(*body));
-+ LASSERT(body != NULL); /* reply already checked out */
-+ /* and swabbed in mdc_enqueue */
-+ LASSERT(lustre_rep_swabbed(req, DLM_REPLY_REC_OFF));
-+
-+ memcpy(&och->och_fh, &body->handle, sizeof(body->handle));
-+ och->och_magic = OBD_CLIENT_HANDLE_MAGIC;
-+ lli->lli_io_epoch = body->io_epoch;
-+
-+ mdc_set_open_replay_data(och, it->d.lustre.it_data);
-+}
-+
-+int ll_local_open(struct file *file, struct lookup_intent *it,
-+ struct ll_file_data *fd, struct obd_client_handle *och)
-+{
-+ ENTRY;
-+
-+ LASSERT(!LUSTRE_FPRIVATE(file));
-+
-+ LASSERT(fd != NULL);
-+
-+ if (och)
-+ ll_och_fill(ll_i2info(file->f_dentry->d_inode), it, och);
-+ LUSTRE_FPRIVATE(file) = fd;
-+ ll_readahead_init(file->f_dentry->d_inode, &fd->fd_ras);
-+ fd->fd_omode = it->it_flags;
-+
-+ RETURN(0);
-+}
-+
-+/* Open a file, and (for the very first open) create objects on the OSTs at
-+ * this time. If opened with O_LOV_DELAY_CREATE, then we don't do the object
-+ * creation or open until ll_lov_setstripe() ioctl is called. We grab
-+ * lli_open_sem to ensure no other process will create objects, send the
-+ * stripe MD to the MDS, or try to destroy the objects if that fails.
-+ *
-+ * If we already have the stripe MD locally then we don't request it in
-+ * mdc_open(), by passing a lmm_size = 0.
-+ *
-+ * It is up to the application to ensure no other processes open this file
-+ * in the O_LOV_DELAY_CREATE case, or the default striping pattern will be
-+ * used. We might be able to avoid races of that sort by getting lli_open_sem
-+ * before returning in the O_LOV_DELAY_CREATE case and dropping it here
-+ * or in ll_file_release(), but I'm not sure that is desirable/necessary.
-+ */
-+int ll_file_open(struct inode *inode, struct file *file)
-+{
-+ struct ll_inode_info *lli = ll_i2info(inode);
-+ struct lookup_intent *it, oit = { .it_op = IT_OPEN,
-+ .it_flags = file->f_flags };
-+ struct lov_stripe_md *lsm;
-+ struct ptlrpc_request *req = NULL;
-+ struct obd_client_handle **och_p;
-+ __u64 *och_usecount;
-+ struct ll_file_data *fd;
-+ int rc = 0, opendir_set = 0;
-+ ENTRY;
-+
-+ CDEBUG(D_VFSTRACE, "VFS Op:inode=%lu/%u(%p), flags %o\n", inode->i_ino,
-+ inode->i_generation, inode, file->f_flags);
-+
-+#ifdef HAVE_VFS_INTENT_PATCHES
-+ it = file->f_it;
-+#else
-+ it = file->private_data; /* XXX: compat macro */
-+ file->private_data = NULL; /* prevent ll_local_open assertion */
-+#endif
-+
-+ fd = ll_file_data_get();
-+ if (fd == NULL)
-+ RETURN(-ENOMEM);
-+
-+ if (S_ISDIR(inode->i_mode)) {
-+again:
-+ spin_lock(&lli->lli_lock);
-+ if (lli->lli_opendir_key == NULL && lli->lli_opendir_pid == 0) {
-+ LASSERT(lli->lli_sai == NULL);
-+ lli->lli_opendir_key = fd;
-+ lli->lli_opendir_pid = cfs_curproc_pid();
-+ opendir_set = 1;
-+ } else if (unlikely(lli->lli_opendir_pid == cfs_curproc_pid() &&
-+ lli->lli_opendir_key != NULL)) {
-+ /* Two cases for this:
-+ * (1) The same process open such directory many times.
-+ * (2) The old process opened the directory, and exited
-+ * before its children processes. Then new process
-+ * with the same pid opens such directory before the
-+ * old process's children processes exit.
-+ * reset stat ahead for such cases. */
-+ spin_unlock(&lli->lli_lock);
-+ CDEBUG(D_INFO, "Conflict statahead for %.*s %lu/%u"
-+ " reset it.\n", file->f_dentry->d_name.len,
-+ file->f_dentry->d_name.name,
-+ inode->i_ino, inode->i_generation);
-+ ll_stop_statahead(inode, lli->lli_opendir_key);
-+ goto again;
-+ }
-+ spin_unlock(&lli->lli_lock);
-+ }
-+
-+ if (inode->i_sb->s_root == file->f_dentry) {
-+ LUSTRE_FPRIVATE(file) = fd;
-+ RETURN(0);
-+ }
-+
-+ if (!it || !it->d.lustre.it_disposition) {
-+ /* Convert f_flags into access mode. We cannot use file->f_mode,
-+ * because everything but O_ACCMODE mask was stripped from it */
-+ if ((oit.it_flags + 1) & O_ACCMODE)
-+ oit.it_flags++;
-+ if (file->f_flags & O_TRUNC)
-+ oit.it_flags |= FMODE_WRITE;
-+
-+ /* kernel only call f_op->open in dentry_open. filp_open calls
-+ * dentry_open after call to open_namei that checks permissions.
-+ * Only nfsd_open call dentry_open directly without checking
-+ * permissions and because of that this code below is safe. */
-+ if (oit.it_flags & FMODE_WRITE)
-+ oit.it_flags |= MDS_OPEN_OWNEROVERRIDE;
-+
-+ /* We do not want O_EXCL here, presumably we opened the file
-+ * already? XXX - NFS implications? */
-+ oit.it_flags &= ~O_EXCL;
-+
-+ it = &oit;
-+ }
-+
-+restart:
-+ /* Let's see if we have file open on MDS already. */
-+ if (it->it_flags & FMODE_WRITE) {
-+ och_p = &lli->lli_mds_write_och;
-+ och_usecount = &lli->lli_open_fd_write_count;
-+ } else if (it->it_flags & FMODE_EXEC) {
-+ och_p = &lli->lli_mds_exec_och;
-+ och_usecount = &lli->lli_open_fd_exec_count;
-+ } else {
-+ och_p = &lli->lli_mds_read_och;
-+ och_usecount = &lli->lli_open_fd_read_count;
-+ }
-+
-+ LASSERTF(it->it_flags != 0, "it %p dist %d \n", it,
-+ it->d.lustre.it_disposition);
-+
-+ down(&lli->lli_och_sem);
-+ if (*och_p) { /* Open handle is present */
-+ if (it_disposition(it, DISP_OPEN_OPEN)) {
-+ /* Well, there's extra open request that we do not need,
-+ let's close it somehow. This will decref request. */
-+ rc = it_open_error(DISP_OPEN_OPEN, it);
-+ if (rc) {
-+ up(&lli->lli_och_sem);
-+ ll_file_data_put(fd);
-+ GOTO(out_openerr, rc);
-+ }
-+ ll_release_openhandle(file->f_dentry, it);
-+ lprocfs_counter_incr(ll_i2sbi(inode)->ll_stats,
-+ LPROC_LL_OPEN);
-+ }
-+ (*och_usecount)++;
-+
-+ rc = ll_local_open(file, it, fd, NULL);
-+
-+ LASSERTF(rc == 0, "rc = %d\n", rc);
-+ } else {
-+ LASSERT(*och_usecount == 0);
-+ if (!it->d.lustre.it_disposition) {
-+ /* We cannot just request lock handle now, new ELC code
-+ means that one of other OPEN locks for this file
-+ could be cancelled, and since blocking ast handler
-+ would attempt to grab och_sem as well, that would
-+ result in a deadlock */
-+ up(&lli->lli_och_sem);
-+ rc = ll_intent_file_open(file, NULL, 0, it);
-+ if (rc) {
-+ ll_file_data_put(fd);
-+ GOTO(out_openerr, rc);
-+ }
-+
-+ mdc_set_lock_data(&it->d.lustre.it_lock_handle,
-+ file->f_dentry->d_inode);
-+ goto restart;
-+ }
-+
-+ OBD_ALLOC(*och_p, sizeof (struct obd_client_handle));
-+ if (!*och_p) {
-+ ll_file_data_put(fd);
-+ GOTO(out_och_free, rc = -ENOMEM);
-+ }
-+ (*och_usecount)++;
-+ req = it->d.lustre.it_data;
-+
-+ /* mdc_intent_lock() didn't get a request ref if there was an
-+ * open error, so don't do cleanup on the request here
-+ * (bug 3430) */
-+ /* XXX (green): Should not we bail out on any error here, not
-+ * just open error? */
-+ rc = it_open_error(DISP_OPEN_OPEN, it);
-+ if (rc) {
-+ ll_file_data_put(fd);
-+ GOTO(out_och_free, rc);
-+ }
-+
-+ ll_stats_ops_tally(ll_i2sbi(inode), LPROC_LL_OPEN, 1);
-+ rc = ll_local_open(file, it, fd, *och_p);
-+ LASSERTF(rc == 0, "rc = %d\n", rc);
-+ }
-+ up(&lli->lli_och_sem);
-+
-+ /* Must do this outside lli_och_sem lock to prevent deadlock where
-+ different kind of OPEN lock for this same inode gets cancelled
-+ by ldlm_cancel_lru */
-+ if (!S_ISREG(inode->i_mode))
-+ GOTO(out, rc);
-+
-+ lsm = lli->lli_smd;
-+ if (lsm == NULL) {
-+ if (file->f_flags & O_LOV_DELAY_CREATE ||
-+ !(file->f_mode & FMODE_WRITE)) {
-+ CDEBUG(D_INODE, "object creation was delayed\n");
-+ GOTO(out, rc);
-+ }
-+ }
-+ file->f_flags &= ~O_LOV_DELAY_CREATE;
-+ GOTO(out, rc);
-+ out:
-+ ptlrpc_req_finished(req);
-+ if (req)
-+ it_clear_disposition(it, DISP_ENQ_OPEN_REF);
-+ if (rc == 0) {
-+ ll_open_complete(inode);
-+ } else {
-+out_och_free:
-+ if (*och_p) {
-+ OBD_FREE(*och_p, sizeof (struct obd_client_handle));
-+ *och_p = NULL; /* OBD_FREE writes some magic there */
-+ (*och_usecount)--;
-+ }
-+ up(&lli->lli_och_sem);
-+out_openerr:
-+ if (opendir_set != 0)
-+ ll_stop_statahead(inode, lli->lli_opendir_key);
-+ }
-+
-+ return rc;
-+}
-+
-+/* Fills the obdo with the attributes for the inode defined by lsm */
-+int ll_lsm_getattr(struct obd_export *exp, struct lov_stripe_md *lsm,
-+ struct obdo *oa)
-+{
-+ struct ptlrpc_request_set *set;
-+ struct obd_info oinfo = { { { 0 } } };
-+ int rc;
-+ ENTRY;
-+
-+ LASSERT(lsm != NULL);
-+
-+ memset(oa, 0, sizeof *oa);
-+ oinfo.oi_md = lsm;
-+ oinfo.oi_oa = oa;
-+ oa->o_id = lsm->lsm_object_id;
-+ oa->o_mode = S_IFREG;
-+ oa->o_valid = OBD_MD_FLID | OBD_MD_FLTYPE | OBD_MD_FLSIZE |
-+ OBD_MD_FLBLOCKS | OBD_MD_FLBLKSZ | OBD_MD_FLMTIME |
-+ OBD_MD_FLCTIME;
-+
-+ set = ptlrpc_prep_set();
-+ if (set == NULL) {
-+ rc = -ENOMEM;
-+ } else {
-+ rc = obd_getattr_async(exp, &oinfo, set);
-+ if (rc == 0)
-+ rc = ptlrpc_set_wait(set);
-+ ptlrpc_set_destroy(set);
-+ }
-+ if (rc)
-+ RETURN(rc);
-+
-+ oa->o_valid &= (OBD_MD_FLBLOCKS | OBD_MD_FLBLKSZ | OBD_MD_FLMTIME |
-+ OBD_MD_FLCTIME | OBD_MD_FLSIZE);
-+ RETURN(0);
-+}
-+
-+static int ll_lock_to_stripe_offset(struct inode *inode, struct ldlm_lock *lock)
-+{
-+ struct ll_inode_info *lli = ll_i2info(inode);
-+ struct lov_stripe_md *lsm = lli->lli_smd;
-+ struct obd_export *exp = ll_i2obdexp(inode);
-+ struct {
-+ char name[16];
-+ struct ldlm_lock *lock;
-+ } key = { .name = KEY_LOCK_TO_STRIPE, .lock = lock };
-+ __u32 stripe, vallen = sizeof(stripe);
-+ int rc;
-+ ENTRY;
-+
-+ if (lsm->lsm_stripe_count == 1)
-+ GOTO(check, stripe = 0);
-+
-+ /* get our offset in the lov */
-+ rc = obd_get_info(exp, sizeof(key), &key, &vallen, &stripe, lsm);
-+ if (rc != 0) {
-+ CERROR("obd_get_info: rc = %d\n", rc);
-+ RETURN(rc);
-+ }
-+ LASSERT(stripe < lsm->lsm_stripe_count);
-+
-+check:
-+ if (lsm->lsm_oinfo[stripe]->loi_id != lock->l_resource->lr_name.name[0]||
-+ lsm->lsm_oinfo[stripe]->loi_gr != lock->l_resource->lr_name.name[1]){
-+ LDLM_ERROR(lock, "resource doesn't match object "LPU64"/"LPU64,
-+ lsm->lsm_oinfo[stripe]->loi_id,
-+ lsm->lsm_oinfo[stripe]->loi_gr);
-+ RETURN(-ELDLM_NO_LOCK_DATA);
-+ }
-+
-+ RETURN(stripe);
-+}
-+
-+/* Get extra page reference to ensure it is not going away */
-+void ll_pin_extent_cb(void *data)
-+{
-+ struct page *page = data;
-+
-+ page_cache_get(page);
-+
-+ return;
-+}
-+/* Flush the page from page cache for an extent as its canceled.
-+ * Page to remove is delivered as @data.
-+ *
-+ * No one can dirty the extent until we've finished our work and they cannot
-+ * enqueue another lock. The DLM protects us from ll_file_read/write here,
-+ * but other kernel actors could have pages locked.
-+ *
-+ * If @discard is set, there is no need to write the page if it is dirty.
-+ *
-+ * Called with the DLM lock held. */
-+int ll_page_removal_cb(void *data, int discard)
-+{
-+ int rc;
-+ struct page *page = data;
-+ struct address_space *mapping;
-+
-+ ENTRY;
-+
-+ /* We have page reference already from ll_pin_page */
-+ lock_page(page);
-+
-+ /* Already truncated by somebody */
-+ if (!page->mapping)
-+ GOTO(out, rc = 0);
-+
-+ mapping = page->mapping;
-+
-+ ll_teardown_mmaps(mapping,
-+ (__u64)page->index << PAGE_CACHE_SHIFT,
-+ ((__u64)page->index<<PAGE_CACHE_SHIFT)|
-+ ~PAGE_CACHE_MASK);
-+ LL_CDEBUG_PAGE(D_PAGE, page, "removing page\n");
-+ if (!discard && PageWriteback(page))
-+ wait_on_page_writeback(page);
-+
-+ if (!discard && clear_page_dirty_for_io(page)) {
-+ rc = ll_call_writepage(page->mapping->host, page);
-+ /* either waiting for io to complete or reacquiring
-+ * the lock that the failed writepage released */
-+ lock_page(page);
-+ wait_on_page_writeback(page);
-+ if (rc < 0) {
-+ CERROR("writepage inode %lu(%p) of page %p "
-+ "failed: %d\n", mapping->host->i_ino,
-+ mapping->host, page, rc);
-+ if (rc == -ENOSPC)
-+ set_bit(AS_ENOSPC, &mapping->flags);
-+ else
-+ set_bit(AS_EIO, &mapping->flags);
-+ }
-+ }
-+ if (page->mapping != NULL) {
-+ struct ll_async_page *llap = llap_cast_private(page);
-+ // checking again to account for writeback's lock_page()
-+ LL_CDEBUG_PAGE(D_PAGE, page, "truncating\n");
-+ if (llap)
-+ ll_ra_accounting(llap, page->mapping);
-+ ll_truncate_complete_page(page);
-+ }
-+ EXIT;
-+out:
-+ LASSERT(!PageWriteback(page));
-+ unlock_page(page);
-+ page_cache_release(page);
-+
-+ return 0;
-+}
-+
-+int ll_extent_lock_cancel_cb(struct ldlm_lock *lock, struct ldlm_lock_desc *new,
-+ void *data, int flag)
-+{
-+ struct inode *inode;
-+ struct ll_inode_info *lli;
-+ struct lov_stripe_md *lsm;
-+ int stripe;
-+ __u64 kms;
-+
-+ ENTRY;
-+
-+ if ((unsigned long)data > 0 && (unsigned long)data < 0x1000) {
-+ LDLM_ERROR(lock, "cancelling lock with bad data %p", data);
-+ LBUG();
-+ }
-+
-+ inode = ll_inode_from_lock(lock);
-+ if (inode == NULL)
-+ RETURN(0);
-+ lli = ll_i2info(inode);
-+ if (lli == NULL)
-+ GOTO(iput, 0);
-+ if (lli->lli_smd == NULL)
-+ GOTO(iput, 0);
-+ lsm = lli->lli_smd;
-+
-+ stripe = ll_lock_to_stripe_offset(inode, lock);
-+ if (stripe < 0)
-+ GOTO(iput, 0);
-+
-+ lov_stripe_lock(lsm);
-+ lock_res_and_lock(lock);
-+ kms = ldlm_extent_shift_kms(lock,
-+ lsm->lsm_oinfo[stripe]->loi_kms);
-+
-+ if (lsm->lsm_oinfo[stripe]->loi_kms != kms)
-+ LDLM_DEBUG(lock, "updating kms from "LPU64" to "LPU64,
-+ lsm->lsm_oinfo[stripe]->loi_kms, kms);
-+ lsm->lsm_oinfo[stripe]->loi_kms = kms;
-+ unlock_res_and_lock(lock);
-+ lov_stripe_unlock(lsm);
-+ ll_try_done_writing(inode);
-+ EXIT;
-+iput:
-+ iput(inode);
-+
-+ return 0;
-+}
-+
-+#if 0
-+int ll_async_completion_ast(struct ldlm_lock *lock, int flags, void *data)
-+{
-+ /* XXX ALLOCATE - 160 bytes */
-+ struct inode *inode = ll_inode_from_lock(lock);
-+ struct ll_inode_info *lli = ll_i2info(inode);
-+ struct lustre_handle lockh = { 0 };
-+ struct ost_lvb *lvb;
-+ int stripe;
-+ ENTRY;
-+
-+ if (flags & (LDLM_FL_BLOCK_WAIT | LDLM_FL_BLOCK_GRANTED |
-+ LDLM_FL_BLOCK_CONV)) {
-+ LBUG(); /* not expecting any blocked async locks yet */
-+ LDLM_DEBUG(lock, "client-side async enqueue returned a blocked "
-+ "lock, returning");
-+ ldlm_lock_dump(D_OTHER, lock, 0);
-+ ldlm_reprocess_all(lock->l_resource);
-+ RETURN(0);
-+ }
-+
-+ LDLM_DEBUG(lock, "client-side async enqueue: granted/glimpsed");
-+
-+ stripe = ll_lock_to_stripe_offset(inode, lock);
-+ if (stripe < 0)
-+ goto iput;
-+
-+ if (lock->l_lvb_len) {
-+ struct lov_stripe_md *lsm = lli->lli_smd;
-+ __u64 kms;
-+ lvb = lock->l_lvb_data;
-+ lsm->lsm_oinfo[stripe].loi_rss = lvb->lvb_size;
-+
-+ lock_res_and_lock(lock);
-+ ll_inode_size_lock(inode, 1);
-+ kms = MAX(lsm->lsm_oinfo[stripe].loi_kms, lvb->lvb_size);
-+ kms = ldlm_extent_shift_kms(NULL, kms);
-+ if (lsm->lsm_oinfo[stripe].loi_kms != kms)
-+ LDLM_DEBUG(lock, "updating kms from "LPU64" to "LPU64,
-+ lsm->lsm_oinfo[stripe].loi_kms, kms);
-+ lsm->lsm_oinfo[stripe].loi_kms = kms;
-+ ll_inode_size_unlock(inode, 1);
-+ unlock_res_and_lock(lock);
-+ }
-+
-+iput:
-+ iput(inode);
-+ wake_up(&lock->l_waitq);
-+
-+ ldlm_lock2handle(lock, &lockh);
-+ ldlm_lock_decref(&lockh, LCK_PR);
-+ RETURN(0);
-+}
-+#endif
-+
-+static int ll_glimpse_callback(struct ldlm_lock *lock, void *reqp)
-+{
-+ struct ptlrpc_request *req = reqp;
-+ struct inode *inode = ll_inode_from_lock(lock);
-+ struct ll_inode_info *lli;
-+ struct lov_stripe_md *lsm;
-+ struct ost_lvb *lvb;
-+ int rc, stripe;
-+ int size[2] = { sizeof(struct ptlrpc_body), sizeof(*lvb) };
-+ ENTRY;
-+
-+ if (inode == NULL)
-+ GOTO(out, rc = -ELDLM_NO_LOCK_DATA);
-+ lli = ll_i2info(inode);
-+ if (lli == NULL)
-+ GOTO(iput, rc = -ELDLM_NO_LOCK_DATA);
-+ lsm = lli->lli_smd;
-+ if (lsm == NULL)
-+ GOTO(iput, rc = -ELDLM_NO_LOCK_DATA);
-+
-+ /* First, find out which stripe index this lock corresponds to. */
-+ stripe = ll_lock_to_stripe_offset(inode, lock);
-+ if (stripe < 0)
-+ GOTO(iput, rc = -ELDLM_NO_LOCK_DATA);
-+
-+ rc = lustre_pack_reply(req, 2, size, NULL);
-+ if (rc)
-+ GOTO(iput, rc);
-+
-+ lvb = lustre_msg_buf(req->rq_repmsg, REPLY_REC_OFF, sizeof(*lvb));
-+ lvb->lvb_size = lli->lli_smd->lsm_oinfo[stripe]->loi_kms;
-+ lvb->lvb_mtime = LTIME_S(inode->i_mtime);
-+ lvb->lvb_atime = LTIME_S(inode->i_atime);
-+ lvb->lvb_ctime = LTIME_S(inode->i_ctime);
-+
-+ LDLM_DEBUG(lock, "i_size: %llu -> stripe number %u -> kms "LPU64
-+ " atime "LPU64", mtime "LPU64", ctime "LPU64,
-+ i_size_read(inode), stripe, lvb->lvb_size, lvb->lvb_mtime,
-+ lvb->lvb_atime, lvb->lvb_ctime);
-+ iput:
-+ iput(inode);
-+
-+ out:
-+ /* These errors are normal races, so we don't want to fill the console
-+ * with messages by calling ptlrpc_error() */
-+ if (rc == -ELDLM_NO_LOCK_DATA)
-+ lustre_pack_reply(req, 1, NULL, NULL);
-+
-+ req->rq_status = rc;
-+ return rc;
-+}
-+
-+int ll_glimpse_ioctl(struct ll_sb_info *sbi, struct lov_stripe_md *lsm,
-+ lstat_t *st)
-+{
-+ struct lustre_handle lockh = { 0 };
-+ struct ldlm_enqueue_info einfo = { 0 };
-+ struct obd_info oinfo = { { { 0 } } };
-+ struct ost_lvb lvb;
-+ int rc;
-+
-+ ENTRY;
-+
-+ einfo.ei_type = LDLM_EXTENT;
-+ einfo.ei_mode = LCK_PR;
-+ einfo.ei_cb_bl = osc_extent_blocking_cb;
-+ einfo.ei_cb_cp = ldlm_completion_ast;
-+ einfo.ei_cb_gl = ll_glimpse_callback;
-+ einfo.ei_cbdata = NULL;
-+
-+ oinfo.oi_policy.l_extent.end = OBD_OBJECT_EOF;
-+ oinfo.oi_lockh = &lockh;
-+ oinfo.oi_md = lsm;
-+ oinfo.oi_flags = LDLM_FL_HAS_INTENT;
-+
-+ rc = obd_enqueue_rqset(sbi->ll_osc_exp, &oinfo, &einfo);
-+ if (rc == -ENOENT)
-+ RETURN(rc);
-+ if (rc != 0) {
-+ CERROR("obd_enqueue returned rc %d, "
-+ "returning -EIO\n", rc);
-+ RETURN(rc > 0 ? -EIO : rc);
-+ }
-+
-+ lov_stripe_lock(lsm);
-+ memset(&lvb, 0, sizeof(lvb));
-+ obd_merge_lvb(sbi->ll_osc_exp, lsm, &lvb, 0);
-+ st->st_size = lvb.lvb_size;
-+ st->st_blocks = lvb.lvb_blocks;
-+ st->st_mtime = lvb.lvb_mtime;
-+ st->st_atime = lvb.lvb_atime;
-+ st->st_ctime = lvb.lvb_ctime;
-+ lov_stripe_unlock(lsm);
-+
-+ RETURN(rc);
-+}
-+
-+/* NB: obd_merge_lvb will prefer locally cached writes if they extend the
-+ * file (because it prefers KMS over RSS when larger) */
-+int ll_glimpse_size(struct inode *inode, int ast_flags)
-+{
-+ struct ll_inode_info *lli = ll_i2info(inode);
-+ struct ll_sb_info *sbi = ll_i2sbi(inode);
-+ struct lustre_handle lockh = { 0 };
-+ struct ldlm_enqueue_info einfo = { 0 };
-+ struct obd_info oinfo = { { { 0 } } };
-+ struct ost_lvb lvb;
-+ int rc;
-+ ENTRY;
-+
-+ CDEBUG(D_DLMTRACE, "Glimpsing inode %lu\n", inode->i_ino);
-+
-+ if (!lli->lli_smd) {
-+ CDEBUG(D_DLMTRACE, "No objects for inode %lu\n", inode->i_ino);
-+ RETURN(0);
-+ }
-+
-+ /* NOTE: this looks like DLM lock request, but it may not be one. Due
-+ * to LDLM_FL_HAS_INTENT flag, this is glimpse request, that
-+ * won't revoke any conflicting DLM locks held. Instead,
-+ * ll_glimpse_callback() will be called on each client
-+ * holding a DLM lock against this file, and resulting size
-+ * will be returned for each stripe. DLM lock on [0, EOF] is
-+ * acquired only if there were no conflicting locks. */
-+ einfo.ei_type = LDLM_EXTENT;
-+ einfo.ei_mode = LCK_PR;
-+ einfo.ei_cb_bl = osc_extent_blocking_cb;
-+ einfo.ei_cb_cp = ldlm_completion_ast;
-+ einfo.ei_cb_gl = ll_glimpse_callback;
-+ einfo.ei_cbdata = inode;
-+
-+ oinfo.oi_policy.l_extent.end = OBD_OBJECT_EOF;
-+ oinfo.oi_lockh = &lockh;
-+ oinfo.oi_md = lli->lli_smd;
-+ oinfo.oi_flags = ast_flags | LDLM_FL_HAS_INTENT;
-+
-+ rc = obd_enqueue_rqset(sbi->ll_osc_exp, &oinfo, &einfo);
-+ if (rc == -ENOENT)
-+ RETURN(rc);
-+ if (rc != 0) {
-+ CERROR("obd_enqueue returned rc %d, returning -EIO\n", rc);
-+ RETURN(rc > 0 ? -EIO : rc);
-+ }
-+
-+ ll_inode_size_lock(inode, 1);
-+ inode_init_lvb(inode, &lvb);
-+ rc = obd_merge_lvb(sbi->ll_osc_exp, lli->lli_smd, &lvb, 0);
-+ i_size_write(inode, lvb.lvb_size);
-+ inode->i_blocks = lvb.lvb_blocks;
-+ LTIME_S(inode->i_mtime) = lvb.lvb_mtime;
-+ LTIME_S(inode->i_atime) = lvb.lvb_atime;
-+ LTIME_S(inode->i_ctime) = lvb.lvb_ctime;
-+ ll_inode_size_unlock(inode, 1);
-+
-+ CDEBUG(D_DLMTRACE, "glimpse: size: %llu, blocks: %llu\n",
-+ i_size_read(inode), (long long)inode->i_blocks);
-+
-+ RETURN(rc);
-+}
-+
-+int ll_extent_lock(struct ll_file_data *fd, struct inode *inode,
-+ struct lov_stripe_md *lsm, int mode,
-+ ldlm_policy_data_t *policy, struct lustre_handle *lockh,
-+ int ast_flags)
-+{
-+ struct ll_sb_info *sbi = ll_i2sbi(inode);
-+ struct ost_lvb lvb;
-+ struct ldlm_enqueue_info einfo = { 0 };
-+ struct obd_info oinfo = { { { 0 } } };
-+ int rc;
-+ ENTRY;
-+
-+ LASSERT(!lustre_handle_is_used(lockh));
-+ LASSERT(lsm != NULL);
-+
-+ /* don't drop the mmapped file to LRU */
-+ if (mapping_mapped(inode->i_mapping))
-+ ast_flags |= LDLM_FL_NO_LRU;
-+
-+ /* XXX phil: can we do this? won't it screw the file size up? */
-+ if ((fd && (fd->fd_flags & LL_FILE_IGNORE_LOCK)) ||
-+ (sbi->ll_flags & LL_SBI_NOLCK))
-+ RETURN(0);
-+
-+ CDEBUG(D_DLMTRACE, "Locking inode %lu, start "LPU64" end "LPU64"\n",
-+ inode->i_ino, policy->l_extent.start, policy->l_extent.end);
-+
-+ einfo.ei_type = LDLM_EXTENT;
-+ einfo.ei_mode = mode;
-+ einfo.ei_cb_bl = osc_extent_blocking_cb;
-+ einfo.ei_cb_cp = ldlm_completion_ast;
-+ einfo.ei_cb_gl = ll_glimpse_callback;
-+ einfo.ei_cbdata = inode;
-+
-+ oinfo.oi_policy = *policy;
-+ oinfo.oi_lockh = lockh;
-+ oinfo.oi_md = lsm;
-+ oinfo.oi_flags = ast_flags;
-+
-+ rc = obd_enqueue(sbi->ll_osc_exp, &oinfo, &einfo, NULL);
-+ *policy = oinfo.oi_policy;
-+ if (rc > 0)
-+ rc = -EIO;
-+
-+ ll_inode_size_lock(inode, 1);
-+ inode_init_lvb(inode, &lvb);
-+ obd_merge_lvb(sbi->ll_osc_exp, lsm, &lvb, 1);
-+
-+ if (policy->l_extent.start == 0 &&
-+ policy->l_extent.end == OBD_OBJECT_EOF) {
-+ /* vmtruncate()->ll_truncate() first sets the i_size and then
-+ * the kms under both a DLM lock and the
-+ * ll_inode_size_lock(). If we don't get the
-+ * ll_inode_size_lock() here we can match the DLM lock and
-+ * reset i_size from the kms before the truncating path has
-+ * updated the kms. generic_file_write can then trust the
-+ * stale i_size when doing appending writes and effectively
-+ * cancel the result of the truncate. Getting the
-+ * ll_inode_size_lock() after the enqueue maintains the DLM
-+ * -> ll_inode_size_lock() acquiring order. */
-+ i_size_write(inode, lvb.lvb_size);
-+ CDEBUG(D_INODE, "inode=%lu, updating i_size %llu\n",
-+ inode->i_ino, i_size_read(inode));
-+ }
-+
-+ if (rc == 0) {
-+ LTIME_S(inode->i_mtime) = lvb.lvb_mtime;
-+ LTIME_S(inode->i_atime) = lvb.lvb_atime;
-+ LTIME_S(inode->i_ctime) = lvb.lvb_ctime;
-+ }
-+ ll_inode_size_unlock(inode, 1);
-+
-+ RETURN(rc);
-+}
-+
-+int ll_extent_unlock(struct ll_file_data *fd, struct inode *inode,
-+ struct lov_stripe_md *lsm, int mode,
-+ struct lustre_handle *lockh)
-+{
-+ struct ll_sb_info *sbi = ll_i2sbi(inode);
-+ int rc;
-+ ENTRY;
-+
-+ /* XXX phil: can we do this? won't it screw the file size up? */
-+ if ((fd && (fd->fd_flags & LL_FILE_IGNORE_LOCK)) ||
-+ (sbi->ll_flags & LL_SBI_NOLCK))
-+ RETURN(0);
-+
-+ rc = obd_cancel(sbi->ll_osc_exp, lsm, mode, lockh);
-+
-+ RETURN(rc);
-+}
-+
-+static void ll_set_file_contended(struct inode *inode)
-+{
-+ struct ll_inode_info *lli = ll_i2info(inode);
-+
-+ lli->lli_contention_time = cfs_time_current();
-+ set_bit(LLI_F_CONTENDED, &lli->lli_flags);
-+}
-+
-+void ll_clear_file_contended(struct inode *inode)
-+{
-+ struct ll_inode_info *lli = ll_i2info(inode);
-+
-+ clear_bit(LLI_F_CONTENDED, &lli->lli_flags);
-+}
-+
-+static int ll_is_file_contended(struct file *file)
-+{
-+ struct inode *inode = file->f_dentry->d_inode;
-+ struct ll_inode_info *lli = ll_i2info(inode);
-+ struct ll_sb_info *sbi = ll_i2sbi(inode);
-+ struct ll_file_data *fd = LUSTRE_FPRIVATE(file);
-+ ENTRY;
-+
-+ if (!(sbi->ll_lco.lco_flags & OBD_CONNECT_SRVLOCK)) {
-+ CDEBUG(D_INFO, "the server does not support SRVLOCK feature,"
-+ " osc connect flags = 0x"LPX64"\n",
-+ sbi->ll_lco.lco_flags);
-+ RETURN(0);
-+ }
-+ if (fd && (fd->fd_flags & LL_FILE_IGNORE_LOCK))
-+ RETURN(1);
-+ if (test_bit(LLI_F_CONTENDED, &lli->lli_flags)) {
-+ cfs_time_t cur_time = cfs_time_current();
-+ cfs_time_t retry_time;
-+
-+ retry_time = cfs_time_add(
-+ lli->lli_contention_time,
-+ cfs_time_seconds(sbi->ll_contention_time));
-+ if (cfs_time_after(cur_time, retry_time)) {
-+ ll_clear_file_contended(inode);
-+ RETURN(0);
-+ }
-+ RETURN(1);
-+ }
-+ RETURN(0);
-+}
-+
-+static int ll_file_get_tree_lock_iov(struct ll_lock_tree *tree,
-+ struct file *file, const struct iovec *iov,
-+ unsigned long nr_segs,
-+ loff_t start, loff_t end, int rw)
-+{
-+ int append;
-+ int tree_locked = 0;
-+ int rc;
-+ struct inode * inode = file->f_dentry->d_inode;
-+
-+ append = (rw == OBD_BRW_WRITE) && (file->f_flags & O_APPEND);
-+
-+ if (append || !ll_is_file_contended(file)) {
-+ struct ll_lock_tree_node *node;
-+ int ast_flags;
-+
-+ ast_flags = append ? 0 : LDLM_FL_DENY_ON_CONTENTION;
-+ if (file->f_flags & O_NONBLOCK)
-+ ast_flags |= LDLM_FL_BLOCK_NOWAIT;
-+ node = ll_node_from_inode(inode, start, end,
-+ (rw == OBD_BRW_WRITE) ? LCK_PW : LCK_PR);
-+ if (IS_ERR(node)) {
-+ rc = PTR_ERR(node);
-+ GOTO(out, rc);
-+ }
-+ tree->lt_fd = LUSTRE_FPRIVATE(file);
-+ rc = ll_tree_lock_iov(tree, node, iov, nr_segs, ast_flags);
-+ if (rc == 0)
-+ tree_locked = 1;
-+ else if (rc == -EUSERS)
-+ ll_set_file_contended(inode);
-+ else
-+ GOTO(out, rc);
-+ }
-+ RETURN(tree_locked);
-+out:
-+ return rc;
-+}
-+
-+/* XXX: exact copy from kernel code (__generic_file_aio_write_nolock from rhel4)
-+ */
-+static size_t ll_file_get_iov_count(const struct iovec *iov,
-+ unsigned long *nr_segs)
-+{
-+ size_t count = 0;
-+ unsigned long seg;
-+
-+ for (seg = 0; seg < *nr_segs; seg++) {
-+ const struct iovec *iv = &iov[seg];
-+
-+ /*
-+ * If any segment has a negative length, or the cumulative
-+ * length ever wraps negative then return -EINVAL.
-+ */
-+ count += iv->iov_len;
-+ if (unlikely((ssize_t)(count|iv->iov_len) < 0))
-+ return -EINVAL;
-+ if (access_ok(VERIFY_WRITE, iv->iov_base, iv->iov_len))
-+ continue;
-+ if (seg == 0)
-+ return -EFAULT;
-+ *nr_segs = seg;
-+ count -= iv->iov_len; /* This segment is no good */
-+ break;
-+ }
-+ return count;
-+}
-+
-+static int iov_copy_update(unsigned long *nr_segs, const struct iovec **iov_out,
-+ unsigned long *nrsegs_copy,
-+ struct iovec *iov_copy, size_t *offset,
-+ size_t size)
-+{
-+ int i;
-+ const struct iovec *iov = *iov_out;
-+ for (i = 0; i < *nr_segs;
-+ i++) {
-+ const struct iovec *iv = &iov[i];
-+ struct iovec *ivc = &iov_copy[i];
-+ *ivc = *iv;
-+ if (i == 0) {
-+ ivc->iov_len -= *offset;
-+ ivc->iov_base += *offset;
-+ }
-+ if (ivc->iov_len >= size) {
-+ ivc->iov_len = size;
-+ if (i == 0)
-+ *offset += size;
-+ else
-+ *offset = size;
-+ break;
-+ }
-+ size -= ivc->iov_len;
-+ }
-+ *iov_out += i;
-+ *nr_segs -= i;
-+ *nrsegs_copy = i + 1;
-+
-+ return 0;
-+}
-+
-+static int ll_reget_short_lock(struct page *page, int rw,
-+ obd_off start, obd_off end,
-+ void **cookie)
-+{
-+ struct ll_async_page *llap;
-+ struct obd_export *exp;
-+ struct inode *inode = page->mapping->host;
-+
-+ ENTRY;
-+
-+ exp = ll_i2obdexp(inode);
-+ if (exp == NULL)
-+ RETURN(0);
-+
-+ llap = llap_cast_private(page);
-+ if (llap == NULL)
-+ RETURN(0);
-+
-+ RETURN(obd_reget_short_lock(exp, ll_i2info(inode)->lli_smd,
-+ &llap->llap_cookie, rw, start, end,
-+ cookie));
-+}
-+
-+static void ll_release_short_lock(struct inode *inode, obd_off end,
-+ void *cookie, int rw)
-+{
-+ struct obd_export *exp;
-+ int rc;
-+
-+ exp = ll_i2obdexp(inode);
-+ if (exp == NULL)
-+ return;
-+
-+ rc = obd_release_short_lock(exp, ll_i2info(inode)->lli_smd, end,
-+ cookie, rw);
-+ if (rc < 0)
-+ CERROR("unlock failed (%d)\n", rc);
-+}
-+
-+static inline int ll_file_get_fast_lock(struct file *file,
-+ obd_off ppos, obd_off end,
-+ const struct iovec *iov,
-+ unsigned long nr_segs,
-+ void **cookie, int rw)
-+{
-+ int rc = 0, seg;
-+ struct page *page;
-+
-+ ENTRY;
-+
-+ /* we would like this read request to be lockfree */
-+ for (seg = 0; seg < nr_segs; seg++) {
-+ const struct iovec *iv = &iov[seg];
-+ if (ll_region_mapped((unsigned long)iv->iov_base, iv->iov_len))
-+ GOTO(out, rc);
-+ }
-+
-+ page = find_lock_page(file->f_dentry->d_inode->i_mapping,
-+ ppos >> CFS_PAGE_SHIFT);
-+ if (page) {
-+ if (ll_reget_short_lock(page, rw, ppos, end, cookie))
-+ rc = 1;
-+
-+ unlock_page(page);
-+ page_cache_release(page);
-+ }
-+
-+out:
-+ RETURN(rc);
-+}
-+
-+static inline void ll_file_put_fast_lock(struct inode *inode, obd_off end,
-+ void *cookie, int rw)
-+{
-+ ll_release_short_lock(inode, end, cookie, rw);
-+}
-+
-+enum ll_lock_style {
-+ LL_LOCK_STYLE_NOLOCK = 0,
-+ LL_LOCK_STYLE_FASTLOCK = 1,
-+ LL_LOCK_STYLE_TREELOCK = 2
-+};
-+
-+static inline int ll_file_get_lock(struct file *file, obd_off ppos,
-+ obd_off end, const struct iovec *iov,
-+ unsigned long nr_segs, void **cookie,
-+ struct ll_lock_tree *tree, int rw)
-+{
-+ int rc;
-+
-+ ENTRY;
-+
-+ if (ll_file_get_fast_lock(file, ppos, end, iov, nr_segs, cookie, rw))
-+ RETURN(LL_LOCK_STYLE_FASTLOCK);
-+
-+ rc = ll_file_get_tree_lock_iov(tree, file, iov, nr_segs,
-+ ppos, end, rw);
-+ /* rc: 1 for tree lock, 0 for no lock, <0 for error */
-+ switch (rc) {
-+ case 1:
-+ RETURN(LL_LOCK_STYLE_TREELOCK);
-+ case 0:
-+ RETURN(LL_LOCK_STYLE_NOLOCK);
-+ }
-+
-+ /* an error happened if we reached this point, rc = -errno here */
-+ RETURN(rc);
-+}
-+
-+static inline void ll_file_put_lock(struct inode *inode, obd_off end,
-+ enum ll_lock_style lock_style,
-+ void *cookie, struct ll_lock_tree *tree,
-+ int rw)
-+
-+{
-+ switch (lock_style) {
-+ case LL_LOCK_STYLE_TREELOCK:
-+ ll_tree_unlock(tree);
-+ break;
-+ case LL_LOCK_STYLE_FASTLOCK:
-+ ll_file_put_fast_lock(inode, end, cookie, rw);
-+ break;
-+ default:
-+ CERROR("invalid locking style (%d)\n", lock_style);
-+ }
-+}
-+
-+#ifdef HAVE_FILE_READV
-+static ssize_t ll_file_readv(struct file *file, const struct iovec *iov,
-+ unsigned long nr_segs, loff_t *ppos)
-+{
-+#else
-+static ssize_t ll_file_aio_read(struct kiocb *iocb, const struct iovec *iov,
-+ unsigned long nr_segs, loff_t pos)
-+{
-+ struct file *file = iocb->ki_filp;
-+ loff_t *ppos = &iocb->ki_pos;
-+#endif
-+ struct inode *inode = file->f_dentry->d_inode;
-+ struct ll_inode_info *lli = ll_i2info(inode);
-+ struct lov_stripe_md *lsm = lli->lli_smd;
-+ struct ll_sb_info *sbi = ll_i2sbi(inode);
-+ struct ll_lock_tree tree;
-+ struct ost_lvb lvb;
-+ struct ll_ra_read bead;
-+ int ra = 0;
-+ obd_off end;
-+ ssize_t retval, chunk, sum = 0;
-+ int lock_style;
-+ struct iovec *iov_copy = NULL;
-+ unsigned long nrsegs_copy, nrsegs_orig = 0;
-+ size_t count, iov_offset = 0;
-+ __u64 kms;
-+ void *cookie;
-+ ENTRY;
-+
-+ count = ll_file_get_iov_count(iov, &nr_segs);
-+ CDEBUG(D_VFSTRACE, "VFS Op:inode=%lu/%u(%p),size="LPSZ",offset=%Ld\n",
-+ inode->i_ino, inode->i_generation, inode, count, *ppos);
-+ /* "If nbyte is 0, read() will return 0 and have no other results."
-+ * -- Single Unix Spec */
-+ if (count == 0)
-+ RETURN(0);
-+
-+ ll_stats_ops_tally(sbi, LPROC_LL_READ_BYTES, count);
-+
-+ if (!lsm) {
-+ /* Read on file with no objects should return zero-filled
-+ * buffers up to file size (we can get non-zero sizes with
-+ * mknod + truncate, then opening file for read. This is a
-+ * common pattern in NFS case, it seems). Bug 6243 */
-+ int notzeroed;
-+ /* Since there are no objects on OSTs, we have nothing to get
-+ * lock on and so we are forced to access inode->i_size
-+ * unguarded */
-+
-+ /* Read beyond end of file */
-+ if (*ppos >= i_size_read(inode))
-+ RETURN(0);
-+
-+ if (count > i_size_read(inode) - *ppos)
-+ count = i_size_read(inode) - *ppos;
-+ /* Make sure to correctly adjust the file pos pointer for
-+ * EFAULT case */
-+ for (nrsegs_copy = 0; nrsegs_copy < nr_segs; nrsegs_copy++) {
-+ const struct iovec *iv = &iov[nrsegs_copy];
-+
-+ if (count < iv->iov_len)
-+ chunk = count;
-+ else
-+ chunk = iv->iov_len;
-+ notzeroed = clear_user(iv->iov_base, chunk);
-+ sum += (chunk - notzeroed);
-+ count -= (chunk - notzeroed);
-+ if (notzeroed || !count)
-+ break;
-+ }
-+ *ppos += sum;
-+ if (!sum)
-+ RETURN(-EFAULT);
-+ RETURN(sum);
-+ }
-+
-+repeat:
-+ if (sbi->ll_max_rw_chunk != 0) {
-+ /* first, let's know the end of the current stripe */
-+ end = *ppos;
-+ obd_extent_calc(sbi->ll_osc_exp, lsm, OBD_CALC_STRIPE_END,
-+ (obd_off *)&end);
-+
-+ /* correct, the end is beyond the request */
-+ if (end > *ppos + count - 1)
-+ end = *ppos + count - 1;
-+
-+ /* and chunk shouldn't be too large even if striping is wide */
-+ if (end - *ppos > sbi->ll_max_rw_chunk)
-+ end = *ppos + sbi->ll_max_rw_chunk - 1;
-+
-+ chunk = end - *ppos + 1;
-+ if ((count == chunk) && (iov_offset == 0)) {
-+ if (iov_copy)
-+ OBD_FREE(iov_copy, sizeof(*iov) * nrsegs_orig);
-+
-+ iov_copy = (struct iovec *)iov;
-+ nrsegs_copy = nr_segs;
-+ } else {
-+ if (!iov_copy) {
-+ nrsegs_orig = nr_segs;
-+ OBD_ALLOC(iov_copy, sizeof(*iov) * nr_segs);
-+ if (!iov_copy)
-+ GOTO(out, retval = -ENOMEM);
-+ }
-+
-+ iov_copy_update(&nr_segs, &iov, &nrsegs_copy, iov_copy,
-+ &iov_offset, chunk);
-+ }
-+ } else {
-+ end = *ppos + count - 1;
-+ iov_copy = (struct iovec *)iov;
-+ nrsegs_copy = nr_segs;
-+ }
-+
-+ lock_style = ll_file_get_lock(file, (obd_off)(*ppos), end,
-+ iov_copy, nrsegs_copy, &cookie, &tree,
-+ OBD_BRW_READ);
-+ if (lock_style < 0)
-+ GOTO(out, retval = lock_style);
-+
-+ ll_inode_size_lock(inode, 1);
-+ /*
-+ * Consistency guarantees: following possibilities exist for the
-+ * relation between region being read and real file size at this
-+ * moment:
-+ *
-+ * (A): the region is completely inside of the file;
-+ *
-+ * (B-x): x bytes of region are inside of the file, the rest is
-+ * outside;
-+ *
-+ * (C): the region is completely outside of the file.
-+ *
-+ * This classification is stable under DLM lock acquired by
-+ * ll_tree_lock() above, because to change class, other client has to
-+ * take DLM lock conflicting with our lock. Also, any updates to
-+ * ->i_size by other threads on this client are serialized by
-+ * ll_inode_size_lock(). This guarantees that short reads are handled
-+ * correctly in the face of concurrent writes and truncates.
-+ */
-+ inode_init_lvb(inode, &lvb);
-+ obd_merge_lvb(ll_i2sbi(inode)->ll_osc_exp, lsm, &lvb, 1);
-+ kms = lvb.lvb_size;
-+ if (*ppos + count - 1 > kms) {
-+ /* A glimpse is necessary to determine whether we return a
-+ * short read (B) or some zeroes at the end of the buffer (C) */
-+ ll_inode_size_unlock(inode, 1);
-+ retval = ll_glimpse_size(inode, LDLM_FL_BLOCK_GRANTED);
-+ if (retval) {
-+ if (lock_style != LL_LOCK_STYLE_NOLOCK)
-+ ll_file_put_lock(inode, end, lock_style,
-+ cookie, &tree, OBD_BRW_READ);
-+ goto out;
-+ }
-+ } else {
-+ /* region is within kms and, hence, within real file size (A).
-+ * We need to increase i_size to cover the read region so that
-+ * generic_file_read() will do its job, but that doesn't mean
-+ * the kms size is _correct_, it is only the _minimum_ size.
-+ * If someone does a stat they will get the correct size which
-+ * will always be >= the kms value here. b=11081 */
-+ if (i_size_read(inode) < kms)
-+ i_size_write(inode, kms);
-+ ll_inode_size_unlock(inode, 1);
-+ }
-+
-+ chunk = end - *ppos + 1;
-+ CDEBUG(D_INODE,"Read ino %lu, "LPSZ" bytes, offset %lld, i_size %llu\n",
-+ inode->i_ino, chunk, *ppos, i_size_read(inode));
-+
-+ /* turn off the kernel's read-ahead */
-+ if (lock_style != LL_LOCK_STYLE_NOLOCK) {
-+ file->f_ra.ra_pages = 0;
-+ /* initialize read-ahead window once per syscall */
-+ if (ra == 0) {
-+ ra = 1;
-+ bead.lrr_start = *ppos >> CFS_PAGE_SHIFT;
-+ bead.lrr_count = (count + CFS_PAGE_SIZE - 1) >> CFS_PAGE_SHIFT;
-+ ll_ra_read_in(file, &bead);
-+ }
-+
-+ /* BUG: 5972 */
-+ file_accessed(file);
-+#ifdef HAVE_FILE_READV
-+ retval = generic_file_readv(file, iov_copy, nrsegs_copy, ppos);
-+#else
-+ retval = generic_file_aio_read(iocb, iov_copy, nrsegs_copy,
-+ *ppos);
-+#endif
-+ ll_file_put_lock(inode, end, lock_style, cookie,
-+ &tree, OBD_BRW_READ);
-+ } else {
-+ retval = ll_file_lockless_io(file, iov_copy, nrsegs_copy, ppos,
-+ READ, chunk);
-+ }
-+ ll_rw_stats_tally(sbi, current->pid, file, count, 0);
-+ if (retval > 0) {
-+ count -= retval;
-+ sum += retval;
-+ if (retval == chunk && count > 0)
-+ goto repeat;
-+ }
-+
-+ out:
-+ if (ra != 0)
-+ ll_ra_read_ex(file, &bead);
-+ retval = (sum > 0) ? sum : retval;
-+
-+ if (iov_copy && iov_copy != iov)
-+ OBD_FREE(iov_copy, sizeof(*iov) * nrsegs_orig);
-+
-+ RETURN(retval);
-+}
-+
-+static ssize_t ll_file_read(struct file *file, char *buf, size_t count,
-+ loff_t *ppos)
-+{
-+ struct iovec local_iov = { .iov_base = (void __user *)buf,
-+ .iov_len = count };
-+#ifdef HAVE_FILE_READV
-+ return ll_file_readv(file, &local_iov, 1, ppos);
-+#else
-+ struct kiocb kiocb;
-+ ssize_t ret;
-+
-+ init_sync_kiocb(&kiocb, file);
-+ kiocb.ki_pos = *ppos;
-+ kiocb.ki_left = count;
-+
-+ ret = ll_file_aio_read(&kiocb, &local_iov, 1, kiocb.ki_pos);
-+ *ppos = kiocb.ki_pos;
-+ return ret;
-+#endif
-+}
-+
-+/*
-+ * Write to a file (through the page cache).
-+ */
-+#ifdef HAVE_FILE_WRITEV
-+static ssize_t ll_file_writev(struct file *file, const struct iovec *iov,
-+ unsigned long nr_segs, loff_t *ppos)
-+{
-+#else /* AIO stuff */
-+static ssize_t ll_file_aio_write(struct kiocb *iocb, const struct iovec *iov,
-+ unsigned long nr_segs, loff_t pos)
-+{
-+ struct file *file = iocb->ki_filp;
-+ loff_t *ppos = &iocb->ki_pos;
-+#endif
-+ struct inode *inode = file->f_dentry->d_inode;
-+ struct ll_sb_info *sbi = ll_i2sbi(inode);
-+ struct lov_stripe_md *lsm = ll_i2info(inode)->lli_smd;
-+ struct ll_lock_tree tree;
-+ loff_t maxbytes = ll_file_maxbytes(inode);
-+ loff_t lock_start, lock_end, end;
-+ ssize_t retval, chunk, sum = 0;
-+ int tree_locked;
-+ struct iovec *iov_copy = NULL;
-+ unsigned long nrsegs_copy, nrsegs_orig = 0;
-+ size_t count, iov_offset = 0;
-+ ENTRY;
-+
-+ count = ll_file_get_iov_count(iov, &nr_segs);
-+
-+ CDEBUG(D_VFSTRACE, "VFS Op:inode=%lu/%u(%p),size="LPSZ",offset=%Ld\n",
-+ inode->i_ino, inode->i_generation, inode, count, *ppos);
-+
-+ SIGNAL_MASK_ASSERT(); /* XXX BUG 1511 */
-+
-+ /* POSIX, but surprised the VFS doesn't check this already */
-+ if (count == 0)
-+ RETURN(0);
-+
-+ /* If file was opened for LL_IOC_LOV_SETSTRIPE but the ioctl wasn't
-+ * called on the file, don't fail the below assertion (bug 2388). */
-+ if (file->f_flags & O_LOV_DELAY_CREATE &&
-+ ll_i2info(inode)->lli_smd == NULL)
-+ RETURN(-EBADF);
-+
-+ LASSERT(ll_i2info(inode)->lli_smd != NULL);
-+
-+ down(&ll_i2info(inode)->lli_write_sem);
-+
-+repeat:
-+ chunk = 0; /* just to fix gcc's warning */
-+ end = *ppos + count - 1;
-+
-+ if (file->f_flags & O_APPEND) {
-+ lock_start = 0;
-+ lock_end = OBD_OBJECT_EOF;
-+ iov_copy = (struct iovec *)iov;
-+ nrsegs_copy = nr_segs;
-+ } else if (sbi->ll_max_rw_chunk != 0) {
-+ /* first, let's know the end of the current stripe */
-+ end = *ppos;
-+ obd_extent_calc(sbi->ll_osc_exp, lsm, OBD_CALC_STRIPE_END,
-+ (obd_off *)&end);
-+
-+ /* correct, the end is beyond the request */
-+ if (end > *ppos + count - 1)
-+ end = *ppos + count - 1;
-+
-+ /* and chunk shouldn't be too large even if striping is wide */
-+ if (end - *ppos > sbi->ll_max_rw_chunk)
-+ end = *ppos + sbi->ll_max_rw_chunk - 1;
-+ lock_start = *ppos;
-+ lock_end = end;
-+ chunk = end - *ppos + 1;
-+ if ((count == chunk) && (iov_offset == 0)) {
-+ if (iov_copy)
-+ OBD_FREE(iov_copy, sizeof(*iov) * nrsegs_orig);
-+
-+ iov_copy = (struct iovec *)iov;
-+ nrsegs_copy = nr_segs;
-+ } else {
-+ if (!iov_copy) {
-+ nrsegs_orig = nr_segs;
-+ OBD_ALLOC(iov_copy, sizeof(*iov) * nr_segs);
-+ if (!iov_copy)
-+ GOTO(out, retval = -ENOMEM);
-+ }
-+ iov_copy_update(&nr_segs, &iov, &nrsegs_copy, iov_copy,
-+ &iov_offset, chunk);
-+ }
-+ } else {
-+ lock_start = *ppos;
-+ lock_end = end;
-+ iov_copy = (struct iovec *)iov;
-+ nrsegs_copy = nr_segs;
-+ }
-+
-+ tree_locked = ll_file_get_tree_lock_iov(&tree, file, iov_copy,
-+ nrsegs_copy,
-+ (obd_off)lock_start,
-+ (obd_off)lock_end,
-+ OBD_BRW_WRITE);
-+ if (tree_locked < 0)
-+ GOTO(out, retval = tree_locked);
-+
-+ /* This is ok, g_f_w will overwrite this under i_sem if it races
-+ * with a local truncate, it just makes our maxbyte checking easier.
-+ * The i_size value gets updated in ll_extent_lock() as a consequence
-+ * of the [0,EOF] extent lock we requested above. */
-+ if (file->f_flags & O_APPEND) {
-+ *ppos = i_size_read(inode);
-+ end = *ppos + count - 1;
-+ }
-+
-+ if (*ppos >= maxbytes) {
-+ send_sig(SIGXFSZ, current, 0);
-+ GOTO(out_unlock, retval = -EFBIG);
-+ }
-+ if (end > maxbytes - 1)
-+ end = maxbytes - 1;
-+
-+ /* generic_file_write handles O_APPEND after getting i_mutex */
-+ chunk = end - *ppos + 1;
-+ CDEBUG(D_INFO, "Writing inode %lu, "LPSZ" bytes, offset %Lu\n",
-+ inode->i_ino, chunk, *ppos);
-+ if (tree_locked)
-+#ifdef HAVE_FILE_WRITEV
-+ retval = generic_file_writev(file, iov_copy, nrsegs_copy, ppos);
-+#else
-+ retval = generic_file_aio_write(iocb, iov_copy, nrsegs_copy,
-+ *ppos);
-+#endif
-+ else
-+ retval = ll_file_lockless_io(file, iov_copy, nrsegs_copy,
-+ ppos, WRITE, chunk);
-+ ll_rw_stats_tally(ll_i2sbi(inode), current->pid, file, chunk, 1);
-+
-+out_unlock:
-+ if (tree_locked)
-+ ll_tree_unlock(&tree);
-+
-+out:
-+ if (retval > 0) {
-+ count -= retval;
-+ sum += retval;
-+ if (retval == chunk && count > 0)
-+ goto repeat;
-+ }
-+
-+ up(&ll_i2info(inode)->lli_write_sem);
-+
-+ if (iov_copy && iov_copy != iov)
-+ OBD_FREE(iov_copy, sizeof(*iov) * nrsegs_orig);
-+
-+ retval = (sum > 0) ? sum : retval;
-+ ll_stats_ops_tally(ll_i2sbi(inode), LPROC_LL_WRITE_BYTES,
-+ retval > 0 ? retval : 0);
-+ RETURN(retval);
-+}
-+
-+static ssize_t ll_file_write(struct file *file, const char *buf, size_t count,
-+ loff_t *ppos)
-+{
-+ struct iovec local_iov = { .iov_base = (void __user *)buf,
-+ .iov_len = count };
-+
-+#ifdef HAVE_FILE_WRITEV
-+ return ll_file_writev(file, &local_iov, 1, ppos);
-+#else
-+ struct kiocb kiocb;
-+ ssize_t ret;
-+
-+ init_sync_kiocb(&kiocb, file);
-+ kiocb.ki_pos = *ppos;
-+ kiocb.ki_left = count;
-+
-+ ret = ll_file_aio_write(&kiocb, &local_iov, 1, kiocb.ki_pos);
-+ *ppos = kiocb.ki_pos;
-+
-+ return ret;
-+#endif
-+}
-+
-+/*
-+ * Send file content (through pagecache) somewhere with helper
-+ */
-+static ssize_t ll_file_sendfile(struct file *in_file, loff_t *ppos,size_t count,
-+ read_actor_t actor, void *target)
-+{
-+ struct inode *inode = in_file->f_dentry->d_inode;
-+ struct ll_inode_info *lli = ll_i2info(inode);
-+ struct lov_stripe_md *lsm = lli->lli_smd;
-+ struct ll_lock_tree tree;
-+ struct ll_lock_tree_node *node;
-+ struct ost_lvb lvb;
-+ struct ll_ra_read bead;
-+ int rc;
-+ ssize_t retval;
-+ __u64 kms;
-+ ENTRY;
-+ CDEBUG(D_VFSTRACE, "VFS Op:inode=%lu/%u(%p),size="LPSZ",offset=%Ld\n",
-+ inode->i_ino, inode->i_generation, inode, count, *ppos);
-+
-+ /* "If nbyte is 0, read() will return 0 and have no other results."
-+ * -- Single Unix Spec */
-+ if (count == 0)
-+ RETURN(0);
-+
-+ ll_stats_ops_tally(ll_i2sbi(inode), LPROC_LL_READ_BYTES, count);
-+ /* turn off the kernel's read-ahead */
-+ in_file->f_ra.ra_pages = 0;
-+
-+ /* File with no objects, nothing to lock */
-+ if (!lsm)
-+ RETURN(generic_file_sendfile(in_file, ppos, count, actor, target));
-+
-+ node = ll_node_from_inode(inode, *ppos, *ppos + count - 1, LCK_PR);
-+ if (IS_ERR(node))
-+ RETURN(PTR_ERR(node));
-+
-+ tree.lt_fd = LUSTRE_FPRIVATE(in_file);
-+ rc = ll_tree_lock(&tree, node, NULL, count,
-+ in_file->f_flags & O_NONBLOCK?LDLM_FL_BLOCK_NOWAIT:0);
-+ if (rc != 0)
-+ RETURN(rc);
-+
-+ ll_clear_file_contended(inode);
-+ ll_inode_size_lock(inode, 1);
-+ /*
-+ * Consistency guarantees: following possibilities exist for the
-+ * relation between region being read and real file size at this
-+ * moment:
-+ *
-+ * (A): the region is completely inside of the file;
-+ *
-+ * (B-x): x bytes of region are inside of the file, the rest is
-+ * outside;
-+ *
-+ * (C): the region is completely outside of the file.
-+ *
-+ * This classification is stable under DLM lock acquired by
-+ * ll_tree_lock() above, because to change class, other client has to
-+ * take DLM lock conflicting with our lock. Also, any updates to
-+ * ->i_size by other threads on this client are serialized by
-+ * ll_inode_size_lock(). This guarantees that short reads are handled
-+ * correctly in the face of concurrent writes and truncates.
-+ */
-+ inode_init_lvb(inode, &lvb);
-+ obd_merge_lvb(ll_i2sbi(inode)->ll_osc_exp, lsm, &lvb, 1);
-+ kms = lvb.lvb_size;
-+ if (*ppos + count - 1 > kms) {
-+ /* A glimpse is necessary to determine whether we return a
-+ * short read (B) or some zeroes at the end of the buffer (C) */
-+ ll_inode_size_unlock(inode, 1);
-+ retval = ll_glimpse_size(inode, LDLM_FL_BLOCK_GRANTED);
-+ if (retval)
-+ goto out;
-+ } else {
-+ /* region is within kms and, hence, within real file size (A) */
-+ i_size_write(inode, kms);
-+ ll_inode_size_unlock(inode, 1);
-+ }
-+
-+ CDEBUG(D_INFO, "Send ino %lu, "LPSZ" bytes, offset %lld, i_size %llu\n",
-+ inode->i_ino, count, *ppos, i_size_read(inode));
-+
-+ bead.lrr_start = *ppos >> CFS_PAGE_SHIFT;
-+ bead.lrr_count = (count + CFS_PAGE_SIZE - 1) >> CFS_PAGE_SHIFT;
-+ ll_ra_read_in(in_file, &bead);
-+ /* BUG: 5972 */
-+ file_accessed(in_file);
-+ retval = generic_file_sendfile(in_file, ppos, count, actor, target);
-+ ll_ra_read_ex(in_file, &bead);
-+
-+ out:
-+ ll_tree_unlock(&tree);
-+ RETURN(retval);
-+}
-+
-+static int ll_lov_recreate_obj(struct inode *inode, struct file *file,
-+ unsigned long arg)
-+{
-+ struct ll_inode_info *lli = ll_i2info(inode);
-+ struct obd_export *exp = ll_i2obdexp(inode);
-+ struct ll_recreate_obj ucreatp;
-+ struct obd_trans_info oti = { 0 };
-+ struct obdo *oa = NULL;
-+ int lsm_size;
-+ int rc = 0;
-+ struct lov_stripe_md *lsm, *lsm2;
-+ ENTRY;
-+
-+ if (!cfs_capable(CFS_CAP_SYS_ADMIN))
-+ RETURN(-EPERM);
-+
-+ rc = copy_from_user(&ucreatp, (struct ll_recreate_obj *)arg,
-+ sizeof(struct ll_recreate_obj));
-+ if (rc) {
-+ RETURN(-EFAULT);
-+ }
-+ OBDO_ALLOC(oa);
-+ if (oa == NULL)
-+ RETURN(-ENOMEM);
-+
-+ down(&lli->lli_size_sem);
-+ lsm = lli->lli_smd;
-+ if (lsm == NULL)
-+ GOTO(out, rc = -ENOENT);
-+ lsm_size = sizeof(*lsm) + (sizeof(struct lov_oinfo) *
-+ (lsm->lsm_stripe_count));
-+
-+ OBD_ALLOC(lsm2, lsm_size);
-+ if (lsm2 == NULL)
-+ GOTO(out, rc = -ENOMEM);
-+
-+ oa->o_id = ucreatp.lrc_id;
-+ oa->o_nlink = ucreatp.lrc_ost_idx;
-+ oa->o_flags |= OBD_FL_RECREATE_OBJS;
-+ oa->o_valid = OBD_MD_FLID | OBD_MD_FLFLAGS;
-+ obdo_from_inode(oa, inode, OBD_MD_FLTYPE | OBD_MD_FLATIME |
-+ OBD_MD_FLMTIME | OBD_MD_FLCTIME);
-+
-+ memcpy(lsm2, lsm, lsm_size);
-+ rc = obd_create(exp, oa, &lsm2, &oti);
-+
-+ OBD_FREE(lsm2, lsm_size);
-+ GOTO(out, rc);
-+out:
-+ up(&lli->lli_size_sem);
-+ OBDO_FREE(oa);
-+ return rc;
-+}
-+
-+int ll_lov_setstripe_ea_info(struct inode *inode, struct file *file,
-+ int flags, struct lov_user_md *lum,
-+ int lum_size)
-+{
-+ struct ll_inode_info *lli = ll_i2info(inode);
-+ struct lov_stripe_md *lsm;
-+ struct lookup_intent oit = {.it_op = IT_OPEN, .it_flags = flags};
-+ int rc = 0;
-+ ENTRY;
-+
-+ down(&lli->lli_size_sem);
-+ lsm = lli->lli_smd;
-+ if (lsm) {
-+ up(&lli->lli_size_sem);
-+ CDEBUG(D_IOCTL, "stripe already exists for ino %lu\n",
-+ inode->i_ino);
-+ RETURN(-EEXIST);
-+ }
-+
-+ rc = ll_intent_file_open(file, lum, lum_size, &oit);
-+ if (rc)
-+ GOTO(out, rc);
-+ if (it_disposition(&oit, DISP_LOOKUP_NEG))
-+ GOTO(out_req_free, rc = -ENOENT);
-+ rc = oit.d.lustre.it_status;
-+ if (rc < 0)
-+ GOTO(out_req_free, rc);
-+
-+ ll_release_openhandle(file->f_dentry, &oit);
-+
-+ out:
-+ up(&lli->lli_size_sem);
-+ ll_intent_release(&oit);
-+ RETURN(rc);
-+out_req_free:
-+ ptlrpc_req_finished((struct ptlrpc_request *) oit.d.lustre.it_data);
-+ goto out;
-+}
-+
-+int ll_lov_getstripe_ea_info(struct inode *inode, const char *filename,
-+ struct lov_mds_md **lmmp, int *lmm_size,
-+ struct ptlrpc_request **request)
-+{
-+ struct ll_sb_info *sbi = ll_i2sbi(inode);
-+ struct ll_fid fid;
-+ struct mds_body *body;
-+ struct lov_mds_md *lmm = NULL;
-+ struct ptlrpc_request *req = NULL;
-+ int rc, lmmsize;
-+
-+ ll_inode2fid(&fid, inode);
-+
-+ rc = ll_get_max_mdsize(sbi, &lmmsize);
-+ if (rc)
-+ RETURN(rc);
-+
-+ rc = mdc_getattr_name(sbi->ll_mdc_exp, &fid,
-+ filename, strlen(filename) + 1,
-+ OBD_MD_FLEASIZE | OBD_MD_FLDIREA,
-+ lmmsize, &req);
-+ if (rc < 0) {
-+ CDEBUG(D_INFO, "mdc_getattr_name failed "
-+ "on %s: rc %d\n", filename, rc);
-+ GOTO(out, rc);
-+ }
-+
-+ body = lustre_msg_buf(req->rq_repmsg, REPLY_REC_OFF,
-+ sizeof(*body));
-+ LASSERT(body != NULL); /* checked by mdc_getattr_name */
-+ /* swabbed by mdc_getattr_name */
-+ LASSERT(lustre_rep_swabbed(req, REPLY_REC_OFF));
-+
-+ lmmsize = body->eadatasize;
-+
-+ if (!(body->valid & (OBD_MD_FLEASIZE | OBD_MD_FLDIREA)) ||
-+ lmmsize == 0) {
-+ GOTO(out, rc = -ENODATA);
-+ }
-+
-+ lmm = lustre_msg_buf(req->rq_repmsg, REPLY_REC_OFF + 1,
-+ lmmsize);
-+ LASSERT(lmm != NULL);
-+ LASSERT(lustre_rep_swabbed(req, REPLY_REC_OFF + 1));
-+
-+ if ((lmm->lmm_magic != cpu_to_le32(LOV_MAGIC)) &&
-+ (lmm->lmm_magic != cpu_to_le32(LOV_MAGIC_JOIN))) {
-+ GOTO(out, rc = -EPROTO);
-+ }
-+ /*
-+ * This is coming from the MDS, so is probably in
-+ * little endian. We convert it to host endian before
-+ * passing it to userspace.
-+ */
-+ if (LOV_MAGIC != cpu_to_le32(LOV_MAGIC)) {
-+ if (lmm->lmm_magic == cpu_to_le32(LOV_MAGIC)) {
-+ lustre_swab_lov_user_md((struct lov_user_md *)lmm);
-+ /* if function called for directory - we should be
-+ * avoid swab not existent lsm objects */
-+ if (S_ISREG(body->mode))
-+ lustre_swab_lov_user_md_objects((struct lov_user_md *)lmm);
-+ } else if (lmm->lmm_magic == cpu_to_le32(LOV_MAGIC_JOIN)) {
-+ lustre_swab_lov_user_md_join((struct lov_user_md_join *)lmm);
-+ }
-+ }
-+
-+ if (lmm->lmm_magic == LOV_MAGIC_JOIN) {
-+ struct lov_stripe_md *lsm;
-+ struct lov_user_md_join *lmj;
-+ int lmj_size, i, aindex = 0;
-+
-+ rc = obd_unpackmd(sbi->ll_osc_exp, &lsm, lmm, lmmsize);
-+ if (rc < 0)
-+ GOTO(out, rc = -ENOMEM);
-+ rc = obd_checkmd(sbi->ll_osc_exp, sbi->ll_mdc_exp, lsm);
-+ if (rc)
-+ GOTO(out_free_memmd, rc);
-+
-+ lmj_size = sizeof(struct lov_user_md_join) +
-+ lsm->lsm_stripe_count *
-+ sizeof(struct lov_user_ost_data_join);
-+ OBD_ALLOC(lmj, lmj_size);
-+ if (!lmj)
-+ GOTO(out_free_memmd, rc = -ENOMEM);
-+
-+ memcpy(lmj, lmm, sizeof(struct lov_user_md_join));
-+ for (i = 0; i < lsm->lsm_stripe_count; i++) {
-+ struct lov_extent *lex =
-+ &lsm->lsm_array->lai_ext_array[aindex];
-+
-+ if (lex->le_loi_idx + lex->le_stripe_count <= i)
-+ aindex ++;
-+ CDEBUG(D_INFO, "aindex %d i %d l_extent_start "
-+ LPU64" len %d\n", aindex, i,
-+ lex->le_start, (int)lex->le_len);
-+ lmj->lmm_objects[i].l_extent_start =
-+ lex->le_start;
-+
-+ if ((int)lex->le_len == -1)
-+ lmj->lmm_objects[i].l_extent_end = -1;
-+ else
-+ lmj->lmm_objects[i].l_extent_end =
-+ lex->le_start + lex->le_len;
-+ lmj->lmm_objects[i].l_object_id =
-+ lsm->lsm_oinfo[i]->loi_id;
-+ lmj->lmm_objects[i].l_object_gr =
-+ lsm->lsm_oinfo[i]->loi_gr;
-+ lmj->lmm_objects[i].l_ost_gen =
-+ lsm->lsm_oinfo[i]->loi_ost_gen;
-+ lmj->lmm_objects[i].l_ost_idx =
-+ lsm->lsm_oinfo[i]->loi_ost_idx;
-+ }
-+ lmm = (struct lov_mds_md *)lmj;
-+ lmmsize = lmj_size;
-+out_free_memmd:
-+ obd_free_memmd(sbi->ll_osc_exp, &lsm);
-+ }
-+out:
-+ *lmmp = lmm;
-+ *lmm_size = lmmsize;
-+ *request = req;
-+ return rc;
-+}
-+static int ll_lov_setea(struct inode *inode, struct file *file,
-+ unsigned long arg)
-+{
-+ int flags = MDS_OPEN_HAS_OBJS | FMODE_WRITE;
-+ struct lov_user_md *lump;
-+ int lum_size = sizeof(struct lov_user_md) +
-+ sizeof(struct lov_user_ost_data);
-+ int rc;
-+ ENTRY;
-+
-+ if (!cfs_capable(CFS_CAP_SYS_ADMIN))
-+ RETURN(-EPERM);
-+
-+ OBD_ALLOC(lump, lum_size);
-+ if (lump == NULL) {
-+ RETURN(-ENOMEM);
-+ }
-+ rc = copy_from_user(lump, (struct lov_user_md *)arg, lum_size);
-+ if (rc) {
-+ OBD_FREE(lump, lum_size);
-+ RETURN(-EFAULT);
-+ }
-+
-+ rc = ll_lov_setstripe_ea_info(inode, file, flags, lump, lum_size);
-+
-+ OBD_FREE(lump, lum_size);
-+ RETURN(rc);
-+}
-+
-+static int ll_lov_setstripe(struct inode *inode, struct file *file,
-+ unsigned long arg)
-+{
-+ struct lov_user_md lum, *lump = (struct lov_user_md *)arg;
-+ int rc;
-+ int flags = FMODE_WRITE;
-+ ENTRY;
-+
-+ /* Bug 1152: copy properly when this is no longer true */
-+ LASSERT(sizeof(lum) == sizeof(*lump));
-+ LASSERT(sizeof(lum.lmm_objects[0]) == sizeof(lump->lmm_objects[0]));
-+ rc = copy_from_user(&lum, lump, sizeof(lum));
-+ if (rc)
-+ RETURN(-EFAULT);
-+
-+ rc = ll_lov_setstripe_ea_info(inode, file, flags, &lum, sizeof(lum));
-+ if (rc == 0) {
-+ put_user(0, &lump->lmm_stripe_count);
-+ rc = obd_iocontrol(LL_IOC_LOV_GETSTRIPE, ll_i2obdexp(inode),
-+ 0, ll_i2info(inode)->lli_smd, lump);
-+ }
-+ RETURN(rc);
-+}
-+
-+static int ll_lov_getstripe(struct inode *inode, unsigned long arg)
-+{
-+ struct lov_stripe_md *lsm = ll_i2info(inode)->lli_smd;
-+
-+ if (!lsm)
-+ RETURN(-ENODATA);
-+
-+ return obd_iocontrol(LL_IOC_LOV_GETSTRIPE, ll_i2obdexp(inode), 0, lsm,
-+ (void *)arg);
-+}
-+
-+static int ll_get_grouplock(struct inode *inode, struct file *file,
-+ unsigned long arg)
-+{
-+ struct ll_file_data *fd = LUSTRE_FPRIVATE(file);
-+ ldlm_policy_data_t policy = { .l_extent = { .start = 0,
-+ .end = OBD_OBJECT_EOF}};
-+ struct lustre_handle lockh = { 0 };
-+ struct ll_inode_info *lli = ll_i2info(inode);
-+ struct lov_stripe_md *lsm = lli->lli_smd;
-+ int flags = 0, rc;
-+ ENTRY;
-+
-+ if (fd->fd_flags & LL_FILE_GROUP_LOCKED) {
-+ RETURN(-EINVAL);
-+ }
-+
-+ policy.l_extent.gid = arg;
-+ if (file->f_flags & O_NONBLOCK)
-+ flags = LDLM_FL_BLOCK_NOWAIT;
-+
-+ rc = ll_extent_lock(fd, inode, lsm, LCK_GROUP, &policy, &lockh, flags);
-+ if (rc)
-+ RETURN(rc);
-+
-+ fd->fd_flags |= LL_FILE_GROUP_LOCKED|LL_FILE_IGNORE_LOCK;
-+ fd->fd_gid = arg;
-+ memcpy(&fd->fd_cwlockh, &lockh, sizeof(lockh));
-+
-+ RETURN(0);
-+}
-+
-+static int ll_put_grouplock(struct inode *inode, struct file *file,
-+ unsigned long arg)
-+{
-+ struct ll_file_data *fd = LUSTRE_FPRIVATE(file);
-+ struct ll_inode_info *lli = ll_i2info(inode);
-+ struct lov_stripe_md *lsm = lli->lli_smd;
-+ int rc;
-+ ENTRY;
-+
-+ if (!(fd->fd_flags & LL_FILE_GROUP_LOCKED)) {
-+ /* Ugh, it's already unlocked. */
-+ RETURN(-EINVAL);
-+ }
-+
-+ if (fd->fd_gid != arg) /* Ugh? Unlocking with different gid? */
-+ RETURN(-EINVAL);
-+
-+ fd->fd_flags &= ~(LL_FILE_GROUP_LOCKED|LL_FILE_IGNORE_LOCK);
-+
-+ rc = ll_extent_unlock(fd, inode, lsm, LCK_GROUP, &fd->fd_cwlockh);
-+ if (rc)
-+ RETURN(rc);
-+
-+ fd->fd_gid = 0;
-+ memset(&fd->fd_cwlockh, 0, sizeof(fd->fd_cwlockh));
-+
-+ RETURN(0);
-+}
-+
-+#if LUSTRE_FIX >= 50
-+static int join_sanity_check(struct inode *head, struct inode *tail)
-+{
-+ ENTRY;
-+ if ((ll_i2sbi(head)->ll_flags & LL_SBI_JOIN) == 0) {
-+ CERROR("server do not support join \n");
-+ RETURN(-EINVAL);
-+ }
-+ if (!S_ISREG(tail->i_mode) || !S_ISREG(head->i_mode)) {
-+ CERROR("tail ino %lu and ino head %lu must be regular\n",
-+ head->i_ino, tail->i_ino);
-+ RETURN(-EINVAL);
-+ }
-+ if (head->i_ino == tail->i_ino) {
-+ CERROR("file %lu can not be joined to itself \n", head->i_ino);
-+ RETURN(-EINVAL);
-+ }
-+ if (i_size_read(head) % JOIN_FILE_ALIGN) {
-+ CERROR("hsize %llu must be times of 64K\n", i_size_read(head));
-+ RETURN(-EINVAL);
-+ }
-+ RETURN(0);
-+}
-+
-+static int join_file(struct inode *head_inode, struct file *head_filp,
-+ struct file *tail_filp)
-+{
-+ struct dentry *tail_dentry = tail_filp->f_dentry;
-+ struct lookup_intent oit = {.it_op = IT_OPEN,
-+ .it_flags = head_filp->f_flags|O_JOIN_FILE};
-+ struct ldlm_enqueue_info einfo = { LDLM_IBITS, LCK_PW,
-+ ll_mdc_blocking_ast, ldlm_completion_ast, NULL, NULL };
-+
-+ struct lustre_handle lockh;
-+ struct mdc_op_data *op_data;
-+ int rc;
-+ loff_t data;
-+ ENTRY;
-+
-+ tail_dentry = tail_filp->f_dentry;
-+
-+ OBD_ALLOC_PTR(op_data);
-+ if (op_data == NULL) {
-+ RETURN(-ENOMEM);
-+ }
-+
-+ data = i_size_read(head_inode);
-+ ll_prepare_mdc_op_data(op_data, head_inode,
-+ tail_dentry->d_parent->d_inode,
-+ tail_dentry->d_name.name,
-+ tail_dentry->d_name.len, 0, &data);
-+ rc = mdc_enqueue(ll_i2mdcexp(head_inode), &einfo, &oit,
-+ op_data, &lockh, NULL, 0, 0);
-+
-+ if (rc < 0)
-+ GOTO(out, rc);
-+
-+ rc = oit.d.lustre.it_status;
-+
-+ if (rc < 0 || it_open_error(DISP_OPEN_OPEN, &oit)) {
-+ rc = rc ? rc : it_open_error(DISP_OPEN_OPEN, &oit);
-+ ptlrpc_req_finished((struct ptlrpc_request *)
-+ oit.d.lustre.it_data);
-+ GOTO(out, rc);
-+ }
-+
-+ if (oit.d.lustre.it_lock_mode) { /* If we got lock - release it right
-+ * away */
-+ ldlm_lock_decref(&lockh, oit.d.lustre.it_lock_mode);
-+ oit.d.lustre.it_lock_mode = 0;
-+ }
-+ ptlrpc_req_finished((struct ptlrpc_request *) oit.d.lustre.it_data);
-+ it_clear_disposition(&oit, DISP_ENQ_COMPLETE);
-+ ll_release_openhandle(head_filp->f_dentry, &oit);
-+out:
-+ if (op_data)
-+ OBD_FREE_PTR(op_data);
-+ ll_intent_release(&oit);
-+ RETURN(rc);
-+}
-+
-+static int ll_file_join(struct inode *head, struct file *filp,
-+ char *filename_tail)
-+{
-+ struct inode *tail = NULL, *first = NULL, *second = NULL;
-+ struct dentry *tail_dentry;
-+ struct file *tail_filp, *first_filp, *second_filp;
-+ struct ll_lock_tree first_tree, second_tree;
-+ struct ll_lock_tree_node *first_node, *second_node;
-+ struct ll_inode_info *hlli = ll_i2info(head), *tlli;
-+ int rc = 0, cleanup_phase = 0;
-+ ENTRY;
-+
-+ CDEBUG(D_VFSTRACE, "VFS Op:head=%lu/%u(%p) tail %s\n",
-+ head->i_ino, head->i_generation, head, filename_tail);
-+
-+ tail_filp = filp_open(filename_tail, O_WRONLY, 0644);
-+ if (IS_ERR(tail_filp)) {
-+ CERROR("Can not open tail file %s", filename_tail);
-+ rc = PTR_ERR(tail_filp);
-+ GOTO(cleanup, rc);
-+ }
-+ tail = igrab(tail_filp->f_dentry->d_inode);
-+
-+ tlli = ll_i2info(tail);
-+ tail_dentry = tail_filp->f_dentry;
-+ LASSERT(tail_dentry);
-+ cleanup_phase = 1;
-+
-+ /*reorder the inode for lock sequence*/
-+ first = head->i_ino > tail->i_ino ? head : tail;
-+ second = head->i_ino > tail->i_ino ? tail : head;
-+ first_filp = head->i_ino > tail->i_ino ? filp : tail_filp;
-+ second_filp = head->i_ino > tail->i_ino ? tail_filp : filp;
-+
-+ CDEBUG(D_INFO, "reorder object from %lu:%lu to %lu:%lu \n",
-+ head->i_ino, tail->i_ino, first->i_ino, second->i_ino);
-+ first_node = ll_node_from_inode(first, 0, OBD_OBJECT_EOF, LCK_EX);
-+ if (IS_ERR(first_node)){
-+ rc = PTR_ERR(first_node);
-+ GOTO(cleanup, rc);
-+ }
-+ first_tree.lt_fd = first_filp->private_data;
-+ rc = ll_tree_lock(&first_tree, first_node, NULL, 0, 0);
-+ if (rc != 0)
-+ GOTO(cleanup, rc);
-+ cleanup_phase = 2;
-+
-+ second_node = ll_node_from_inode(second, 0, OBD_OBJECT_EOF, LCK_EX);
-+ if (IS_ERR(second_node)){
-+ rc = PTR_ERR(second_node);
-+ GOTO(cleanup, rc);
-+ }
-+ second_tree.lt_fd = second_filp->private_data;
-+ rc = ll_tree_lock(&second_tree, second_node, NULL, 0, 0);
-+ if (rc != 0)
-+ GOTO(cleanup, rc);
-+ cleanup_phase = 3;
-+
-+ rc = join_sanity_check(head, tail);
-+ if (rc)
-+ GOTO(cleanup, rc);
-+
-+ rc = join_file(head, filp, tail_filp);
-+ if (rc)
-+ GOTO(cleanup, rc);
-+cleanup:
-+ switch (cleanup_phase) {
-+ case 3:
-+ ll_tree_unlock(&second_tree);
-+ obd_cancel_unused(ll_i2obdexp(second),
-+ ll_i2info(second)->lli_smd, 0, NULL);
-+ case 2:
-+ ll_tree_unlock(&first_tree);
-+ obd_cancel_unused(ll_i2obdexp(first),
-+ ll_i2info(first)->lli_smd, 0, NULL);
-+ case 1:
-+ filp_close(tail_filp, 0);
-+ if (tail)
-+ iput(tail);
-+ if (head && rc == 0) {
-+ obd_free_memmd(ll_i2sbi(head)->ll_osc_exp,
-+ &hlli->lli_smd);
-+ hlli->lli_smd = NULL;
-+ }
-+ case 0:
-+ break;
-+ default:
-+ CERROR("invalid cleanup_phase %d\n", cleanup_phase);
-+ LBUG();
-+ }
-+ RETURN(rc);
-+}
-+#endif /* LUSTRE_FIX >= 50 */
-+
-+/**
-+ * Close inode open handle
-+ *
-+ * \param dentry [in] dentry which contains the inode
-+ * \param it [in,out] intent which contains open info and result
-+ *
-+ * \retval 0 success
-+ * \retval <0 failure
-+ */
-+int ll_release_openhandle(struct dentry *dentry, struct lookup_intent *it)
-+{
-+ struct inode *inode = dentry->d_inode;
-+ struct obd_client_handle *och;
-+ int rc;
-+ ENTRY;
-+
-+ LASSERT(inode);
-+
-+ /* Root ? Do nothing. */
-+ if (dentry->d_inode->i_sb->s_root == dentry)
-+ RETURN(0);
-+
-+ /* No open handle to close? Move away */
-+ if (!it_disposition(it, DISP_OPEN_OPEN))
-+ RETURN(0);
-+
-+ LASSERT(it_open_error(DISP_OPEN_OPEN, it) == 0);
-+
-+ OBD_ALLOC(och, sizeof(*och));
-+ if (!och)
-+ GOTO(out, rc = -ENOMEM);
-+
-+ ll_och_fill(ll_i2info(inode), it, och);
-+
-+ rc = ll_close_inode_openhandle(inode, och);
-+
-+ OBD_FREE(och, sizeof(*och));
-+ out:
-+ /* this one is in place of ll_file_open */
-+ if (it_disposition(it, DISP_ENQ_OPEN_REF))
-+ ptlrpc_req_finished(it->d.lustre.it_data);
-+ it_clear_disposition(it, DISP_ENQ_OPEN_REF);
-+ RETURN(rc);
-+}
-+
-+int ll_fiemap(struct inode *inode, struct ll_user_fiemap *fiemap,
-+ int num_bytes)
-+{
-+ struct obd_export *exp = ll_i2obdexp(inode);
-+ struct lov_stripe_md *lsm = ll_i2info(inode)->lli_smd;
-+ struct ll_fiemap_info_key fm_key = { .name = KEY_FIEMAP, };
-+ int vallen = num_bytes;
-+ int rc;
-+ ENTRY;
-+
-+ /* If the stripe_count > 1 and the application does not understand
-+ * DEVICE_ORDER flag, then it cannot interpret the extents correctly.
-+ */
-+ if (lsm->lsm_stripe_count > 1 &&
-+ !(fiemap->fm_flags & FIEMAP_FLAG_DEVICE_ORDER))
-+ return -EOPNOTSUPP;
-+
-+ fm_key.oa.o_id = lsm->lsm_object_id;
-+ fm_key.oa.o_valid = OBD_MD_FLID;
-+
-+ obdo_from_inode(&fm_key.oa, inode, OBD_MD_FLFID | OBD_MD_FLSIZE);
-+
-+ /* If filesize is 0, then there would be no objects for mapping */
-+ if (fm_key.oa.o_size == 0) {
-+ fiemap->fm_mapped_extents = 0;
-+ RETURN(0);
-+ }
-+
-+ memcpy(&fm_key.fiemap, fiemap, sizeof(*fiemap));
-+
-+ rc = obd_get_info(exp, sizeof(fm_key), &fm_key, &vallen, fiemap, lsm);
-+ if (rc)
-+ CERROR("obd_get_info failed: rc = %d\n", rc);
-+
-+ RETURN(rc);
-+}
-+
-+int ll_file_ioctl(struct inode *inode, struct file *file, unsigned int cmd,
-+ unsigned long arg)
-+{
-+ struct ll_file_data *fd = LUSTRE_FPRIVATE(file);
-+ int flags;
-+ ENTRY;
-+
-+ CDEBUG(D_VFSTRACE, "VFS Op:inode=%lu/%u(%p),cmd=%x\n", inode->i_ino,
-+ inode->i_generation, inode, cmd);
-+ ll_stats_ops_tally(ll_i2sbi(inode), LPROC_LL_IOCTL, 1);
-+
-+ /* asm-ppc{,64} declares TCGETS, et. al. as type 't' not 'T' */
-+ if (_IOC_TYPE(cmd) == 'T' || _IOC_TYPE(cmd) == 't') /* tty ioctls */
-+ RETURN(-ENOTTY);
-+
-+ switch(cmd) {
-+ case LL_IOC_GETFLAGS:
-+ /* Get the current value of the file flags */
-+ return put_user(fd->fd_flags, (int *)arg);
-+ case LL_IOC_SETFLAGS:
-+ case LL_IOC_CLRFLAGS:
-+ /* Set or clear specific file flags */
-+ /* XXX This probably needs checks to ensure the flags are
-+ * not abused, and to handle any flag side effects.
-+ */
-+ if (get_user(flags, (int *) arg))
-+ RETURN(-EFAULT);
-+
-+ if (cmd == LL_IOC_SETFLAGS) {
-+ if ((flags & LL_FILE_IGNORE_LOCK) &&
-+ !(file->f_flags & O_DIRECT)) {
-+ CERROR("%s: unable to disable locking on "
-+ "non-O_DIRECT file\n", current->comm);
-+ RETURN(-EINVAL);
-+ }
-+
-+ fd->fd_flags |= flags;
-+ } else {
-+ fd->fd_flags &= ~flags;
-+ }
-+ RETURN(0);
-+ case LL_IOC_LOV_SETSTRIPE:
-+ RETURN(ll_lov_setstripe(inode, file, arg));
-+ case LL_IOC_LOV_SETEA:
-+ RETURN(ll_lov_setea(inode, file, arg));
-+ case LL_IOC_LOV_GETSTRIPE:
-+ RETURN(ll_lov_getstripe(inode, arg));
-+ case LL_IOC_RECREATE_OBJ:
-+ RETURN(ll_lov_recreate_obj(inode, file, arg));
-+ case EXT3_IOC_FIEMAP: {
-+ struct ll_user_fiemap *fiemap_s;
-+ size_t num_bytes, ret_bytes;
-+ unsigned int extent_count;
-+ int rc = 0;
-+
-+ /* Get the extent count so we can calculate the size of
-+ * required fiemap buffer */
-+ if (get_user(extent_count,
-+ &((struct ll_user_fiemap __user *)arg)->fm_extent_count))
-+ RETURN(-EFAULT);
-+ num_bytes = sizeof(*fiemap_s) + (extent_count *
-+ sizeof(struct ll_fiemap_extent));
-+ OBD_VMALLOC(fiemap_s, num_bytes);
-+ if (fiemap_s == NULL)
-+ RETURN(-ENOMEM);
-+
-+ if (copy_from_user(fiemap_s,(struct ll_user_fiemap __user *)arg,
-+ sizeof(*fiemap_s)))
-+ GOTO(error, rc = -EFAULT);
-+
-+ if (fiemap_s->fm_flags & ~LUSTRE_FIEMAP_FLAGS_COMPAT) {
-+ fiemap_s->fm_flags = fiemap_s->fm_flags &
-+ ~LUSTRE_FIEMAP_FLAGS_COMPAT;
-+ if (copy_to_user((char *)arg, fiemap_s,
-+ sizeof(*fiemap_s)))
-+ GOTO(error, rc = -EFAULT);
-+
-+ GOTO(error, rc = -EBADR);
-+ }
-+
-+ /* If fm_extent_count is non-zero, read the first extent since
-+ * it is used to calculate end_offset and device from previous
-+ * fiemap call. */
-+ if (extent_count) {
-+ if (copy_from_user(&fiemap_s->fm_extents[0],
-+ (char __user *)arg + sizeof(*fiemap_s),
-+ sizeof(struct ll_fiemap_extent)))
-+ GOTO(error, rc = -EFAULT);
-+ }
-+
-+ if (fiemap_s->fm_flags & FIEMAP_FLAG_SYNC) {
-+ int rc;
-+
-+ rc = filemap_fdatawrite(inode->i_mapping);
-+ if (rc)
-+ GOTO(error, rc);
-+ }
-+
-+ rc = ll_fiemap(inode, fiemap_s, num_bytes);
-+ if (rc)
-+ GOTO(error, rc);
-+
-+ ret_bytes = sizeof(struct ll_user_fiemap);
-+
-+ if (extent_count != 0)
-+ ret_bytes += (fiemap_s->fm_mapped_extents *
-+ sizeof(struct ll_fiemap_extent));
-+
-+ if (copy_to_user((void *)arg, fiemap_s, ret_bytes))
-+ rc = -EFAULT;
-+
-+error:
-+ OBD_VFREE(fiemap_s, num_bytes);
-+ RETURN(rc);
-+ }
-+ case EXT3_IOC_GETFLAGS:
-+ case EXT3_IOC_SETFLAGS:
-+ RETURN(ll_iocontrol(inode, file, cmd, arg));
-+ case EXT3_IOC_GETVERSION_OLD:
-+ case EXT3_IOC_GETVERSION:
-+ RETURN(put_user(inode->i_generation, (int *)arg));
-+ case LL_IOC_JOIN: {
-+#if LUSTRE_FIX >= 50
-+ /* Allow file join in beta builds to allow debuggging */
-+ char *ftail;
-+ int rc;
-+
-+ ftail = getname((const char *)arg);
-+ if (IS_ERR(ftail))
-+ RETURN(PTR_ERR(ftail));
-+ rc = ll_file_join(inode, file, ftail);
-+ putname(ftail);
-+ RETURN(rc);
-+#else
-+ CWARN("file join is not supported in this version of Lustre\n");
-+ RETURN(-ENOTTY);
-+#endif
-+ }
-+ case LL_IOC_GROUP_LOCK:
-+ RETURN(ll_get_grouplock(inode, file, arg));
-+ case LL_IOC_GROUP_UNLOCK:
-+ RETURN(ll_put_grouplock(inode, file, arg));
-+ case IOC_OBD_STATFS:
-+ RETURN(ll_obd_statfs(inode, (void *)arg));
-+ case OBD_IOC_GETNAME_OLD:
-+ case OBD_IOC_GETNAME: {
-+ struct obd_device *obd =
-+ class_exp2obd(ll_i2sbi(inode)->ll_osc_exp);
-+ if (!obd)
-+ RETURN(-EFAULT);
-+ if (copy_to_user((void *)arg, obd->obd_name,
-+ strlen(obd->obd_name) + 1))
-+ RETURN (-EFAULT);
-+ RETURN(0);
-+ }
-+
-+ /* We need to special case any other ioctls we want to handle,
-+ * to send them to the MDS/OST as appropriate and to properly
-+ * network encode the arg field.
-+ case EXT3_IOC_SETVERSION_OLD:
-+ case EXT3_IOC_SETVERSION:
-+ */
-+ default: {
-+ int err;
-+
-+ if (LLIOC_STOP ==
-+ ll_iocontrol_call(inode, file, cmd, arg, &err))
-+ RETURN(err);
-+
-+ RETURN(obd_iocontrol(cmd, ll_i2obdexp(inode), 0, NULL,
-+ (void *)arg));
-+ }
-+ }
-+}
-+
-+loff_t ll_file_seek(struct file *file, loff_t offset, int origin)
-+{
-+ struct inode *inode = file->f_dentry->d_inode;
-+ struct ll_inode_info *lli = ll_i2info(inode);
-+ struct lov_stripe_md *lsm = lli->lli_smd;
-+ loff_t retval;
-+ ENTRY;
-+ retval = offset + ((origin == 2) ? i_size_read(inode) :
-+ (origin == 1) ? file->f_pos : 0);
-+ CDEBUG(D_VFSTRACE, "VFS Op:inode=%lu/%u(%p), to=%Lu=%#Lx(%s)\n",
-+ inode->i_ino, inode->i_generation, inode, retval, retval,
-+ origin == 2 ? "SEEK_END": origin == 1 ? "SEEK_CUR" : "SEEK_SET");
-+ ll_stats_ops_tally(ll_i2sbi(inode), LPROC_LL_LLSEEK, 1);
-+
-+ if (origin == 2) { /* SEEK_END */
-+ int nonblock = 0, rc;
-+
-+ if (file->f_flags & O_NONBLOCK)
-+ nonblock = LDLM_FL_BLOCK_NOWAIT;
-+
-+ if (lsm != NULL) {
-+ rc = ll_glimpse_size(inode, nonblock);
-+ if (rc != 0)
-+ RETURN(rc);
-+ }
-+
-+ ll_inode_size_lock(inode, 0);
-+ offset += i_size_read(inode);
-+ ll_inode_size_unlock(inode, 0);
-+ } else if (origin == 1) { /* SEEK_CUR */
-+ offset += file->f_pos;
-+ }
-+
-+ retval = -EINVAL;
-+ if (offset >= 0 && offset <= ll_file_maxbytes(inode)) {
-+ if (offset != file->f_pos) {
-+ file->f_pos = offset;
-+ file->f_version = 0;
-+ }
-+ retval = offset;
-+ }
-+
-+ RETURN(retval);
-+}
-+
-+int ll_fsync(struct file *file, struct dentry *dentry, int data)
-+{
-+ struct inode *inode = dentry->d_inode;
-+ struct ll_inode_info *lli = ll_i2info(inode);
-+ struct lov_stripe_md *lsm = lli->lli_smd;
-+ struct ll_fid fid;
-+ struct ptlrpc_request *req;
-+ int rc, err;
-+ ENTRY;
-+ CDEBUG(D_VFSTRACE, "VFS Op:inode=%lu/%u(%p)\n", inode->i_ino,
-+ inode->i_generation, inode);
-+ ll_stats_ops_tally(ll_i2sbi(inode), LPROC_LL_FSYNC, 1);
-+
-+ /* fsync's caller has already called _fdata{sync,write}, we want
-+ * that IO to finish before calling the osc and mdc sync methods */
-+ rc = filemap_fdatawait(inode->i_mapping);
-+
-+ /* catch async errors that were recorded back when async writeback
-+ * failed for pages in this mapping. */
-+ err = lli->lli_async_rc;
-+ lli->lli_async_rc = 0;
-+ if (rc == 0)
-+ rc = err;
-+ if (lsm) {
-+ err = lov_test_and_clear_async_rc(lsm);
-+ if (rc == 0)
-+ rc = err;
-+ }
-+
-+ ll_inode2fid(&fid, inode);
-+ err = mdc_sync(ll_i2sbi(inode)->ll_mdc_exp, &fid, &req);
-+ if (!rc)
-+ rc = err;
-+ if (!err)
-+ ptlrpc_req_finished(req);
-+
-+ if (data && lsm) {
-+ struct obdo *oa;
-+
-+ OBDO_ALLOC(oa);
-+ if (!oa)
-+ RETURN(rc ? rc : -ENOMEM);
-+
-+ oa->o_id = lsm->lsm_object_id;
-+ oa->o_valid = OBD_MD_FLID;
-+ obdo_from_inode(oa, inode, OBD_MD_FLTYPE | OBD_MD_FLATIME |
-+ OBD_MD_FLMTIME | OBD_MD_FLCTIME);
-+
-+ err = obd_sync(ll_i2sbi(inode)->ll_osc_exp, oa, lsm,
-+ 0, OBD_OBJECT_EOF);
-+ if (!rc)
-+ rc = err;
-+ OBDO_FREE(oa);
-+ }
-+
-+ RETURN(rc);
-+}
-+
-+int ll_file_flock(struct file *file, int cmd, struct file_lock *file_lock)
-+{
-+ struct inode *inode = file->f_dentry->d_inode;
-+ struct ll_sb_info *sbi = ll_i2sbi(inode);
-+ struct ldlm_res_id res_id =
-+ { .name = {inode->i_ino, inode->i_generation, LDLM_FLOCK} };
-+ struct ldlm_enqueue_info einfo = { LDLM_FLOCK, 0, NULL,
-+ ldlm_flock_completion_ast, NULL, file_lock };
-+ struct lustre_handle lockh = {0};
-+ ldlm_policy_data_t flock;
-+ int flags = 0;
-+ int rc;
-+ ENTRY;
-+
-+ CDEBUG(D_VFSTRACE, "VFS Op:inode=%lu file_lock=%p\n",
-+ inode->i_ino, file_lock);
-+ ll_stats_ops_tally(ll_i2sbi(inode), LPROC_LL_FLOCK, 1);
-+
-+ if (file_lock->fl_flags & FL_FLOCK) {
-+ LASSERT((cmd == F_SETLKW) || (cmd == F_SETLK));
-+ /* set missing params for flock() calls */
-+ file_lock->fl_end = OFFSET_MAX;
-+ file_lock->fl_pid = current->tgid;
-+ }
-+ flock.l_flock.pid = file_lock->fl_pid;
-+ flock.l_flock.start = file_lock->fl_start;
-+ flock.l_flock.end = file_lock->fl_end;
-+
-+ switch (file_lock->fl_type) {
-+ case F_RDLCK:
-+ einfo.ei_mode = LCK_PR;
-+ break;
-+ case F_UNLCK:
-+ /* An unlock request may or may not have any relation to
-+ * existing locks so we may not be able to pass a lock handle
-+ * via a normal ldlm_lock_cancel() request. The request may even
-+ * unlock a byte range in the middle of an existing lock. In
-+ * order to process an unlock request we need all of the same
-+ * information that is given with a normal read or write record
-+ * lock request. To avoid creating another ldlm unlock (cancel)
-+ * message we'll treat a LCK_NL flock request as an unlock. */
-+ einfo.ei_mode = LCK_NL;
-+ break;
-+ case F_WRLCK:
-+ einfo.ei_mode = LCK_PW;
-+ break;
-+ default:
-+ CERROR("unknown fcntl lock type: %d\n", file_lock->fl_type);
-+ RETURN (-EINVAL);
-+ }
-+
-+ switch (cmd) {
-+ case F_SETLKW:
-+#ifdef F_SETLKW64
-+ case F_SETLKW64:
-+#endif
-+ flags = 0;
-+ break;
-+ case F_SETLK:
-+#ifdef F_SETLK64
-+ case F_SETLK64:
-+#endif
-+ flags = LDLM_FL_BLOCK_NOWAIT;
-+ break;
-+ case F_GETLK:
-+#ifdef F_GETLK64
-+ case F_GETLK64:
-+#endif
-+ flags = LDLM_FL_TEST_LOCK;
-+ /* Save the old mode so that if the mode in the lock changes we
-+ * can decrement the appropriate reader or writer refcount. */
-+ file_lock->fl_type = einfo.ei_mode;
-+ break;
-+ default:
-+ CERROR("unknown fcntl lock command: %d\n", cmd);
-+ RETURN (-EINVAL);
-+ }
-+
-+ CDEBUG(D_DLMTRACE, "inode=%lu, pid=%u, flags=%#x, mode=%u, "
-+ "start="LPU64", end="LPU64"\n", inode->i_ino, flock.l_flock.pid,
-+ flags, einfo.ei_mode, flock.l_flock.start, flock.l_flock.end);
-+
-+ rc = ldlm_cli_enqueue(sbi->ll_mdc_exp, NULL, &einfo, res_id,
-+ &flock, &flags, NULL, 0, NULL, &lockh, 0);
-+ if ((file_lock->fl_flags & FL_FLOCK) &&
-+ (rc == 0 || file_lock->fl_type == F_UNLCK))
-+ ll_flock_lock_file_wait(file, file_lock, (cmd == F_SETLKW));
-+#ifdef HAVE_F_OP_FLOCK
-+ if ((file_lock->fl_flags & FL_POSIX) &&
-+ (rc == 0 || file_lock->fl_type == F_UNLCK) &&
-+ !(flags & LDLM_FL_TEST_LOCK))
-+ posix_lock_file_wait(file, file_lock);
-+#endif
-+
-+ RETURN(rc);
-+}
-+
-+int ll_file_noflock(struct file *file, int cmd, struct file_lock *file_lock)
-+{
-+ ENTRY;
-+
-+ RETURN(-ENOSYS);
-+}
-+
-+int ll_have_md_lock(struct inode *inode, __u64 bits)
-+{
-+ struct lustre_handle lockh;
-+ struct ldlm_res_id res_id = { .name = {0} };
-+ struct obd_device *obddev;
-+ ldlm_policy_data_t policy = { .l_inodebits = {bits}};
-+ int flags;
-+ ENTRY;
-+
-+ if (!inode)
-+ RETURN(0);
-+
-+ obddev = ll_i2mdcexp(inode)->exp_obd;
-+ res_id.name[0] = inode->i_ino;
-+ res_id.name[1] = inode->i_generation;
-+
-+ CDEBUG(D_INFO, "trying to match res "LPU64"\n", res_id.name[0]);
-+
-+ flags = LDLM_FL_BLOCK_GRANTED | LDLM_FL_CBPENDING | LDLM_FL_TEST_LOCK;
-+ if (ldlm_lock_match(obddev->obd_namespace, flags, &res_id, LDLM_IBITS,
-+ &policy, LCK_CR|LCK_CW|LCK_PR|LCK_PW, &lockh)) {
-+ RETURN(1);
-+ }
-+
-+ RETURN(0);
-+}
-+
-+static int ll_inode_revalidate_fini(struct inode *inode, int rc) {
-+ if (rc == -ENOENT) { /* Already unlinked. Just update nlink
-+ * and return success */
-+ inode->i_nlink = 0;
-+ /* This path cannot be hit for regular files unless in
-+ * case of obscure races, so no need to to validate
-+ * size. */
-+ if (!S_ISREG(inode->i_mode) &&
-+ !S_ISDIR(inode->i_mode))
-+ return 0;
-+ }
-+
-+ if (rc) {
-+ CERROR("failure %d inode %lu\n", rc, inode->i_ino);
-+ return -abs(rc);
-+
-+ }
-+
-+ return 0;
-+}
-+
-+int ll_inode_revalidate_it(struct dentry *dentry, struct lookup_intent *it)
-+{
-+ struct inode *inode = dentry->d_inode;
-+ struct ptlrpc_request *req = NULL;
-+ struct obd_export *exp;
-+ int rc;
-+ ENTRY;
-+
-+ if (!inode) {
-+ CERROR("REPORT THIS LINE TO PETER\n");
-+ RETURN(0);
-+ }
-+ CDEBUG(D_VFSTRACE, "VFS Op:inode=%lu/%u(%p),name=%s\n",
-+ inode->i_ino, inode->i_generation, inode, dentry->d_name.name);
-+
-+ exp = ll_i2mdcexp(inode);
-+
-+ if (exp->exp_connect_flags & OBD_CONNECT_ATTRFID) {
-+ struct lookup_intent oit = { .it_op = IT_GETATTR };
-+ struct mdc_op_data op_data;
-+
-+ /* Call getattr by fid, so do not provide name at all. */
-+ ll_prepare_mdc_op_data(&op_data, dentry->d_parent->d_inode,
-+ dentry->d_inode, NULL, 0, 0, NULL);
-+ rc = mdc_intent_lock(exp, &op_data, NULL, 0,
-+ /* we are not interested in name
-+ based lookup */
-+ &oit, 0, &req,
-+ ll_mdc_blocking_ast, 0);
-+ if (rc < 0) {
-+ rc = ll_inode_revalidate_fini(inode, rc);
-+ GOTO (out, rc);
-+ }
-+
-+ rc = revalidate_it_finish(req, DLM_REPLY_REC_OFF, &oit, dentry);
-+ if (rc != 0) {
-+ ll_intent_release(&oit);
-+ GOTO(out, rc);
-+ }
-+
-+ /* Unlinked? Unhash dentry, so it is not picked up later by
-+ do_lookup() -> ll_revalidate_it(). We cannot use d_drop
-+ here to preserve get_cwd functionality on 2.6.
-+ Bug 10503 */
-+ if (!dentry->d_inode->i_nlink) {
-+ spin_lock(&ll_lookup_lock);
-+ spin_lock(&dcache_lock);
-+ ll_drop_dentry(dentry);
-+ spin_unlock(&dcache_lock);
-+ spin_unlock(&ll_lookup_lock);
-+ }
-+
-+ ll_lookup_finish_locks(&oit, dentry);
-+ } else if (!ll_have_md_lock(dentry->d_inode,
-+ MDS_INODELOCK_UPDATE|MDS_INODELOCK_LOOKUP)) {
-+ struct ll_sb_info *sbi = ll_i2sbi(dentry->d_inode);
-+ struct ll_fid fid;
-+ obd_valid valid = OBD_MD_FLGETATTR;
-+ int ealen = 0;
-+
-+ if (S_ISREG(inode->i_mode)) {
-+ rc = ll_get_max_mdsize(sbi, &ealen);
-+ if (rc)
-+ RETURN(rc);
-+ valid |= OBD_MD_FLEASIZE | OBD_MD_FLMODEASIZE;
-+ }
-+ ll_inode2fid(&fid, inode);
-+ rc = mdc_getattr(sbi->ll_mdc_exp, &fid, valid, ealen, &req);
-+ if (rc) {
-+ rc = ll_inode_revalidate_fini(inode, rc);
-+ RETURN(rc);
-+ }
-+
-+ rc = ll_prep_inode(sbi->ll_osc_exp, &inode, req, REPLY_REC_OFF,
-+ NULL);
-+ if (rc)
-+ GOTO(out, rc);
-+ }
-+
-+ /* if object not yet allocated, don't validate size */
-+ if (ll_i2info(inode)->lli_smd == NULL)
-+ GOTO(out, rc = 0);
-+
-+ /* ll_glimpse_size will prefer locally cached writes if they extend
-+ * the file */
-+ rc = ll_glimpse_size(inode, 0);
-+
-+out:
-+ ptlrpc_req_finished(req);
-+ RETURN(rc);
-+}
-+
-+int ll_getattr_it(struct vfsmount *mnt, struct dentry *de,
-+ struct lookup_intent *it, struct kstat *stat)
-+{
-+ struct inode *inode = de->d_inode;
-+ int res = 0;
-+
-+ res = ll_inode_revalidate_it(de, it);
-+ ll_stats_ops_tally(ll_i2sbi(inode), LPROC_LL_GETATTR, 1);
-+
-+ if (res)
-+ return res;
-+
-+ stat->dev = inode->i_sb->s_dev;
-+ stat->ino = inode->i_ino;
-+ stat->mode = inode->i_mode;
-+ stat->nlink = inode->i_nlink;
-+ stat->uid = inode->i_uid;
-+ stat->gid = inode->i_gid;
-+ stat->rdev = kdev_t_to_nr(inode->i_rdev);
-+ stat->atime = inode->i_atime;
-+ stat->mtime = inode->i_mtime;
-+ stat->ctime = inode->i_ctime;
-+#ifdef HAVE_INODE_BLKSIZE
-+ stat->blksize = inode->i_blksize;
-+#else
-+ stat->blksize = 1<<inode->i_blkbits;
-+#endif
-+
-+ ll_inode_size_lock(inode, 0);
-+ stat->size = i_size_read(inode);
-+ stat->blocks = inode->i_blocks;
-+ ll_inode_size_unlock(inode, 0);
-+
-+ return 0;
-+}
-+int ll_getattr(struct vfsmount *mnt, struct dentry *de, struct kstat *stat)
-+{
-+ struct lookup_intent it = { .it_op = IT_GETATTR };
-+
-+ return ll_getattr_it(mnt, de, &it, stat);
-+}
-+
-+static
-+int lustre_check_acl(struct inode *inode, int mask)
-+{
-+#ifdef CONFIG_FS_POSIX_ACL
-+ struct ll_inode_info *lli = ll_i2info(inode);
-+ struct posix_acl *acl;
-+ int rc;
-+ ENTRY;
-+
-+ spin_lock(&lli->lli_lock);
-+ acl = posix_acl_dup(lli->lli_posix_acl);
-+ spin_unlock(&lli->lli_lock);
-+
-+ if (!acl)
-+ RETURN(-EAGAIN);
-+
-+ rc = posix_acl_permission(inode, acl, mask);
-+ posix_acl_release(acl);
-+
-+ RETURN(rc);
-+#else
-+ return -EAGAIN;
-+#endif
-+}
-+
-+#if (LINUX_VERSION_CODE >= KERNEL_VERSION(2,6,10))
-+int ll_inode_permission(struct inode *inode, int mask, struct nameidata *nd)
-+{
-+ CDEBUG(D_VFSTRACE, "VFS Op:inode=%lu/%u(%p), mask %o\n",
-+ inode->i_ino, inode->i_generation, inode, mask);
-+
-+ ll_stats_ops_tally(ll_i2sbi(inode), LPROC_LL_INODE_PERM, 1);
-+ return generic_permission(inode, mask, lustre_check_acl);
-+}
-+#else
-+#if (LINUX_VERSION_CODE >= KERNEL_VERSION(2,6,0))
-+int ll_inode_permission(struct inode *inode, int mask, struct nameidata *nd)
-+#else
-+int ll_inode_permission(struct inode *inode, int mask)
-+#endif
-+{
-+ int mode = inode->i_mode;
-+ int rc;
-+
-+ CDEBUG(D_VFSTRACE, "VFS Op:inode=%lu/%u(%p), mask %o\n",
-+ inode->i_ino, inode->i_generation, inode, mask);
-+ ll_stats_ops_tally(ll_i2sbi(inode), LPROC_LL_INODE_PERM, 1);
-+
-+ if ((mask & MAY_WRITE) && IS_RDONLY(inode) &&
-+ (S_ISREG(mode) || S_ISDIR(mode) || S_ISLNK(mode)))
-+ return -EROFS;
-+ if ((mask & MAY_WRITE) && IS_IMMUTABLE(inode))
-+ return -EACCES;
-+ if (current->fsuid == inode->i_uid) {
-+ mode >>= 6;
-+ } else if (1) {
-+ if (((mode >> 3) & mask & S_IRWXO) != mask)
-+ goto check_groups;
-+ rc = lustre_check_acl(inode, mask);
-+ if (rc == -EAGAIN)
-+ goto check_groups;
-+ if (rc == -EACCES)
-+ goto check_capabilities;
-+ return rc;
-+ } else {
-+check_groups:
-+ if (in_group_p(inode->i_gid))
-+ mode >>= 3;
-+ }
-+ if ((mode & mask & S_IRWXO) == mask)
-+ return 0;
-+
-+check_capabilities:
-+ if (!(mask & MAY_EXEC) ||
-+ (inode->i_mode & S_IXUGO) || S_ISDIR(inode->i_mode))
-+ if (cfs_capable(CFS_CAP_DAC_OVERRIDE))
-+ return 0;
-+
-+ if (cfs_capable(CFS_CAP_DAC_READ_SEARCH) && ((mask == MAY_READ) ||
-+ (S_ISDIR(inode->i_mode) && !(mask & MAY_WRITE))))
-+ return 0;
-+
-+ return -EACCES;
-+}
-+#endif
-+
-+/* -o localflock - only provides locally consistent flock locks */
-+struct file_operations ll_file_operations = {
-+ .read = ll_file_read,
-+#ifdef HAVE_FILE_READV
-+ .readv = ll_file_readv,
-+#else
-+ .aio_read = ll_file_aio_read,
-+#endif
-+ .write = ll_file_write,
-+#ifdef HAVE_FILE_WRITEV
-+ .writev = ll_file_writev,
-+#else
-+ .aio_write = ll_file_aio_write,
-+#endif
-+ .ioctl = ll_file_ioctl,
-+ .open = ll_file_open,
-+ .release = ll_file_release,
-+ .mmap = ll_file_mmap,
-+ .llseek = ll_file_seek,
-+ .sendfile = ll_file_sendfile,
-+ .fsync = ll_fsync,
-+};
-+
-+struct file_operations ll_file_operations_flock = {
-+ .read = ll_file_read,
-+#ifdef HAVE_FILE_READV
-+ .readv = ll_file_readv,
-+#else
-+ .aio_read = ll_file_aio_read,
-+#endif
-+ .write = ll_file_write,
-+#ifdef HAVE_FILE_WRITEV
-+ .writev = ll_file_writev,
-+#else
-+ .aio_write = ll_file_aio_write,
-+#endif
-+ .ioctl = ll_file_ioctl,
-+ .open = ll_file_open,
-+ .release = ll_file_release,
-+ .mmap = ll_file_mmap,
-+ .llseek = ll_file_seek,
-+ .sendfile = ll_file_sendfile,
-+ .fsync = ll_fsync,
-+#ifdef HAVE_F_OP_FLOCK
-+ .flock = ll_file_flock,
-+#endif
-+ .lock = ll_file_flock
-+};
-+
-+/* These are for -o noflock - to return ENOSYS on flock calls */
-+struct file_operations ll_file_operations_noflock = {
-+ .read = ll_file_read,
-+#ifdef HAVE_FILE_READV
-+ .readv = ll_file_readv,
-+#else
-+ .aio_read = ll_file_aio_read,
-+#endif
-+ .write = ll_file_write,
-+#ifdef HAVE_FILE_WRITEV
-+ .writev = ll_file_writev,
-+#else
-+ .aio_write = ll_file_aio_write,
-+#endif
-+ .ioctl = ll_file_ioctl,
-+ .open = ll_file_open,
-+ .release = ll_file_release,
-+ .mmap = ll_file_mmap,
-+ .llseek = ll_file_seek,
-+ .sendfile = ll_file_sendfile,
-+ .fsync = ll_fsync,
-+#ifdef HAVE_F_OP_FLOCK
-+ .flock = ll_file_noflock,
-+#endif
-+ .lock = ll_file_noflock
-+};
-+
-+struct inode_operations ll_file_inode_operations = {
-+#ifdef HAVE_VFS_INTENT_PATCHES
-+ .setattr_raw = ll_setattr_raw,
-+#endif
-+ .setattr = ll_setattr,
-+ .truncate = ll_truncate,
-+ .getattr = ll_getattr,
-+ .permission = ll_inode_permission,
-+ .setxattr = ll_setxattr,
-+ .getxattr = ll_getxattr,
-+ .listxattr = ll_listxattr,
-+ .removexattr = ll_removexattr,
-+};
-+
-+/* dynamic ioctl number support routins */
-+static struct llioc_ctl_data {
-+ struct rw_semaphore ioc_sem;
-+ struct list_head ioc_head;
-+} llioc = {
-+ __RWSEM_INITIALIZER(llioc.ioc_sem),
-+ CFS_LIST_HEAD_INIT(llioc.ioc_head)
-+};
-+
-+
-+struct llioc_data {
-+ struct list_head iocd_list;
-+ unsigned int iocd_size;
-+ llioc_callback_t iocd_cb;
-+ unsigned int iocd_count;
-+ unsigned int iocd_cmd[0];
-+};
-+
-+void *ll_iocontrol_register(llioc_callback_t cb, int count, unsigned int *cmd)
-+{
-+ unsigned int size;
-+ struct llioc_data *in_data = NULL;
-+ ENTRY;
-+
-+ if (cb == NULL || cmd == NULL ||
-+ count > LLIOC_MAX_CMD || count < 0)
-+ RETURN(NULL);
-+
-+ size = sizeof(*in_data) + count * sizeof(unsigned int);
-+ OBD_ALLOC(in_data, size);
-+ if (in_data == NULL)
-+ RETURN(NULL);
-+
-+ memset(in_data, 0, sizeof(*in_data));
-+ in_data->iocd_size = size;
-+ in_data->iocd_cb = cb;
-+ in_data->iocd_count = count;
-+ memcpy(in_data->iocd_cmd, cmd, sizeof(unsigned int) * count);
-+
-+ down_write(&llioc.ioc_sem);
-+ list_add_tail(&in_data->iocd_list, &llioc.ioc_head);
-+ up_write(&llioc.ioc_sem);
-+
-+ RETURN(in_data);
-+}
-+
-+void ll_iocontrol_unregister(void *magic)
-+{
-+ struct llioc_data *tmp;
-+
-+ if (magic == NULL)
-+ return;
-+
-+ down_write(&llioc.ioc_sem);
-+ list_for_each_entry(tmp, &llioc.ioc_head, iocd_list) {
-+ if (tmp == magic) {
-+ unsigned int size = tmp->iocd_size;
-+
-+ list_del(&tmp->iocd_list);
-+ up_write(&llioc.ioc_sem);
-+
-+ OBD_FREE(tmp, size);
-+ return;
-+ }
-+ }
-+ up_write(&llioc.ioc_sem);
-+
-+ CWARN("didn't find iocontrol register block with magic: %p\n", magic);
-+}
-+
-+EXPORT_SYMBOL(ll_iocontrol_register);
-+EXPORT_SYMBOL(ll_iocontrol_unregister);
-+
-+enum llioc_iter ll_iocontrol_call(struct inode *inode, struct file *file,
-+ unsigned int cmd, unsigned long arg, int *rcp)
-+{
-+ enum llioc_iter ret = LLIOC_CONT;
-+ struct llioc_data *data;
-+ int rc = -EINVAL, i;
-+
-+ down_read(&llioc.ioc_sem);
-+ list_for_each_entry(data, &llioc.ioc_head, iocd_list) {
-+ for (i = 0; i < data->iocd_count; i++) {
-+ if (cmd != data->iocd_cmd[i])
-+ continue;
-+
-+ ret = data->iocd_cb(inode, file, cmd, arg, data, &rc);
-+ break;
-+ }
-+
-+ if (ret == LLIOC_STOP)
-+ break;
-+ }
-+ up_read(&llioc.ioc_sem);
-+
-+ if (rcp)
-+ *rcp = rc;
-+ return ret;
-+}
diff -urNad lustre~/lustre/llite/llite_internal.h lustre/lustre/llite/llite_internal.h
--- lustre~/lustre/llite/llite_internal.h 2009-08-19 09:51:09.000000000 +0200
-+++ lustre/lustre/llite/llite_internal.h 2009-08-19 14:10:45.000000000 +0200
++++ lustre/lustre/llite/llite_internal.h 2009-08-20 10:25:20.000000000 +0200
@@ -596,8 +596,13 @@
void ll_lookup_finish_locks(struct lookup_intent *it, struct dentry *dentry);
/* llite/rw.c */
-+#ifdef NO_PREPARE_WRITE
++#ifdef HAVE_WRITE_BEGIN_IN_STRUCT_ADDRESS_SPACE_OPERATIONS
+int ll_write_begin(struct file *, struct address_space *mapping, loff_t pos, unsigned len, unsigned flags, struct page **pagep, void **fsdata);
+int ll_write_end(struct file *, struct address_space *mapping, loff_t pos, unsigned len, unsigned copied, struct page *page, void *fsdata);
+#else
@@ -9225,1040 +1687,9 @@ diff -urNad lustre~/lustre/llite/llite_internal.h lustre/lustre/llite/llite_inte
/* llite/special.c */
extern struct inode_operations ll_special_inode_operations;
-diff -urNad lustre~/lustre/llite/llite_internal.h.orig lustre/lustre/llite/llite_internal.h.orig
---- lustre~/lustre/llite/llite_internal.h.orig 1970-01-01 01:00:00.000000000 +0100
-+++ lustre/lustre/llite/llite_internal.h.orig 2009-08-19 14:10:45.000000000 +0200
-@@ -0,0 +1,1027 @@
-+/* -*- mode: c; c-basic-offset: 8; indent-tabs-mode: nil; -*-
-+ * vim:expandtab:shiftwidth=8:tabstop=8:
-+ *
-+ * GPL HEADER START
-+ *
-+ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
-+ *
-+ * This program is free software; you can redistribute it and/or modify
-+ * it under the terms of the GNU General Public License version 2 only,
-+ * as published by the Free Software Foundation.
-+ *
-+ * This program is distributed in the hope that it will be useful, but
-+ * WITHOUT ANY WARRANTY; without even the implied warranty of
-+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
-+ * General Public License version 2 for more details (a copy is included
-+ * in the LICENSE file that accompanied this code).
-+ *
-+ * You should have received a copy of the GNU General Public License
-+ * version 2 along with this program; If not, see
-+ * http://www.sun.com/software/products/lustre/docs/GPLv2.pdf
-+ *
-+ * Please contact Sun Microsystems, Inc., 4150 Network Circle, Santa Clara,
-+ * CA 95054 USA or visit www.sun.com if you need additional information or
-+ * have any questions.
-+ *
-+ * GPL HEADER END
-+ */
-+/*
-+ * Copyright 2008 Sun Microsystems, Inc. All rights reserved
-+ * Use is subject to license terms.
-+ */
-+/*
-+ * This file is part of Lustre, http://www.lustre.org/
-+ * Lustre is a trademark of Sun Microsystems, Inc.
-+ */
-+
-+#ifndef LLITE_INTERNAL_H
-+#define LLITE_INTERNAL_H
-+
-+#ifdef CONFIG_FS_POSIX_ACL
-+# include <linux/fs.h>
-+#ifdef HAVE_XATTR_ACL
-+# include <linux/xattr_acl.h>
-+#endif
-+#ifdef HAVE_LINUX_POSIX_ACL_XATTR_H
-+# include <linux/posix_acl_xattr.h>
-+#endif
-+#endif
-+
-+#include <lustre_debug.h>
-+#include <lustre_ver.h>
-+#include <linux/lustre_version.h>
-+#include <lustre_disk.h> /* for s2sbi */
-+
-+#ifndef HAVE_LE_TYPES
-+typedef __u16 __le16;
-+typedef __u32 __le32;
-+#endif
-+
-+/*
-+struct lustre_intent_data {
-+ __u64 it_lock_handle[2];
-+ __u32 it_disposition;
-+ __u32 it_status;
-+ __u32 it_lock_mode;
-+ }; */
-+
-+/* If there is no FMODE_EXEC defined, make it to match nothing */
-+#ifndef FMODE_EXEC
-+#define FMODE_EXEC 0
-+#endif
-+
-+#define LL_IT2STR(it) ((it) ? ldlm_it2str((it)->it_op) : "0")
-+#define LUSTRE_FPRIVATE(file) ((file)->private_data)
-+
-+#ifdef HAVE_VFS_INTENT_PATCHES
-+static inline struct lookup_intent *ll_nd2it(struct nameidata *nd)
-+{
-+ return &nd->intent;
-+}
-+#endif
-+
-+/*
-+ * Directory entries are currently in the same format as ext2/ext3, but will
-+ * be changed in the future to accomodate FIDs
-+ */
-+#define LL_DIR_NAME_LEN (255)
-+#define LL_DIR_PAD (4)
-+
-+struct ll_dir_entry {
-+ /* number of inode, referenced by this entry */
-+ __le32 lde_inode;
-+ /* total record length, multiple of LL_DIR_PAD */
-+ __le16 lde_rec_len;
-+ /* length of name */
-+ __u8 lde_name_len;
-+ /* file type: regular, directory, device, etc. */
-+ __u8 lde_file_type;
-+ /* name. NOT NUL-terminated */
-+ char lde_name[LL_DIR_NAME_LEN];
-+};
-+
-+struct ll_dentry_data {
-+ int lld_cwd_count;
-+ int lld_mnt_count;
-+ struct obd_client_handle lld_cwd_och;
-+ struct obd_client_handle lld_mnt_och;
-+#ifndef HAVE_VFS_INTENT_PATCHES
-+ struct lookup_intent *lld_it;
-+#endif
-+ unsigned int lld_sa_generation;
-+};
-+
-+#define ll_d2d(de) ((struct ll_dentry_data*)((de)->d_fsdata))
-+
-+extern struct file_operations ll_pgcache_seq_fops;
-+
-+#define LLI_INODE_MAGIC 0x111d0de5
-+#define LLI_INODE_DEAD 0xdeadd00d
-+#define LLI_F_HAVE_OST_SIZE_LOCK 0
-+#define LLI_F_HAVE_MDS_SIZE_LOCK 1
-+#define LLI_F_CONTENDED 2
-+#define LLI_F_SRVLOCK 3
-+
-+struct ll_inode_info {
-+ int lli_inode_magic;
-+ struct semaphore lli_size_sem; /* protect open and change size */
-+ void *lli_size_sem_owner;
-+ struct semaphore lli_write_sem;
-+ struct lov_stripe_md *lli_smd;
-+ char *lli_symlink_name;
-+ __u64 lli_maxbytes;
-+ __u64 lli_io_epoch;
-+ unsigned long lli_flags;
-+ cfs_time_t lli_contention_time;
-+
-+ /* this lock protects s_d_w and p_w_ll and mmap_cnt */
-+ spinlock_t lli_lock;
-+#ifdef HAVE_CLOSE_THREAD
-+ struct list_head lli_pending_write_llaps;
-+ struct list_head lli_close_item;
-+ int lli_send_done_writing;
-+#endif
-+ atomic_t lli_mmap_cnt;
-+
-+ /* for writepage() only to communicate to fsync */
-+ int lli_async_rc;
-+
-+ struct posix_acl *lli_posix_acl;
-+
-+ struct list_head lli_dead_list;
-+
-+ struct semaphore lli_och_sem; /* Protects access to och pointers
-+ and their usage counters */
-+ /* We need all three because every inode may be opened in different
-+ modes */
-+ struct obd_client_handle *lli_mds_read_och;
-+ __u64 lli_open_fd_read_count;
-+ struct obd_client_handle *lli_mds_write_och;
-+ __u64 lli_open_fd_write_count;
-+ struct obd_client_handle *lli_mds_exec_och;
-+ __u64 lli_open_fd_exec_count;
-+ struct inode lli_vfs_inode;
-+
-+ /* metadata stat-ahead */
-+ /*
-+ * "opendir_pid" is the token when lookup/revalid -- I am the owner of
-+ * dir statahead.
-+ */
-+ pid_t lli_opendir_pid;
-+ /*
-+ * since parent-child threads can share the same @file struct,
-+ * "opendir_key" is the token when dir close for case of parent exit
-+ * before child -- it is me should cleanup the dir readahead. */
-+ void *lli_opendir_key;
-+ struct ll_statahead_info *lli_sai;
-+};
-+
-+/*
-+ * Locking to guarantee consistency of non-atomic updates to long long i_size,
-+ * consistency between file size and KMS, and consistency within
-+ * ->lli_smd->lsm_oinfo[]'s.
-+ *
-+ * Implemented by ->lli_size_sem and ->lsm_sem, nested in that order.
-+ */
-+
-+void ll_inode_size_lock(struct inode *inode, int lock_lsm);
-+void ll_inode_size_unlock(struct inode *inode, int unlock_lsm);
-+
-+// FIXME: replace the name of this with LL_I to conform to kernel stuff
-+// static inline struct ll_inode_info *LL_I(struct inode *inode)
-+static inline struct ll_inode_info *ll_i2info(struct inode *inode)
-+{
-+ return container_of(inode, struct ll_inode_info, lli_vfs_inode);
-+}
-+
-+/* default to about 40meg of readahead on a given system. That much tied
-+ * up in 512k readahead requests serviced at 40ms each is about 1GB/s. */
-+#define SBI_DEFAULT_READAHEAD_MAX (40UL << (20 - CFS_PAGE_SHIFT))
-+
-+/* default to read-ahead full files smaller than 2MB on the second read */
-+#define SBI_DEFAULT_READAHEAD_WHOLE_MAX (2UL << (20 - CFS_PAGE_SHIFT))
-+
-+enum ra_stat {
-+ RA_STAT_HIT = 0,
-+ RA_STAT_MISS,
-+ RA_STAT_DISTANT_READPAGE,
-+ RA_STAT_MISS_IN_WINDOW,
-+ RA_STAT_FAILED_GRAB_PAGE,
-+ RA_STAT_FAILED_MATCH,
-+ RA_STAT_DISCARDED,
-+ RA_STAT_ZERO_LEN,
-+ RA_STAT_ZERO_WINDOW,
-+ RA_STAT_EOF,
-+ RA_STAT_MAX_IN_FLIGHT,
-+ RA_STAT_WRONG_GRAB_PAGE,
-+ _NR_RA_STAT,
-+};
-+
-+struct ll_ra_info {
-+ unsigned long ra_cur_pages;
-+ unsigned long ra_max_pages;
-+ unsigned long ra_max_read_ahead_whole_pages;
-+ unsigned long ra_stats[_NR_RA_STAT];
-+};
-+
-+/* LL_HIST_MAX=32 causes an overflow */
-+#define LL_HIST_MAX 28
-+#define LL_HIST_START 12 /* buckets start at 2^12 = 4k */
-+#define LL_PROCESS_HIST_MAX 10
-+struct per_process_info {
-+ pid_t pid;
-+ struct obd_histogram pp_r_hist;
-+ struct obd_histogram pp_w_hist;
-+};
-+
-+/* pp_extents[LL_PROCESS_HIST_MAX] will hold the combined process info */
-+struct ll_rw_extents_info {
-+ struct per_process_info pp_extents[LL_PROCESS_HIST_MAX + 1];
-+};
-+
-+#define LL_OFFSET_HIST_MAX 100
-+struct ll_rw_process_info {
-+ pid_t rw_pid;
-+ int rw_op;
-+ loff_t rw_range_start;
-+ loff_t rw_range_end;
-+ loff_t rw_last_file_pos;
-+ loff_t rw_offset;
-+ size_t rw_smallest_extent;
-+ size_t rw_largest_extent;
-+ struct file *rw_last_file;
-+};
-+
-+
-+enum stats_track_type {
-+ STATS_TRACK_ALL = 0, /* track all processes */
-+ STATS_TRACK_PID, /* track process with this pid */
-+ STATS_TRACK_PPID, /* track processes with this ppid */
-+ STATS_TRACK_GID, /* track processes with this gid */
-+ STATS_TRACK_LAST,
-+};
-+
-+/* flags for sbi->ll_flags */
-+#define LL_SBI_NOLCK 0x01 /* DLM locking disabled (directio-only) */
-+#define LL_SBI_DATA_CHECKSUM 0x02 /* checksum each page on the wire */
-+#define LL_SBI_FLOCK 0x04
-+#define LL_SBI_USER_XATTR 0x08 /* support user xattr */
-+#define LL_SBI_ACL 0x10 /* support ACL */
-+#define LL_SBI_JOIN 0x20 /* support JOIN */
-+#define LL_SBI_LOCALFLOCK 0x40 /* Local flocks support by kernel */
-+#define LL_SBI_LRU_RESIZE 0x80 /* support lru resize */
-+#define LL_SBI_LLITE_CHECKSUM 0x100 /* checksum each page in memory */
-+
-+/* default value for ll_sb_info->contention_time */
-+#define SBI_DEFAULT_CONTENTION_SECONDS 60
-+/* default value for lockless_truncate_enable */
-+#define SBI_DEFAULT_LOCKLESS_TRUNCATE_ENABLE 1
-+
-+struct ll_sb_info {
-+ struct list_head ll_list;
-+ /* this protects pglist and ra_info. It isn't safe to
-+ * grab from interrupt contexts */
-+ spinlock_t ll_lock;
-+ spinlock_t ll_pp_extent_lock; /* Lock for pp_extent entries */
-+ spinlock_t ll_process_lock; /* Lock for ll_rw_process_info */
-+ struct obd_uuid ll_sb_uuid;
-+ struct obd_export *ll_mdc_exp;
-+ struct obd_export *ll_osc_exp;
-+ struct proc_dir_entry *ll_proc_root;
-+ obd_id ll_rootino; /* number of root inode */
-+
-+ int ll_flags;
-+ struct list_head ll_conn_chain; /* per-conn chain of SBs */
-+ struct lustre_client_ocd ll_lco;
-+
-+ struct list_head ll_orphan_dentry_list; /*please don't ask -p*/
-+ struct ll_close_queue *ll_lcq;
-+
-+ struct lprocfs_stats *ll_stats; /* lprocfs stats counter */
-+
-+ unsigned long ll_async_page_max;
-+ unsigned long ll_async_page_count;
-+ unsigned long ll_pglist_gen;
-+ struct list_head ll_pglist; /* all pages (llap_pglist_item) */
-+
-+ unsigned ll_contention_time; /* seconds */
-+ unsigned ll_lockless_truncate_enable; /* true/false */
-+
-+ struct ll_ra_info ll_ra_info;
-+ unsigned int ll_namelen;
-+ struct file_operations *ll_fop;
-+
-+#ifdef HAVE_EXPORT___IGET
-+ struct list_head ll_deathrow; /* inodes to be destroyed (b1443) */
-+ spinlock_t ll_deathrow_lock;
-+#endif
-+ /* =0 - hold lock over whole read/write
-+ * >0 - max. chunk to be read/written w/o lock re-acquiring */
-+ unsigned long ll_max_rw_chunk;
-+
-+ /* Statistics */
-+ struct ll_rw_extents_info ll_rw_extents_info;
-+ int ll_extent_process_count;
-+ struct ll_rw_process_info ll_rw_process_info[LL_PROCESS_HIST_MAX];
-+ unsigned int ll_offset_process_count;
-+ struct ll_rw_process_info ll_rw_offset_info[LL_OFFSET_HIST_MAX];
-+ unsigned int ll_rw_offset_entry_count;
-+ enum stats_track_type ll_stats_track_type;
-+ int ll_stats_track_id;
-+ int ll_rw_stats_on;
-+ dev_t ll_sdev_orig; /* save s_dev before assign for
-+ * clustred nfs */
-+
-+ /* metadata stat-ahead */
-+ unsigned int ll_sa_max; /* max statahead RPCs */
-+ unsigned int ll_sa_wrong; /* statahead thread stopped for
-+ * low hit ratio */
-+ unsigned int ll_sa_total; /* statahead thread started
-+ * count */
-+ unsigned long long ll_sa_blocked; /* ls count waiting for
-+ * statahead */
-+ unsigned long long ll_sa_cached; /* ls count got in cache */
-+ unsigned long long ll_sa_hit; /* hit count */
-+ unsigned long long ll_sa_miss; /* miss count */
-+};
-+
-+#define LL_DEFAULT_MAX_RW_CHUNK (32 * 1024 * 1024)
-+
-+struct ll_ra_read {
-+ pgoff_t lrr_start;
-+ pgoff_t lrr_count;
-+ struct task_struct *lrr_reader;
-+ struct list_head lrr_linkage;
-+};
-+
-+/*
-+ * per file-descriptor read-ahead data.
-+ */
-+struct ll_readahead_state {
-+ spinlock_t ras_lock;
-+ /*
-+ * index of the last page that read(2) needed and that wasn't in the
-+ * cache. Used by ras_update() to detect seeks.
-+ *
-+ * XXX nikita: if access seeks into cached region, Lustre doesn't see
-+ * this.
-+ */
-+ unsigned long ras_last_readpage;
-+ /*
-+ * number of pages read after last read-ahead window reset. As window
-+ * is reset on each seek, this is effectively a number of consecutive
-+ * accesses. Maybe ->ras_accessed_in_window is better name.
-+ *
-+ * XXX nikita: window is also reset (by ras_update()) when Lustre
-+ * believes that memory pressure evicts read-ahead pages. In that
-+ * case, it probably doesn't make sense to expand window to
-+ * PTLRPC_MAX_BRW_PAGES on the third access.
-+ */
-+ unsigned long ras_consecutive_pages;
-+ /*
-+ * number of read requests after the last read-ahead window reset
-+ * As window is reset on each seek, this is effectively the number
-+ * on consecutive read request and is used to trigger read-ahead.
-+ */
-+ unsigned long ras_consecutive_requests;
-+ /*
-+ * Parameters of current read-ahead window. Handled by
-+ * ras_update(). On the initial access to the file or after a seek,
-+ * window is reset to 0. After 3 consecutive accesses, window is
-+ * expanded to PTLRPC_MAX_BRW_PAGES. Afterwards, window is enlarged by
-+ * PTLRPC_MAX_BRW_PAGES chunks up to ->ra_max_pages.
-+ */
-+ unsigned long ras_window_start, ras_window_len;
-+ /*
-+ * Where next read-ahead should start at. This lies within read-ahead
-+ * window. Read-ahead window is read in pieces rather than at once
-+ * because: 1. lustre limits total number of pages under read-ahead by
-+ * ->ra_max_pages (see ll_ra_count_get()), 2. client cannot read pages
-+ * not covered by DLM lock.
-+ */
-+ unsigned long ras_next_readahead;
-+ /*
-+ * Total number of ll_file_read requests issued, reads originating
-+ * due to mmap are not counted in this total. This value is used to
-+ * trigger full file read-ahead after multiple reads to a small file.
-+ */
-+ unsigned long ras_requests;
-+ /*
-+ * Page index with respect to the current request, these value
-+ * will not be accurate when dealing with reads issued via mmap.
-+ */
-+ unsigned long ras_request_index;
-+ /*
-+ * list of struct ll_ra_read's one per read(2) call current in
-+ * progress against this file descriptor. Used by read-ahead code,
-+ * protected by ->ras_lock.
-+ */
-+ struct list_head ras_read_beads;
-+ /*
-+ * The following 3 items are used for detecting the stride I/O
-+ * mode.
-+ * In stride I/O mode,
-+ * ...............|-----data-----|****gap*****|--------|******|....
-+ * offset |-stride_pages-|-stride_gap-|
-+ * ras_stride_offset = offset;
-+ * ras_stride_length = stride_pages + stride_gap;
-+ * ras_stride_pages = stride_pages;
-+ * Note: all these three items are counted by pages.
-+ */
-+ unsigned long ras_stride_length;
-+ unsigned long ras_stride_pages;
-+ pgoff_t ras_stride_offset;
-+ /*
-+ * number of consecutive stride request count, and it is similar as
-+ * ras_consecutive_requests, but used for stride I/O mode.
-+ * Note: only more than 2 consecutive stride request are detected,
-+ * stride read-ahead will be enable
-+ */
-+ unsigned long ras_consecutive_stride_requests;
-+};
-+
-+extern cfs_mem_cache_t *ll_file_data_slab;
-+struct lustre_handle;
-+struct ll_file_data {
-+ struct ll_readahead_state fd_ras;
-+ int fd_omode;
-+ struct lustre_handle fd_cwlockh;
-+ unsigned long fd_gid;
-+ __u32 fd_flags;
-+};
-+
-+struct lov_stripe_md;
-+
-+extern spinlock_t inode_lock;
-+
-+extern struct proc_dir_entry *proc_lustre_fs_root;
-+
-+static inline struct inode *ll_info2i(struct ll_inode_info *lli)
-+{
-+ return &lli->lli_vfs_inode;
-+}
-+
-+struct it_cb_data {
-+ struct inode *icbd_parent;
-+ struct dentry **icbd_childp;
-+ obd_id hash;
-+};
-+
-+void ll_i2gids(__u32 *suppgids, struct inode *i1,struct inode *i2);
-+
-+#define LLAP_MAGIC 98764321
-+
-+extern cfs_mem_cache_t *ll_async_page_slab;
-+extern size_t ll_async_page_slab_size;
-+struct ll_async_page {
-+ int llap_magic;
-+ /* only trust these if the page lock is providing exclusion */
-+ unsigned int llap_write_queued:1,
-+ llap_defer_uptodate:1,
-+ llap_origin:3,
-+ llap_ra_used:1,
-+ llap_ignore_quota:1,
-+ llap_nocache:1,
-+ llap_lockless_io_page:1;
-+ void *llap_cookie;
-+ struct page *llap_page;
-+ struct list_head llap_pending_write;
-+ struct list_head llap_pglist_item;
-+ /* checksum for paranoid I/O debugging */
-+ __u32 llap_checksum;
-+};
-+
-+/*
-+ * enumeration of llap_from_page() call-sites. Used to export statistics in
-+ * /proc/fs/lustre/llite/fsN/dump_page_cache.
-+ */
-+enum {
-+ LLAP_ORIGIN_UNKNOWN = 0,
-+ LLAP_ORIGIN_READPAGE,
-+ LLAP_ORIGIN_READAHEAD,
-+ LLAP_ORIGIN_COMMIT_WRITE,
-+ LLAP_ORIGIN_WRITEPAGE,
-+ LLAP_ORIGIN_REMOVEPAGE,
-+ LLAP_ORIGIN_LOCKLESS_IO,
-+ LLAP__ORIGIN_MAX,
-+};
-+extern char *llap_origins[];
-+
-+#ifdef HAVE_REGISTER_CACHE
-+#define ll_register_cache(cache) register_cache(cache)
-+#define ll_unregister_cache(cache) unregister_cache(cache)
-+#else
-+#define ll_register_cache(cache) do {} while (0)
-+#define ll_unregister_cache(cache) do {} while (0)
-+#endif
-+
-+void ll_ra_read_in(struct file *f, struct ll_ra_read *rar);
-+void ll_ra_read_ex(struct file *f, struct ll_ra_read *rar);
-+struct ll_ra_read *ll_ra_read_get(struct file *f);
-+
-+/* llite/lproc_llite.c */
-+#ifdef LPROCFS
-+int lprocfs_register_mountpoint(struct proc_dir_entry *parent,
-+ struct super_block *sb, char *osc, char *mdc);
-+void lprocfs_unregister_mountpoint(struct ll_sb_info *sbi);
-+void ll_stats_ops_tally(struct ll_sb_info *sbi, int op, int count);
-+void lprocfs_llite_init_vars(struct lprocfs_static_vars *lvars);
-+#else
-+static inline int lprocfs_register_mountpoint(struct proc_dir_entry *parent,
-+ struct super_block *sb, char *osc, char *mdc){return 0;}
-+static inline void lprocfs_unregister_mountpoint(struct ll_sb_info *sbi) {}
-+static void ll_stats_ops_tally(struct ll_sb_info *sbi, int op, int count) {}
-+static void lprocfs_llite_init_vars(struct lprocfs_static_vars *lvars)
-+{
-+ memset(lvars, 0, sizeof(*lvars));
-+}
-+#endif
-+
-+
-+/* llite/dir.c */
-+extern struct file_operations ll_dir_operations;
-+extern struct inode_operations ll_dir_inode_operations;
-+
-+struct page *ll_get_dir_page(struct inode *dir, unsigned long n);
-+
-+static inline unsigned ll_dir_rec_len(unsigned name_len)
-+{
-+ return (name_len + 8 + LL_DIR_PAD - 1) & ~(LL_DIR_PAD - 1);
-+}
-+
-+static inline struct ll_dir_entry *ll_entry_at(void *base, unsigned offset)
-+{
-+ return (struct ll_dir_entry *)((char *)base + offset);
-+}
-+
-+/*
-+ * p is at least 6 bytes before the end of page
-+ */
-+static inline struct ll_dir_entry *ll_dir_next_entry(struct ll_dir_entry *p)
-+{
-+ return ll_entry_at(p, le16_to_cpu(p->lde_rec_len));
-+}
-+
-+static inline void ll_put_page(struct page *page)
-+{
-+ kunmap(page);
-+ page_cache_release(page);
-+}
-+
-+static inline unsigned long dir_pages(struct inode *inode)
-+{
-+ return (inode->i_size + CFS_PAGE_SIZE - 1) >> CFS_PAGE_SHIFT;
-+}
-+
-+int ll_objects_destroy(struct ptlrpc_request *request, struct inode *dir);
-+struct inode *ll_iget(struct super_block *sb, ino_t hash,
-+ struct lustre_md *lic);
-+int ll_mdc_cancel_unused(struct lustre_handle *, struct inode *, int flags,
-+ void *opaque);
-+int ll_mdc_blocking_ast(struct ldlm_lock *, struct ldlm_lock_desc *,
-+ void *data, int flag);
-+int ll_prepare_mdc_op_data(struct mdc_op_data *,
-+ struct inode *i1, struct inode *i2,
-+ const char *name, int namelen, int mode, void *data);
-+#ifndef HAVE_VFS_INTENT_PATCHES
-+struct lookup_intent *ll_convert_intent(struct open_intent *oit,
-+ int lookup_flags);
-+#endif
-+void ll_pin_extent_cb(void *data);
-+int ll_page_removal_cb(void *data, int discard);
-+int ll_extent_lock_cancel_cb(struct ldlm_lock *lock, struct ldlm_lock_desc *new,
-+ void *data, int flag);
-+int lookup_it_finish(struct ptlrpc_request *request, int offset,
-+ struct lookup_intent *it, void *data);
-+void ll_lookup_finish_locks(struct lookup_intent *it, struct dentry *dentry);
-+
-+/* llite/rw.c */
-+int ll_prepare_write(struct file *, struct page *, unsigned from, unsigned to);
-+int ll_commit_write(struct file *, struct page *, unsigned from, unsigned to);
-+int ll_writepage(struct page *page);
-+void ll_inode_fill_obdo(struct inode *inode, int cmd, struct obdo *oa);
-+int ll_ap_completion(void *data, int cmd, struct obdo *oa, int rc);
-+int llap_shrink_cache(struct ll_sb_info *sbi, int shrink_fraction);
-+extern struct cache_definition ll_cache_definition;
-+void ll_removepage(struct page *page);
-+int ll_readpage(struct file *file, struct page *page);
-+struct ll_async_page *llap_cast_private(struct page *page);
-+void ll_readahead_init(struct inode *inode, struct ll_readahead_state *ras);
-+void ll_ra_accounting(struct ll_async_page *llap,struct address_space *mapping);
-+void ll_truncate(struct inode *inode);
-+int ll_file_punch(struct inode *, loff_t, int);
-+ssize_t ll_file_lockless_io(struct file *, const struct iovec *,
-+ unsigned long, loff_t *, int, ssize_t);
-+void ll_clear_file_contended(struct inode*);
-+int ll_sync_page_range(struct inode *, struct address_space *, loff_t, size_t);
-+
-+/* llite/file.c */
-+extern struct file_operations ll_file_operations;
-+extern struct file_operations ll_file_operations_flock;
-+extern struct file_operations ll_file_operations_noflock;
-+extern struct inode_operations ll_file_inode_operations;
-+extern int ll_inode_revalidate_it(struct dentry *, struct lookup_intent *);
-+extern int ll_have_md_lock(struct inode *inode, __u64 bits);
-+int ll_region_mapped(unsigned long addr, size_t count);
-+int ll_extent_lock(struct ll_file_data *, struct inode *,
-+ struct lov_stripe_md *, int mode, ldlm_policy_data_t *,
-+ struct lustre_handle *, int ast_flags);
-+int ll_extent_unlock(struct ll_file_data *, struct inode *,
-+ struct lov_stripe_md *, int mode, struct lustre_handle *);
-+int ll_file_open(struct inode *inode, struct file *file);
-+int ll_file_release(struct inode *inode, struct file *file);
-+int ll_lsm_getattr(struct obd_export *, struct lov_stripe_md *, struct obdo *);
-+int ll_glimpse_ioctl(struct ll_sb_info *sbi,
-+ struct lov_stripe_md *lsm, lstat_t *st);
-+int ll_glimpse_size(struct inode *inode, int ast_flags);
-+int ll_local_open(struct file *file,
-+ struct lookup_intent *it, struct ll_file_data *fd,
-+ struct obd_client_handle *och);
-+int ll_release_openhandle(struct dentry *, struct lookup_intent *);
-+int ll_mdc_close(struct obd_export *mdc_exp, struct inode *inode,
-+ struct file *file);
-+int ll_mdc_real_close(struct inode *inode, int flags);
-+extern void ll_rw_stats_tally(struct ll_sb_info *sbi, pid_t pid, struct file
-+ *file, size_t count, int rw);
-+int ll_getattr_it(struct vfsmount *mnt, struct dentry *de,
-+ struct lookup_intent *it, struct kstat *stat);
-+int ll_getattr(struct vfsmount *mnt, struct dentry *de, struct kstat *stat);
-+struct ll_file_data *ll_file_data_get(void);
-+#if (LINUX_VERSION_CODE >= KERNEL_VERSION(2,6,0))
-+int ll_inode_permission(struct inode *inode, int mask, struct nameidata *nd);
-+#else
-+int ll_inode_permission(struct inode *inode, int mask);
-+#endif
-+int ll_lov_setstripe_ea_info(struct inode *inode, struct file *file,
-+ int flags, struct lov_user_md *lum,
-+ int lum_size);
-+int ll_lov_getstripe_ea_info(struct inode *inode, const char *filename,
-+ struct lov_mds_md **lmm, int *lmm_size,
-+ struct ptlrpc_request **request);
-+int ll_dir_setstripe(struct inode *inode, struct lov_user_md *lump,
-+ int set_default);
-+int ll_dir_getstripe(struct inode *inode, struct lov_mds_md **lmm,
-+ int *lmm_size, struct ptlrpc_request **request);
-+int ll_fsync(struct file *file, struct dentry *dentry, int data);
-+int ll_fiemap(struct inode *inode, struct ll_user_fiemap *fiemap,
-+ int num_bytes);
-+
-+/* llite/dcache.c */
-+/* llite/namei.c */
-+/**
-+ * protect race ll_find_aliases vs ll_revalidate_it vs ll_unhash_aliases
-+ */
-+extern spinlock_t ll_lookup_lock;
-+extern struct dentry_operations ll_d_ops;
-+void ll_intent_drop_lock(struct lookup_intent *);
-+void ll_intent_release(struct lookup_intent *);
-+extern void ll_set_dd(struct dentry *de);
-+int ll_drop_dentry(struct dentry *dentry);
-+void ll_unhash_aliases(struct inode *);
-+void ll_frob_intent(struct lookup_intent **itp, struct lookup_intent *deft);
-+void ll_lookup_finish_locks(struct lookup_intent *it, struct dentry *dentry);
-+int ll_dcompare(struct dentry *parent, struct qstr *d_name, struct qstr *name);
-+int revalidate_it_finish(struct ptlrpc_request *request, int offset,
-+ struct lookup_intent *it, struct dentry *de);
-+
-+/* llite/llite_lib.c */
-+extern struct super_operations lustre_super_operations;
-+
-+char *ll_read_opt(const char *opt, char *data);
-+void ll_lli_init(struct ll_inode_info *lli);
-+int ll_fill_super(struct super_block *sb);
-+void ll_put_super(struct super_block *sb);
-+void ll_kill_super(struct super_block *sb);
-+struct inode *ll_inode_from_lock(struct ldlm_lock *lock);
-+void ll_clear_inode(struct inode *inode);
-+int ll_setattr_raw(struct inode *inode, struct iattr *attr);
-+int ll_setattr(struct dentry *de, struct iattr *attr);
-+#ifndef HAVE_STATFS_DENTRY_PARAM
-+int ll_statfs(struct super_block *sb, struct kstatfs *sfs);
-+#else
-+int ll_statfs(struct dentry *de, struct kstatfs *sfs);
-+#endif
-+int ll_statfs_internal(struct super_block *sb, struct obd_statfs *osfs,
-+ __u64 max_age, __u32 flags);
-+void ll_update_inode(struct inode *inode, struct lustre_md *md);
-+void ll_read_inode2(struct inode *inode, void *opaque);
-+int ll_iocontrol(struct inode *inode, struct file *file,
-+ unsigned int cmd, unsigned long arg);
-+#ifdef HAVE_UMOUNTBEGIN_VFSMOUNT
-+void ll_umount_begin(struct vfsmount *vfsmnt, int flags);
-+#else
-+void ll_umount_begin(struct super_block *sb);
-+#endif
-+int ll_remount_fs(struct super_block *sb, int *flags, char *data);
-+int ll_show_options(struct seq_file *seq, struct vfsmount *vfs);
-+int ll_prep_inode(struct obd_export *exp, struct inode **inode,
-+ struct ptlrpc_request *req, int offset, struct super_block *);
-+void lustre_dump_dentry(struct dentry *, int recur);
-+void lustre_dump_inode(struct inode *);
-+struct ll_async_page *llite_pglist_next_llap(struct ll_sb_info *sbi,
-+ struct list_head *list);
-+int ll_obd_statfs(struct inode *inode, void *arg);
-+int ll_get_max_mdsize(struct ll_sb_info *sbi, int *max_mdsize);
-+int ll_process_config(struct lustre_cfg *lcfg);
-+
-+/* llite/llite_nfs.c */
-+extern struct export_operations lustre_export_operations;
-+__u32 get_uuid2int(const char *name, int len);
-+struct dentry *ll_fh_to_dentry(struct super_block *sb, __u32 *data, int len,
-+ int fhtype, int parent);
-+int ll_dentry_to_fh(struct dentry *, __u32 *datap, int *lenp, int need_parent);
-+
-+/* llite/special.c */
-+extern struct inode_operations ll_special_inode_operations;
-+extern struct file_operations ll_special_chr_inode_fops;
-+extern struct file_operations ll_special_chr_file_fops;
-+extern struct file_operations ll_special_blk_inode_fops;
-+extern struct file_operations ll_special_fifo_inode_fops;
-+extern struct file_operations ll_special_fifo_file_fops;
-+extern struct file_operations ll_special_sock_inode_fops;
-+
-+/* llite/symlink.c */
-+extern struct inode_operations ll_fast_symlink_inode_operations;
-+
-+/* llite/llite_close.c */
-+struct ll_close_queue {
-+ spinlock_t lcq_lock;
-+ struct list_head lcq_list;
-+ wait_queue_head_t lcq_waitq;
-+ struct completion lcq_comp;
-+};
-+
-+#ifdef HAVE_CLOSE_THREAD
-+void llap_write_pending(struct inode *inode, struct ll_async_page *llap);
-+void llap_write_complete(struct inode *inode, struct ll_async_page *llap);
-+void ll_open_complete(struct inode *inode);
-+int ll_is_inode_dirty(struct inode *inode);
-+void ll_try_done_writing(struct inode *inode);
-+void ll_queue_done_writing(struct inode *inode);
-+#else
-+static inline void llap_write_pending(struct inode *inode,
-+ struct ll_async_page *llap) { return; };
-+static inline void llap_write_complete(struct inode *inode,
-+ struct ll_async_page *llap) { return; };
-+static inline void ll_open_complete(struct inode *inode) { return; };
-+static inline int ll_is_inode_dirty(struct inode *inode) { return 0; };
-+static inline void ll_try_done_writing(struct inode *inode) { return; };
-+static inline void ll_queue_done_writing(struct inode *inode) { return; };
-+//static inline void ll_close_thread_shutdown(struct ll_close_queue *lcq) { return; };
-+//static inline int ll_close_thread_start(struct ll_close_queue **lcq_ret) { return 0; };
-+#endif
-+void ll_close_thread_shutdown(struct ll_close_queue *lcq);
-+int ll_close_thread_start(struct ll_close_queue **lcq_ret);
-+
-+/* llite/llite_mmap.c */
-+typedef struct rb_root rb_root_t;
-+typedef struct rb_node rb_node_t;
-+
-+struct ll_lock_tree_node;
-+struct ll_lock_tree {
-+ rb_root_t lt_root;
-+ struct list_head lt_locked_list;
-+ struct ll_file_data *lt_fd;
-+};
-+
-+int ll_teardown_mmaps(struct address_space *mapping, __u64 first, __u64 last);
-+int ll_file_mmap(struct file * file, struct vm_area_struct * vma);
-+struct ll_lock_tree_node * ll_node_from_inode(struct inode *inode, __u64 start,
-+ __u64 end, ldlm_mode_t mode);
-+int ll_tree_lock(struct ll_lock_tree *tree,
-+ struct ll_lock_tree_node *first_node,
-+ const char *buf, size_t count, int ast_flags);
-+int ll_tree_lock_iov(struct ll_lock_tree *tree,
-+ struct ll_lock_tree_node *first_node,
-+ const struct iovec *iov, unsigned long nr_segs,
-+ int ast_flags);
-+int ll_tree_unlock(struct ll_lock_tree *tree);
-+
-+#define ll_s2sbi(sb) (s2lsi(sb)->lsi_llsbi)
-+
-+static inline __u64 ll_ts2u64(struct timespec *time)
-+{
-+ __u64 t = time->tv_sec;
-+ return t;
-+}
-+
-+/* don't need an addref as the sb_info should be holding one */
-+static inline struct obd_export *ll_s2obdexp(struct super_block *sb)
-+{
-+ return ll_s2sbi(sb)->ll_osc_exp;
-+}
-+
-+/* don't need an addref as the sb_info should be holding one */
-+static inline struct obd_export *ll_s2mdcexp(struct super_block *sb)
-+{
-+ return ll_s2sbi(sb)->ll_mdc_exp;
-+}
-+
-+static inline struct client_obd *sbi2mdc(struct ll_sb_info *sbi)
-+{
-+ struct obd_device *obd = sbi->ll_mdc_exp->exp_obd;
-+ if (obd == NULL)
-+ LBUG();
-+ return &obd->u.cli;
-+}
-+
-+// FIXME: replace the name of this with LL_SB to conform to kernel stuff
-+static inline struct ll_sb_info *ll_i2sbi(struct inode *inode)
-+{
-+ return ll_s2sbi(inode->i_sb);
-+}
-+
-+static inline struct obd_export *ll_i2obdexp(struct inode *inode)
-+{
-+ return ll_s2obdexp(inode->i_sb);
-+}
-+
-+static inline struct obd_export *ll_i2mdcexp(struct inode *inode)
-+{
-+ return ll_s2mdcexp(inode->i_sb);
-+}
-+
-+static inline void ll_inode2fid(struct ll_fid *fid, struct inode *inode)
-+{
-+ mdc_pack_fid(fid, inode->i_ino, inode->i_generation,
-+ inode->i_mode & S_IFMT);
-+}
-+
-+static inline int ll_mds_max_easize(struct super_block *sb)
-+{
-+ return sbi2mdc(ll_s2sbi(sb))->cl_max_mds_easize;
-+}
-+
-+static inline __u64 ll_file_maxbytes(struct inode *inode)
-+{
-+ return ll_i2info(inode)->lli_maxbytes;
-+}
-+
-+/* llite/xattr.c */
-+int ll_setxattr(struct dentry *dentry, const char *name,
-+ const void *value, size_t size, int flags);
-+ssize_t ll_getxattr(struct dentry *dentry, const char *name,
-+ void *buffer, size_t size);
-+ssize_t ll_listxattr(struct dentry *dentry, char *buffer, size_t size);
-+int ll_removexattr(struct dentry *dentry, const char *name);
-+
-+/* statahead.c */
-+
-+#define LL_SA_RPC_MIN 2
-+#define LL_SA_RPC_DEF 32
-+#define LL_SA_RPC_MAX 8192
-+
-+/* per inode struct, for dir only */
-+struct ll_statahead_info {
-+ struct inode *sai_inode;
-+ unsigned int sai_generation; /* generation for statahead */
-+ atomic_t sai_refcount; /* when access this struct, hold
-+ * refcount */
-+ unsigned int sai_sent; /* stat requests sent count */
-+ unsigned int sai_replied; /* stat requests which received
-+ * reply */
-+ unsigned int sai_max; /* max ahead of lookup */
-+ unsigned int sai_index; /* index of statahead entry */
-+ unsigned int sai_index_next; /* index for the next statahead
-+ * entry to be stated */
-+ unsigned int sai_hit; /* hit count */
-+ unsigned int sai_miss; /* miss count:
-+ * for "ls -al" case, it includes
-+ * hidden dentry miss;
-+ * for "ls -l" case, it does not
-+ * include hidden dentry miss.
-+ * "sai_miss_hidden" is used for
-+ * the later case.
-+ */
-+ unsigned int sai_consecutive_miss; /* consecutive miss */
-+ unsigned int sai_miss_hidden;/* "ls -al", but first dentry
-+ * is not a hidden one */
-+ unsigned int sai_skip_hidden;/* skipped hidden dentry count */
-+ unsigned int sai_ls_all:1; /* "ls -al", do stat-ahead for
-+ * hidden entries */
-+ cfs_waitq_t sai_waitq; /* stat-ahead wait queue */
-+ struct ptlrpc_thread sai_thread; /* stat-ahead thread */
-+ struct list_head sai_entries_sent; /* entries sent out */
-+ struct list_head sai_entries_received; /* entries returned */
-+ struct list_head sai_entries_stated; /* entries stated */
-+};
-+
-+int do_statahead_enter(struct inode *dir, struct dentry **dentry, int lookup);
-+void ll_statahead_exit(struct dentry *dentry, int result);
-+void ll_stop_statahead(struct inode *inode, void *key);
-+
-+static inline
-+void ll_statahead_mark(struct dentry *dentry)
-+{
-+ struct ll_inode_info *lli = ll_i2info(dentry->d_parent->d_inode);
-+ struct ll_dentry_data *ldd = ll_d2d(dentry);
-+
-+ /* not the same process, don't mark */
-+ if (lli->lli_opendir_pid != cfs_curproc_pid())
-+ return;
-+
-+ spin_lock(&lli->lli_lock);
-+ if (likely(lli->lli_sai != NULL && ldd != NULL))
-+ ldd->lld_sa_generation = lli->lli_sai->sai_generation;
-+ spin_unlock(&lli->lli_lock);
-+}
-+
-+static inline
-+int ll_statahead_enter(struct inode *dir, struct dentry **dentryp, int lookup)
-+{
-+ struct ll_sb_info *sbi = ll_i2sbi(dir);
-+ struct ll_inode_info *lli = ll_i2info(dir);
-+ struct ll_dentry_data *ldd = ll_d2d(*dentryp);
-+
-+ if (sbi->ll_sa_max == 0)
-+ return -ENOTSUPP;
-+
-+ /* not the same process, don't statahead */
-+ if (lli->lli_opendir_pid != cfs_curproc_pid())
-+ return -EBADF;
-+
-+ /*
-+ * When "ls" a dentry, the system trigger more than once "revalidate" or
-+ * "lookup", for "getattr", for "getxattr", and maybe for others.
-+ * Under patchless client mode, the operation intent is not accurate,
-+ * it maybe misguide the statahead thread. For example:
-+ * The "revalidate" call for "getattr" and "getxattr" of a dentry maybe
-+ * have the same operation intent -- "IT_GETATTR".
-+ * In fact, one dentry should has only one chance to interact with the
-+ * statahead thread, otherwise the statahead windows will be confused.
-+ * The solution is as following:
-+ * Assign "lld_sa_generation" with "sai_generation" when a dentry
-+ * "IT_GETATTR" for the first time, and the subsequent "IT_GETATTR"
-+ * will bypass interacting with statahead thread for checking:
-+ * "lld_sa_generation == lli_sai->sai_generation"
-+ */
-+ if (ldd && lli->lli_sai &&
-+ ldd->lld_sa_generation == lli->lli_sai->sai_generation)
-+ return -EAGAIN;
-+
-+ return do_statahead_enter(dir, dentryp, lookup);
-+}
-+
-+static void inline ll_dops_init(struct dentry *de, int block)
-+{
-+ struct ll_dentry_data *lld = ll_d2d(de);
-+
-+ if (lld == NULL && block != 0) {
-+ ll_set_dd(de);
-+ lld = ll_d2d(de);
-+ }
-+
-+ if (lld != NULL)
-+ lld->lld_sa_generation = 0;
-+
-+ de->d_op = &ll_d_ops;
-+}
-+
-+/* llite ioctl register support rountine */
-+#ifdef __KERNEL__
-+enum llioc_iter {
-+ LLIOC_CONT = 0,
-+ LLIOC_STOP
-+};
-+
-+#define LLIOC_MAX_CMD 256
-+
-+/*
-+ * Rules to write a callback function:
-+ *
-+ * Parameters:
-+ * @magic: Dynamic ioctl call routine will feed this vaule with the pointer
-+ * returned to ll_iocontrol_register. Callback functions should use this
-+ * data to check the potential collasion of ioctl cmd. If collasion is
-+ * found, callback function should return LLIOC_CONT.
-+ * @rcp: The result of ioctl command.
-+ *
-+ * Return values:
-+ * If @magic matches the pointer returned by ll_iocontrol_data, the
-+ * callback should return LLIOC_STOP; return LLIOC_STOP otherwise.
-+ */
-+typedef enum llioc_iter (*llioc_callback_t)(struct inode *inode,
-+ struct file *file, unsigned int cmd, unsigned long arg,
-+ void *magic, int *rcp);
-+
-+enum llioc_iter ll_iocontrol_call(struct inode *inode, struct file *file,
-+ unsigned int cmd, unsigned long arg, int *rcp);
-+
-+/* export functions */
-+/* Register ioctl block dynamatically for a regular file.
-+ *
-+ * @cmd: the array of ioctl command set
-+ * @count: number of commands in the @cmd
-+ * @cb: callback function, it will be called if an ioctl command is found to
-+ * belong to the command list @cmd.
-+ *
-+ * Return vaule:
-+ * A magic pointer will be returned if success;
-+ * otherwise, NULL will be returned.
-+ * */
-+void *ll_iocontrol_register(llioc_callback_t cb, int count, unsigned int *cmd);
-+void ll_iocontrol_unregister(void *magic);
-+
-+#endif
-+
-+#endif /* LLITE_INTERNAL_H */
diff -urNad lustre~/lustre/llite/llite_lib.c lustre/lustre/llite/llite_lib.c
--- lustre~/lustre/llite/llite_lib.c 2009-08-19 09:51:09.000000000 +0200
-+++ lustre/lustre/llite/llite_lib.c 2009-08-19 14:10:45.000000000 +0200
++++ lustre/lustre/llite/llite_lib.c 2009-08-20 10:25:20.000000000 +0200
@@ -1346,7 +1346,7 @@
rc = vmtruncate(inode, new_size);
clear_bit(LLI_F_SRVLOCK, &lli->lli_flags);
@@ -10268,2257 +1699,18 @@ diff -urNad lustre~/lustre/llite/llite_lib.c lustre/lustre/llite/llite_lib.c
ll_inode_size_unlock(inode, 0);
}
}
-@@ -1406,7 +1406,11 @@
+@@ -1406,7 +1406,7 @@
/* POSIX: check before ATTR_*TIME_SET set (from inode_change_ok) */
if (ia_valid & (ATTR_MTIME_SET | ATTR_ATIME_SET)) {
-+#ifdef HAS_STRUCT_CRED
-+ if (current->real_cred->fsuid != inode->i_uid &&
-+#else
- if (current->fsuid != inode->i_uid &&
-+#endif
+- if (current->fsuid != inode->i_uid &&
++ if (CREDENTIALS(current,fsuid) != inode->i_uid &&
!cfs_capable(CFS_CAP_FOWNER))
RETURN(-EPERM);
}
-diff -urNad lustre~/lustre/llite/llite_lib.c.orig lustre/lustre/llite/llite_lib.c.orig
---- lustre~/lustre/llite/llite_lib.c.orig 1970-01-01 01:00:00.000000000 +0100
-+++ lustre/lustre/llite/llite_lib.c.orig 2009-08-19 14:10:45.000000000 +0200
-@@ -0,0 +1,2232 @@
-+/* -*- mode: c; c-basic-offset: 8; indent-tabs-mode: nil; -*-
-+ * vim:expandtab:shiftwidth=8:tabstop=8:
-+ *
-+ * GPL HEADER START
-+ *
-+ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
-+ *
-+ * This program is free software; you can redistribute it and/or modify
-+ * it under the terms of the GNU General Public License version 2 only,
-+ * as published by the Free Software Foundation.
-+ *
-+ * This program is distributed in the hope that it will be useful, but
-+ * WITHOUT ANY WARRANTY; without even the implied warranty of
-+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
-+ * General Public License version 2 for more details (a copy is included
-+ * in the LICENSE file that accompanied this code).
-+ *
-+ * You should have received a copy of the GNU General Public License
-+ * version 2 along with this program; If not, see
-+ * http://www.sun.com/software/products/lustre/docs/GPLv2.pdf
-+ *
-+ * Please contact Sun Microsystems, Inc., 4150 Network Circle, Santa Clara,
-+ * CA 95054 USA or visit www.sun.com if you need additional information or
-+ * have any questions.
-+ *
-+ * GPL HEADER END
-+ */
-+/*
-+ * Copyright 2008 Sun Microsystems, Inc. All rights reserved
-+ * Use is subject to license terms.
-+ */
-+/*
-+ * This file is part of Lustre, http://www.lustre.org/
-+ * Lustre is a trademark of Sun Microsystems, Inc.
-+ *
-+ * lustre/llite/llite_lib.c
-+ *
-+ * Lustre Light Super operations
-+ */
-+
-+#define DEBUG_SUBSYSTEM S_LLITE
-+
-+#include <linux/module.h>
-+#include <linux/types.h>
-+#include <linux/random.h>
-+#include <linux/version.h>
-+#include <linux/mm.h>
-+
-+#include <lustre_lite.h>
-+#include <lustre_ha.h>
-+#include <lustre_dlm.h>
-+#include <lprocfs_status.h>
-+#include <lustre_disk.h>
-+#include <lustre_param.h>
-+#include <lustre_cache.h>
-+#include "llite_internal.h"
-+
-+cfs_mem_cache_t *ll_file_data_slab;
-+
-+LIST_HEAD(ll_super_blocks);
-+spinlock_t ll_sb_lock = SPIN_LOCK_UNLOCKED;
-+
-+extern struct address_space_operations ll_aops;
-+extern struct address_space_operations ll_dir_aops;
-+
-+#ifndef log2
-+#define log2(n) ffz(~(n))
-+#endif
-+
-+
-+static struct ll_sb_info *ll_init_sbi(void)
-+{
-+ struct ll_sb_info *sbi = NULL;
-+ unsigned long pages;
-+ struct sysinfo si;
-+ class_uuid_t uuid;
-+ int i;
-+ ENTRY;
-+
-+ OBD_ALLOC(sbi, sizeof(*sbi));
-+ if (!sbi)
-+ RETURN(NULL);
-+
-+ spin_lock_init(&sbi->ll_lock);
-+ spin_lock_init(&sbi->ll_lco.lco_lock);
-+ spin_lock_init(&sbi->ll_pp_extent_lock);
-+ spin_lock_init(&sbi->ll_process_lock);
-+ sbi->ll_rw_stats_on = 0;
-+ INIT_LIST_HEAD(&sbi->ll_pglist);
-+
-+ si_meminfo(&si);
-+ pages = si.totalram - si.totalhigh;
-+ if (pages >> (20 - CFS_PAGE_SHIFT) < 512) {
-+#ifdef HAVE_BGL_SUPPORT
-+ sbi->ll_async_page_max = pages / 4;
-+#else
-+ sbi->ll_async_page_max = pages / 2;
-+#endif
-+ } else {
-+ sbi->ll_async_page_max = (pages / 4) * 3;
-+ }
-+ sbi->ll_ra_info.ra_max_pages = min(pages / 32,
-+ SBI_DEFAULT_READAHEAD_MAX);
-+ sbi->ll_ra_info.ra_max_read_ahead_whole_pages =
-+ SBI_DEFAULT_READAHEAD_WHOLE_MAX;
-+ sbi->ll_contention_time = SBI_DEFAULT_CONTENTION_SECONDS;
-+ sbi->ll_lockless_truncate_enable = SBI_DEFAULT_LOCKLESS_TRUNCATE_ENABLE;
-+ INIT_LIST_HEAD(&sbi->ll_conn_chain);
-+ INIT_LIST_HEAD(&sbi->ll_orphan_dentry_list);
-+
-+ ll_generate_random_uuid(uuid);
-+ class_uuid_unparse(uuid, &sbi->ll_sb_uuid);
-+ CDEBUG(D_CONFIG, "generated uuid: %s\n", sbi->ll_sb_uuid.uuid);
-+
-+ spin_lock(&ll_sb_lock);
-+ list_add_tail(&sbi->ll_list, &ll_super_blocks);
-+ spin_unlock(&ll_sb_lock);
-+
-+#ifdef ENABLE_CHECKSUM
-+ sbi->ll_flags |= LL_SBI_DATA_CHECKSUM;
-+#endif
-+#ifdef ENABLE_LLITE_CHECKSUM
-+ sbi->ll_flags |= LL_SBI_LLITE_CHECKSUM;
-+#endif
-+
-+#ifdef HAVE_LRU_RESIZE_SUPPORT
-+ sbi->ll_flags |= LL_SBI_LRU_RESIZE;
-+#endif
-+
-+#ifdef HAVE_EXPORT___IGET
-+ INIT_LIST_HEAD(&sbi->ll_deathrow);
-+ spin_lock_init(&sbi->ll_deathrow_lock);
-+#endif
-+ for (i = 0; i <= LL_PROCESS_HIST_MAX; i++) {
-+ spin_lock_init(&sbi->ll_rw_extents_info.pp_extents[i].pp_r_hist.oh_lock);
-+ spin_lock_init(&sbi->ll_rw_extents_info.pp_extents[i].pp_w_hist.oh_lock);
-+ }
-+
-+ /* metadata statahead is enabled by default */
-+ sbi->ll_sa_max = LL_SA_RPC_DEF;
-+
-+ RETURN(sbi);
-+}
-+
-+void ll_free_sbi(struct super_block *sb)
-+{
-+ struct ll_sb_info *sbi = ll_s2sbi(sb);
-+ ENTRY;
-+
-+ if (sbi != NULL) {
-+ spin_lock(&ll_sb_lock);
-+ list_del(&sbi->ll_list);
-+ spin_unlock(&ll_sb_lock);
-+ OBD_FREE(sbi, sizeof(*sbi));
-+ }
-+ EXIT;
-+}
-+
-+static struct dentry_operations ll_d_root_ops = {
-+#ifdef DCACHE_LUSTRE_INVALID
-+ .d_compare = ll_dcompare,
-+#endif
-+};
-+
-+static int client_common_fill_super(struct super_block *sb,
-+ char *mdc, char *osc)
-+{
-+ struct inode *root = 0;
-+ struct ll_sb_info *sbi = ll_s2sbi(sb);
-+ struct obd_device *obd;
-+ struct ll_fid rootfid;
-+ struct obd_statfs osfs;
-+ struct ptlrpc_request *request = NULL;
-+ struct lustre_handle osc_conn = {0, };
-+ struct lustre_handle mdc_conn = {0, };
-+ struct lustre_md md;
-+ struct obd_connect_data *data = NULL;
-+ int err, checksum;
-+ ENTRY;
-+
-+ obd = class_name2obd(mdc);
-+ if (!obd) {
-+ CERROR("MDC %s: not setup or attached\n", mdc);
-+ RETURN(-EINVAL);
-+ }
-+
-+ OBD_ALLOC(data, sizeof(*data));
-+ if (data == NULL)
-+ RETURN(-ENOMEM);
-+
-+ if (proc_lustre_fs_root) {
-+ err = lprocfs_register_mountpoint(proc_lustre_fs_root, sb,
-+ osc, mdc);
-+ if (err < 0)
-+ CERROR("could not register mount in /proc/fs/lustre\n");
-+ }
-+
-+ /* indicate the features supported by this client */
-+ data->ocd_connect_flags = OBD_CONNECT_VERSION | OBD_CONNECT_IBITS |
-+ OBD_CONNECT_JOIN | OBD_CONNECT_ATTRFID | OBD_CONNECT_NODEVOH |
-+ OBD_CONNECT_CANCELSET | OBD_CONNECT_AT;
-+#ifdef HAVE_LRU_RESIZE_SUPPORT
-+ if (sbi->ll_flags & LL_SBI_LRU_RESIZE)
-+ data->ocd_connect_flags |= OBD_CONNECT_LRU_RESIZE;
-+#endif
-+#ifdef CONFIG_FS_POSIX_ACL
-+ data->ocd_connect_flags |= OBD_CONNECT_ACL;
-+#endif
-+ data->ocd_ibits_known = MDS_INODELOCK_FULL;
-+ data->ocd_version = LUSTRE_VERSION_CODE;
-+
-+ if (sb->s_flags & MS_RDONLY)
-+ data->ocd_connect_flags |= OBD_CONNECT_RDONLY;
-+ if (sbi->ll_flags & LL_SBI_USER_XATTR)
-+ data->ocd_connect_flags |= OBD_CONNECT_XATTR;
-+
-+#ifdef HAVE_MS_FLOCK_LOCK
-+ /* force vfs to use lustre handler for flock() calls - bug 10743 */
-+ sb->s_flags |= MS_FLOCK_LOCK;
-+#endif
-+
-+ if (sbi->ll_flags & LL_SBI_FLOCK)
-+ sbi->ll_fop = &ll_file_operations_flock;
-+ else if (sbi->ll_flags & LL_SBI_LOCALFLOCK)
-+ sbi->ll_fop = &ll_file_operations;
-+ else
-+ sbi->ll_fop = &ll_file_operations_noflock;
-+
-+
-+ err = obd_connect(&mdc_conn, obd, &sbi->ll_sb_uuid, data, &sbi->ll_mdc_exp);
-+ if (err == -EBUSY) {
-+ LCONSOLE_ERROR_MSG(0x14f, "An MDT (mdc %s) is performing "
-+ "recovery, of which this client is not a "
-+ "part. Please wait for recovery to complete,"
-+ " abort, or time out.\n", mdc);
-+ GOTO(out, err);
-+ } else if (err) {
-+ CERROR("cannot connect to %s: rc = %d\n", mdc, err);
-+ GOTO(out, err);
-+ }
-+
-+ err = obd_statfs(obd, &osfs, cfs_time_current_64() - HZ, 0);
-+ if (err)
-+ GOTO(out_mdc, err);
-+
-+ /* MDC connect is surely finished by now because we actually sent
-+ * a statfs RPC, otherwise obd_connect() is asynchronous. */
-+ *data = class_exp2cliimp(sbi->ll_mdc_exp)->imp_connect_data;
-+
-+ LASSERT(osfs.os_bsize);
-+ sb->s_blocksize = osfs.os_bsize;
-+ sb->s_blocksize_bits = log2(osfs.os_bsize);
-+ sb->s_magic = LL_SUPER_MAGIC;
-+
-+ /* for bug 11559. in $LINUX/fs/read_write.c, function do_sendfile():
-+ * retval = in_file->f_op->sendfile(...);
-+ * if (*ppos > max)
-+ * retval = -EOVERFLOW;
-+ *
-+ * it will check if *ppos is greater than max. However, max equals to
-+ * s_maxbytes, which is a negative integer in a x86_64 box since loff_t
-+ * has been defined as a signed long long ineger in linux kernel. */
-+#if BITS_PER_LONG == 64
-+ sb->s_maxbytes = PAGE_CACHE_MAXBYTES >> 1;
-+#else
-+ sb->s_maxbytes = PAGE_CACHE_MAXBYTES;
-+#endif
-+ sbi->ll_namelen = osfs.os_namelen;
-+ sbi->ll_max_rw_chunk = LL_DEFAULT_MAX_RW_CHUNK;
-+
-+ if ((sbi->ll_flags & LL_SBI_USER_XATTR) &&
-+ !(data->ocd_connect_flags & OBD_CONNECT_XATTR)) {
-+ LCONSOLE_INFO("Disabling user_xattr feature because "
-+ "it is not supported on the server\n");
-+ sbi->ll_flags &= ~LL_SBI_USER_XATTR;
-+ }
-+
-+ if (data->ocd_connect_flags & OBD_CONNECT_ACL) {
-+#ifdef MS_POSIXACL
-+ sb->s_flags |= MS_POSIXACL;
-+#endif
-+ sbi->ll_flags |= LL_SBI_ACL;
-+ } else
-+ sbi->ll_flags &= ~LL_SBI_ACL;
-+
-+ if (data->ocd_connect_flags & OBD_CONNECT_JOIN)
-+ sbi->ll_flags |= LL_SBI_JOIN;
-+
-+ obd = class_name2obd(osc);
-+ if (!obd) {
-+ CERROR("OSC %s: not setup or attached\n", osc);
-+ GOTO(out_mdc, err = -ENODEV);
-+ }
-+
-+ data->ocd_connect_flags = OBD_CONNECT_VERSION | OBD_CONNECT_GRANT |
-+ OBD_CONNECT_REQPORTAL | OBD_CONNECT_BRW_SIZE |
-+ OBD_CONNECT_SRVLOCK | OBD_CONNECT_CANCELSET | OBD_CONNECT_AT |
-+ OBD_CONNECT_TRUNCLOCK;
-+
-+ if (!OBD_FAIL_CHECK(OBD_FAIL_OSC_CONNECT_CKSUM)) {
-+ /* OBD_CONNECT_CKSUM should always be set, even if checksums are
-+ * disabled by default, because it can still be enabled on the
-+ * fly via /proc. As a consequence, we still need to come to an
-+ * agreement on the supported algorithms at connect time */
-+ data->ocd_connect_flags |= OBD_CONNECT_CKSUM;
-+
-+ if (OBD_FAIL_CHECK(OBD_FAIL_OSC_CKSUM_ADLER_ONLY))
-+ data->ocd_cksum_types = OBD_CKSUM_ADLER;
-+ else
-+ /* send the list of supported checksum types */
-+ data->ocd_cksum_types = OBD_CKSUM_ALL;
-+ }
-+
-+#ifdef HAVE_LRU_RESIZE_SUPPORT
-+ if (sbi->ll_flags & LL_SBI_LRU_RESIZE)
-+ data->ocd_connect_flags |= OBD_CONNECT_LRU_RESIZE;
-+#endif
-+
-+ CDEBUG(D_RPCTRACE, "ocd_connect_flags: "LPX64" ocd_version: %d "
-+ "ocd_grant: %d\n", data->ocd_connect_flags,
-+ data->ocd_version, data->ocd_grant);
-+
-+ obd->obd_upcall.onu_owner = &sbi->ll_lco;
-+ obd->obd_upcall.onu_upcall = ll_ocd_update;
-+ data->ocd_brw_size = PTLRPC_MAX_BRW_PAGES << CFS_PAGE_SHIFT;
-+
-+ obd_register_lock_cancel_cb(obd, ll_extent_lock_cancel_cb);
-+ obd_register_page_removal_cb(obd, ll_page_removal_cb, ll_pin_extent_cb);
-+
-+
-+ err = obd_connect(&osc_conn, obd, &sbi->ll_sb_uuid, data, &sbi->ll_osc_exp);
-+ if (err == -EBUSY) {
-+ LCONSOLE_ERROR_MSG(0x150, "An OST (osc %s) is performing "
-+ "recovery, of which this client is not a "
-+ "part. Please wait for recovery to "
-+ "complete, abort, or time out.\n", osc);
-+ GOTO(out, err); // need clear cb?
-+ } else if (err) {
-+ CERROR("cannot connect to %s: rc = %d\n", osc, err);
-+ GOTO(out_cb, err);
-+ }
-+ spin_lock(&sbi->ll_lco.lco_lock);
-+ sbi->ll_lco.lco_flags = data->ocd_connect_flags;
-+ sbi->ll_lco.lco_mdc_exp = sbi->ll_mdc_exp;
-+ sbi->ll_lco.lco_osc_exp = sbi->ll_osc_exp;
-+ spin_unlock(&sbi->ll_lco.lco_lock);
-+
-+ err = mdc_init_ea_size(sbi->ll_mdc_exp, sbi->ll_osc_exp);
-+ if (err) {
-+ CERROR("cannot set max EA and cookie sizes: rc = %d\n", err);
-+ GOTO(out_osc, err);
-+ }
-+
-+ err = obd_prep_async_page(sbi->ll_osc_exp, NULL, NULL, NULL,
-+ 0, NULL, NULL, NULL, 0, NULL);
-+ if (err < 0) {
-+ LCONSOLE_ERROR_MSG(0x151, "There are no OST's in this "
-+ "filesystem. There must be at least one "
-+ "active OST for a client to start.\n");
-+ GOTO(out_osc, err);
-+ }
-+
-+ if (!ll_async_page_slab) {
-+ ll_async_page_slab_size =
-+ size_round(sizeof(struct ll_async_page)) + err;
-+ ll_async_page_slab = cfs_mem_cache_create("ll_async_page",
-+ ll_async_page_slab_size,
-+ 0, 0);
-+ if (!ll_async_page_slab)
-+ GOTO(out_osc, err = -ENOMEM);
-+ }
-+
-+ err = mdc_getstatus(sbi->ll_mdc_exp, &rootfid);
-+ if (err) {
-+ CERROR("cannot mds_connect: rc = %d\n", err);
-+ GOTO(out_osc, err);
-+ }
-+ CDEBUG(D_SUPER, "rootfid "LPU64"\n", rootfid.id);
-+ sbi->ll_rootino = rootfid.id;
-+
-+ sb->s_op = &lustre_super_operations;
-+#if THREAD_SIZE >= 8192
-+ /* Disable the NFS export because of stack overflow
-+ * when THREAD_SIZE < 8192. Please refer to 17630. */
-+ sb->s_export_op = &lustre_export_operations;
-+#endif
-+
-+ /* make root inode
-+ * XXX: move this to after cbd setup? */
-+ err = mdc_getattr(sbi->ll_mdc_exp, &rootfid,
-+ OBD_MD_FLGETATTR | OBD_MD_FLBLOCKS |
-+ (sbi->ll_flags & LL_SBI_ACL ? OBD_MD_FLACL : 0),
-+ 0, &request);
-+ if (err) {
-+ CERROR("mdc_getattr failed for root: rc = %d\n", err);
-+ GOTO(out_osc, err);
-+ }
-+
-+ err = mdc_req2lustre_md(request, REPLY_REC_OFF, sbi->ll_osc_exp, &md);
-+ if (err) {
-+ CERROR("failed to understand root inode md: rc = %d\n",err);
-+ ptlrpc_req_finished (request);
-+ GOTO(out_osc, err);
-+ }
-+
-+ LASSERT(sbi->ll_rootino != 0);
-+ root = ll_iget(sb, sbi->ll_rootino, &md);
-+
-+ ptlrpc_req_finished(request);
-+
-+ if (root == NULL || is_bad_inode(root)) {
-+ mdc_free_lustre_md(sbi->ll_osc_exp, &md);
-+ CERROR("lustre_lite: bad iget4 for root\n");
-+ GOTO(out_root, err = -EBADF);
-+ }
-+
-+ err = ll_close_thread_start(&sbi->ll_lcq);
-+ if (err) {
-+ CERROR("cannot start close thread: rc %d\n", err);
-+ GOTO(out_root, err);
-+ }
-+
-+ checksum = sbi->ll_flags & LL_SBI_DATA_CHECKSUM;
-+ err = obd_set_info_async(sbi->ll_osc_exp, sizeof(KEY_CHECKSUM),
-+ KEY_CHECKSUM, sizeof(checksum),
-+ &checksum, NULL);
-+
-+ /* making vm readahead 0 for 2.4.x. In the case of 2.6.x,
-+ backing dev info assigned to inode mapping is used for
-+ determining maximal readahead. */
-+#if (LINUX_VERSION_CODE < KERNEL_VERSION(2,6,0)) && \
-+ !defined(KERNEL_HAS_AS_MAX_READAHEAD)
-+ /* bug 2805 - set VM readahead to zero */
-+ vm_max_readahead = vm_min_readahead = 0;
-+#endif
-+
-+ sb->s_root = d_alloc_root(root);
-+ if (data != NULL)
-+ OBD_FREE(data, sizeof(*data));
-+ sb->s_root->d_op = &ll_d_root_ops;
-+
-+ sbi->ll_sdev_orig = sb->s_dev;
-+ /* We set sb->s_dev equal on all lustre clients in order to support
-+ * NFS export clustering. NFSD requires that the FSID be the same
-+ * on all clients. */
-+ /* s_dev is also used in lt_compare() to compare two fs, but that is
-+ * only a node-local comparison. */
-+ sb->s_dev = get_uuid2int(sbi2mdc(sbi)->cl_target_uuid.uuid,
-+ strlen(sbi2mdc(sbi)->cl_target_uuid.uuid));
-+
-+ RETURN(err);
-+
-+out_root:
-+ if (root)
-+ iput(root);
-+out_osc:
-+ obd_disconnect(sbi->ll_osc_exp);
-+ sbi->ll_osc_exp = NULL;
-+out_cb:
-+ obd = class_name2obd(osc);
-+ obd_unregister_lock_cancel_cb(obd, ll_extent_lock_cancel_cb);
-+ obd_unregister_page_removal_cb(obd, ll_page_removal_cb);
-+out_mdc:
-+ obd_disconnect(sbi->ll_mdc_exp);
-+ sbi->ll_mdc_exp = NULL;
-+out:
-+ if (data != NULL)
-+ OBD_FREE(data, sizeof(*data));
-+ lprocfs_unregister_mountpoint(sbi);
-+ RETURN(err);
-+}
-+
-+int ll_get_max_mdsize(struct ll_sb_info *sbi, int *lmmsize)
-+{
-+ int size, rc;
-+
-+ *lmmsize = obd_size_diskmd(sbi->ll_osc_exp, NULL);
-+ size = sizeof(int);
-+ rc = obd_get_info(sbi->ll_mdc_exp, sizeof(KEY_MAX_EASIZE),
-+ KEY_MAX_EASIZE, &size, lmmsize, NULL);
-+ if (rc)
-+ CERROR("Get max mdsize error rc %d \n", rc);
-+
-+ RETURN(rc);
-+}
-+
-+void ll_dump_inode(struct inode *inode)
-+{
-+ struct list_head *tmp;
-+ int dentry_count = 0;
-+
-+ LASSERT(inode != NULL);
-+
-+ list_for_each(tmp, &inode->i_dentry)
-+ dentry_count++;
-+
-+ CERROR("inode %p dump: dev=%s ino=%lu mode=%o count=%u, %d dentries\n",
-+ inode, ll_i2mdcexp(inode)->exp_obd->obd_name, inode->i_ino,
-+ inode->i_mode, atomic_read(&inode->i_count), dentry_count);
-+}
-+
-+void lustre_dump_dentry(struct dentry *dentry, int recur)
-+{
-+ struct list_head *tmp;
-+ int subdirs = 0;
-+
-+ LASSERT(dentry != NULL);
-+
-+ list_for_each(tmp, &dentry->d_subdirs)
-+ subdirs++;
-+
-+ CERROR("dentry %p dump: name=%.*s parent=%.*s (%p), inode=%p, count=%u,"
-+ " flags=0x%x, fsdata=%p, %d subdirs\n", dentry,
-+ dentry->d_name.len, dentry->d_name.name,
-+ dentry->d_parent->d_name.len, dentry->d_parent->d_name.name,
-+ dentry->d_parent, dentry->d_inode, atomic_read(&dentry->d_count),
-+ dentry->d_flags, dentry->d_fsdata, subdirs);
-+ if (dentry->d_inode != NULL)
-+ ll_dump_inode(dentry->d_inode);
-+
-+ if (recur == 0)
-+ return;
-+
-+ list_for_each(tmp, &dentry->d_subdirs) {
-+ struct dentry *d = list_entry(tmp, struct dentry, d_child);
-+ lustre_dump_dentry(d, recur - 1);
-+ }
-+}
-+
-+#ifdef HAVE_EXPORT___IGET
-+static void prune_dir_dentries(struct inode *inode)
-+{
-+ struct dentry *dentry, *prev = NULL;
-+
-+ /* due to lustre specific logic, a directory
-+ * can have few dentries - a bug from VFS POV */
-+restart:
-+ spin_lock(&dcache_lock);
-+ if (!list_empty(&inode->i_dentry)) {
-+ dentry = list_entry(inode->i_dentry.prev,
-+ struct dentry, d_alias);
-+ /* in order to prevent infinite loops we
-+ * break if previous dentry is busy */
-+ if (dentry != prev) {
-+ prev = dentry;
-+ dget_locked(dentry);
-+ spin_unlock(&dcache_lock);
-+
-+ /* try to kill all child dentries */
-+ shrink_dcache_parent(dentry);
-+ dput(dentry);
-+
-+ /* now try to get rid of current dentry */
-+ d_prune_aliases(inode);
-+ goto restart;
-+ }
-+ }
-+ spin_unlock(&dcache_lock);
-+}
-+
-+static void prune_deathrow_one(struct ll_inode_info *lli)
-+{
-+ struct inode *inode = ll_info2i(lli);
-+
-+ /* first, try to drop any dentries - they hold a ref on the inode */
-+ if (S_ISDIR(inode->i_mode))
-+ prune_dir_dentries(inode);
-+ else
-+ d_prune_aliases(inode);
-+
-+
-+ /* if somebody still uses it, leave it */
-+ LASSERT(atomic_read(&inode->i_count) > 0);
-+ if (atomic_read(&inode->i_count) > 1)
-+ goto out;
-+
-+ CDEBUG(D_INODE, "inode %lu/%u(%d) looks a good candidate for prune\n",
-+ inode->i_ino,inode->i_generation, atomic_read(&inode->i_count));
-+
-+ /* seems nobody uses it anymore */
-+ inode->i_nlink = 0;
-+
-+out:
-+ iput(inode);
-+ return;
-+}
-+
-+static void prune_deathrow(struct ll_sb_info *sbi, int try)
-+{
-+ struct ll_inode_info *lli;
-+ int empty;
-+
-+ do {
-+ if (need_resched() && try)
-+ break;
-+
-+ if (try) {
-+ if (!spin_trylock(&sbi->ll_deathrow_lock))
-+ break;
-+ } else {
-+ spin_lock(&sbi->ll_deathrow_lock);
-+ }
-+
-+ empty = 1;
-+ lli = NULL;
-+ if (!list_empty(&sbi->ll_deathrow)) {
-+ lli = list_entry(sbi->ll_deathrow.next,
-+ struct ll_inode_info,
-+ lli_dead_list);
-+ list_del_init(&lli->lli_dead_list);
-+ if (!list_empty(&sbi->ll_deathrow))
-+ empty = 0;
-+ }
-+ spin_unlock(&sbi->ll_deathrow_lock);
-+
-+ if (lli)
-+ prune_deathrow_one(lli);
-+
-+ } while (empty == 0);
-+}
-+#else /* !HAVE_EXPORT___IGET */
-+#define prune_deathrow(sbi, try) do {} while (0)
-+#endif /* HAVE_EXPORT___IGET */
-+
-+void client_common_put_super(struct super_block *sb)
-+{
-+ struct ll_sb_info *sbi = ll_s2sbi(sb);
-+ ENTRY;
-+
-+ ll_close_thread_shutdown(sbi->ll_lcq);
-+
-+ lprocfs_unregister_mountpoint(sbi);
-+
-+ /* destroy inodes in deathrow */
-+ prune_deathrow(sbi, 0);
-+
-+ list_del(&sbi->ll_conn_chain);
-+
-+ /* callbacks is cleared after disconnect each target */
-+ obd_disconnect(sbi->ll_osc_exp);
-+ sbi->ll_osc_exp = NULL;
-+
-+ obd_disconnect(sbi->ll_mdc_exp);
-+ sbi->ll_mdc_exp = NULL;
-+
-+ EXIT;
-+}
-+
-+void ll_kill_super(struct super_block *sb)
-+{
-+ struct ll_sb_info *sbi;
-+
-+ ENTRY;
-+
-+ /* not init sb ?*/
-+ if (!(sb->s_flags & MS_ACTIVE))
-+ return;
-+
-+ sbi = ll_s2sbi(sb);
-+ /* we need restore s_dev from changed for clustred NFS before put_super
-+ * because new kernels have cached s_dev and change sb->s_dev in
-+ * put_super not affected real removing devices */
-+ if (sbi)
-+ sb->s_dev = sbi->ll_sdev_orig;
-+ EXIT;
-+}
-+
-+char *ll_read_opt(const char *opt, char *data)
-+{
-+ char *value;
-+ char *retval;
-+ ENTRY;
-+
-+ CDEBUG(D_SUPER, "option: %s, data %s\n", opt, data);
-+ if (strncmp(opt, data, strlen(opt)))
-+ RETURN(NULL);
-+ if ((value = strchr(data, '=')) == NULL)
-+ RETURN(NULL);
-+
-+ value++;
-+ OBD_ALLOC(retval, strlen(value) + 1);
-+ if (!retval) {
-+ CERROR("out of memory!\n");
-+ RETURN(NULL);
-+ }
-+
-+ memcpy(retval, value, strlen(value)+1);
-+ CDEBUG(D_SUPER, "Assigned option: %s, value %s\n", opt, retval);
-+ RETURN(retval);
-+}
-+
-+static inline int ll_set_opt(const char *opt, char *data, int fl)
-+{
-+ if (strncmp(opt, data, strlen(opt)) != 0)
-+ return(0);
-+ else
-+ return(fl);
-+}
-+
-+/* non-client-specific mount options are parsed in lmd_parse */
-+static int ll_options(char *options, int *flags)
-+{
-+ int tmp;
-+ char *s1 = options, *s2;
-+ ENTRY;
-+
-+ if (!options)
-+ RETURN(0);
-+
-+ CDEBUG(D_CONFIG, "Parsing opts %s\n", options);
-+
-+ while (*s1) {
-+ CDEBUG(D_SUPER, "next opt=%s\n", s1);
-+ tmp = ll_set_opt("nolock", s1, LL_SBI_NOLCK);
-+ if (tmp) {
-+ *flags |= tmp;
-+ goto next;
-+ }
-+ tmp = ll_set_opt("flock", s1, LL_SBI_FLOCK);
-+ if (tmp) {
-+ *flags |= tmp;
-+ goto next;
-+ }
-+ tmp = ll_set_opt("localflock", s1, LL_SBI_LOCALFLOCK);
-+ if (tmp) {
-+ *flags |= tmp;
-+ goto next;
-+ }
-+ tmp = ll_set_opt("noflock", s1, LL_SBI_FLOCK|LL_SBI_LOCALFLOCK);
-+ if (tmp) {
-+ *flags &= ~tmp;
-+ goto next;
-+ }
-+ tmp = ll_set_opt("user_xattr", s1, LL_SBI_USER_XATTR);
-+ if (tmp) {
-+ *flags |= tmp;
-+ goto next;
-+ }
-+ tmp = ll_set_opt("nouser_xattr", s1, LL_SBI_USER_XATTR);
-+ if (tmp) {
-+ *flags &= ~tmp;
-+ goto next;
-+ }
-+ tmp = ll_set_opt("acl", s1, LL_SBI_ACL);
-+ if (tmp) {
-+ /* Ignore deprecated mount option. The client will
-+ * always try to mount with ACL support, whether this
-+ * is used depends on whether server supports it. */
-+ LCONSOLE_ERROR_MSG(0x152, "Ignoring deprecated "
-+ "mount option 'acl'.\n");
-+ goto next;
-+ }
-+ tmp = ll_set_opt("noacl", s1, LL_SBI_ACL);
-+ if (tmp) {
-+ LCONSOLE_ERROR_MSG(0x152, "Ignoring deprecated "
-+ "mount option 'noacl'.\n");
-+ goto next;
-+ }
-+
-+ tmp = ll_set_opt("checksum", s1, LL_SBI_DATA_CHECKSUM);
-+ if (tmp) {
-+ *flags |= tmp;
-+ goto next;
-+ }
-+ tmp = ll_set_opt("nochecksum", s1, LL_SBI_DATA_CHECKSUM);
-+ if (tmp) {
-+ *flags &= ~tmp;
-+ goto next;
-+ }
-+
-+ tmp = ll_set_opt("lruresize", s1, LL_SBI_LRU_RESIZE);
-+ if (tmp) {
-+ *flags |= tmp;
-+ goto next;
-+ }
-+ tmp = ll_set_opt("nolruresize", s1, LL_SBI_LRU_RESIZE);
-+ if (tmp) {
-+ *flags &= ~tmp;
-+ goto next;
-+ }
-+ LCONSOLE_ERROR_MSG(0x152, "Unknown option '%s', won't mount.\n",
-+ s1);
-+ RETURN(-EINVAL);
-+
-+next:
-+ /* Find next opt */
-+ s2 = strchr(s1, ',');
-+ if (s2 == NULL)
-+ break;
-+ s1 = s2 + 1;
-+ }
-+ RETURN(0);
-+}
-+
-+void ll_lli_init(struct ll_inode_info *lli)
-+{
-+ lli->lli_inode_magic = LLI_INODE_MAGIC;
-+ sema_init(&lli->lli_size_sem, 1);
-+ sema_init(&lli->lli_write_sem, 1);
-+ lli->lli_flags = 0;
-+ lli->lli_maxbytes = PAGE_CACHE_MAXBYTES;
-+ spin_lock_init(&lli->lli_lock);
-+ sema_init(&lli->lli_och_sem, 1);
-+ lli->lli_mds_read_och = lli->lli_mds_write_och = NULL;
-+ lli->lli_mds_exec_och = NULL;
-+ lli->lli_open_fd_read_count = lli->lli_open_fd_write_count = 0;
-+ lli->lli_open_fd_exec_count = 0;
-+ INIT_LIST_HEAD(&lli->lli_dead_list);
-+#ifdef HAVE_CLOSE_THREAD
-+ INIT_LIST_HEAD(&lli->lli_pending_write_llaps);
-+#endif
-+}
-+
-+/* COMPAT_146 */
-+#define MDCDEV "mdc_dev"
-+static int old_lustre_process_log(struct super_block *sb, char *newprofile,
-+ struct config_llog_instance *cfg)
-+{
-+ struct lustre_sb_info *lsi = s2lsi(sb);
-+ struct obd_device *obd;
-+ struct lustre_handle mdc_conn = {0, };
-+ struct obd_export *exp;
-+ char *ptr, *mdt, *profile;
-+ char niduuid[10] = "mdtnid0";
-+ class_uuid_t uuid;
-+ struct obd_uuid mdc_uuid;
-+ struct llog_ctxt *ctxt;
-+ struct obd_connect_data ocd = { 0 };
-+ lnet_nid_t nid;
-+ int i, rc = 0, recov_bk = 1, failnodes = 0;
-+ ENTRY;
-+
-+ ll_generate_random_uuid(uuid);
-+ class_uuid_unparse(uuid, &mdc_uuid);
-+ CDEBUG(D_HA, "generated uuid: %s\n", mdc_uuid.uuid);
-+
-+ /* Figure out the old mdt and profile name from new-style profile
-+ ("lustre" from "mds/lustre-client") */
-+ mdt = newprofile;
-+ profile = strchr(mdt, '/');
-+ if (profile == NULL) {
-+ CDEBUG(D_CONFIG, "Can't find MDT name in %s\n", newprofile);
-+ GOTO(out, rc = -EINVAL);
-+ }
-+ *profile = '\0';
-+ profile++;
-+ ptr = strrchr(profile, '-');
-+ if (ptr == NULL) {
-+ CDEBUG(D_CONFIG, "Can't find client name in %s\n", newprofile);
-+ GOTO(out, rc = -EINVAL);
-+ }
-+ *ptr = '\0';
-+
-+ LCONSOLE_WARN("This looks like an old mount command; I will try to "
-+ "contact MDT '%s' for profile '%s'\n", mdt, profile);
-+
-+ /* Use nids from mount line: uml1,1 at elan:uml2,2 at elan:/lustre */
-+ i = 0;
-+ ptr = lsi->lsi_lmd->lmd_dev;
-+ while (class_parse_nid(ptr, &nid, &ptr) == 0) {
-+ rc = do_lcfg(MDCDEV, nid, LCFG_ADD_UUID, niduuid, 0,0,0);
-+ i++;
-+ /* Stop at the first failover nid */
-+ if (*ptr == ':')
-+ break;
-+ }
-+ if (i == 0) {
-+ CERROR("No valid MDT nids found.\n");
-+ GOTO(out, rc = -EINVAL);
-+ }
-+ failnodes++;
-+
-+ rc = do_lcfg(MDCDEV, 0, LCFG_ATTACH, LUSTRE_MDC_NAME,mdc_uuid.uuid,0,0);
-+ if (rc < 0)
-+ GOTO(out_del_uuid, rc);
-+
-+ rc = do_lcfg(MDCDEV, 0, LCFG_SETUP, mdt, niduuid, 0, 0);
-+ if (rc < 0) {
-+ LCONSOLE_ERROR_MSG(0x153, "I couldn't establish a connection "
-+ "with the MDT. Check that the MDT host NID "
-+ "is correct and the networks are up.\n");
-+ GOTO(out_detach, rc);
-+ }
-+
-+ obd = class_name2obd(MDCDEV);
-+ if (obd == NULL)
-+ GOTO(out_cleanup, rc = -EINVAL);
-+
-+ /* Add any failover nids */
-+ while (*ptr == ':') {
-+ /* New failover node */
-+ sprintf(niduuid, "mdtnid%d", failnodes);
-+ i = 0;
-+ while (class_parse_nid(ptr, &nid, &ptr) == 0) {
-+ i++;
-+ rc = do_lcfg(MDCDEV, nid, LCFG_ADD_UUID, niduuid,0,0,0);
-+ if (rc)
-+ CERROR("Add uuid for %s failed %d\n",
-+ libcfs_nid2str(nid), rc);
-+ if (*ptr == ':')
-+ break;
-+ }
-+ if (i > 0) {
-+ rc = do_lcfg(MDCDEV, 0, LCFG_ADD_CONN, niduuid, 0, 0,0);
-+ if (rc)
-+ CERROR("Add conn for %s failed %d\n",
-+ libcfs_nid2str(nid), rc);
-+ failnodes++;
-+ } else {
-+ /* at ":/fsname" */
-+ break;
-+ }
-+ }
-+
-+ /* Try all connections, but only once. */
-+ rc = obd_set_info_async(obd->obd_self_export,
-+ sizeof(KEY_INIT_RECOV_BACKUP), KEY_INIT_RECOV_BACKUP,
-+ sizeof(recov_bk), &recov_bk, NULL);
-+ if (rc)
-+ GOTO(out_cleanup, rc);
-+
-+ /* If we don't have this then an ACL MDS will refuse the connection */
-+ ocd.ocd_connect_flags = OBD_CONNECT_ACL;
-+
-+ rc = obd_connect(&mdc_conn, obd, &mdc_uuid, &ocd, &exp);
-+ if (rc) {
-+ CERROR("cannot connect to %s: rc = %d\n", mdt, rc);
-+ GOTO(out_cleanup, rc);
-+ }
-+
-+ ctxt = llog_get_context(exp->exp_obd, LLOG_CONFIG_REPL_CTXT);
-+
-+ cfg->cfg_flags |= CFG_F_COMPAT146;
-+
-+#if 1
-+ rc = class_config_parse_llog(ctxt, profile, cfg);
-+#else
-+ /*
-+ * For debugging, it's useful to just dump the log
-+ */
-+ rc = class_config_dump_llog(ctxt, profile, cfg);
-+#endif
-+ llog_ctxt_put(ctxt);
-+ switch (rc) {
-+ case 0: {
-+ /* Set the caller's profile name to the old-style */
-+ memcpy(newprofile, profile, strlen(profile) + 1);
-+ break;
-+ }
-+ case -EINVAL:
-+ LCONSOLE_ERROR_MSG(0x154, "%s: The configuration '%s' could not"
-+ " be read from the MDT '%s'. Make sure this"
-+ " client and the MDT are running compatible "
-+ "versions of Lustre.\n",
-+ obd->obd_name, profile, mdt);
-+ /* fall through */
-+ default:
-+ LCONSOLE_ERROR_MSG(0x155, "%s: The configuration '%s' could not"
-+ " be read from the MDT '%s'. This may be "
-+ "the result of communication errors between "
-+ "the client and the MDT, or if the MDT is "
-+ "not running.\n", obd->obd_name, profile,
-+ mdt);
-+ break;
-+ }
-+
-+ /* We don't so much care about errors in cleaning up the config llog
-+ * connection, as we have already read the config by this point. */
-+ obd_disconnect(exp);
-+
-+out_cleanup:
-+ do_lcfg(MDCDEV, 0, LCFG_CLEANUP, 0, 0, 0, 0);
-+
-+out_detach:
-+ do_lcfg(MDCDEV, 0, LCFG_DETACH, 0, 0, 0, 0);
-+
-+out_del_uuid:
-+ /* class_add_uuid adds a nid even if the same uuid exists; we might
-+ delete any copy here. So they all better match. */
-+ for (i = 0; i < failnodes; i++) {
-+ sprintf(niduuid, "mdtnid%d", i);
-+ do_lcfg(MDCDEV, 0, LCFG_DEL_UUID, niduuid, 0, 0, 0);
-+ }
-+ /* class_import_put will get rid of the additional connections */
-+out:
-+ RETURN(rc);
-+}
-+/* end COMPAT_146 */
-+
-+int ll_fill_super(struct super_block *sb)
-+{
-+ struct lustre_profile *lprof;
-+ struct lustre_sb_info *lsi = s2lsi(sb);
-+ struct ll_sb_info *sbi;
-+ char *osc = NULL, *mdc = NULL;
-+ char *profilenm = get_profile_name(sb);
-+ struct config_llog_instance cfg = {0, };
-+ char ll_instance[sizeof(sb) * 2 + 1];
-+ int err;
-+ ENTRY;
-+
-+ CDEBUG(D_VFSTRACE, "VFS Op: sb %p\n", sb);
-+
-+ cfs_module_get();
-+
-+ /* client additional sb info */
-+ lsi->lsi_llsbi = sbi = ll_init_sbi();
-+ if (!sbi) {
-+ cfs_module_put();
-+ RETURN(-ENOMEM);
-+ }
-+
-+ err = ll_options(lsi->lsi_lmd->lmd_opts, &sbi->ll_flags);
-+ if (err)
-+ GOTO(out_free, err);
-+
-+ /* Generate a string unique to this super, in case some joker tries
-+ to mount the same fs at two mount points.
-+ Use the address of the super itself.*/
-+ sprintf(ll_instance, "%p", sb);
-+ cfg.cfg_instance = ll_instance;
-+ cfg.cfg_uuid = lsi->lsi_llsbi->ll_sb_uuid;
-+ cfg.cfg_sb = sb;
-+
-+ /* set up client obds */
-+ if (strchr(profilenm, '/') != NULL) /* COMPAT_146 */
-+ err = -EINVAL; /* skip error messages, use old config code */
-+ else
-+ err = lustre_process_log(sb, profilenm, &cfg);
-+ /* COMPAT_146 */
-+ if (err < 0) {
-+ char *oldname;
-+ int rc, oldnamelen;
-+ oldnamelen = strlen(profilenm) + 1;
-+ /* Temp storage for 1.4.6 profile name */
-+ OBD_ALLOC(oldname, oldnamelen);
-+ if (oldname) {
-+ memcpy(oldname, profilenm, oldnamelen);
-+ rc = old_lustre_process_log(sb, oldname, &cfg);
-+ if (rc >= 0) {
-+ /* That worked - update the profile name
-+ permanently */
-+ err = rc;
-+ OBD_FREE(lsi->lsi_lmd->lmd_profile,
-+ strlen(lsi->lsi_lmd->lmd_profile) + 1);
-+ OBD_ALLOC(lsi->lsi_lmd->lmd_profile,
-+ strlen(oldname) + 1);
-+ if (!lsi->lsi_lmd->lmd_profile) {
-+ OBD_FREE(oldname, oldnamelen);
-+ GOTO(out_free, err = -ENOMEM);
-+ }
-+ memcpy(lsi->lsi_lmd->lmd_profile, oldname,
-+ strlen(oldname) + 1);
-+ profilenm = get_profile_name(sb);
-+ /* Don't ever try to recover the MGS */
-+ rc = ptlrpc_set_import_active(
-+ lsi->lsi_mgc->u.cli.cl_import, 0);
-+ }
-+ OBD_FREE(oldname, oldnamelen);
-+ }
-+ }
-+ /* end COMPAT_146 */
-+ if (err < 0) {
-+ CERROR("Unable to process log: %d\n", err);
-+ GOTO(out_free, err);
-+ }
-+
-+ lprof = class_get_profile(profilenm);
-+ if (lprof == NULL) {
-+ LCONSOLE_ERROR_MSG(0x156, "The client profile '%s' could not be"
-+ " read from the MGS. Does that filesystem "
-+ "exist?\n", profilenm);
-+ GOTO(out_free, err = -EINVAL);
-+ }
-+ CDEBUG(D_CONFIG, "Found profile %s: mdc=%s osc=%s\n", profilenm,
-+ lprof->lp_mdc, lprof->lp_osc);
-+
-+ OBD_ALLOC(osc, strlen(lprof->lp_osc) +
-+ strlen(ll_instance) + 2);
-+ if (!osc)
-+ GOTO(out_free, err = -ENOMEM);
-+ sprintf(osc, "%s-%s", lprof->lp_osc, ll_instance);
-+
-+ OBD_ALLOC(mdc, strlen(lprof->lp_mdc) +
-+ strlen(ll_instance) + 2);
-+ if (!mdc)
-+ GOTO(out_free, err = -ENOMEM);
-+ sprintf(mdc, "%s-%s", lprof->lp_mdc, ll_instance);
-+
-+ /* connections, registrations, sb setup */
-+ err = client_common_fill_super(sb, mdc, osc);
-+
-+out_free:
-+ if (mdc)
-+ OBD_FREE(mdc, strlen(mdc) + 1);
-+ if (osc)
-+ OBD_FREE(osc, strlen(osc) + 1);
-+ if (err)
-+ ll_put_super(sb);
-+ else
-+ LCONSOLE_WARN("Client %s has started\n", profilenm);
-+
-+ RETURN(err);
-+} /* ll_fill_super */
-+
-+
-+void ll_put_super(struct super_block *sb)
-+{
-+ struct config_llog_instance cfg;
-+ char ll_instance[sizeof(sb) * 2 + 1];
-+ struct obd_device *obd;
-+ struct lustre_sb_info *lsi = s2lsi(sb);
-+ struct ll_sb_info *sbi = ll_s2sbi(sb);
-+ char *profilenm = get_profile_name(sb);
-+ int force = 1, next;
-+ ENTRY;
-+
-+ CDEBUG(D_VFSTRACE, "VFS Op: sb %p - %s\n", sb, profilenm);
-+
-+ sprintf(ll_instance, "%p", sb);
-+ cfg.cfg_instance = ll_instance;
-+ lustre_end_log(sb, NULL, &cfg);
-+
-+ if (sbi->ll_mdc_exp) {
-+ obd = class_exp2obd(sbi->ll_mdc_exp);
-+ if (obd)
-+ force = obd->obd_force;
-+ }
-+
-+ /* We need to set force before the lov_disconnect in
-+ lustre_common_put_super, since l_d cleans up osc's as well. */
-+ if (force) {
-+ next = 0;
-+ while ((obd = class_devices_in_group(&sbi->ll_sb_uuid,
-+ &next)) != NULL) {
-+ obd->obd_force = force;
-+ }
-+ }
-+
-+ if (sbi->ll_lcq) {
-+ /* Only if client_common_fill_super succeeded */
-+ client_common_put_super(sb);
-+ }
-+
-+ next = 0;
-+ while ((obd = class_devices_in_group(&sbi->ll_sb_uuid, &next)) !=NULL) {
-+ class_manual_cleanup(obd);
-+ }
-+
-+ if (profilenm)
-+ class_del_profile(profilenm);
-+
-+ ll_free_sbi(sb);
-+ lsi->lsi_llsbi = NULL;
-+
-+ lustre_common_put_super(sb);
-+
-+ LCONSOLE_WARN("client %s umount complete\n", ll_instance);
-+
-+ cfs_module_put();
-+
-+ EXIT;
-+} /* client_put_super */
-+
-+#ifdef HAVE_REGISTER_CACHE
-+#include <linux/cache_def.h>
-+#ifdef HAVE_CACHE_RETURN_INT
-+static int
-+#else
-+static void
-+#endif
-+ll_shrink_cache(int priority, unsigned int gfp_mask)
-+{
-+ struct ll_sb_info *sbi;
-+ int count = 0;
-+
-+ list_for_each_entry(sbi, &ll_super_blocks, ll_list)
-+ count += llap_shrink_cache(sbi, priority);
-+
-+#ifdef HAVE_CACHE_RETURN_INT
-+ return count;
-+#endif
-+}
-+
-+struct cache_definition ll_cache_definition = {
-+ .name = "llap_cache",
-+ .shrink = ll_shrink_cache
-+};
-+#endif /* HAVE_REGISTER_CACHE */
-+
-+struct inode *ll_inode_from_lock(struct ldlm_lock *lock)
-+{
-+ struct inode *inode = NULL;
-+ /* NOTE: we depend on atomic igrab() -bzzz */
-+ lock_res_and_lock(lock);
-+ if (lock->l_ast_data) {
-+ struct ll_inode_info *lli = ll_i2info(lock->l_ast_data);
-+ if (lli->lli_inode_magic == LLI_INODE_MAGIC) {
-+ inode = igrab(lock->l_ast_data);
-+ } else {
-+ inode = lock->l_ast_data;
-+ ldlm_lock_debug(NULL, inode->i_state & I_FREEING ?
-+ D_INFO : D_WARNING,
-+ lock, __FILE__, __func__, __LINE__,
-+ "l_ast_data %p is bogus: magic %08x",
-+ lock->l_ast_data, lli->lli_inode_magic);
-+ inode = NULL;
-+ }
-+ }
-+ unlock_res_and_lock(lock);
-+ return inode;
-+}
-+
-+static int null_if_equal(struct ldlm_lock *lock, void *data)
-+{
-+ if (data == lock->l_ast_data) {
-+ lock->l_ast_data = NULL;
-+
-+ if (lock->l_req_mode != lock->l_granted_mode)
-+ LDLM_ERROR(lock,"clearing inode with ungranted lock");
-+ }
-+
-+ return LDLM_ITER_CONTINUE;
-+}
-+
-+void ll_clear_inode(struct inode *inode)
-+{
-+ struct ll_fid fid;
-+ struct ll_inode_info *lli = ll_i2info(inode);
-+ struct ll_sb_info *sbi = ll_i2sbi(inode);
-+ ENTRY;
-+
-+ CDEBUG(D_VFSTRACE, "VFS Op:inode=%lu/%u(%p)\n", inode->i_ino,
-+ inode->i_generation, inode);
-+
-+ if (S_ISDIR(inode->i_mode)) {
-+ /* these should have been cleared in ll_file_release */
-+ LASSERT(lli->lli_sai == NULL);
-+ LASSERT(lli->lli_opendir_key == NULL);
-+ LASSERT(lli->lli_opendir_pid == 0);
-+ }
-+
-+ ll_inode2fid(&fid, inode);
-+ clear_bit(LLI_F_HAVE_MDS_SIZE_LOCK, &lli->lli_flags);
-+ mdc_change_cbdata(sbi->ll_mdc_exp, &fid, null_if_equal, inode);
-+
-+ LASSERT(!lli->lli_open_fd_write_count);
-+ LASSERT(!lli->lli_open_fd_read_count);
-+ LASSERT(!lli->lli_open_fd_exec_count);
-+
-+ if (lli->lli_mds_write_och)
-+ ll_mdc_real_close(inode, FMODE_WRITE);
-+ if (lli->lli_mds_exec_och) {
-+ if (!FMODE_EXEC)
-+ CERROR("No FMODE exec, bug exec och is present for "
-+ "inode %ld\n", inode->i_ino);
-+ ll_mdc_real_close(inode, FMODE_EXEC);
-+ }
-+ if (lli->lli_mds_read_och)
-+ ll_mdc_real_close(inode, FMODE_READ);
-+
-+
-+ if (lli->lli_smd) {
-+ obd_change_cbdata(sbi->ll_osc_exp, lli->lli_smd,
-+ null_if_equal, inode);
-+
-+ obd_free_memmd(sbi->ll_osc_exp, &lli->lli_smd);
-+ lli->lli_smd = NULL;
-+ }
-+
-+ if (lli->lli_symlink_name) {
-+ OBD_FREE(lli->lli_symlink_name,
-+ strlen(lli->lli_symlink_name) + 1);
-+ lli->lli_symlink_name = NULL;
-+ }
-+
-+#ifdef CONFIG_FS_POSIX_ACL
-+ if (lli->lli_posix_acl) {
-+ LASSERT(atomic_read(&lli->lli_posix_acl->a_refcount) == 1);
-+ posix_acl_release(lli->lli_posix_acl);
-+ lli->lli_posix_acl = NULL;
-+ }
-+#endif
-+
-+ lli->lli_inode_magic = LLI_INODE_DEAD;
-+
-+#ifdef HAVE_EXPORT___IGET
-+ spin_lock(&sbi->ll_deathrow_lock);
-+ list_del_init(&lli->lli_dead_list);
-+ spin_unlock(&sbi->ll_deathrow_lock);
-+#endif
-+
-+ EXIT;
-+}
-+static int ll_setattr_do_truncate(struct inode *inode, loff_t new_size)
-+{
-+ struct ll_sb_info *sbi = ll_i2sbi(inode);
-+ struct ll_inode_info *lli = ll_i2info(inode);
-+ struct lov_stripe_md *lsm = lli->lli_smd;
-+ int rc;
-+ ldlm_policy_data_t policy = { .l_extent = {new_size,
-+ OBD_OBJECT_EOF } };
-+ struct lustre_handle lockh = { 0 };
-+ int local_lock = 0; /* 0 - no local lock;
-+ * 1 - lock taken by lock_extent;
-+ * 2 - by obd_match*/
-+ int ast_flags;
-+ int err;
-+ ENTRY;
-+
-+ UNLOCK_INODE_MUTEX(inode);
-+ UP_WRITE_I_ALLOC_SEM(inode);
-+
-+ if (sbi->ll_lockless_truncate_enable &&
-+ (sbi->ll_lco.lco_flags & OBD_CONNECT_TRUNCLOCK)) {
-+ ast_flags = LDLM_FL_BLOCK_GRANTED;
-+ rc = obd_match(sbi->ll_osc_exp, lsm, LDLM_EXTENT,
-+ &policy, LCK_PW, &ast_flags, inode, &lockh);
-+ if (rc > 0) {
-+ local_lock = 2;
-+ rc = 0;
-+ } else if (rc == 0) {
-+ rc = ll_file_punch(inode, new_size, 1);
-+ }
-+ } else {
-+ /* XXX when we fix the AST intents to pass the discard-range
-+ * XXX extent, make ast_flags always LDLM_AST_DISCARD_DATA
-+ * XXX here. */
-+ ast_flags = (new_size == 0) ? LDLM_AST_DISCARD_DATA : 0;
-+ rc = ll_extent_lock(NULL, inode, lsm, LCK_PW, &policy,
-+ &lockh, ast_flags);
-+ if (likely(rc == 0))
-+ local_lock = 1;
-+ }
-+
-+ LOCK_INODE_MUTEX(inode);
-+ DOWN_WRITE_I_ALLOC_SEM(inode);
-+ if (likely(rc == 0)) {
-+ /* Only ll_inode_size_lock is taken at this level.
-+ * lov_stripe_lock() is grabbed by ll_truncate() only over
-+ * call to obd_adjust_kms(). If vmtruncate returns 0, then
-+ * ll_truncate dropped ll_inode_size_lock() */
-+ ll_inode_size_lock(inode, 0);
-+ if (!local_lock)
-+ set_bit(LLI_F_SRVLOCK, &lli->lli_flags);
-+ rc = vmtruncate(inode, new_size);
-+ clear_bit(LLI_F_SRVLOCK, &lli->lli_flags);
-+ if (rc != 0) {
-+ LASSERT(atomic_read(&lli->lli_size_sem.count) <= 0);
-+ ll_inode_size_unlock(inode, 0);
-+ }
-+ }
-+ if (local_lock) {
-+ if (local_lock == 2)
-+ err = obd_cancel(sbi->ll_osc_exp, lsm, LCK_PW, &lockh);
-+ else
-+ err = ll_extent_unlock(NULL, inode, lsm, LCK_PW, &lockh);
-+ if (unlikely(err != 0)){
-+ CERROR("extent unlock failed: err=%d,"
-+ " unlock method =%d\n", err, local_lock);
-+ if (rc == 0)
-+ rc = err;
-+ }
-+ }
-+ RETURN(rc);
-+}
-+
-+/* If this inode has objects allocated to it (lsm != NULL), then the OST
-+ * object(s) determine the file size and mtime. Otherwise, the MDS will
-+ * keep these values until such a time that objects are allocated for it.
-+ * We do the MDS operations first, as it is checking permissions for us.
-+ * We don't to the MDS RPC if there is nothing that we want to store there,
-+ * otherwise there is no harm in updating mtime/atime on the MDS if we are
-+ * going to do an RPC anyways.
-+ *
-+ * If we are doing a truncate, we will send the mtime and ctime updates
-+ * to the OST with the punch RPC, otherwise we do an explicit setattr RPC.
-+ * I don't believe it is possible to get e.g. ATTR_MTIME_SET and ATTR_SIZE
-+ * at the same time.
-+ */
-+int ll_setattr_raw(struct inode *inode, struct iattr *attr)
-+{
-+ struct ll_inode_info *lli = ll_i2info(inode);
-+ struct lov_stripe_md *lsm = lli->lli_smd;
-+ struct ll_sb_info *sbi = ll_i2sbi(inode);
-+ struct ptlrpc_request *request = NULL;
-+ struct mdc_op_data op_data;
-+ struct lustre_md md;
-+ int ia_valid = attr->ia_valid;
-+ int rc = 0;
-+ ENTRY;
-+
-+ CDEBUG(D_VFSTRACE, "VFS Op:inode=%lu valid %x\n", inode->i_ino,
-+ attr->ia_valid);
-+ ll_stats_ops_tally(ll_i2sbi(inode), LPROC_LL_SETATTR, 1);
-+
-+ if (ia_valid & ATTR_SIZE) {
-+ if (attr->ia_size > ll_file_maxbytes(inode)) {
-+ CDEBUG(D_INODE, "file too large %llu > "LPU64"\n",
-+ attr->ia_size, ll_file_maxbytes(inode));
-+ RETURN(-EFBIG);
-+ }
-+
-+ attr->ia_valid |= ATTR_MTIME | ATTR_CTIME;
-+ }
-+
-+ /* POSIX: check before ATTR_*TIME_SET set (from inode_change_ok) */
-+ if (ia_valid & (ATTR_MTIME_SET | ATTR_ATIME_SET)) {
-+ if (current->fsuid != inode->i_uid &&
-+ !cfs_capable(CFS_CAP_FOWNER))
-+ RETURN(-EPERM);
-+ }
-+
-+ /* We mark all of the fields "set" so MDS/OST does not re-set them */
-+ if (attr->ia_valid & ATTR_CTIME) {
-+ attr->ia_ctime = CURRENT_TIME;
-+ attr->ia_valid |= ATTR_CTIME_SET;
-+ }
-+ if (!(ia_valid & ATTR_ATIME_SET) && (attr->ia_valid & ATTR_ATIME)) {
-+ attr->ia_atime = CURRENT_TIME;
-+ attr->ia_valid |= ATTR_ATIME_SET;
-+ }
-+ if (!(ia_valid & ATTR_MTIME_SET) && (attr->ia_valid & ATTR_MTIME)) {
-+ attr->ia_mtime = CURRENT_TIME;
-+ attr->ia_valid |= ATTR_MTIME_SET;
-+ }
-+ if ((attr->ia_valid & ATTR_CTIME) && !(attr->ia_valid & ATTR_MTIME)) {
-+ /* To avoid stale mtime on mds, obtain it from ost and send
-+ to mds. */
-+ rc = ll_glimpse_size(inode, 0);
-+ if (rc)
-+ RETURN(rc);
-+
-+ attr->ia_valid |= ATTR_MTIME_SET | ATTR_MTIME;
-+ attr->ia_mtime = inode->i_mtime;
-+ }
-+
-+ if (attr->ia_valid & (ATTR_MTIME | ATTR_CTIME))
-+ CDEBUG(D_INODE, "setting mtime %lu, ctime %lu, now = %lu\n",
-+ LTIME_S(attr->ia_mtime), LTIME_S(attr->ia_ctime),
-+ CURRENT_SECONDS);
-+
-+ /* NB: ATTR_SIZE will only be set after this point if the size
-+ * resides on the MDS, ie, this file has no objects. */
-+ if (lsm)
-+ attr->ia_valid &= ~ATTR_SIZE;
-+
-+ /* We always do an MDS RPC, even if we're only changing the size;
-+ * only the MDS knows whether truncate() should fail with -ETXTBUSY */
-+ ll_prepare_mdc_op_data(&op_data, inode, NULL, NULL, 0, 0, NULL);
-+
-+ rc = mdc_setattr(sbi->ll_mdc_exp, &op_data,
-+ attr, NULL, 0, NULL, 0, &request);
-+
-+ if (rc) {
-+ ptlrpc_req_finished(request);
-+ if (rc == -ENOENT) {
-+ inode->i_nlink = 0;
-+ /* Unlinked special device node? Or just a race?
-+ * Pretend we done everything. */
-+ if (!S_ISREG(inode->i_mode) &&
-+ !S_ISDIR(inode->i_mode))
-+ rc = inode_setattr(inode, attr);
-+ } else if (rc != -EPERM && rc != -EACCES && rc != -ETXTBSY)
-+ CERROR("mdc_setattr fails: rc = %d\n", rc);
-+ RETURN(rc);
-+ }
-+
-+ rc = mdc_req2lustre_md(request, REPLY_REC_OFF, sbi->ll_osc_exp, &md);
-+ if (rc) {
-+ ptlrpc_req_finished(request);
-+ RETURN(rc);
-+ }
-+
-+ /* We call inode_setattr to adjust timestamps.
-+ * If there is at least some data in file, we cleared ATTR_SIZE above to
-+ * avoid invoking vmtruncate, otherwise it is important to call
-+ * vmtruncate in inode_setattr to update inode->i_size (bug 6196) */
-+ rc = inode_setattr(inode, attr);
-+
-+ ll_update_inode(inode, &md);
-+ ptlrpc_req_finished(request);
-+
-+ if (!lsm || !S_ISREG(inode->i_mode)) {
-+ CDEBUG(D_INODE, "no lsm: not setting attrs on OST\n");
-+ RETURN(rc);
-+ }
-+
-+ /* We really need to get our PW lock before we change inode->i_size.
-+ * If we don't we can race with other i_size updaters on our node, like
-+ * ll_file_read. We can also race with i_size propogation to other
-+ * nodes through dirtying and writeback of final cached pages. This
-+ * last one is especially bad for racing o_append users on other
-+ * nodes. */
-+ if (ia_valid & ATTR_SIZE) {
-+ rc = ll_setattr_do_truncate(inode, attr->ia_size);
-+ } else if (ia_valid & (ATTR_MTIME | ATTR_MTIME_SET)) {
-+ obd_flag flags;
-+ struct obd_info oinfo = { { { 0 } } };
-+ struct obdo *oa;
-+ OBDO_ALLOC(oa);
-+
-+ CDEBUG(D_INODE, "set mtime on OST inode %lu to %lu\n",
-+ inode->i_ino, LTIME_S(attr->ia_mtime));
-+
-+ if (oa) {
-+ oa->o_id = lsm->lsm_object_id;
-+ oa->o_valid = OBD_MD_FLID;
-+
-+ flags = OBD_MD_FLTYPE | OBD_MD_FLATIME |
-+ OBD_MD_FLMTIME | OBD_MD_FLCTIME |
-+ OBD_MD_FLFID | OBD_MD_FLGENER;
-+
-+ obdo_from_inode(oa, inode, flags);
-+
-+ oinfo.oi_oa = oa;
-+ oinfo.oi_md = lsm;
-+
-+ rc = obd_setattr_rqset(sbi->ll_osc_exp, &oinfo, NULL);
-+ if (rc)
-+ CERROR("obd_setattr_async fails: rc=%d\n", rc);
-+ OBDO_FREE(oa);
-+ } else {
-+ rc = -ENOMEM;
-+ }
-+ }
-+ RETURN(rc);
-+}
-+
-+int ll_setattr(struct dentry *de, struct iattr *attr)
-+{
-+ int mode;
-+
-+ if ((attr->ia_valid & (ATTR_CTIME|ATTR_SIZE|ATTR_MODE)) ==
-+ (ATTR_CTIME|ATTR_SIZE|ATTR_MODE))
-+ attr->ia_valid |= MDS_OPEN_OWNEROVERRIDE;
-+ if ((attr->ia_valid & (ATTR_MODE|ATTR_FORCE|ATTR_SIZE)) ==
-+ (ATTR_SIZE|ATTR_MODE)) {
-+ mode = de->d_inode->i_mode;
-+ if (((mode & S_ISUID) && (!(attr->ia_mode & S_ISUID))) ||
-+ ((mode & S_ISGID) && (mode & S_IXGRP) &&
-+ (!(attr->ia_mode & S_ISGID))))
-+ attr->ia_valid |= ATTR_FORCE;
-+ }
-+
-+ return ll_setattr_raw(de->d_inode, attr);
-+}
-+
-+int ll_statfs_internal(struct super_block *sb, struct obd_statfs *osfs,
-+ __u64 max_age, __u32 flags)
-+{
-+ struct ll_sb_info *sbi = ll_s2sbi(sb);
-+ struct obd_statfs obd_osfs;
-+ int rc;
-+ ENTRY;
-+
-+ rc = obd_statfs(class_exp2obd(sbi->ll_mdc_exp), osfs, max_age, flags);
-+ if (rc) {
-+ CERROR("mdc_statfs fails: rc = %d\n", rc);
-+ RETURN(rc);
-+ }
-+
-+ osfs->os_type = sb->s_magic;
-+
-+ CDEBUG(D_SUPER, "MDC blocks "LPU64"/"LPU64" objects "LPU64"/"LPU64"\n",
-+ osfs->os_bavail, osfs->os_blocks, osfs->os_ffree,osfs->os_files);
-+
-+ rc = obd_statfs_rqset(class_exp2obd(sbi->ll_osc_exp),
-+ &obd_osfs, max_age, flags);
-+ if (rc) {
-+ CERROR("obd_statfs fails: rc = %d\n", rc);
-+ RETURN(rc);
-+ }
-+
-+ CDEBUG(D_SUPER, "OSC blocks "LPU64"/"LPU64" objects "LPU64"/"LPU64"\n",
-+ obd_osfs.os_bavail, obd_osfs.os_blocks, obd_osfs.os_ffree,
-+ obd_osfs.os_files);
-+
-+ osfs->os_bsize = obd_osfs.os_bsize;
-+ osfs->os_blocks = obd_osfs.os_blocks;
-+ osfs->os_bfree = obd_osfs.os_bfree;
-+ osfs->os_bavail = obd_osfs.os_bavail;
-+
-+ /* If we don't have as many objects free on the OST as inodes
-+ * on the MDS, we reduce the total number of inodes to
-+ * compensate, so that the "inodes in use" number is correct.
-+ */
-+ if (obd_osfs.os_ffree < osfs->os_ffree) {
-+ osfs->os_files = (osfs->os_files - osfs->os_ffree) +
-+ obd_osfs.os_ffree;
-+ osfs->os_ffree = obd_osfs.os_ffree;
-+ }
-+
-+ RETURN(rc);
-+}
-+#ifndef HAVE_STATFS_DENTRY_PARAM
-+int ll_statfs(struct super_block *sb, struct kstatfs *sfs)
-+{
-+#else
-+int ll_statfs(struct dentry *de, struct kstatfs *sfs)
-+{
-+ struct super_block *sb = de->d_sb;
-+#endif
-+ struct obd_statfs osfs;
-+ int rc;
-+
-+ CDEBUG(D_VFSTRACE, "VFS Op: at "LPU64" jiffies\n", get_jiffies_64());
-+ ll_stats_ops_tally(ll_s2sbi(sb), LPROC_LL_STAFS, 1);
-+
-+ /* For now we will always get up-to-date statfs values, but in the
-+ * future we may allow some amount of caching on the client (e.g.
-+ * from QOS or lprocfs updates). */
-+ rc = ll_statfs_internal(sb, &osfs, cfs_time_current_64() - 1, 0);
-+ if (rc)
-+ return rc;
-+
-+ statfs_unpack(sfs, &osfs);
-+
-+ /* We need to downshift for all 32-bit kernels, because we can't
-+ * tell if the kernel is being called via sys_statfs64() or not.
-+ * Stop before overflowing f_bsize - in which case it is better
-+ * to just risk EOVERFLOW if caller is using old sys_statfs(). */
-+ if (sizeof(long) < 8) {
-+ while (osfs.os_blocks > ~0UL && sfs->f_bsize < 0x40000000) {
-+ sfs->f_bsize <<= 1;
-+
-+ osfs.os_blocks >>= 1;
-+ osfs.os_bfree >>= 1;
-+ osfs.os_bavail >>= 1;
-+ }
-+ }
-+
-+ sfs->f_blocks = osfs.os_blocks;
-+ sfs->f_bfree = osfs.os_bfree;
-+ sfs->f_bavail = osfs.os_bavail;
-+
-+ return 0;
-+}
-+
-+void ll_inode_size_lock(struct inode *inode, int lock_lsm)
-+{
-+ struct ll_inode_info *lli;
-+ struct lov_stripe_md *lsm;
-+
-+ lli = ll_i2info(inode);
-+ LASSERT(lli->lli_size_sem_owner != current);
-+ down(&lli->lli_size_sem);
-+ LASSERT(lli->lli_size_sem_owner == NULL);
-+ lli->lli_size_sem_owner = current;
-+ lsm = lli->lli_smd;
-+ LASSERTF(lsm != NULL || lock_lsm == 0, "lsm %p, lock_lsm %d\n",
-+ lsm, lock_lsm);
-+ if (lock_lsm)
-+ lov_stripe_lock(lsm);
-+}
-+
-+void ll_inode_size_unlock(struct inode *inode, int unlock_lsm)
-+{
-+ struct ll_inode_info *lli;
-+ struct lov_stripe_md *lsm;
-+
-+ lli = ll_i2info(inode);
-+ lsm = lli->lli_smd;
-+ LASSERTF(lsm != NULL || unlock_lsm == 0, "lsm %p, lock_lsm %d\n",
-+ lsm, unlock_lsm);
-+ if (unlock_lsm)
-+ lov_stripe_unlock(lsm);
-+ LASSERT(lli->lli_size_sem_owner == current);
-+ lli->lli_size_sem_owner = NULL;
-+ up(&lli->lli_size_sem);
-+}
-+
-+static void ll_replace_lsm(struct inode *inode, struct lov_stripe_md *lsm)
-+{
-+ struct ll_inode_info *lli = ll_i2info(inode);
-+
-+ dump_lsm(D_INODE, lsm);
-+ dump_lsm(D_INODE, lli->lli_smd);
-+ LASSERTF(lsm->lsm_magic == LOV_MAGIC_JOIN,
-+ "lsm must be joined lsm %p\n", lsm);
-+ obd_free_memmd(ll_i2obdexp(inode), &lli->lli_smd);
-+ CDEBUG(D_INODE, "replace lsm %p to lli_smd %p for inode %lu%u(%p)\n",
-+ lsm, lli->lli_smd, inode->i_ino, inode->i_generation, inode);
-+ lli->lli_smd = lsm;
-+ lli->lli_maxbytes = lsm->lsm_maxbytes;
-+ if (lli->lli_maxbytes > PAGE_CACHE_MAXBYTES)
-+ lli->lli_maxbytes = PAGE_CACHE_MAXBYTES;
-+}
-+
-+void ll_update_inode(struct inode *inode, struct lustre_md *md)
-+{
-+ struct ll_inode_info *lli = ll_i2info(inode);
-+ struct mds_body *body = md->body;
-+ struct lov_stripe_md *lsm = md->lsm;
-+
-+ LASSERT ((lsm != NULL) == ((body->valid & OBD_MD_FLEASIZE) != 0));
-+ if (lsm != NULL) {
-+ if (lli->lli_smd == NULL) {
-+ if (lsm->lsm_magic != LOV_MAGIC &&
-+ lsm->lsm_magic != LOV_MAGIC_JOIN) {
-+ dump_lsm(D_ERROR, lsm);
-+ LBUG();
-+ }
-+ CDEBUG(D_INODE, "adding lsm %p to inode %lu/%u(%p)\n",
-+ lsm, inode->i_ino, inode->i_generation, inode);
-+ /* ll_inode_size_lock() requires it is only called
-+ * with lli_smd != NULL or lock_lsm == 0 or we can
-+ * race between lock/unlock. bug 9547 */
-+ lli->lli_smd = lsm;
-+ lli->lli_maxbytes = lsm->lsm_maxbytes;
-+ if (lli->lli_maxbytes > PAGE_CACHE_MAXBYTES)
-+ lli->lli_maxbytes = PAGE_CACHE_MAXBYTES;
-+ } else {
-+ if (lli->lli_smd->lsm_magic == lsm->lsm_magic &&
-+ lli->lli_smd->lsm_stripe_count ==
-+ lsm->lsm_stripe_count) {
-+ if (lov_stripe_md_cmp(lli->lli_smd, lsm)) {
-+ CERROR("lsm mismatch for inode %ld\n",
-+ inode->i_ino);
-+ CERROR("lli_smd:\n");
-+ dump_lsm(D_ERROR, lli->lli_smd);
-+ CERROR("lsm:\n");
-+ dump_lsm(D_ERROR, lsm);
-+ LBUG();
-+ }
-+ } else
-+ ll_replace_lsm(inode, lsm);
-+ }
-+ if (lli->lli_smd != lsm)
-+ obd_free_memmd(ll_i2obdexp(inode), &lsm);
-+ }
-+
-+#ifdef CONFIG_FS_POSIX_ACL
-+ LASSERT(!md->posix_acl || (body->valid & OBD_MD_FLACL));
-+ if (body->valid & OBD_MD_FLACL) {
-+ spin_lock(&lli->lli_lock);
-+ if (lli->lli_posix_acl)
-+ posix_acl_release(lli->lli_posix_acl);
-+ lli->lli_posix_acl = md->posix_acl;
-+ spin_unlock(&lli->lli_lock);
-+ }
-+#endif
-+
-+ if (body->valid & OBD_MD_FLID)
-+ inode->i_ino = body->ino;
-+ if (body->valid & OBD_MD_FLATIME &&
-+ body->atime > LTIME_S(inode->i_atime))
-+ LTIME_S(inode->i_atime) = body->atime;
-+
-+ /* mtime is always updated with ctime, but can be set in past.
-+ As write and utime(2) may happen within 1 second, and utime's
-+ mtime has a priority over write's one, so take mtime from mds
-+ for the same ctimes. */
-+ if (body->valid & OBD_MD_FLCTIME &&
-+ body->ctime >= LTIME_S(inode->i_ctime)) {
-+ LTIME_S(inode->i_ctime) = body->ctime;
-+ if (body->valid & OBD_MD_FLMTIME) {
-+ CDEBUG(D_INODE, "setting ino %lu mtime "
-+ "from %lu to "LPU64"\n", inode->i_ino,
-+ LTIME_S(inode->i_mtime), body->mtime);
-+ LTIME_S(inode->i_mtime) = body->mtime;
-+ }
-+ }
-+ if (body->valid & OBD_MD_FLMODE)
-+ inode->i_mode = (inode->i_mode & S_IFMT)|(body->mode & ~S_IFMT);
-+ if (body->valid & OBD_MD_FLTYPE)
-+ inode->i_mode = (inode->i_mode & ~S_IFMT)|(body->mode & S_IFMT);
-+ if (S_ISREG(inode->i_mode)) {
-+ inode->i_blkbits = min(PTLRPC_MAX_BRW_BITS+1, LL_MAX_BLKSIZE_BITS);
-+ } else {
-+ inode->i_blkbits = inode->i_sb->s_blocksize_bits;
-+ }
-+#ifdef HAVE_INODE_BLKSIZE
-+ inode->i_blksize = 1<<inode->i_blkbits;
-+#endif
-+ if (body->valid & OBD_MD_FLUID)
-+ inode->i_uid = body->uid;
-+ if (body->valid & OBD_MD_FLGID)
-+ inode->i_gid = body->gid;
-+ if (body->valid & OBD_MD_FLFLAGS)
-+ inode->i_flags = ll_ext_to_inode_flags(body->flags);
-+
-+ if (body->valid & OBD_MD_FLNLINK)
-+ inode->i_nlink = body->nlink;
-+ if (body->valid & OBD_MD_FLGENER)
-+ inode->i_generation = body->generation;
-+ if (body->valid & OBD_MD_FLRDEV)
-+ inode->i_rdev = old_decode_dev(body->rdev);
-+ if (body->valid & OBD_MD_FLSIZE) {
-+#if 0 /* Can't block ll_test_inode->ll_update_inode, b=14326*/
-+ ll_inode_size_lock(inode, 0);
-+ i_size_write(inode, body->size);
-+ ll_inode_size_unlock(inode, 0);
-+#else
-+ inode->i_size = body->size;
-+#endif
-+ }
-+ if (body->valid & OBD_MD_FLBLOCKS)
-+ inode->i_blocks = body->blocks;
-+
-+ if (body->valid & OBD_MD_FLSIZE)
-+ set_bit(LLI_F_HAVE_MDS_SIZE_LOCK, &lli->lli_flags);
-+}
-+
-+static struct backing_dev_info ll_backing_dev_info = {
-+ .ra_pages = 0, /* No readahead */
-+#if (LINUX_VERSION_CODE >= KERNEL_VERSION(2,6,12))
-+ .capabilities = 0, /* Does contribute to dirty memory */
-+#else
-+ .memory_backed = 0, /* Does contribute to dirty memory */
-+#endif
-+};
-+
-+void ll_read_inode2(struct inode *inode, void *opaque)
-+{
-+ struct lustre_md *md = opaque;
-+ struct ll_inode_info *lli = ll_i2info(inode);
-+ ENTRY;
-+
-+ CDEBUG(D_VFSTRACE, "VFS Op:inode=%lu/%u(%p)\n", inode->i_ino,
-+ inode->i_generation, inode);
-+
-+ ll_lli_init(lli);
-+
-+ LASSERT(!lli->lli_smd);
-+
-+ /* Core attributes from the MDS first. This is a new inode, and
-+ * the VFS doesn't zero times in the core inode so we have to do
-+ * it ourselves. They will be overwritten by either MDS or OST
-+ * attributes - we just need to make sure they aren't newer. */
-+ LTIME_S(inode->i_mtime) = 0;
-+ LTIME_S(inode->i_atime) = 0;
-+ LTIME_S(inode->i_ctime) = 0;
-+ inode->i_rdev = 0;
-+ ll_update_inode(inode, md);
-+
-+ /* OIDEBUG(inode); */
-+
-+ if (S_ISREG(inode->i_mode)) {
-+ struct ll_sb_info *sbi = ll_i2sbi(inode);
-+ inode->i_op = &ll_file_inode_operations;
-+ inode->i_fop = sbi->ll_fop;
-+ inode->i_mapping->a_ops = &ll_aops;
-+ EXIT;
-+ } else if (S_ISDIR(inode->i_mode)) {
-+ inode->i_op = &ll_dir_inode_operations;
-+ inode->i_fop = &ll_dir_operations;
-+ inode->i_mapping->a_ops = &ll_dir_aops;
-+ EXIT;
-+ } else if (S_ISLNK(inode->i_mode)) {
-+ inode->i_op = &ll_fast_symlink_inode_operations;
-+ EXIT;
-+ } else {
-+ inode->i_op = &ll_special_inode_operations;
-+ init_special_inode(inode, inode->i_mode,
-+ kdev_t_to_nr(inode->i_rdev));
-+ /* initializing backing dev info. */
-+ inode->i_mapping->backing_dev_info = &ll_backing_dev_info;
-+ EXIT;
-+ }
-+}
-+
-+int ll_iocontrol(struct inode *inode, struct file *file,
-+ unsigned int cmd, unsigned long arg)
-+{
-+ struct ll_sb_info *sbi = ll_i2sbi(inode);
-+ struct ptlrpc_request *req = NULL;
-+ int rc, flags = 0;
-+ ENTRY;
-+
-+ switch(cmd) {
-+ case EXT3_IOC_GETFLAGS: {
-+ struct ll_fid fid;
-+ struct mds_body *body;
-+
-+ ll_inode2fid(&fid, inode);
-+ rc = mdc_getattr(sbi->ll_mdc_exp, &fid, OBD_MD_FLFLAGS,0,&req);
-+ if (rc) {
-+ CERROR("failure %d inode %lu\n", rc, inode->i_ino);
-+ RETURN(-abs(rc));
-+ }
-+
-+ body = lustre_msg_buf(req->rq_repmsg, REPLY_REC_OFF,
-+ sizeof(*body));
-+
-+ /* We want to return EXT3_*_FL flags to the caller via this
-+ * ioctl. An older MDS may be sending S_* flags, fix it up. */
-+ flags = ll_inode_to_ext_flags(body->flags,
-+ body->flags &MDS_BFLAG_EXT_FLAGS);
-+ ptlrpc_req_finished (req);
-+
-+ RETURN(put_user(flags, (int *)arg));
-+ }
-+ case EXT3_IOC_SETFLAGS: {
-+ struct mdc_op_data op_data;
-+ struct ll_iattr_struct attr;
-+ struct obd_info oinfo = { { { 0 } } };
-+ struct lov_stripe_md *lsm = ll_i2info(inode)->lli_smd;
-+
-+ if (get_user(flags, (int *)arg))
-+ RETURN(-EFAULT);
-+
-+ oinfo.oi_md = lsm;
-+ OBDO_ALLOC(oinfo.oi_oa);
-+ if (!oinfo.oi_oa)
-+ RETURN(-ENOMEM);
-+
-+ ll_prepare_mdc_op_data(&op_data, inode, NULL, NULL, 0, 0, NULL);
-+
-+ memset(&attr, 0, sizeof(attr));
-+ attr.ia_attr_flags = flags;
-+ ((struct iattr *)&attr)->ia_valid |= ATTR_ATTR_FLAG;
-+
-+ rc = mdc_setattr(sbi->ll_mdc_exp, &op_data,
-+ (struct iattr *)&attr, NULL, 0, NULL, 0, &req);
-+ ptlrpc_req_finished(req);
-+ if (rc || lsm == NULL) {
-+ OBDO_FREE(oinfo.oi_oa);
-+ RETURN(rc);
-+ }
-+
-+ oinfo.oi_oa->o_id = lsm->lsm_object_id;
-+ oinfo.oi_oa->o_flags = flags;
-+ oinfo.oi_oa->o_valid = OBD_MD_FLID | OBD_MD_FLFLAGS;
-+
-+ obdo_from_inode(oinfo.oi_oa, inode,
-+ OBD_MD_FLFID | OBD_MD_FLGENER);
-+ rc = obd_setattr_rqset(sbi->ll_osc_exp, &oinfo, NULL);
-+ OBDO_FREE(oinfo.oi_oa);
-+ if (rc) {
-+ if (rc != -EPERM && rc != -EACCES)
-+ CERROR("mdc_setattr_async fails: rc = %d\n", rc);
-+ RETURN(rc);
-+ }
-+
-+ inode->i_flags = ll_ext_to_inode_flags(flags |
-+ MDS_BFLAG_EXT_FLAGS);
-+ RETURN(0);
-+ }
-+ default:
-+ RETURN(-ENOSYS);
-+ }
-+
-+ RETURN(0);
-+}
-+
-+/* umount -f client means force down, don't save state */
-+#ifdef HAVE_UMOUNTBEGIN_VFSMOUNT
-+void ll_umount_begin(struct vfsmount *vfsmnt, int flags)
-+{
-+ struct super_block *sb = vfsmnt->mnt_sb;
-+#else
-+void ll_umount_begin(struct super_block *sb)
-+{
-+#endif
-+ struct lustre_sb_info *lsi = s2lsi(sb);
-+ struct ll_sb_info *sbi = ll_s2sbi(sb);
-+ struct obd_device *obd;
-+ struct obd_ioctl_data ioc_data = { 0 };
-+ ENTRY;
-+
-+#ifdef HAVE_UMOUNTBEGIN_VFSMOUNT
-+ if (!(flags & MNT_FORCE)) {
-+ EXIT;
-+ return;
-+ }
-+#endif
-+
-+ /* Tell the MGC we got umount -f */
-+ lsi->lsi_flags |= LSI_UMOUNT_FORCE;
-+
-+ CDEBUG(D_VFSTRACE, "VFS Op: superblock %p count %d active %d\n", sb,
-+ sb->s_count, atomic_read(&sb->s_active));
-+
-+ obd = class_exp2obd(sbi->ll_mdc_exp);
-+ if (obd == NULL) {
-+ CERROR("Invalid MDC connection handle "LPX64"\n",
-+ sbi->ll_mdc_exp->exp_handle.h_cookie);
-+ EXIT;
-+ return;
-+ }
-+ obd->obd_force = 1;
-+ obd_iocontrol(IOC_OSC_SET_ACTIVE, sbi->ll_mdc_exp, sizeof ioc_data,
-+ &ioc_data, NULL);
-+
-+ obd = class_exp2obd(sbi->ll_osc_exp);
-+ if (obd == NULL) {
-+ CERROR("Invalid LOV connection handle "LPX64"\n",
-+ sbi->ll_osc_exp->exp_handle.h_cookie);
-+ EXIT;
-+ return;
-+ }
-+
-+ obd->obd_force = 1;
-+ obd_iocontrol(IOC_OSC_SET_ACTIVE, sbi->ll_osc_exp, sizeof ioc_data,
-+ &ioc_data, NULL);
-+
-+ /* Really, we'd like to wait until there are no requests outstanding,
-+ * and then continue. For now, we just invalidate the requests,
-+ * schedule() and sleep one second if needed, and hope.
-+ */
-+ schedule();
-+#ifdef HAVE_UMOUNTBEGIN_VFSMOUNT
-+ if (atomic_read(&vfsmnt->mnt_count) > 2) {
-+ cfs_schedule_timeout(CFS_TASK_INTERRUPTIBLE,
-+ cfs_time_seconds(1));
-+ if (atomic_read(&vfsmnt->mnt_count) > 2)
-+ LCONSOLE_WARN("Mount still busy with %d refs! You "
-+ "may try to umount it a bit later\n",
-+ atomic_read(&vfsmnt->mnt_count));
-+ }
-+#endif
-+
-+ EXIT;
-+}
-+
-+int ll_remount_fs(struct super_block *sb, int *flags, char *data)
-+{
-+ struct ll_sb_info *sbi = ll_s2sbi(sb);
-+ int err;
-+ __u32 read_only;
-+
-+ if ((*flags & MS_RDONLY) != (sb->s_flags & MS_RDONLY)) {
-+ read_only = *flags & MS_RDONLY;
-+ err = obd_set_info_async(sbi->ll_mdc_exp, sizeof(KEY_READONLY),
-+ KEY_READONLY, sizeof(read_only),
-+ &read_only, NULL);
-+
-+ /* MDS might have expected a different ro key value, b=17493 */
-+ if (err == -EINVAL) {
-+ CDEBUG(D_CONFIG, "Retrying remount with 1.6.6 ro key\n");
-+ err = obd_set_info_async(sbi->ll_mdc_exp,
-+ sizeof(KEY_READONLY_166COMPAT),
-+ KEY_READONLY_166COMPAT,
-+ sizeof(read_only),
-+ &read_only, NULL);
-+ }
-+
-+ if (err) {
-+ CERROR("Failed to change the read-only flag during "
-+ "remount: %d\n", err);
-+ return err;
-+ }
-+
-+ if (read_only)
-+ sb->s_flags |= MS_RDONLY;
-+ else
-+ sb->s_flags &= ~MS_RDONLY;
-+ }
-+ return 0;
-+}
-+
-+int ll_prep_inode(struct obd_export *exp, struct inode **inode,
-+ struct ptlrpc_request *req, int offset,struct super_block *sb)
-+{
-+ struct lustre_md md;
-+ struct ll_sb_info *sbi = NULL;
-+ int rc = 0;
-+ ENTRY;
-+
-+ LASSERT(*inode || sb);
-+ sbi = sb ? ll_s2sbi(sb) : ll_i2sbi(*inode);
-+ prune_deathrow(sbi, 1);
-+
-+ rc = mdc_req2lustre_md(req, offset, exp, &md);
-+ if (rc)
-+ RETURN(rc);
-+
-+ if (*inode) {
-+ ll_update_inode(*inode, &md);
-+ } else {
-+ LASSERT(sb);
-+ *inode = ll_iget(sb, md.body->ino, &md);
-+ if (*inode == NULL || is_bad_inode(*inode)) {
-+ mdc_free_lustre_md(exp, &md);
-+ rc = -ENOMEM;
-+ CERROR("new_inode -fatal: rc %d\n", rc);
-+ GOTO(out, rc);
-+ }
-+ }
-+
-+ rc = obd_checkmd(exp, ll_i2mdcexp(*inode),
-+ ll_i2info(*inode)->lli_smd);
-+out:
-+ RETURN(rc);
-+}
-+
-+char *llap_origins[] = {
-+ [LLAP_ORIGIN_UNKNOWN] = "--",
-+ [LLAP_ORIGIN_READPAGE] = "rp",
-+ [LLAP_ORIGIN_READAHEAD] = "ra",
-+ [LLAP_ORIGIN_COMMIT_WRITE] = "cw",
-+ [LLAP_ORIGIN_WRITEPAGE] = "wp",
-+ [LLAP_ORIGIN_LOCKLESS_IO] = "ls"
-+};
-+
-+struct ll_async_page *llite_pglist_next_llap(struct ll_sb_info *sbi,
-+ struct list_head *list)
-+{
-+ struct ll_async_page *llap;
-+ struct list_head *pos;
-+
-+ list_for_each(pos, list) {
-+ if (pos == &sbi->ll_pglist)
-+ return NULL;
-+ llap = list_entry(pos, struct ll_async_page, llap_pglist_item);
-+ if (llap->llap_page == NULL)
-+ continue;
-+ return llap;
-+ }
-+ LBUG();
-+ return NULL;
-+}
-+
-+int ll_obd_statfs(struct inode *inode, void *arg)
-+{
-+ struct ll_sb_info *sbi = NULL;
-+ struct obd_device *client_obd = NULL, *lov_obd = NULL;
-+ struct lov_obd *lov = NULL;
-+ struct obd_statfs stat_buf = {0};
-+ char *buf = NULL;
-+ struct obd_ioctl_data *data = NULL;
-+ __u32 type, index;
-+ int len = 0, rc;
-+
-+ if (!inode || !(sbi = ll_i2sbi(inode)))
-+ GOTO(out_statfs, rc = -EINVAL);
-+
-+ rc = obd_ioctl_getdata(&buf, &len, arg);
-+ if (rc)
-+ GOTO(out_statfs, rc);
-+
-+ data = (void*)buf;
-+ if (!data->ioc_inlbuf1 || !data->ioc_inlbuf2 ||
-+ !data->ioc_pbuf1 || !data->ioc_pbuf2)
-+ GOTO(out_statfs, rc = -EINVAL);
-+
-+ memcpy(&type, data->ioc_inlbuf1, sizeof(__u32));
-+ memcpy(&index, data->ioc_inlbuf2, sizeof(__u32));
-+
-+ if (type == LL_STATFS_MDC) {
-+ if (index > 0)
-+ GOTO(out_statfs, rc = -ENODEV);
-+ client_obd = class_exp2obd(sbi->ll_mdc_exp);
-+ } else if (type == LL_STATFS_LOV) {
-+ lov_obd = class_exp2obd(sbi->ll_osc_exp);
-+ lov = &lov_obd->u.lov;
-+
-+ if (index >= lov->desc.ld_tgt_count)
-+ GOTO(out_statfs, rc = -ENODEV);
-+
-+ if (!lov->lov_tgts[index])
-+ /* Try again with the next index */
-+ GOTO(out_statfs, rc = -EAGAIN);
-+
-+ client_obd = class_exp2obd(lov->lov_tgts[index]->ltd_exp);
-+ if (!lov->lov_tgts[index]->ltd_active)
-+ GOTO(out_uuid, rc = -ENODATA);
-+ }
-+
-+ if (!client_obd)
-+ GOTO(out_statfs, rc = -EINVAL);
-+
-+ rc = obd_statfs(client_obd, &stat_buf, cfs_time_current_64() - HZ, 1);
-+ if (rc)
-+ GOTO(out_statfs, rc);
-+
-+ if (copy_to_user(data->ioc_pbuf1, &stat_buf, data->ioc_plen1))
-+ GOTO(out_statfs, rc = -EFAULT);
-+
-+out_uuid:
-+ if (copy_to_user(data->ioc_pbuf2, obd2cli_tgt(client_obd),
-+ data->ioc_plen2))
-+ rc = -EFAULT;
-+
-+out_statfs:
-+ if (buf)
-+ obd_ioctl_freedata(buf, len);
-+ return rc;
-+}
-+
-+int ll_process_config(struct lustre_cfg *lcfg)
-+{
-+ char *ptr;
-+ void *sb;
-+ struct lprocfs_static_vars lvars;
-+ unsigned long x;
-+ int rc = 0;
-+
-+ lprocfs_llite_init_vars(&lvars);
-+
-+ /* The instance name contains the sb: lustre-client-aacfe000 */
-+ ptr = strrchr(lustre_cfg_string(lcfg, 0), '-');
-+ if (!ptr || !*(++ptr))
-+ return -EINVAL;
-+ if (sscanf(ptr, "%lx", &x) != 1)
-+ return -EINVAL;
-+ sb = (void *)x;
-+ /* This better be a real Lustre superblock! */
-+ LASSERT(s2lsi((struct super_block *)sb)->lsi_lmd->lmd_magic == LMD_MAGIC);
-+
-+ /* Note we have not called client_common_fill_super yet, so
-+ proc fns must be able to handle that! */
-+ rc = class_process_proc_param(PARAM_LLITE, lvars.obd_vars,
-+ lcfg, sb);
-+ return(rc);
-+}
-+
-+int ll_show_options(struct seq_file *seq, struct vfsmount *vfs)
-+{
-+ struct ll_sb_info *sbi;
-+
-+ LASSERT((seq != NULL) && (vfs != NULL));
-+ sbi = ll_s2sbi(vfs->mnt_sb);
-+
-+ if (sbi->ll_flags & LL_SBI_NOLCK)
-+ seq_puts(seq, ",nolock");
-+
-+ if (sbi->ll_flags & LL_SBI_FLOCK)
-+ seq_puts(seq, ",flock");
-+
-+ if (sbi->ll_flags & LL_SBI_LOCALFLOCK)
-+ seq_puts(seq, ",localflock");
-+
-+ if (sbi->ll_flags & LL_SBI_USER_XATTR)
-+ seq_puts(seq, ",user_xattr");
-+
-+ if (sbi->ll_flags & LL_SBI_ACL)
-+ seq_puts(seq, ",acl");
-+
-+ RETURN(0);
-+}
diff -urNad lustre~/lustre/llite/llite_mmap.c lustre/lustre/llite/llite_mmap.c
--- lustre~/lustre/llite/llite_mmap.c 2009-08-19 09:51:09.000000000 +0200
-+++ lustre/lustre/llite/llite_mmap.c 2009-08-19 14:10:45.000000000 +0200
++++ lustre/lustre/llite/llite_mmap.c 2009-08-20 10:25:20.000000000 +0200
@@ -81,8 +81,7 @@
int lt_get_mmap_locks(struct ll_lock_tree *tree,
unsigned long addr, size_t count);
@@ -12818,7 +2010,7 @@ diff -urNad lustre~/lustre/llite/llite_mmap.c lustre/lustre/llite/llite_mmap.c
#endif
diff -urNad lustre~/lustre/llite/llite_nfs.c lustre/lustre/llite/llite_nfs.c
--- lustre~/lustre/llite/llite_nfs.c 2009-08-19 09:51:09.000000000 +0200
-+++ lustre/lustre/llite/llite_nfs.c 2009-08-19 14:10:45.000000000 +0200
++++ lustre/lustre/llite/llite_nfs.c 2009-08-20 10:25:20.000000000 +0200
@@ -68,36 +68,30 @@
}
@@ -12904,7 +2096,7 @@ diff -urNad lustre~/lustre/llite/llite_nfs.c lustre/lustre/llite/llite_nfs.c
RETURN(ERR_PTR(-ESTALE));
}
-+#ifdef HAS_NO_D_ALLOC_ANON
++#ifndef HAVE_D_ALLOC_ANON
+ result = d_obtain_alias(inode);
+ if (!IS_ERR(result))
+ ll_dops_init(result, 1);
@@ -12958,7 +2150,7 @@ diff -urNad lustre~/lustre/llite/llite_nfs.c lustre/lustre/llite/llite_nfs.c
+ struct lustre_nfs_fid *nfs_fid = (void *)fh;
+ ENTRY;
+
-+ CDEBUG(D_INFO, "encoding for (%lu) maxlen=%d minlen=%u\n",
++ CDEBUG(D_INFO, "encoding for (%u) maxlen=%d minlen=%lu\n",
+ inode->i_ino, *plen,
+ sizeof(struct lustre_nfs_fid));
+
@@ -13115,7 +2307,7 @@ diff -urNad lustre~/lustre/llite/llite_nfs.c lustre/lustre/llite/llite_nfs.c
#endif
diff -urNad lustre~/lustre/llite/lloop.c lustre/lustre/llite/lloop.c
--- lustre~/lustre/llite/lloop.c 2009-08-19 09:51:09.000000000 +0200
-+++ lustre/lustre/llite/lloop.c 2009-08-19 14:10:45.000000000 +0200
++++ lustre/lustre/llite/lloop.c 2009-08-20 10:25:20.000000000 +0200
@@ -152,7 +152,7 @@
struct semaphore lo_bh_mutex;
atomic_t lo_pending;
@@ -13129,7 +2321,7 @@ diff -urNad lustre~/lustre/llite/lloop.c lustre/lustre/llite/lloop.c
int ret, cmd, i;
struct bio_vec *bvec;
-+#ifdef STRUCT_BIO_WO_HW_SEGMENTS
++#ifndef HAVE_BI_HW_SEGMENTS
+ /* since 2.6.28 no bi_hw_segments */
+ BUG_ON(bio->bi_phys_segments > LLOOP_MAX_SEGMENTS);
+#else
@@ -13174,27 +2366,17 @@ diff -urNad lustre~/lustre/llite/lloop.c lustre/lustre/llite/lloop.c
}
/*
-@@ -361,7 +366,8 @@
- up(&lo->lo_sem);
-
- for (;;) {
-- down_interruptible(&lo->lo_bh_mutex);
-+ int i = down_interruptible(&lo->lo_bh_mutex);
-+ if (i == 4711) break; /* that won't ever happen - just a tribute to -Werror */
- /*
- * could be upped because of tear-down, not because of
- * pending work
-@@ -504,9 +510,17 @@
+@@ -504,9 +509,17 @@
return 0;
}
-+#ifdef BLOCKDEV_OPS_WITH_MODE
++#ifdef HAVE_OPEN_BDEV_MODE
+static int lo_open(struct block_device *bdev, fmode_t unused_mode)
+#else
static int lo_open(struct inode *inode, struct file *file)
+#endif
{
-+#ifdef BLOCKDEV_OPS_WITH_MODE
++#ifdef HAVE_OPEN_BDEV_MODE
+ struct lloop_device *lo = bdev->bd_disk->private_data;
+#else
struct lloop_device *lo = inode->i_bdev->bd_disk->private_data;
@@ -13202,17 +2384,17 @@ diff -urNad lustre~/lustre/llite/lloop.c lustre/lustre/llite/lloop.c
down(&lo->lo_ctl_mutex);
lo->lo_refcnt++;
-@@ -515,9 +529,17 @@
+@@ -515,9 +528,17 @@
return 0;
}
-+#ifdef BLOCKDEV_OPS_WITH_MODE
++#ifdef HAVE_OPEN_BDEV_MODE
+static int lo_release(struct gendisk *bd_disk, fmode_t unused_mode)
+#else
static int lo_release(struct inode *inode, struct file *file)
+#endif
{
-+#ifdef BLOCKDEV_OPS_WITH_MODE
++#ifdef HAVE_OPEN_BDEV_MODE
+ struct lloop_device *lo = bd_disk->private_data;
+#else
struct lloop_device *lo = inode->i_bdev->bd_disk->private_data;
@@ -13220,11 +2402,11 @@ diff -urNad lustre~/lustre/llite/lloop.c lustre/lustre/llite/lloop.c
down(&lo->lo_ctl_mutex);
--lo->lo_refcnt;
-@@ -527,11 +549,20 @@
+@@ -527,11 +548,20 @@
}
/* lloop device node's ioctl function. */
-+#ifdef BLOCKDEV_OPS_WITH_MODE
++#ifdef HAVE_OPEN_BDEV_MODE
+static int lo_ioctl(struct block_device *bdev, fmode_t mode,
+ unsigned int cmd, unsigned long arg)
+#else
@@ -13232,7 +2414,7 @@ diff -urNad lustre~/lustre/llite/lloop.c lustre/lustre/llite/lloop.c
unsigned int cmd, unsigned long arg)
+#endif
{
-+#ifdef BLOCKDEV_OPS_WITH_MODE
++#ifdef HAVE_OPEN_BDEV_MODE
+ struct lloop_device *lo = bdev->bd_disk->private_data;
+#else
struct lloop_device *lo = inode->i_bdev->bd_disk->private_data;
@@ -13241,11 +2423,11 @@ diff -urNad lustre~/lustre/llite/lloop.c lustre/lustre/llite/lloop.c
int err = 0;
down(&lloop_mutex);
-@@ -539,7 +570,11 @@
+@@ -539,7 +569,11 @@
case LL_IOC_LLOOP_DETACH: {
err = loop_clr_fd(lo, bdev, 2);
if (err == 0)
-+#ifdef BLOCKDEV_OPS_WITH_MODE
++#ifdef HAVE_BLKDEV_PUT_2ARGS
+ blkdev_put(bdev, mode); /* grabbed in LLOOP_ATTACH */
+#else
blkdev_put(bdev); /* grabbed in LLOOP_ATTACH */
@@ -13253,11 +2435,11 @@ diff -urNad lustre~/lustre/llite/lloop.c lustre/lustre/llite/lloop.c
break;
}
-@@ -626,7 +661,12 @@
+@@ -626,7 +660,12 @@
err = loop_set_fd(lo, NULL, bdev, file);
if (err) {
fput(file);
-+#ifdef BLOCKDEV_OPS_WITH_MODE
++#ifdef HAVE_BLKDEV_PUT_2ARGS
+ /* i guess... --azi */
+ blkdev_put(bdev, FMODE_READ|FMODE_WRITE);
+#else
@@ -13266,11 +2448,11 @@ diff -urNad lustre~/lustre/llite/lloop.c lustre/lustre/llite/lloop.c
}
break;
-@@ -650,7 +690,12 @@
+@@ -650,7 +689,12 @@
bdev = lo->lo_device;
err = loop_clr_fd(lo, bdev, 1);
if (err == 0)
-+#ifdef BLOCKDEV_OPS_WITH_MODE
++#ifdef HAVE_BLKDEV_PUT_2ARGS
+ /* i guess... --azi */
+ blkdev_put(bdev, FMODE_READ|FMODE_WRITE); /* grabbed in LLOOP_ATTACH */
+#else
@@ -13279,7 +2461,7 @@ diff -urNad lustre~/lustre/llite/lloop.c lustre/lustre/llite/lloop.c
break;
}
-@@ -736,7 +781,7 @@
+@@ -736,7 +780,7 @@
out_mem4:
while (i--)
@@ -13288,7 +2470,7 @@ diff -urNad lustre~/lustre/llite/lloop.c lustre/lustre/llite/lloop.c
i = max_loop;
out_mem3:
while (i--)
-@@ -758,7 +803,7 @@
+@@ -758,7 +802,7 @@
ll_iocontrol_unregister(ll_iocontrol_magic);
for (i = 0; i < max_loop; i++) {
del_gendisk(disks[i]);
@@ -13297,820 +2479,33 @@ diff -urNad lustre~/lustre/llite/lloop.c lustre/lustre/llite/lloop.c
put_disk(disks[i]);
}
if (ll_unregister_blkdev(lloop_major, "lloop"))
-diff -urNad lustre~/lustre/llite/lloop.c.orig lustre/lustre/llite/lloop.c.orig
---- lustre~/lustre/llite/lloop.c.orig 1970-01-01 01:00:00.000000000 +0100
-+++ lustre/lustre/llite/lloop.c.orig 2009-08-19 14:10:45.000000000 +0200
-@@ -0,0 +1,777 @@
-+/* -*- mode: c; c-basic-offset: 8; indent-tabs-mode: nil; -*-
-+ * vim:expandtab:shiftwidth=8:tabstop=8:
-+ *
-+ * GPL HEADER START
-+ *
-+ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
-+ *
-+ * This program is free software; you can redistribute it and/or modify
-+ * it under the terms of the GNU General Public License version 2 only,
-+ * as published by the Free Software Foundation.
-+ *
-+ * This program is distributed in the hope that it will be useful, but
-+ * WITHOUT ANY WARRANTY; without even the implied warranty of
-+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
-+ * General Public License version 2 for more details (a copy is included
-+ * in the LICENSE file that accompanied this code).
-+ *
-+ * You should have received a copy of the GNU General Public License
-+ * version 2 along with this program; If not, see
-+ * http://www.sun.com/software/products/lustre/docs/GPLv2.pdf
-+ *
-+ * Please contact Sun Microsystems, Inc., 4150 Network Circle, Santa Clara,
-+ * CA 95054 USA or visit www.sun.com if you need additional information or
-+ * have any questions.
-+ *
-+ * GPL HEADER END
-+ */
-+/*
-+ * Copyright 2008 Sun Microsystems, Inc. All rights reserved
-+ * Use is subject to license terms.
-+ */
-+/*
-+ * This file is part of Lustre, http://www.lustre.org/
-+ * Lustre is a trademark of Sun Microsystems, Inc.
-+ */
-+
-+/*
-+ * linux/drivers/block/loop.c
-+ *
-+ * Written by Theodore Ts'o, 3/29/93
-+ *
-+ * Copyright 1993 by Theodore Ts'o. Redistribution of this file is
-+ * permitted under the GNU General Public License.
-+ *
-+ * DES encryption plus some minor changes by Werner Almesberger, 30-MAY-1993
-+ * more DES encryption plus IDEA encryption by Nicholas J. Leon, June 20, 1996
-+ *
-+ * Modularized and updated for 1.1.16 kernel - Mitch Dsouza 28th May 1994
-+ * Adapted for 1.3.59 kernel - Andries Brouwer, 1 Feb 1996
-+ *
-+ * Fixed do_loop_request() re-entrancy - Vincent.Renardias at waw.com Mar 20, 1997
-+ *
-+ * Added devfs support - Richard Gooch <rgooch at atnf.csiro.au> 16-Jan-1998
-+ *
-+ * Handle sparse backing files correctly - Kenn Humborg, Jun 28, 1998
-+ *
-+ * Loadable modules and other fixes by AK, 1998
-+ *
-+ * Make real block number available to downstream transfer functions, enables
-+ * CBC (and relatives) mode encryption requiring unique IVs per data block.
-+ * Reed H. Petty, rhp at draper.net
-+ *
-+ * Maximum number of loop devices now dynamic via max_loop module parameter.
-+ * Russell Kroll <rkroll at exploits.org> 19990701
-+ *
-+ * Maximum number of loop devices when compiled-in now selectable by passing
-+ * max_loop=<1-255> to the kernel on boot.
-+ * Erik I. Bols?, <eriki at himolde.no>, Oct 31, 1999
-+ *
-+ * Completely rewrite request handling to be make_request_fn style and
-+ * non blocking, pushing work to a helper thread. Lots of fixes from
-+ * Al Viro too.
-+ * Jens Axboe <axboe at suse.de>, Nov 2000
-+ *
-+ * Support up to 256 loop devices
-+ * Heinz Mauelshagen <mge at sistina.com>, Feb 2002
-+ *
-+ * Support for falling back on the write file operation when the address space
-+ * operations prepare_write and/or commit_write are not available on the
-+ * backing filesystem.
-+ * Anton Altaparmakov, 16 Feb 2005
-+ *
-+ * Still To Fix:
-+ * - Advisory locking is ignored here.
-+ * - Should use an own CAP_* category instead of CAP_SYS_ADMIN
-+ *
-+ */
-+
-+#ifndef AUTOCONF_INCLUDED
-+#include <linux/config.h>
-+#endif
-+#include <linux/module.h>
-+
-+#include <linux/sched.h>
-+#include <linux/fs.h>
-+#include <linux/file.h>
-+#include <linux/stat.h>
-+#include <linux/errno.h>
-+#include <linux/major.h>
-+#include <linux/wait.h>
-+#include <linux/blkdev.h>
-+#include <linux/blkpg.h>
-+#include <linux/init.h>
-+#include <linux/smp_lock.h>
-+#include <linux/swap.h>
-+#include <linux/slab.h>
-+#include <linux/suspend.h>
-+#include <linux/writeback.h>
-+#include <linux/buffer_head.h> /* for invalidate_bdev() */
-+#include <linux/completion.h>
-+#include <linux/highmem.h>
-+#include <linux/gfp.h>
-+#include <linux/swap.h>
-+#include <linux/pagevec.h>
-+
-+#include <asm/uaccess.h>
-+
-+#include <lustre_lib.h>
-+#include <lustre_lite.h>
-+#include "llite_internal.h"
-+
-+#define LLOOP_MAX_SEGMENTS PTLRPC_MAX_BRW_PAGES
-+
-+/* Possible states of device */
-+enum {
-+ LLOOP_UNBOUND,
-+ LLOOP_BOUND,
-+ LLOOP_RUNDOWN,
-+};
-+
-+struct lloop_device {
-+ int lo_number;
-+ int lo_refcnt;
-+ loff_t lo_offset;
-+ loff_t lo_sizelimit;
-+ int lo_flags;
-+ int (*ioctl)(struct lloop_device *, int cmd,
-+ unsigned long arg);
-+
-+ struct file * lo_backing_file;
-+ struct block_device *lo_device;
-+ unsigned lo_blocksize;
-+
-+ int old_gfp_mask;
-+
-+ spinlock_t lo_lock;
-+ struct bio *lo_bio;
-+ struct bio *lo_biotail;
-+ int lo_state;
-+ struct semaphore lo_sem;
-+ struct semaphore lo_ctl_mutex;
-+ struct semaphore lo_bh_mutex;
-+ atomic_t lo_pending;
-+
-+ request_queue_t *lo_queue;
-+
-+ /* data to handle bio for lustre. */
-+ struct lo_request_data {
-+ struct brw_page lrd_pages[LLOOP_MAX_SEGMENTS];
-+ struct obdo lrd_oa;
-+ } lo_requests[1];
-+
-+};
-+
-+/*
-+ * Loop flags
-+ */
-+enum {
-+ LO_FLAGS_READ_ONLY = 1,
-+};
-+
-+static int lloop_major;
-+static int max_loop = 8;
-+static struct lloop_device *loop_dev;
-+static struct gendisk **disks;
-+static struct semaphore lloop_mutex;
-+static void *ll_iocontrol_magic = NULL;
-+
-+static loff_t get_loop_size(struct lloop_device *lo, struct file *file)
-+{
-+ loff_t size, offset, loopsize;
-+
-+ /* Compute loopsize in bytes */
-+ size = i_size_read(file->f_mapping->host);
-+ offset = lo->lo_offset;
-+ loopsize = size - offset;
-+ if (lo->lo_sizelimit > 0 && lo->lo_sizelimit < loopsize)
-+ loopsize = lo->lo_sizelimit;
-+
-+ /*
-+ * Unfortunately, if we want to do I/O on the device,
-+ * the number of 512-byte sectors has to fit into a sector_t.
-+ */
-+ return loopsize >> 9;
-+}
-+
-+static int do_bio_filebacked(struct lloop_device *lo, struct bio *bio)
-+{
-+ struct inode *inode = lo->lo_backing_file->f_dentry->d_inode;
-+ struct ll_inode_info *lli = ll_i2info(inode);
-+ struct lov_stripe_md *lsm = lli->lli_smd;
-+ struct obd_info oinfo = {{{0}}};
-+ struct brw_page *pg = lo->lo_requests[0].lrd_pages;
-+ struct obdo *oa = &lo->lo_requests[0].lrd_oa;
-+ pgoff_t offset;
-+ int ret, cmd, i;
-+ struct bio_vec *bvec;
-+
-+ BUG_ON(bio->bi_hw_segments > LLOOP_MAX_SEGMENTS);
-+
-+ offset = (pgoff_t)(bio->bi_sector << 9) + lo->lo_offset;
-+ bio_for_each_segment(bvec, bio, i) {
-+ BUG_ON(bvec->bv_offset != 0);
-+ BUG_ON(bvec->bv_len != CFS_PAGE_SIZE);
-+
-+ pg->pg = bvec->bv_page;
-+ pg->off = offset;
-+ pg->count = bvec->bv_len;
-+ pg->flag = OBD_BRW_SRVLOCK;
-+
-+ pg++;
-+ offset += bvec->bv_len;
-+ }
-+
-+ oa->o_mode = inode->i_mode;
-+ oa->o_id = lsm->lsm_object_id;
-+ oa->o_valid = OBD_MD_FLID | OBD_MD_FLMODE | OBD_MD_FLTYPE;
-+ obdo_from_inode(oa, inode, OBD_MD_FLFID | OBD_MD_FLGENER);
-+
-+ cmd = OBD_BRW_READ;
-+ if (bio_rw(bio) == WRITE)
-+ cmd = OBD_BRW_WRITE;
-+
-+ if (cmd == OBD_BRW_WRITE)
-+ ll_stats_ops_tally(ll_i2sbi(inode), LPROC_LL_BRW_WRITE, bio->bi_size);
-+ else
-+ ll_stats_ops_tally(ll_i2sbi(inode), LPROC_LL_BRW_READ, bio->bi_size);
-+ oinfo.oi_oa = oa;
-+ oinfo.oi_md = lsm;
-+ ret = obd_brw(cmd, ll_i2obdexp(inode), &oinfo,
-+ (obd_count)(i - bio->bi_idx),
-+ lo->lo_requests[0].lrd_pages, NULL);
-+ if (ret == 0)
-+ obdo_to_inode(inode, oa, OBD_MD_FLBLOCKS);
-+ return ret;
-+}
-+
-+
-+/*
-+ * Add bio to back of pending list
-+ */
-+static void loop_add_bio(struct lloop_device *lo, struct bio *bio)
-+{
-+ unsigned long flags;
-+
-+ spin_lock_irqsave(&lo->lo_lock, flags);
-+ if (lo->lo_biotail) {
-+ lo->lo_biotail->bi_next = bio;
-+ lo->lo_biotail = bio;
-+ } else
-+ lo->lo_bio = lo->lo_biotail = bio;
-+ spin_unlock_irqrestore(&lo->lo_lock, flags);
-+
-+ up(&lo->lo_bh_mutex);
-+}
-+
-+/*
-+ * Grab first pending buffer
-+ */
-+static struct bio *loop_get_bio(struct lloop_device *lo)
-+{
-+ struct bio *bio;
-+
-+ spin_lock_irq(&lo->lo_lock);
-+ if ((bio = lo->lo_bio)) {
-+ if (bio == lo->lo_biotail)
-+ lo->lo_biotail = NULL;
-+ lo->lo_bio = bio->bi_next;
-+ bio->bi_next = NULL;
-+ }
-+ spin_unlock_irq(&lo->lo_lock);
-+
-+ return bio;
-+}
-+
-+static int loop_make_request(request_queue_t *q, struct bio *old_bio)
-+{
-+ struct lloop_device *lo = q->queuedata;
-+ int rw = bio_rw(old_bio);
-+
-+ if (!lo)
-+ goto out;
-+
-+ spin_lock_irq(&lo->lo_lock);
-+ if (lo->lo_state != LLOOP_BOUND)
-+ goto inactive;
-+ atomic_inc(&lo->lo_pending);
-+ spin_unlock_irq(&lo->lo_lock);
-+
-+ if (rw == WRITE) {
-+ if (lo->lo_flags & LO_FLAGS_READ_ONLY)
-+ goto err;
-+ } else if (rw == READA) {
-+ rw = READ;
-+ } else if (rw != READ) {
-+ CERROR("lloop: unknown command (%x)\n", rw);
-+ goto err;
-+ }
-+ loop_add_bio(lo, old_bio);
-+ return 0;
-+err:
-+ if (atomic_dec_and_test(&lo->lo_pending))
-+ up(&lo->lo_bh_mutex);
-+out:
-+ bio_io_error(old_bio, old_bio->bi_size);
-+ return 0;
-+inactive:
-+ spin_unlock_irq(&lo->lo_lock);
-+ goto out;
-+}
-+
-+/*
-+ * kick off io on the underlying address space
-+ */
-+static void loop_unplug(request_queue_t *q)
-+{
-+ struct lloop_device *lo = q->queuedata;
-+
-+ clear_bit(QUEUE_FLAG_PLUGGED, &q->queue_flags);
-+ blk_run_address_space(lo->lo_backing_file->f_mapping);
-+}
-+
-+static inline void loop_handle_bio(struct lloop_device *lo, struct bio *bio)
-+{
-+ int ret;
-+ ret = do_bio_filebacked(lo, bio);
-+ bio_endio(bio, bio->bi_size, ret);
-+}
-+
-+/*
-+ * worker thread that handles reads/writes to file backed loop devices,
-+ * to avoid blocking in our make_request_fn. it also does loop decrypting
-+ * on reads for block backed loop, as that is too heavy to do from
-+ * b_end_io context where irqs may be disabled.
-+ */
-+static int loop_thread(void *data)
-+{
-+ struct lloop_device *lo = data;
-+ struct bio *bio;
-+
-+ daemonize("lloop%d", lo->lo_number);
-+
-+ set_user_nice(current, -20);
-+
-+ lo->lo_state = LLOOP_BOUND;
-+ atomic_inc(&lo->lo_pending);
-+
-+ /*
-+ * up sem, we are running
-+ */
-+ up(&lo->lo_sem);
-+
-+ for (;;) {
-+ down_interruptible(&lo->lo_bh_mutex);
-+ /*
-+ * could be upped because of tear-down, not because of
-+ * pending work
-+ */
-+ if (!atomic_read(&lo->lo_pending))
-+ break;
-+
-+ bio = loop_get_bio(lo);
-+ if (!bio) {
-+ CWARN("lloop(minor: %d): missing bio\n", lo->lo_number);
-+ continue;
-+ }
-+ loop_handle_bio(lo, bio);
-+
-+ /*
-+ * upped both for pending work and tear-down, lo_pending
-+ * will hit zero then
-+ */
-+ if (atomic_dec_and_test(&lo->lo_pending))
-+ break;
-+ }
-+
-+ up(&lo->lo_sem);
-+ return 0;
-+}
-+
-+static int loop_set_fd(struct lloop_device *lo, struct file *unused,
-+ struct block_device *bdev, struct file *file)
-+{
-+ struct inode *inode;
-+ struct address_space *mapping;
-+ int lo_flags = 0;
-+ int error;
-+ loff_t size;
-+
-+ if (!try_module_get(THIS_MODULE))
-+ return -ENODEV;
-+
-+ error = -EBUSY;
-+ if (lo->lo_state != LLOOP_UNBOUND)
-+ goto out;
-+
-+ mapping = file->f_mapping;
-+ inode = mapping->host;
-+
-+ error = -EINVAL;
-+ if (!S_ISREG(inode->i_mode) || inode->i_sb->s_magic != LL_SUPER_MAGIC)
-+ goto out;
-+
-+ if (!(file->f_mode & FMODE_WRITE))
-+ lo_flags |= LO_FLAGS_READ_ONLY;
-+
-+ size = get_loop_size(lo, file);
-+
-+ if ((loff_t)(sector_t)size != size) {
-+ error = -EFBIG;
-+ goto out;
-+ }
-+
-+ /* remove all pages in cache so as dirty pages not to be existent. */
-+ truncate_inode_pages(mapping, 0);
-+
-+ set_device_ro(bdev, (lo_flags & LO_FLAGS_READ_ONLY) != 0);
-+
-+ lo->lo_blocksize = CFS_PAGE_SIZE;
-+ lo->lo_device = bdev;
-+ lo->lo_flags = lo_flags;
-+ lo->lo_backing_file = file;
-+ lo->ioctl = NULL;
-+ lo->lo_sizelimit = 0;
-+ lo->old_gfp_mask = mapping_gfp_mask(mapping);
-+ mapping_set_gfp_mask(mapping, lo->old_gfp_mask & ~(__GFP_IO|__GFP_FS));
-+
-+ lo->lo_bio = lo->lo_biotail = NULL;
-+
-+ /*
-+ * set queue make_request_fn, and add limits based on lower level
-+ * device
-+ */
-+ blk_queue_make_request(lo->lo_queue, loop_make_request);
-+ lo->lo_queue->queuedata = lo;
-+ lo->lo_queue->unplug_fn = loop_unplug;
-+
-+ /* queue parameters */
-+ blk_queue_hardsect_size(lo->lo_queue, CFS_PAGE_SIZE);
-+ blk_queue_max_sectors(lo->lo_queue, LLOOP_MAX_SEGMENTS);
-+ blk_queue_max_phys_segments(lo->lo_queue, LLOOP_MAX_SEGMENTS);
-+
-+ set_capacity(disks[lo->lo_number], size);
-+ bd_set_size(bdev, size << 9);
-+
-+ set_blocksize(bdev, lo->lo_blocksize);
-+
-+ kernel_thread(loop_thread, lo, CLONE_KERNEL);
-+ down(&lo->lo_sem);
-+ return 0;
-+
-+ out:
-+ /* This is safe: open() is still holding a reference. */
-+ module_put(THIS_MODULE);
-+ return error;
-+}
-+
-+static int loop_clr_fd(struct lloop_device *lo, struct block_device *bdev,
-+ int count)
-+{
-+ struct file *filp = lo->lo_backing_file;
-+ int gfp = lo->old_gfp_mask;
-+
-+ if (lo->lo_state != LLOOP_BOUND)
-+ return -ENXIO;
-+
-+ if (lo->lo_refcnt > count) /* we needed one fd for the ioctl */
-+ return -EBUSY;
-+
-+ if (filp == NULL)
-+ return -EINVAL;
-+
-+ spin_lock_irq(&lo->lo_lock);
-+ lo->lo_state = LLOOP_RUNDOWN;
-+ if (atomic_dec_and_test(&lo->lo_pending))
-+ up(&lo->lo_bh_mutex);
-+ spin_unlock_irq(&lo->lo_lock);
-+
-+ down(&lo->lo_sem);
-+ lo->lo_backing_file = NULL;
-+ lo->ioctl = NULL;
-+ lo->lo_device = NULL;
-+ lo->lo_offset = 0;
-+ lo->lo_sizelimit = 0;
-+ lo->lo_flags = 0;
-+ ll_invalidate_bdev(bdev, 0);
-+ set_capacity(disks[lo->lo_number], 0);
-+ bd_set_size(bdev, 0);
-+ mapping_set_gfp_mask(filp->f_mapping, gfp);
-+ lo->lo_state = LLOOP_UNBOUND;
-+ fput(filp);
-+ /* This is safe: open() is still holding a reference. */
-+ module_put(THIS_MODULE);
-+ return 0;
-+}
-+
-+static int lo_open(struct inode *inode, struct file *file)
-+{
-+ struct lloop_device *lo = inode->i_bdev->bd_disk->private_data;
-+
-+ down(&lo->lo_ctl_mutex);
-+ lo->lo_refcnt++;
-+ up(&lo->lo_ctl_mutex);
-+
-+ return 0;
-+}
-+
-+static int lo_release(struct inode *inode, struct file *file)
-+{
-+ struct lloop_device *lo = inode->i_bdev->bd_disk->private_data;
-+
-+ down(&lo->lo_ctl_mutex);
-+ --lo->lo_refcnt;
-+ up(&lo->lo_ctl_mutex);
-+
-+ return 0;
-+}
-+
-+/* lloop device node's ioctl function. */
-+static int lo_ioctl(struct inode *inode, struct file *unused,
-+ unsigned int cmd, unsigned long arg)
-+{
-+ struct lloop_device *lo = inode->i_bdev->bd_disk->private_data;
-+ struct block_device *bdev = inode->i_bdev;
-+ int err = 0;
-+
-+ down(&lloop_mutex);
-+ switch (cmd) {
-+ case LL_IOC_LLOOP_DETACH: {
-+ err = loop_clr_fd(lo, bdev, 2);
-+ if (err == 0)
-+ blkdev_put(bdev); /* grabbed in LLOOP_ATTACH */
-+ break;
-+ }
-+
-+ case LL_IOC_LLOOP_INFO: {
-+ __u64 ino = 0;
-+
-+ if (lo->lo_state == LLOOP_BOUND)
-+ ino = lo->lo_backing_file->f_dentry->d_inode->i_ino;
-+
-+ if (put_user(ino, (__u64 *)arg))
-+ err = -EFAULT;
-+ break;
-+ }
-+
-+ default:
-+ err = -EINVAL;
-+ break;
-+ }
-+ up(&lloop_mutex);
-+
-+ return err;
-+}
-+
-+static struct block_device_operations lo_fops = {
-+ .owner = THIS_MODULE,
-+ .open = lo_open,
-+ .release = lo_release,
-+ .ioctl = lo_ioctl,
-+};
-+
-+/* dynamic iocontrol callback.
-+ * This callback is registered in lloop_init and will be called by
-+ * ll_iocontrol_call.
-+ * This is a llite regular file ioctl function. It takes the responsibility
-+ * of attaching a file, and detaching a file by a lloop's device numner.
-+ */
-+static enum llioc_iter lloop_ioctl(struct inode *unused, struct file *file,
-+ unsigned int cmd, unsigned long arg,
-+ void *magic, int *rcp)
-+{
-+ struct lloop_device *lo = NULL;
-+ struct block_device *bdev = NULL;
-+ int err = 0;
-+ dev_t dev;
-+
-+ if (magic != ll_iocontrol_magic)
-+ return LLIOC_CONT;
-+
-+ if (disks == NULL)
-+ GOTO(out1, err = -ENODEV);
-+
-+ down(&lloop_mutex);
-+ switch (cmd) {
-+ case LL_IOC_LLOOP_ATTACH: {
-+ struct lloop_device *lo_free = NULL;
-+ int i;
-+
-+ for (i = 0; i < max_loop; i++, lo = NULL) {
-+ lo = &loop_dev[i];
-+ if (lo->lo_state == LLOOP_UNBOUND) {
-+ if (!lo_free)
-+ lo_free = lo;
-+ continue;
-+ }
-+ if (lo->lo_backing_file->f_dentry->d_inode ==
-+ file->f_dentry->d_inode)
-+ break;
-+ }
-+ if (lo || !lo_free)
-+ GOTO(out, err = -EBUSY);
-+
-+ lo = lo_free;
-+ dev = MKDEV(lloop_major, lo->lo_number);
-+
-+ /* quit if the used pointer is writable */
-+ if (put_user((long)old_encode_dev(dev), (long*)arg))
-+ GOTO(out, err = -EFAULT);
-+
-+ bdev = open_by_devnum(dev, file->f_mode);
-+ if (IS_ERR(bdev))
-+ GOTO(out, err = PTR_ERR(bdev));
-+
-+ get_file(file);
-+ err = loop_set_fd(lo, NULL, bdev, file);
-+ if (err) {
-+ fput(file);
-+ blkdev_put(bdev);
-+ }
-+
-+ break;
-+ }
-+
-+ case LL_IOC_LLOOP_DETACH_BYDEV: {
-+ int minor;
-+
-+ dev = old_decode_dev(arg);
-+ if (MAJOR(dev) != lloop_major)
-+ GOTO(out, err = -EINVAL);
-+
-+ minor = MINOR(dev);
-+ if (minor > max_loop - 1)
-+ GOTO(out, err = -EINVAL);
-+
-+ lo = &loop_dev[minor];
-+ if (lo->lo_state != LLOOP_BOUND)
-+ GOTO(out, err = -EINVAL);
-+
-+ bdev = lo->lo_device;
-+ err = loop_clr_fd(lo, bdev, 1);
-+ if (err == 0)
-+ blkdev_put(bdev); /* grabbed in LLOOP_ATTACH */
-+
-+ break;
-+ }
-+
-+ default:
-+ err = -EINVAL;
-+ break;
-+ }
-+
-+out:
-+ up(&lloop_mutex);
-+out1:
-+ if (rcp)
-+ *rcp = err;
-+ return LLIOC_STOP;
-+}
-+
-+static int __init lloop_init(void)
-+{
-+ int i;
-+ unsigned int cmdlist[] = {
-+ LL_IOC_LLOOP_ATTACH,
-+ LL_IOC_LLOOP_DETACH_BYDEV,
-+ };
-+
-+ if (max_loop < 1 || max_loop > 256) {
-+ CWARN("lloop: invalid max_loop (must be between"
-+ " 1 and 256), using default (8)\n");
-+ max_loop = 8;
-+ }
-+
-+ lloop_major = register_blkdev(0, "lloop");
-+ if (lloop_major < 0)
-+ return -EIO;
-+
-+ ll_iocontrol_magic = ll_iocontrol_register(lloop_ioctl, 2, cmdlist);
-+ if (ll_iocontrol_magic == NULL)
-+ goto out_mem1;
-+
-+ loop_dev = kmalloc(max_loop * sizeof(struct lloop_device), GFP_KERNEL);
-+ if (!loop_dev)
-+ goto out_mem1;
-+ memset(loop_dev, 0, max_loop * sizeof(struct lloop_device));
-+
-+ disks = kmalloc(max_loop * sizeof(struct gendisk *), GFP_KERNEL);
-+ if (!disks)
-+ goto out_mem2;
-+
-+ for (i = 0; i < max_loop; i++) {
-+ disks[i] = alloc_disk(1);
-+ if (!disks[i])
-+ goto out_mem3;
-+ }
-+
-+ init_MUTEX(&lloop_mutex);
-+
-+ for (i = 0; i < max_loop; i++) {
-+ struct lloop_device *lo = &loop_dev[i];
-+ struct gendisk *disk = disks[i];
-+
-+ memset(lo, 0, sizeof(*lo));
-+ lo->lo_queue = blk_alloc_queue(GFP_KERNEL);
-+ if (!lo->lo_queue)
-+ goto out_mem4;
-+
-+ init_MUTEX(&lo->lo_ctl_mutex);
-+ init_MUTEX_LOCKED(&lo->lo_sem);
-+ init_MUTEX_LOCKED(&lo->lo_bh_mutex);
-+ lo->lo_number = i;
-+ spin_lock_init(&lo->lo_lock);
-+ disk->major = lloop_major;
-+ disk->first_minor = i;
-+ disk->fops = &lo_fops;
-+ sprintf(disk->disk_name, "lloop%d", i);
-+ disk->private_data = lo;
-+ disk->queue = lo->lo_queue;
-+ }
-+
-+ /* We cannot fail after we call this, so another loop!*/
-+ for (i = 0; i < max_loop; i++)
-+ add_disk(disks[i]);
-+ return 0;
-+
-+out_mem4:
-+ while (i--)
-+ blk_put_queue(loop_dev[i].lo_queue);
-+ i = max_loop;
-+out_mem3:
-+ while (i--)
-+ put_disk(disks[i]);
-+ kfree(disks);
-+out_mem2:
-+ kfree(loop_dev);
-+out_mem1:
-+ unregister_blkdev(lloop_major, "lloop");
-+ ll_iocontrol_unregister(ll_iocontrol_magic);
-+ CERROR("lloop: ran out of memory\n");
-+ return -ENOMEM;
-+}
-+
-+static void lloop_exit(void)
-+{
-+ int i;
-+
-+ ll_iocontrol_unregister(ll_iocontrol_magic);
-+ for (i = 0; i < max_loop; i++) {
-+ del_gendisk(disks[i]);
-+ blk_put_queue(loop_dev[i].lo_queue);
-+ put_disk(disks[i]);
-+ }
-+ if (ll_unregister_blkdev(lloop_major, "lloop"))
-+ CWARN("lloop: cannot unregister blkdev\n");
-+
-+ kfree(disks);
-+ kfree(loop_dev);
-+}
-+
-+module_init(lloop_init);
-+module_exit(lloop_exit);
-+
-+CFS_MODULE_PARM(max_loop, "i", int, 0444, "maximum of lloop_device");
-+MODULE_AUTHOR("Sun Microsystems, Inc. <http://www.lustre.org/>");
-+MODULE_DESCRIPTION("Lustre virtual block device");
-+MODULE_LICENSE("GPL");
diff -urNad lustre~/lustre/llite/lproc_llite.c lustre/lustre/llite/lproc_llite.c
--- lustre~/lustre/llite/lproc_llite.c 2009-08-19 09:51:09.000000000 +0200
-+++ lustre/lustre/llite/lproc_llite.c 2009-08-19 14:10:45.000000000 +0200
-@@ -649,7 +649,11 @@
++++ lustre/lustre/llite/lproc_llite.c 2009-08-20 10:25:20.000000000 +0200
+@@ -649,7 +649,7 @@
sbi->ll_stats_track_id == current->p_pptr->pid)
lprocfs_counter_add(sbi->ll_stats, op, count);
else if (sbi->ll_stats_track_type == STATS_TRACK_GID &&
-+#ifdef HAS_STRUCT_CRED
-+ sbi->ll_stats_track_id == current->real_cred->gid)
-+#else
- sbi->ll_stats_track_id == current->gid)
-+#endif
+- sbi->ll_stats_track_id == current->gid)
++ sbi->ll_stats_track_id == CREDENTIALS(current,gid))
lprocfs_counter_add(sbi->ll_stats, op, count);
}
EXPORT_SYMBOL(ll_stats_ops_tally);
diff -urNad lustre~/lustre/llite/namei.c lustre/lustre/llite/namei.c
--- lustre~/lustre/llite/namei.c 2009-08-19 09:51:09.000000000 +0200
-+++ lustre/lustre/llite/namei.c 2009-08-19 14:10:45.000000000 +0200
-@@ -875,7 +875,11 @@
++++ lustre/lustre/llite/namei.c 2009-08-20 10:25:20.000000000 +0200
+@@ -875,7 +875,7 @@
GOTO(err_exit, err);
err = mdc_create(sbi->ll_mdc_exp, &op_data, tgt, tgt_len,
-+#ifdef HAS_STRUCT_CRED
-+ mode, current->real_cred->fsuid, current->real_cred->fsgid,
-+#else
- mode, current->fsuid, current->fsgid,
-+#endif
+- mode, current->fsuid, current->fsgid,
++ mode, CREDENTIALS(current,fsuid), CREDENTIALS(current,fsgid),
cfs_curproc_cap_pack(), rdev, &request);
if (err)
GOTO(err_exit, err);
diff -urNad lustre~/lustre/llite/rw.c lustre/lustre/llite/rw.c
--- lustre~/lustre/llite/rw.c 2009-08-19 09:51:09.000000000 +0200
-+++ lustre/lustre/llite/rw.c 2009-08-19 14:10:45.000000000 +0200
++++ lustre/lustre/llite/rw.c 2009-08-20 10:25:20.000000000 +0200
@@ -61,6 +61,8 @@
#define DEBUG_SUBSYSTEM S_LLITE
@@ -14133,7 +2528,7 @@ diff -urNad lustre~/lustre/llite/rw.c lustre/lustre/llite/rw.c
ll_inode_size_unlock(inode, 0);
} /* ll_truncate */
-+#ifdef NO_PREPARE_WRITE
++#ifdef HAVE_WRITE_BEGIN_IN_STRUCT_ADDRESS_SPACE_OPERATIONS
+int ll_write_begin(struct file *unused_file, struct address_space *mapping, loff_t pos, unsigned len, unsigned flags, struct page **pagep, void **unused_fsdata)
+{
+ struct inode *inode = mapping->host;
@@ -14236,7 +2631,7 @@ diff -urNad lustre~/lustre/llite/rw.c lustre/lustre/llite/rw.c
RETURN(rc);
}
-+#ifdef NO_PREPARE_WRITE
++#ifdef HAVE_WRITE_BEGIN_IN_STRUCT_ADDRESS_SPACE_OPERATIONS
+int ll_write_end(struct file *file, struct address_space *mapping, loff_t pos, unsigned len, unsigned copied, struct page *page, void *unued_fsdata)
+{
+ struct ll_file_data *fd = LUSTRE_FPRIVATE(file);
@@ -14255,7 +2650,7 @@ diff -urNad lustre~/lustre/llite/rw.c lustre/lustre/llite/rw.c
+ LASSERT(PageLocked(page));
+
+ CDEBUG(D_INODE, "inode %p is writing page %p from %ld to %ld at %lu\n",
-+ inode, page, (long int)pos, (long int)(pos + copied), page->index);
++ inode, page, pos, (pos + copied), page->index);
+
+/**** ???? ****/
+if (PageChecked(page)) {
@@ -14339,2233 +2734,14 @@ diff -urNad lustre~/lustre/llite/rw.c lustre/lustre/llite/rw.c
if (rc)
GOTO(out, rc);
}
-diff -urNad lustre~/lustre/llite/rw.c.orig lustre/lustre/llite/rw.c.orig
---- lustre~/lustre/llite/rw.c.orig 1970-01-01 01:00:00.000000000 +0100
-+++ lustre/lustre/llite/rw.c.orig 2009-08-19 14:10:45.000000000 +0200
-@@ -0,0 +1,2215 @@
-+/* -*- mode: c; c-basic-offset: 8; indent-tabs-mode: nil; -*-
-+ * vim:expandtab:shiftwidth=8:tabstop=8:
-+ *
-+ * GPL HEADER START
-+ *
-+ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
-+ *
-+ * This program is free software; you can redistribute it and/or modify
-+ * it under the terms of the GNU General Public License version 2 only,
-+ * as published by the Free Software Foundation.
-+ *
-+ * This program is distributed in the hope that it will be useful, but
-+ * WITHOUT ANY WARRANTY; without even the implied warranty of
-+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
-+ * General Public License version 2 for more details (a copy is included
-+ * in the LICENSE file that accompanied this code).
-+ *
-+ * You should have received a copy of the GNU General Public License
-+ * version 2 along with this program; If not, see
-+ * http://www.sun.com/software/products/lustre/docs/GPLv2.pdf
-+ *
-+ * Please contact Sun Microsystems, Inc., 4150 Network Circle, Santa Clara,
-+ * CA 95054 USA or visit www.sun.com if you need additional information or
-+ * have any questions.
-+ *
-+ * GPL HEADER END
-+ */
-+/*
-+ * Copyright 2008 Sun Microsystems, Inc. All rights reserved
-+ * Use is subject to license terms.
-+ */
-+/*
-+ * This file is part of Lustre, http://www.lustre.org/
-+ * Lustre is a trademark of Sun Microsystems, Inc.
-+ *
-+ * lustre/llite/rw.c
-+ *
-+ * Lustre Lite I/O page cache routines shared by different kernel revs
-+ */
-+
-+#ifndef AUTOCONF_INCLUDED
-+#include <linux/config.h>
-+#endif
-+#include <linux/kernel.h>
-+#include <linux/mm.h>
-+#include <linux/string.h>
-+#include <linux/stat.h>
-+#include <linux/errno.h>
-+#include <linux/smp_lock.h>
-+#include <linux/unistd.h>
-+#include <linux/version.h>
-+#include <asm/system.h>
-+#include <asm/uaccess.h>
-+
-+#include <linux/fs.h>
-+#include <linux/stat.h>
-+#include <asm/uaccess.h>
-+#include <linux/mm.h>
-+#include <linux/pagemap.h>
-+#include <linux/smp_lock.h>
-+
-+#define DEBUG_SUBSYSTEM S_LLITE
-+
-+#include <lustre_lite.h>
-+#include "llite_internal.h"
-+#include <linux/lustre_compat25.h>
-+
-+#ifndef list_for_each_prev_safe
-+#define list_for_each_prev_safe(pos, n, head) \
-+ for (pos = (head)->prev, n = pos->prev; pos != (head); \
-+ pos = n, n = pos->prev )
-+#endif
-+
-+cfs_mem_cache_t *ll_async_page_slab = NULL;
-+size_t ll_async_page_slab_size = 0;
-+
-+/* SYNCHRONOUS I/O to object storage for an inode */
-+static int ll_brw(int cmd, struct inode *inode, struct obdo *oa,
-+ struct page *page, int flags)
-+{
-+ struct ll_inode_info *lli = ll_i2info(inode);
-+ struct lov_stripe_md *lsm = lli->lli_smd;
-+ struct obd_info oinfo = { { { 0 } } };
-+ struct brw_page pg;
-+ int rc;
-+ ENTRY;
-+
-+ pg.pg = page;
-+ pg.off = ((obd_off)page->index) << CFS_PAGE_SHIFT;
-+
-+ if ((cmd & OBD_BRW_WRITE) && (pg.off+CFS_PAGE_SIZE>i_size_read(inode)))
-+ pg.count = i_size_read(inode) % CFS_PAGE_SIZE;
-+ else
-+ pg.count = CFS_PAGE_SIZE;
-+
-+ LL_CDEBUG_PAGE(D_PAGE, page, "%s %d bytes ino %lu at "LPU64"/"LPX64"\n",
-+ cmd & OBD_BRW_WRITE ? "write" : "read", pg.count,
-+ inode->i_ino, pg.off, pg.off);
-+ if (pg.count == 0) {
-+ CERROR("ZERO COUNT: ino %lu: size %p:%Lu(%p:%Lu) idx %lu off "
-+ LPU64"\n", inode->i_ino, inode, i_size_read(inode),
-+ page->mapping->host, i_size_read(page->mapping->host),
-+ page->index, pg.off);
-+ }
-+
-+ pg.flag = flags;
-+
-+ if (cmd & OBD_BRW_WRITE)
-+ ll_stats_ops_tally(ll_i2sbi(inode), LPROC_LL_BRW_WRITE,
-+ pg.count);
-+ else
-+ ll_stats_ops_tally(ll_i2sbi(inode), LPROC_LL_BRW_READ,
-+ pg.count);
-+ oinfo.oi_oa = oa;
-+ oinfo.oi_md = lsm;
-+ rc = obd_brw(cmd, ll_i2obdexp(inode), &oinfo, 1, &pg, NULL);
-+ if (rc == 0)
-+ obdo_to_inode(inode, oa, OBD_MD_FLBLOCKS);
-+ else if (rc != -EIO)
-+ CERROR("error from obd_brw: rc = %d\n", rc);
-+ RETURN(rc);
-+}
-+
-+int ll_file_punch(struct inode * inode, loff_t new_size, int srvlock)
-+{
-+ struct ll_inode_info *lli = ll_i2info(inode);
-+ struct obd_info oinfo = { { { 0 } } };
-+ struct obdo oa;
-+ int rc;
-+
-+ ENTRY;
-+ CDEBUG(D_INFO, "calling punch for "LPX64" (new size %Lu=%#Lx)\n",
-+ lli->lli_smd->lsm_object_id, new_size, new_size);
-+
-+ oinfo.oi_md = lli->lli_smd;
-+ oinfo.oi_policy.l_extent.start = new_size;
-+ oinfo.oi_policy.l_extent.end = OBD_OBJECT_EOF;
-+ oinfo.oi_oa = &oa;
-+ oa.o_id = lli->lli_smd->lsm_object_id;
-+ oa.o_valid = OBD_MD_FLID;
-+ if (srvlock) {
-+ /* set OBD_MD_FLFLAGS in o_valid, only if we
-+ * set OBD_FL_TRUNCLOCK, otherwise ost_punch
-+ * and filter_setattr get confused, see the comment
-+ * in ost_punch */
-+ oa.o_flags = OBD_FL_TRUNCLOCK;
-+ oa.o_valid |= OBD_MD_FLFLAGS;
-+ }
-+ obdo_from_inode(&oa, inode, OBD_MD_FLTYPE | OBD_MD_FLMODE |OBD_MD_FLFID|
-+ OBD_MD_FLATIME | OBD_MD_FLMTIME | OBD_MD_FLCTIME |
-+ OBD_MD_FLUID | OBD_MD_FLGID | OBD_MD_FLGENER |
-+ OBD_MD_FLBLOCKS);
-+ rc = obd_punch_rqset(ll_i2obdexp(inode), &oinfo, NULL);
-+ if (rc) {
-+ CERROR("obd_truncate fails (%d) ino %lu\n", rc, inode->i_ino);
-+ RETURN(rc);
-+ }
-+ obdo_to_inode(inode, &oa, OBD_MD_FLSIZE | OBD_MD_FLBLOCKS |
-+ OBD_MD_FLATIME | OBD_MD_FLMTIME | OBD_MD_FLCTIME);
-+ RETURN(0);
-+}
-+/* this isn't where truncate starts. roughly:
-+ * sys_truncate->ll_setattr_raw->vmtruncate->ll_truncate. setattr_raw grabs
-+ * DLM lock on [size, EOF], i_mutex, ->lli_size_sem, and WRITE_I_ALLOC_SEM to
-+ * avoid races.
-+ *
-+ * must be called under ->lli_size_sem */
-+void ll_truncate(struct inode *inode)
-+{
-+ struct ll_inode_info *lli = ll_i2info(inode);
-+ int srvlock = test_bit(LLI_F_SRVLOCK, &lli->lli_flags);
-+ loff_t new_size;
-+ ENTRY;
-+ CDEBUG(D_VFSTRACE, "VFS Op:inode=%lu/%u(%p) to %Lu=%#Lx\n",inode->i_ino,
-+ inode->i_generation, inode, i_size_read(inode), i_size_read(inode));
-+
-+ ll_stats_ops_tally(ll_i2sbi(inode), LPROC_LL_TRUNC, 1);
-+ if (lli->lli_size_sem_owner != current) {
-+ EXIT;
-+ return;
-+ }
-+
-+ if (!lli->lli_smd) {
-+ CDEBUG(D_INODE, "truncate on inode %lu with no objects\n",
-+ inode->i_ino);
-+ GOTO(out_unlock, 0);
-+ }
-+
-+ LASSERT(atomic_read(&lli->lli_size_sem.count) <= 0);
-+
-+ if (!srvlock) {
-+ struct ost_lvb lvb;
-+ int rc;
-+
-+ /* XXX I'm pretty sure this is a hack to paper over a more fundamental
-+ * race condition. */
-+ lov_stripe_lock(lli->lli_smd);
-+ inode_init_lvb(inode, &lvb);
-+ rc = obd_merge_lvb(ll_i2obdexp(inode), lli->lli_smd, &lvb, 0);
-+ inode->i_blocks = lvb.lvb_blocks;
-+ if (lvb.lvb_size == i_size_read(inode) && rc == 0) {
-+ CDEBUG(D_VFSTRACE, "skipping punch for obj "LPX64", %Lu=%#Lx\n",
-+ lli->lli_smd->lsm_object_id, i_size_read(inode),
-+ i_size_read(inode));
-+ lov_stripe_unlock(lli->lli_smd);
-+ GOTO(out_unlock, 0);
-+ }
-+
-+ obd_adjust_kms(ll_i2obdexp(inode), lli->lli_smd,
-+ i_size_read(inode), 1);
-+ lov_stripe_unlock(lli->lli_smd);
-+ }
-+
-+ if (unlikely((ll_i2sbi(inode)->ll_flags & LL_SBI_LLITE_CHECKSUM) &&
-+ (i_size_read(inode) & ~CFS_PAGE_MASK))) {
-+ /* If the truncate leaves a partial page, update its checksum */
-+ struct page *page = find_get_page(inode->i_mapping,
-+ i_size_read(inode) >>
-+ CFS_PAGE_SHIFT);
-+ if (page != NULL) {
-+ struct ll_async_page *llap = llap_cast_private(page);
-+ if (llap != NULL) {
-+ char *kaddr = kmap_atomic(page, KM_USER0);
-+ llap->llap_checksum =
-+ init_checksum(OSC_DEFAULT_CKSUM);
-+ llap->llap_checksum =
-+ compute_checksum(llap->llap_checksum,
-+ kaddr, CFS_PAGE_SIZE,
-+ OSC_DEFAULT_CKSUM);
-+ kunmap_atomic(kaddr, KM_USER0);
-+ }
-+ page_cache_release(page);
-+ }
-+ }
-+
-+ new_size = i_size_read(inode);
-+ ll_inode_size_unlock(inode, 0);
-+ if (!srvlock)
-+ ll_file_punch(inode, new_size, 0);
-+ else
-+ ll_stats_ops_tally(ll_i2sbi(inode), LPROC_LL_LOCKLESS_TRUNC, 1);
-+
-+ EXIT;
-+ return;
-+
-+ out_unlock:
-+ ll_inode_size_unlock(inode, 0);
-+} /* ll_truncate */
-+
-+int ll_prepare_write(struct file *file, struct page *page, unsigned from,
-+ unsigned to)
-+{
-+ struct inode *inode = page->mapping->host;
-+ struct ll_inode_info *lli = ll_i2info(inode);
-+ struct lov_stripe_md *lsm = lli->lli_smd;
-+ obd_off offset = ((obd_off)page->index) << CFS_PAGE_SHIFT;
-+ struct obd_info oinfo = { { { 0 } } };
-+ struct brw_page pga;
-+ struct obdo oa;
-+ struct ost_lvb lvb;
-+ int rc = 0;
-+ ENTRY;
-+
-+ LASSERT(PageLocked(page));
-+ (void)llap_cast_private(page); /* assertion */
-+
-+ /* Check to see if we should return -EIO right away */
-+ pga.pg = page;
-+ pga.off = offset;
-+ pga.count = CFS_PAGE_SIZE;
-+ pga.flag = 0;
-+
-+ oa.o_mode = inode->i_mode;
-+ oa.o_id = lsm->lsm_object_id;
-+ oa.o_valid = OBD_MD_FLID | OBD_MD_FLMODE | OBD_MD_FLTYPE;
-+ obdo_from_inode(&oa, inode, OBD_MD_FLFID | OBD_MD_FLGENER);
-+
-+ oinfo.oi_oa = &oa;
-+ oinfo.oi_md = lsm;
-+ rc = obd_brw(OBD_BRW_CHECK, ll_i2obdexp(inode), &oinfo, 1, &pga, NULL);
-+ if (rc)
-+ RETURN(rc);
-+
-+ if (PageUptodate(page)) {
-+ LL_CDEBUG_PAGE(D_PAGE, page, "uptodate\n");
-+ RETURN(0);
-+ }
-+
-+ /* We're completely overwriting an existing page, so _don't_ set it up
-+ * to date until commit_write */
-+ if (from == 0 && to == CFS_PAGE_SIZE) {
-+ LL_CDEBUG_PAGE(D_PAGE, page, "full page write\n");
-+ POISON_PAGE(page, 0x11);
-+ RETURN(0);
-+ }
-+
-+ /* If are writing to a new page, no need to read old data. The extent
-+ * locking will have updated the KMS, and for our purposes here we can
-+ * treat it like i_size. */
-+ lov_stripe_lock(lsm);
-+ inode_init_lvb(inode, &lvb);
-+ obd_merge_lvb(ll_i2obdexp(inode), lsm, &lvb, 1);
-+ lov_stripe_unlock(lsm);
-+ if (lvb.lvb_size <= offset) {
-+ char *kaddr = kmap_atomic(page, KM_USER0);
-+ LL_CDEBUG_PAGE(D_PAGE, page, "kms "LPU64" <= offset "LPU64"\n",
-+ lvb.lvb_size, offset);
-+ memset(kaddr, 0, CFS_PAGE_SIZE);
-+ kunmap_atomic(kaddr, KM_USER0);
-+ GOTO(prepare_done, rc = 0);
-+ }
-+
-+ /* XXX could be an async ocp read.. read-ahead? */
-+ rc = ll_brw(OBD_BRW_READ, inode, &oa, page, 0);
-+ if (rc == 0) {
-+ /* bug 1598: don't clobber blksize */
-+ oa.o_valid &= ~(OBD_MD_FLSIZE | OBD_MD_FLBLKSZ);
-+ obdo_refresh_inode(inode, &oa, oa.o_valid);
-+ }
-+
-+ EXIT;
-+ prepare_done:
-+ if (rc == 0)
-+ SetPageUptodate(page);
-+
-+ return rc;
-+}
-+
-+/**
-+ * make page ready for ASYNC write
-+ * \param data - pointer to llap cookie
-+ * \param cmd - is OBD_BRW_* macroses
-+ *
-+ * \retval 0 is page successfully prepared to send
-+ * \retval -EAGAIN is page not need to send
-+ */
-+static int ll_ap_make_ready(void *data, int cmd)
-+{
-+ struct ll_async_page *llap;
-+ struct page *page;
-+ ENTRY;
-+
-+ llap = LLAP_FROM_COOKIE(data);
-+ page = llap->llap_page;
-+
-+ /* we're trying to write, but the page is locked.. come back later */
-+ if (TryLockPage(page))
-+ RETURN(-EAGAIN);
-+
-+ LASSERTF(!(cmd & OBD_BRW_READ) || !PageWriteback(page),
-+ "cmd %x page %p ino %lu index %lu fl %lx\n", cmd, page,
-+ page->mapping->host->i_ino, page->index, page->flags);
-+
-+ /* if we left PageDirty we might get another writepage call
-+ * in the future. list walkers are bright enough
-+ * to check page dirty so we can leave it on whatever list
-+ * its on. XXX also, we're called with the cli list so if
-+ * we got the page cache list we'd create a lock inversion
-+ * with the removepage path which gets the page lock then the
-+ * cli lock */
-+ if(!clear_page_dirty_for_io(page)) {
-+ unlock_page(page);
-+ RETURN(-EAGAIN);
-+ }
-+
-+ /* This actually clears the dirty bit in the radix tree.*/
-+ set_page_writeback(page);
-+
-+ LL_CDEBUG_PAGE(D_PAGE, page, "made ready\n");
-+ page_cache_get(page);
-+
-+ RETURN(0);
-+}
-+
-+/* We have two reasons for giving llite the opportunity to change the
-+ * write length of a given queued page as it builds the RPC containing
-+ * the page:
-+ *
-+ * 1) Further extending writes may have landed in the page cache
-+ * since a partial write first queued this page requiring us
-+ * to write more from the page cache. (No further races are possible, since
-+ * by the time this is called, the page is locked.)
-+ * 2) We might have raced with truncate and want to avoid performing
-+ * write RPCs that are just going to be thrown away by the
-+ * truncate's punch on the storage targets.
-+ *
-+ * The kms serves these purposes as it is set at both truncate and extending
-+ * writes.
-+ */
-+static int ll_ap_refresh_count(void *data, int cmd)
-+{
-+ struct ll_inode_info *lli;
-+ struct ll_async_page *llap;
-+ struct lov_stripe_md *lsm;
-+ struct page *page;
-+ struct inode *inode;
-+ struct ost_lvb lvb;
-+ __u64 kms;
-+ ENTRY;
-+
-+ /* readpage queues with _COUNT_STABLE, shouldn't get here. */
-+ LASSERT(cmd != OBD_BRW_READ);
-+
-+ llap = LLAP_FROM_COOKIE(data);
-+ page = llap->llap_page;
-+ inode = page->mapping->host;
-+ lli = ll_i2info(inode);
-+ lsm = lli->lli_smd;
-+
-+ lov_stripe_lock(lsm);
-+ inode_init_lvb(inode, &lvb);
-+ obd_merge_lvb(ll_i2obdexp(inode), lsm, &lvb, 1);
-+ kms = lvb.lvb_size;
-+ lov_stripe_unlock(lsm);
-+
-+ /* catch race with truncate */
-+ if (((__u64)page->index << CFS_PAGE_SHIFT) >= kms)
-+ return 0;
-+
-+ /* catch sub-page write at end of file */
-+ if (((__u64)page->index << CFS_PAGE_SHIFT) + CFS_PAGE_SIZE > kms)
-+ return kms % CFS_PAGE_SIZE;
-+
-+ return CFS_PAGE_SIZE;
-+}
-+
-+void ll_inode_fill_obdo(struct inode *inode, int cmd, struct obdo *oa)
-+{
-+ struct lov_stripe_md *lsm;
-+ obd_flag valid_flags;
-+
-+ lsm = ll_i2info(inode)->lli_smd;
-+
-+ oa->o_id = lsm->lsm_object_id;
-+ oa->o_valid = OBD_MD_FLID;
-+ valid_flags = OBD_MD_FLTYPE | OBD_MD_FLATIME;
-+ if (cmd & OBD_BRW_WRITE) {
-+ oa->o_valid |= OBD_MD_FLEPOCH;
-+ oa->o_easize = ll_i2info(inode)->lli_io_epoch;
-+
-+ valid_flags |= OBD_MD_FLMTIME | OBD_MD_FLCTIME |
-+ OBD_MD_FLUID | OBD_MD_FLGID |
-+ OBD_MD_FLFID | OBD_MD_FLGENER;
-+ }
-+
-+ obdo_from_inode(oa, inode, valid_flags);
-+}
-+
-+static void ll_ap_fill_obdo(void *data, int cmd, struct obdo *oa)
-+{
-+ struct ll_async_page *llap;
-+ ENTRY;
-+
-+ llap = LLAP_FROM_COOKIE(data);
-+ ll_inode_fill_obdo(llap->llap_page->mapping->host, cmd, oa);
-+
-+ EXIT;
-+}
-+
-+static void ll_ap_update_obdo(void *data, int cmd, struct obdo *oa,
-+ obd_valid valid)
-+{
-+ struct ll_async_page *llap;
-+ ENTRY;
-+
-+ llap = LLAP_FROM_COOKIE(data);
-+ obdo_from_inode(oa, llap->llap_page->mapping->host, valid);
-+
-+ EXIT;
-+}
-+
-+static struct obd_async_page_ops ll_async_page_ops = {
-+ .ap_make_ready = ll_ap_make_ready,
-+ .ap_refresh_count = ll_ap_refresh_count,
-+ .ap_fill_obdo = ll_ap_fill_obdo,
-+ .ap_update_obdo = ll_ap_update_obdo,
-+ .ap_completion = ll_ap_completion,
-+};
-+
-+struct ll_async_page *llap_cast_private(struct page *page)
-+{
-+ struct ll_async_page *llap = (struct ll_async_page *)page_private(page);
-+
-+ LASSERTF(llap == NULL || llap->llap_magic == LLAP_MAGIC,
-+ "page %p private %lu gave magic %d which != %d\n",
-+ page, page_private(page), llap->llap_magic, LLAP_MAGIC);
-+
-+ return llap;
-+}
-+
-+/* Try to shrink the page cache for the @sbi filesystem by 1/@shrink_fraction.
-+ *
-+ * There is an llap attached onto every page in lustre, linked off @sbi.
-+ * We add an llap to the list so we don't lose our place during list walking.
-+ * If llaps in the list are being moved they will only move to the end
-+ * of the LRU, and we aren't terribly interested in those pages here (we
-+ * start at the beginning of the list where the least-used llaps are.
-+ */
-+int llap_shrink_cache(struct ll_sb_info *sbi, int shrink_fraction)
-+{
-+ struct ll_async_page *llap, dummy_llap = { .llap_magic = 0xd11ad11a };
-+ unsigned long total, want, count = 0;
-+
-+ total = sbi->ll_async_page_count;
-+
-+ /* There can be a large number of llaps (600k or more in a large
-+ * memory machine) so the VM 1/6 shrink ratio is likely too much.
-+ * Since we are freeing pages also, we don't necessarily want to
-+ * shrink so much. Limit to 40MB of pages + llaps per call. */
-+ if (shrink_fraction == 0)
-+ want = sbi->ll_async_page_count - sbi->ll_async_page_max + 32;
-+ else
-+ want = (total + shrink_fraction - 1) / shrink_fraction;
-+
-+ if (want > 40 << (20 - CFS_PAGE_SHIFT))
-+ want = 40 << (20 - CFS_PAGE_SHIFT);
-+
-+ CDEBUG(D_CACHE, "shrinking %lu of %lu pages (1/%d)\n",
-+ want, total, shrink_fraction);
-+
-+ spin_lock(&sbi->ll_lock);
-+ list_add(&dummy_llap.llap_pglist_item, &sbi->ll_pglist);
-+
-+ while (--total >= 0 && count < want) {
-+ struct page *page;
-+ int keep;
-+
-+ if (unlikely(need_resched())) {
-+ spin_unlock(&sbi->ll_lock);
-+ cond_resched();
-+ spin_lock(&sbi->ll_lock);
-+ }
-+
-+ llap = llite_pglist_next_llap(sbi,&dummy_llap.llap_pglist_item);
-+ list_del_init(&dummy_llap.llap_pglist_item);
-+ if (llap == NULL)
-+ break;
-+
-+ page = llap->llap_page;
-+ LASSERT(page != NULL);
-+
-+ list_add(&dummy_llap.llap_pglist_item, &llap->llap_pglist_item);
-+
-+ /* Page needs/undergoing IO */
-+ if (TryLockPage(page)) {
-+ LL_CDEBUG_PAGE(D_PAGE, page, "can't lock\n");
-+ continue;
-+ }
-+
-+ keep = (llap->llap_write_queued || PageDirty(page) ||
-+ PageWriteback(page) || (!PageUptodate(page) &&
-+ llap->llap_origin != LLAP_ORIGIN_READAHEAD));
-+
-+ LL_CDEBUG_PAGE(D_PAGE, page,"%s LRU page: %s%s%s%s%s origin %s\n",
-+ keep ? "keep" : "drop",
-+ llap->llap_write_queued ? "wq " : "",
-+ PageDirty(page) ? "pd " : "",
-+ PageUptodate(page) ? "" : "!pu ",
-+ PageWriteback(page) ? "wb" : "",
-+ llap->llap_defer_uptodate ? "" : "!du",
-+ llap_origins[llap->llap_origin]);
-+
-+ /* If page is dirty or undergoing IO don't discard it */
-+ if (keep) {
-+ unlock_page(page);
-+ continue;
-+ }
-+
-+ page_cache_get(page);
-+ spin_unlock(&sbi->ll_lock);
-+
-+ if (page->mapping != NULL) {
-+ ll_teardown_mmaps(page->mapping,
-+ (__u64)page->index << CFS_PAGE_SHIFT,
-+ ((__u64)page->index << CFS_PAGE_SHIFT)|
-+ ~CFS_PAGE_MASK);
-+ if (!PageDirty(page) && !page_mapped(page)) {
-+ ll_ra_accounting(llap, page->mapping);
-+ ll_truncate_complete_page(page);
-+ ++count;
-+ } else {
-+ LL_CDEBUG_PAGE(D_PAGE, page, "Not dropping page"
-+ " because it is "
-+ "%s\n",
-+ PageDirty(page)?
-+ "dirty":"mapped");
-+ }
-+ }
-+ unlock_page(page);
-+ page_cache_release(page);
-+
-+ spin_lock(&sbi->ll_lock);
-+ }
-+ list_del(&dummy_llap.llap_pglist_item);
-+ spin_unlock(&sbi->ll_lock);
-+
-+ CDEBUG(D_CACHE, "shrank %lu/%lu and left %lu unscanned\n",
-+ count, want, total);
-+
-+ return count;
-+}
-+
-+static struct ll_async_page *llap_from_page_with_lockh(struct page *page,
-+ unsigned origin,
-+ struct lustre_handle *lockh)
-+{
-+ struct ll_async_page *llap;
-+ struct obd_export *exp;
-+ struct inode *inode = page->mapping->host;
-+ struct ll_sb_info *sbi;
-+ int rc;
-+ ENTRY;
-+
-+ if (!inode) {
-+ static int triggered;
-+
-+ if (!triggered) {
-+ LL_CDEBUG_PAGE(D_ERROR, page, "Bug 10047. Wrong anon "
-+ "page received\n");
-+ libcfs_debug_dumpstack(NULL);
-+ triggered = 1;
-+ }
-+ RETURN(ERR_PTR(-EINVAL));
-+ }
-+ sbi = ll_i2sbi(inode);
-+ LASSERT(ll_async_page_slab);
-+ LASSERTF(origin < LLAP__ORIGIN_MAX, "%u\n", origin);
-+
-+ llap = llap_cast_private(page);
-+ if (llap != NULL) {
-+ /* move to end of LRU list, except when page is just about to
-+ * die */
-+ if (origin != LLAP_ORIGIN_REMOVEPAGE) {
-+ spin_lock(&sbi->ll_lock);
-+ sbi->ll_pglist_gen++;
-+ list_del_init(&llap->llap_pglist_item);
-+ list_add_tail(&llap->llap_pglist_item, &sbi->ll_pglist);
-+ spin_unlock(&sbi->ll_lock);
-+ }
-+ GOTO(out, llap);
-+ }
-+
-+ exp = ll_i2obdexp(page->mapping->host);
-+ if (exp == NULL)
-+ RETURN(ERR_PTR(-EINVAL));
-+
-+ /* limit the number of lustre-cached pages */
-+ if (sbi->ll_async_page_count >= sbi->ll_async_page_max)
-+ llap_shrink_cache(sbi, 0);
-+
-+ OBD_SLAB_ALLOC(llap, ll_async_page_slab, CFS_ALLOC_STD,
-+ ll_async_page_slab_size);
-+ if (llap == NULL)
-+ RETURN(ERR_PTR(-ENOMEM));
-+ llap->llap_magic = LLAP_MAGIC;
-+ llap->llap_cookie = (void *)llap + size_round(sizeof(*llap));
-+
-+ /* XXX: for bug 11270 - check for lockless origin here! */
-+ if (origin == LLAP_ORIGIN_LOCKLESS_IO)
-+ llap->llap_nocache = 1;
-+
-+ rc = obd_prep_async_page(exp, ll_i2info(inode)->lli_smd, NULL, page,
-+ (obd_off)page->index << CFS_PAGE_SHIFT,
-+ &ll_async_page_ops, llap, &llap->llap_cookie,
-+ llap->llap_nocache, lockh);
-+ if (rc) {
-+ OBD_SLAB_FREE(llap, ll_async_page_slab,
-+ ll_async_page_slab_size);
-+ RETURN(ERR_PTR(rc));
-+ }
-+
-+ CDEBUG(D_CACHE, "llap %p page %p cookie %p obj off "LPU64"\n", llap,
-+ page, llap->llap_cookie, (obd_off)page->index << CFS_PAGE_SHIFT);
-+ /* also zeroing the PRIVBITS low order bitflags */
-+ __set_page_ll_data(page, llap);
-+ llap->llap_page = page;
-+
-+ spin_lock(&sbi->ll_lock);
-+ sbi->ll_pglist_gen++;
-+ sbi->ll_async_page_count++;
-+ list_add_tail(&llap->llap_pglist_item, &sbi->ll_pglist);
-+ spin_unlock(&sbi->ll_lock);
-+
-+ out:
-+ if (unlikely(sbi->ll_flags & LL_SBI_LLITE_CHECKSUM)) {
-+ __u32 csum;
-+ char *kaddr = kmap_atomic(page, KM_USER0);
-+ csum = init_checksum(OSC_DEFAULT_CKSUM);
-+ csum = compute_checksum(csum, kaddr, CFS_PAGE_SIZE,
-+ OSC_DEFAULT_CKSUM);
-+ kunmap_atomic(kaddr, KM_USER0);
-+ if (origin == LLAP_ORIGIN_READAHEAD ||
-+ origin == LLAP_ORIGIN_READPAGE ||
-+ origin == LLAP_ORIGIN_LOCKLESS_IO) {
-+ llap->llap_checksum = 0;
-+ } else if (origin == LLAP_ORIGIN_COMMIT_WRITE ||
-+ llap->llap_checksum == 0) {
-+ llap->llap_checksum = csum;
-+ CDEBUG(D_PAGE, "page %p cksum %x\n", page, csum);
-+ } else if (llap->llap_checksum == csum) {
-+ /* origin == LLAP_ORIGIN_WRITEPAGE */
-+ CDEBUG(D_PAGE, "page %p cksum %x confirmed\n",
-+ page, csum);
-+ } else {
-+ /* origin == LLAP_ORIGIN_WRITEPAGE */
-+ LL_CDEBUG_PAGE(D_ERROR, page, "old cksum %x != new "
-+ "%x!\n", llap->llap_checksum, csum);
-+ }
-+ }
-+
-+ llap->llap_origin = origin;
-+ RETURN(llap);
-+}
-+
-+static inline struct ll_async_page *llap_from_page(struct page *page,
-+ unsigned origin)
-+{
-+ return llap_from_page_with_lockh(page, origin, NULL);
-+}
-+
-+static int queue_or_sync_write(struct obd_export *exp, struct inode *inode,
-+ struct ll_async_page *llap,
-+ unsigned to, obd_flag async_flags)
-+{
-+ unsigned long size_index = i_size_read(inode) >> CFS_PAGE_SHIFT;
-+ struct obd_io_group *oig;
-+ struct ll_sb_info *sbi = ll_i2sbi(inode);
-+ int rc, noquot = llap->llap_ignore_quota ? OBD_BRW_NOQUOTA : 0;
-+ ENTRY;
-+
-+ /* _make_ready only sees llap once we've unlocked the page */
-+ llap->llap_write_queued = 1;
-+ rc = obd_queue_async_io(exp, ll_i2info(inode)->lli_smd, NULL,
-+ llap->llap_cookie, OBD_BRW_WRITE | noquot,
-+ 0, 0, 0, async_flags);
-+ if (rc == 0) {
-+ LL_CDEBUG_PAGE(D_PAGE, llap->llap_page, "write queued\n");
-+ llap_write_pending(inode, llap);
-+ GOTO(out, 0);
-+ }
-+
-+ llap->llap_write_queued = 0;
-+
-+ rc = oig_init(&oig);
-+ if (rc)
-+ GOTO(out, rc);
-+
-+ /* make full-page requests if we are not at EOF (bug 4410) */
-+ if (to != CFS_PAGE_SIZE && llap->llap_page->index < size_index) {
-+ LL_CDEBUG_PAGE(D_PAGE, llap->llap_page,
-+ "sync write before EOF: size_index %lu, to %d\n",
-+ size_index, to);
-+ to = CFS_PAGE_SIZE;
-+ } else if (to != CFS_PAGE_SIZE && llap->llap_page->index == size_index){
-+ int size_to = i_size_read(inode) & ~CFS_PAGE_MASK;
-+ LL_CDEBUG_PAGE(D_PAGE, llap->llap_page,
-+ "sync write at EOF: size_index %lu, to %d/%d\n",
-+ size_index, to, size_to);
-+ if (to < size_to)
-+ to = size_to;
-+ }
-+
-+ /* compare the checksum once before the page leaves llite */
-+ if (unlikely((sbi->ll_flags & LL_SBI_LLITE_CHECKSUM) &&
-+ llap->llap_checksum != 0)) {
-+ __u32 csum;
-+ struct page *page = llap->llap_page;
-+ char *kaddr = kmap_atomic(page, KM_USER0);
-+ csum = init_checksum(OSC_DEFAULT_CKSUM);
-+ csum = compute_checksum(csum, kaddr, CFS_PAGE_SIZE,
-+ OSC_DEFAULT_CKSUM);
-+ kunmap_atomic(kaddr, KM_USER0);
-+ if (llap->llap_checksum == csum) {
-+ CDEBUG(D_PAGE, "page %p cksum %x confirmed\n",
-+ page, csum);
-+ } else {
-+ CERROR("page %p old cksum %x != new cksum %x!\n",
-+ page, llap->llap_checksum, csum);
-+ }
-+ }
-+
-+ rc = obd_queue_group_io(exp, ll_i2info(inode)->lli_smd, NULL, oig,
-+ llap->llap_cookie, OBD_BRW_WRITE | noquot,
-+ 0, to, 0, ASYNC_READY | ASYNC_URGENT |
-+ ASYNC_COUNT_STABLE | ASYNC_GROUP_SYNC);
-+ if (rc)
-+ GOTO(free_oig, rc);
-+
-+ rc = obd_trigger_group_io(exp, ll_i2info(inode)->lli_smd, NULL, oig);
-+ if (rc)
-+ GOTO(free_oig, rc);
-+
-+ rc = oig_wait(oig);
-+
-+ if (!rc && async_flags & ASYNC_READY) {
-+ unlock_page(llap->llap_page);
-+ if (PageWriteback(llap->llap_page))
-+ end_page_writeback(llap->llap_page);
-+ }
-+
-+ LL_CDEBUG_PAGE(D_PAGE, llap->llap_page, "sync write returned %d\n", rc);
-+
-+free_oig:
-+ oig_release(oig);
-+out:
-+ RETURN(rc);
-+}
-+
-+/* update our write count to account for i_size increases that may have
-+ * happened since we've queued the page for io. */
-+
-+/* be careful not to return success without setting the page Uptodate or
-+ * the next pass through prepare_write will read in stale data from disk. */
-+int ll_commit_write(struct file *file, struct page *page, unsigned from,
-+ unsigned to)
-+{
-+ struct ll_file_data *fd = LUSTRE_FPRIVATE(file);
-+ struct inode *inode = page->mapping->host;
-+ struct ll_inode_info *lli = ll_i2info(inode);
-+ struct lov_stripe_md *lsm = lli->lli_smd;
-+ struct obd_export *exp;
-+ struct ll_async_page *llap;
-+ loff_t size;
-+ struct lustre_handle *lockh = NULL;
-+ int rc = 0;
-+ ENTRY;
-+
-+ SIGNAL_MASK_ASSERT(); /* XXX BUG 1511 */
-+ LASSERT(inode == file->f_dentry->d_inode);
-+ LASSERT(PageLocked(page));
-+
-+ CDEBUG(D_INODE, "inode %p is writing page %p from %d to %d at %lu\n",
-+ inode, page, from, to, page->index);
-+
-+ if (fd->fd_flags & LL_FILE_GROUP_LOCKED)
-+ lockh = &fd->fd_cwlockh;
-+
-+ llap = llap_from_page_with_lockh(page, LLAP_ORIGIN_COMMIT_WRITE, lockh);
-+ if (IS_ERR(llap))
-+ RETURN(PTR_ERR(llap));
-+
-+ exp = ll_i2obdexp(inode);
-+ if (exp == NULL)
-+ RETURN(-EINVAL);
-+
-+ llap->llap_ignore_quota = cfs_capable(CFS_CAP_SYS_RESOURCE);
-+
-+ /* queue a write for some time in the future the first time we
-+ * dirty the page */
-+ if (!PageDirty(page)) {
-+ ll_stats_ops_tally(ll_i2sbi(inode), LPROC_LL_DIRTY_MISSES, 1);
-+
-+ rc = queue_or_sync_write(exp, inode, llap, to, 0);
-+ if (rc)
-+ GOTO(out, rc);
-+ } else {
-+ ll_stats_ops_tally(ll_i2sbi(inode), LPROC_LL_DIRTY_HITS, 1);
-+ }
-+
-+ /* put the page in the page cache, from now on ll_removepage is
-+ * responsible for cleaning up the llap.
-+ * only set page dirty when it's queued to be write out */
-+ if (llap->llap_write_queued)
-+ set_page_dirty(page);
-+
-+out:
-+ size = (((obd_off)page->index) << CFS_PAGE_SHIFT) + to;
-+ ll_inode_size_lock(inode, 0);
-+ if (rc == 0) {
-+ lov_stripe_lock(lsm);
-+ obd_adjust_kms(exp, lsm, size, 0);
-+ lov_stripe_unlock(lsm);
-+ if (size > i_size_read(inode))
-+ i_size_write(inode, size);
-+ SetPageUptodate(page);
-+ } else if (size > i_size_read(inode)) {
-+ /* this page beyond the pales of i_size, so it can't be
-+ * truncated in ll_p_r_e during lock revoking. we must
-+ * teardown our book-keeping here. */
-+ ll_removepage(page);
-+ }
-+ ll_inode_size_unlock(inode, 0);
-+ RETURN(rc);
-+}
-+
-+static unsigned long ll_ra_count_get(struct ll_sb_info *sbi, unsigned long len)
-+{
-+ struct ll_ra_info *ra = &sbi->ll_ra_info;
-+ unsigned long ret;
-+ ENTRY;
-+
-+ spin_lock(&sbi->ll_lock);
-+ ret = min(ra->ra_max_pages - ra->ra_cur_pages, len);
-+ ra->ra_cur_pages += ret;
-+ spin_unlock(&sbi->ll_lock);
-+
-+ RETURN(ret);
-+}
-+
-+static void ll_ra_count_put(struct ll_sb_info *sbi, unsigned long len)
-+{
-+ struct ll_ra_info *ra = &sbi->ll_ra_info;
-+ spin_lock(&sbi->ll_lock);
-+ LASSERTF(ra->ra_cur_pages >= len, "r_c_p %lu len %lu\n",
-+ ra->ra_cur_pages, len);
-+ ra->ra_cur_pages -= len;
-+ spin_unlock(&sbi->ll_lock);
-+}
-+
-+/* called for each page in a completed rpc.*/
-+int ll_ap_completion(void *data, int cmd, struct obdo *oa, int rc)
-+{
-+ struct ll_async_page *llap;
-+ struct page *page;
-+ int ret = 0;
-+ ENTRY;
-+
-+ llap = LLAP_FROM_COOKIE(data);
-+ page = llap->llap_page;
-+ LASSERT(PageLocked(page));
-+ LASSERT(CheckWriteback(page,cmd));
-+
-+ LL_CDEBUG_PAGE(D_PAGE, page, "completing cmd %d with %d\n", cmd, rc);
-+
-+ if (cmd & OBD_BRW_READ && llap->llap_defer_uptodate)
-+ ll_ra_count_put(ll_i2sbi(page->mapping->host), 1);
-+
-+ if (rc == 0) {
-+ if (cmd & OBD_BRW_READ) {
-+ if (!llap->llap_defer_uptodate)
-+ SetPageUptodate(page);
-+ } else {
-+ llap->llap_write_queued = 0;
-+ }
-+ ClearPageError(page);
-+ } else {
-+ if (cmd & OBD_BRW_READ) {
-+ llap->llap_defer_uptodate = 0;
-+ }
-+ SetPageError(page);
-+ if (rc == -ENOSPC)
-+ set_bit(AS_ENOSPC, &page->mapping->flags);
-+ else
-+ set_bit(AS_EIO, &page->mapping->flags);
-+ }
-+
-+ /* be carefull about clear WB.
-+ * if WB will cleared after page lock is released - paralel IO can be
-+ * started before ap_make_ready is finished - so we will be have page
-+ * with PG_Writeback set from ->writepage() and completed READ which
-+ * clear this flag */
-+ if ((cmd & OBD_BRW_WRITE) && PageWriteback(page))
-+ end_page_writeback(page);
-+
-+ unlock_page(page);
-+
-+ if (cmd & OBD_BRW_WRITE) {
-+ llap_write_complete(page->mapping->host, llap);
-+ ll_try_done_writing(page->mapping->host);
-+ }
-+
-+ page_cache_release(page);
-+
-+ RETURN(ret);
-+}
-+
-+static void __ll_put_llap(struct page *page)
-+{
-+ struct inode *inode = page->mapping->host;
-+ struct obd_export *exp;
-+ struct ll_async_page *llap;
-+ struct ll_sb_info *sbi = ll_i2sbi(inode);
-+ int rc;
-+ ENTRY;
-+
-+ exp = ll_i2obdexp(inode);
-+ if (exp == NULL) {
-+ CERROR("page %p ind %lu gave null export\n", page, page->index);
-+ EXIT;
-+ return;
-+ }
-+
-+ llap = llap_from_page(page, LLAP_ORIGIN_REMOVEPAGE);
-+ if (IS_ERR(llap)) {
-+ CERROR("page %p ind %lu couldn't find llap: %ld\n", page,
-+ page->index, PTR_ERR(llap));
-+ EXIT;
-+ return;
-+ }
-+
-+ //llap_write_complete(inode, llap);
-+ rc = obd_teardown_async_page(exp, ll_i2info(inode)->lli_smd, NULL,
-+ llap->llap_cookie);
-+ if (rc != 0)
-+ CERROR("page %p ind %lu failed: %d\n", page, page->index, rc);
-+
-+ /* this unconditional free is only safe because the page lock
-+ * is providing exclusivity to memory pressure/truncate/writeback..*/
-+ __clear_page_ll_data(page);
-+
-+ spin_lock(&sbi->ll_lock);
-+ if (!list_empty(&llap->llap_pglist_item))
-+ list_del_init(&llap->llap_pglist_item);
-+ sbi->ll_pglist_gen++;
-+ sbi->ll_async_page_count--;
-+ spin_unlock(&sbi->ll_lock);
-+ OBD_SLAB_FREE(llap, ll_async_page_slab, ll_async_page_slab_size);
-+
-+ EXIT;
-+}
-+
-+/* the kernel calls us here when a page is unhashed from the page cache.
-+ * the page will be locked and the kernel is holding a spinlock, so
-+ * we need to be careful. we're just tearing down our book-keeping
-+ * here. */
-+void ll_removepage(struct page *page)
-+{
-+ struct ll_async_page *llap = llap_cast_private(page);
-+ ENTRY;
-+
-+ LASSERT(!in_interrupt());
-+
-+ /* sync pages or failed read pages can leave pages in the page
-+ * cache that don't have our data associated with them anymore */
-+ if (page_private(page) == 0) {
-+ EXIT;
-+ return;
-+ }
-+
-+ LASSERT(!llap->llap_lockless_io_page);
-+ LASSERT(!llap->llap_nocache);
-+
-+ LL_CDEBUG_PAGE(D_PAGE, page, "being evicted\n");
-+ __ll_put_llap(page);
-+
-+ EXIT;
-+}
-+
-+static int ll_issue_page_read(struct obd_export *exp,
-+ struct ll_async_page *llap,
-+ struct obd_io_group *oig, int defer)
-+{
-+ struct page *page = llap->llap_page;
-+ int rc;
-+
-+ page_cache_get(page);
-+ llap->llap_defer_uptodate = defer;
-+ llap->llap_ra_used = 0;
-+ rc = obd_queue_group_io(exp, ll_i2info(page->mapping->host)->lli_smd,
-+ NULL, oig, llap->llap_cookie, OBD_BRW_READ, 0,
-+ CFS_PAGE_SIZE, 0, ASYNC_COUNT_STABLE | ASYNC_READY |
-+ ASYNC_URGENT);
-+ if (rc) {
-+ LL_CDEBUG_PAGE(D_ERROR, page, "read queue failed: rc %d\n", rc);
-+ page_cache_release(page);
-+ }
-+ RETURN(rc);
-+}
-+
-+static void ll_ra_stats_inc_unlocked(struct ll_ra_info *ra, enum ra_stat which)
-+{
-+ LASSERTF(which >= 0 && which < _NR_RA_STAT, "which: %u\n", which);
-+ ra->ra_stats[which]++;
-+}
-+
-+static void ll_ra_stats_inc(struct address_space *mapping, enum ra_stat which)
-+{
-+ struct ll_sb_info *sbi = ll_i2sbi(mapping->host);
-+ struct ll_ra_info *ra = &ll_i2sbi(mapping->host)->ll_ra_info;
-+
-+ spin_lock(&sbi->ll_lock);
-+ ll_ra_stats_inc_unlocked(ra, which);
-+ spin_unlock(&sbi->ll_lock);
-+}
-+
-+void ll_ra_accounting(struct ll_async_page *llap, struct address_space *mapping)
-+{
-+ if (!llap->llap_defer_uptodate || llap->llap_ra_used)
-+ return;
-+
-+ ll_ra_stats_inc(mapping, RA_STAT_DISCARDED);
-+}
-+
-+#define RAS_CDEBUG(ras) \
-+ CDEBUG(D_READA, \
-+ "lrp %lu cr %lu cp %lu ws %lu wl %lu nra %lu r %lu ri %lu" \
-+ "csr %lu sf %lu sp %lu sl %lu \n", \
-+ ras->ras_last_readpage, ras->ras_consecutive_requests, \
-+ ras->ras_consecutive_pages, ras->ras_window_start, \
-+ ras->ras_window_len, ras->ras_next_readahead, \
-+ ras->ras_requests, ras->ras_request_index, \
-+ ras->ras_consecutive_stride_requests, ras->ras_stride_offset, \
-+ ras->ras_stride_pages, ras->ras_stride_length)
-+
-+static int index_in_window(unsigned long index, unsigned long point,
-+ unsigned long before, unsigned long after)
-+{
-+ unsigned long start = point - before, end = point + after;
-+
-+ if (start > point)
-+ start = 0;
-+ if (end < point)
-+ end = ~0;
-+
-+ return start <= index && index <= end;
-+}
-+
-+static struct ll_readahead_state *ll_ras_get(struct file *f)
-+{
-+ struct ll_file_data *fd;
-+
-+ fd = LUSTRE_FPRIVATE(f);
-+ return &fd->fd_ras;
-+}
-+
-+void ll_ra_read_in(struct file *f, struct ll_ra_read *rar)
-+{
-+ struct ll_readahead_state *ras;
-+
-+ ras = ll_ras_get(f);
-+
-+ spin_lock(&ras->ras_lock);
-+ ras->ras_requests++;
-+ ras->ras_request_index = 0;
-+ ras->ras_consecutive_requests++;
-+ rar->lrr_reader = current;
-+
-+ list_add(&rar->lrr_linkage, &ras->ras_read_beads);
-+ spin_unlock(&ras->ras_lock);
-+}
-+
-+void ll_ra_read_ex(struct file *f, struct ll_ra_read *rar)
-+{
-+ struct ll_readahead_state *ras;
-+
-+ ras = ll_ras_get(f);
-+
-+ spin_lock(&ras->ras_lock);
-+ list_del_init(&rar->lrr_linkage);
-+ spin_unlock(&ras->ras_lock);
-+}
-+
-+static struct ll_ra_read *ll_ra_read_get_locked(struct ll_readahead_state *ras)
-+{
-+ struct ll_ra_read *scan;
-+
-+ list_for_each_entry(scan, &ras->ras_read_beads, lrr_linkage) {
-+ if (scan->lrr_reader == current)
-+ return scan;
-+ }
-+ return NULL;
-+}
-+
-+struct ll_ra_read *ll_ra_read_get(struct file *f)
-+{
-+ struct ll_readahead_state *ras;
-+ struct ll_ra_read *bead;
-+
-+ ras = ll_ras_get(f);
-+
-+ spin_lock(&ras->ras_lock);
-+ bead = ll_ra_read_get_locked(ras);
-+ spin_unlock(&ras->ras_lock);
-+ return bead;
-+}
-+
-+static int ll_read_ahead_page(struct obd_export *exp, struct obd_io_group *oig,
-+ int index, struct address_space *mapping)
-+{
-+ struct ll_async_page *llap;
-+ struct page *page;
-+ unsigned int gfp_mask = 0;
-+ int rc = 0;
-+
-+ gfp_mask = GFP_HIGHUSER & ~__GFP_WAIT;
-+#ifdef __GFP_NOWARN
-+ gfp_mask |= __GFP_NOWARN;
-+#endif
-+ page = grab_cache_page_nowait_gfp(mapping, index, gfp_mask);
-+ if (page == NULL) {
-+ ll_ra_stats_inc(mapping, RA_STAT_FAILED_GRAB_PAGE);
-+ CDEBUG(D_READA, "g_c_p_n failed\n");
-+ return 0;
-+ }
-+
-+ /* Check if page was truncated or reclaimed */
-+ if (page->mapping != mapping) {
-+ ll_ra_stats_inc(mapping, RA_STAT_WRONG_GRAB_PAGE);
-+ CDEBUG(D_READA, "g_c_p_n returned invalid page\n");
-+ GOTO(unlock_page, rc = 0);
-+ }
-+
-+ /* we do this first so that we can see the page in the /proc
-+ * accounting */
-+ llap = llap_from_page(page, LLAP_ORIGIN_READAHEAD);
-+ if (IS_ERR(llap) || llap->llap_defer_uptodate) {
-+ if (PTR_ERR(llap) == -ENOLCK) {
-+ ll_ra_stats_inc(mapping, RA_STAT_FAILED_MATCH);
-+ CDEBUG(D_READA | D_PAGE,
-+ "Adding page to cache failed index "
-+ "%d\n", index);
-+ CDEBUG(D_READA, "nolock page\n");
-+ GOTO(unlock_page, rc = -ENOLCK);
-+ }
-+ CDEBUG(D_READA, "read-ahead page\n");
-+ GOTO(unlock_page, rc = 0);
-+ }
-+
-+ /* skip completed pages */
-+ if (Page_Uptodate(page))
-+ GOTO(unlock_page, rc = 0);
-+
-+ /* bail out when we hit the end of the lock. */
-+ rc = ll_issue_page_read(exp, llap, oig, 1);
-+ if (rc == 0) {
-+ LL_CDEBUG_PAGE(D_READA | D_PAGE, page, "started read-ahead\n");
-+ rc = 1;
-+ } else {
-+unlock_page:
-+ unlock_page(page);
-+ LL_CDEBUG_PAGE(D_READA | D_PAGE, page, "skipping read-ahead\n");
-+ }
-+ page_cache_release(page);
-+ return rc;
-+}
-+
-+/* ra_io_arg will be filled in the beginning of ll_readahead with
-+ * ras_lock, then the following ll_read_ahead_pages will read RA
-+ * pages according to this arg, all the items in this structure are
-+ * counted by page index.
-+ */
-+struct ra_io_arg {
-+ unsigned long ria_start; /* start offset of read-ahead*/
-+ unsigned long ria_end; /* end offset of read-ahead*/
-+ /* If stride read pattern is detected, ria_stoff means where
-+ * stride read is started. Note: for normal read-ahead, the
-+ * value here is meaningless, and also it will not be accessed*/
-+ pgoff_t ria_stoff;
-+ /* ria_length and ria_pages are the length and pages length in the
-+ * stride I/O mode. And they will also be used to check whether
-+ * it is stride I/O read-ahead in the read-ahead pages*/
-+ unsigned long ria_length;
-+ unsigned long ria_pages;
-+};
-+
-+#define RIA_DEBUG(ria) \
-+ CDEBUG(D_READA, "rs %lu re %lu ro %lu rl %lu rp %lu\n", \
-+ ria->ria_start, ria->ria_end, ria->ria_stoff, ria->ria_length,\
-+ ria->ria_pages)
-+
-+#define RAS_INCREASE_STEP (1024 * 1024 >> CFS_PAGE_SHIFT)
-+
-+static inline int stride_io_mode(struct ll_readahead_state *ras)
-+{
-+ return ras->ras_consecutive_stride_requests > 1;
-+}
-+
-+/* The function calculates how much pages will be read in
-+ * [off, off + length], which will be read by stride I/O mode,
-+ * stride_offset = st_off, stride_lengh = st_len,
-+ * stride_pages = st_pgs
-+ */
-+static unsigned long
-+stride_pg_count(pgoff_t st_off, unsigned long st_len, unsigned long st_pgs,
-+ unsigned long off, unsigned length)
-+{
-+ unsigned long cont_len = st_off > off ? st_off - off : 0;
-+ __u64 stride_len = length + off > st_off ?
-+ length + off + 1 - st_off : 0;
-+ unsigned long left, pg_count;
-+
-+ if (st_len == 0 || length == 0)
-+ return length;
-+
-+ left = do_div(stride_len, st_len);
-+ left = min(left, st_pgs);
-+
-+ pg_count = left + stride_len * st_pgs + cont_len;
-+
-+ LASSERT(pg_count >= left);
-+
-+ CDEBUG(D_READA, "st_off %lu, st_len %lu st_pgs %lu off %lu length %u"
-+ "pgcount %lu\n", st_off, st_len, st_pgs, off, length, pg_count);
-+
-+ return pg_count;
-+}
-+
-+static int ria_page_count(struct ra_io_arg *ria)
-+{
-+ __u64 length = ria->ria_end >= ria->ria_start ?
-+ ria->ria_end - ria->ria_start + 1 : 0;
-+
-+ return stride_pg_count(ria->ria_stoff, ria->ria_length,
-+ ria->ria_pages, ria->ria_start,
-+ length);
-+}
-+
-+/*Check whether the index is in the defined ra-window */
-+static int ras_inside_ra_window(unsigned long idx, struct ra_io_arg *ria)
-+{
-+ /* If ria_length == ria_pages, it means non-stride I/O mode,
-+ * idx should always inside read-ahead window in this case
-+ * For stride I/O mode, just check whether the idx is inside
-+ * the ria_pages. */
-+ return ria->ria_length == 0 || ria->ria_length == ria->ria_pages ||
-+ (idx - ria->ria_stoff) % ria->ria_length < ria->ria_pages;
-+}
-+
-+static int ll_read_ahead_pages(struct obd_export *exp,
-+ struct obd_io_group *oig,
-+ struct ra_io_arg *ria,
-+ unsigned long *reserved_pages,
-+ struct address_space *mapping,
-+ unsigned long *ra_end)
-+{
-+ int rc, count = 0, stride_ria;
-+ unsigned long page_idx;
-+
-+ LASSERT(ria != NULL);
-+ RIA_DEBUG(ria);
-+
-+ stride_ria = ria->ria_length > ria->ria_pages && ria->ria_pages > 0;
-+ for (page_idx = ria->ria_start; page_idx <= ria->ria_end &&
-+ *reserved_pages > 0; page_idx++) {
-+ if (ras_inside_ra_window(page_idx, ria)) {
-+ /* If the page is inside the read-ahead window*/
-+ rc = ll_read_ahead_page(exp, oig, page_idx, mapping);
-+ if (rc == 1) {
-+ (*reserved_pages)--;
-+ count ++;
-+ } else if (rc == -ENOLCK)
-+ break;
-+ } else if (stride_ria) {
-+ /* If it is not in the read-ahead window, and it is
-+ * read-ahead mode, then check whether it should skip
-+ * the stride gap */
-+ pgoff_t offset;
-+ /* FIXME: This assertion only is valid when it is for
-+ * forward read-ahead, it will be fixed when backward
-+ * read-ahead is implemented */
-+ LASSERTF(page_idx > ria->ria_stoff, "since %lu in the"
-+ " gap of ra window,it should bigger than stride"
-+ " offset %lu \n", page_idx, ria->ria_stoff);
-+
-+ offset = page_idx - ria->ria_stoff;
-+ offset = offset % (ria->ria_length);
-+ if (offset > ria->ria_pages) {
-+ page_idx += ria->ria_length - offset;
-+ CDEBUG(D_READA, "i %lu skip %lu \n", page_idx,
-+ ria->ria_length - offset);
-+ continue;
-+ }
-+ }
-+ }
-+ *ra_end = page_idx;
-+ return count;
-+}
-+
-+static int ll_readahead(struct ll_readahead_state *ras,
-+ struct obd_export *exp, struct address_space *mapping,
-+ struct obd_io_group *oig, int flags)
-+{
-+ unsigned long start = 0, end = 0, reserved;
-+ unsigned long ra_end, len;
-+ struct inode *inode;
-+ struct lov_stripe_md *lsm;
-+ struct ll_ra_read *bead;
-+ struct ost_lvb lvb;
-+ struct ra_io_arg ria = { 0 };
-+ int ret = 0;
-+ __u64 kms;
-+ ENTRY;
-+
-+ inode = mapping->host;
-+ lsm = ll_i2info(inode)->lli_smd;
-+
-+ lov_stripe_lock(lsm);
-+ inode_init_lvb(inode, &lvb);
-+ obd_merge_lvb(ll_i2obdexp(inode), lsm, &lvb, 1);
-+ kms = lvb.lvb_size;
-+ lov_stripe_unlock(lsm);
-+ if (kms == 0) {
-+ ll_ra_stats_inc(mapping, RA_STAT_ZERO_LEN);
-+ RETURN(0);
-+ }
-+
-+ spin_lock(&ras->ras_lock);
-+ bead = ll_ra_read_get_locked(ras);
-+ /* Enlarge the RA window to encompass the full read */
-+ if (bead != NULL && ras->ras_window_start + ras->ras_window_len <
-+ bead->lrr_start + bead->lrr_count) {
-+ ras->ras_window_len = bead->lrr_start + bead->lrr_count -
-+ ras->ras_window_start;
-+ }
-+ /* Reserve a part of the read-ahead window that we'll be issuing */
-+ if (ras->ras_window_len) {
-+ start = ras->ras_next_readahead;
-+ end = ras->ras_window_start + ras->ras_window_len - 1;
-+ }
-+ if (end != 0) {
-+ /* Truncate RA window to end of file */
-+ end = min(end, (unsigned long)((kms - 1) >> CFS_PAGE_SHIFT));
-+ ras->ras_next_readahead = max(end, end + 1);
-+ RAS_CDEBUG(ras);
-+ }
-+ ria.ria_start = start;
-+ ria.ria_end = end;
-+ /* If stride I/O mode is detected, get stride window*/
-+ if (stride_io_mode(ras)) {
-+ ria.ria_stoff = ras->ras_stride_offset;
-+ ria.ria_length = ras->ras_stride_length;
-+ ria.ria_pages = ras->ras_stride_pages;
-+ }
-+ spin_unlock(&ras->ras_lock);
-+
-+ if (end == 0) {
-+ ll_ra_stats_inc(mapping, RA_STAT_ZERO_WINDOW);
-+ RETURN(0);
-+ }
-+
-+ len = ria_page_count(&ria);
-+ if (len == 0)
-+ RETURN(0);
-+
-+ reserved = ll_ra_count_get(ll_i2sbi(inode), len);
-+ if (reserved < len)
-+ ll_ra_stats_inc(mapping, RA_STAT_MAX_IN_FLIGHT);
-+
-+ CDEBUG(D_READA, "reserved page %lu \n", reserved);
-+
-+ ret = ll_read_ahead_pages(exp, oig, &ria, &reserved, mapping, &ra_end);
-+
-+ LASSERTF(reserved >= 0, "reserved %lu\n", reserved);
-+ if (reserved != 0)
-+ ll_ra_count_put(ll_i2sbi(inode), reserved);
-+
-+ if (ra_end == end + 1 && ra_end == (kms >> CFS_PAGE_SHIFT))
-+ ll_ra_stats_inc(mapping, RA_STAT_EOF);
-+
-+ /* if we didn't get to the end of the region we reserved from
-+ * the ras we need to go back and update the ras so that the
-+ * next read-ahead tries from where we left off. we only do so
-+ * if the region we failed to issue read-ahead on is still ahead
-+ * of the app and behind the next index to start read-ahead from */
-+ CDEBUG(D_READA, "ra_end %lu end %lu stride end %lu \n",
-+ ra_end, end, ria.ria_end);
-+
-+ if (ra_end != (end + 1)) {
-+ spin_lock(&ras->ras_lock);
-+ if (ra_end < ras->ras_next_readahead &&
-+ index_in_window(ra_end, ras->ras_window_start, 0,
-+ ras->ras_window_len)) {
-+ ras->ras_next_readahead = ra_end;
-+ RAS_CDEBUG(ras);
-+ }
-+ spin_unlock(&ras->ras_lock);
-+ }
-+
-+ RETURN(ret);
-+}
-+
-+static void ras_set_start(struct ll_readahead_state *ras, unsigned long index)
-+{
-+ ras->ras_window_start = index & (~(RAS_INCREASE_STEP - 1));
-+}
-+
-+/* called with the ras_lock held or from places where it doesn't matter */
-+static void ras_reset(struct ll_readahead_state *ras, unsigned long index)
-+{
-+ ras->ras_last_readpage = index;
-+ ras->ras_consecutive_requests = 0;
-+ ras->ras_consecutive_pages = 0;
-+ ras->ras_window_len = 0;
-+ ras_set_start(ras, index);
-+ ras->ras_next_readahead = max(ras->ras_window_start, index);
-+
-+ RAS_CDEBUG(ras);
-+}
-+
-+/* called with the ras_lock held or from places where it doesn't matter */
-+static void ras_stride_reset(struct ll_readahead_state *ras)
-+{
-+ ras->ras_consecutive_stride_requests = 0;
-+ ras->ras_stride_length = 0;
-+ ras->ras_stride_pages = 0;
-+ RAS_CDEBUG(ras);
-+}
-+
-+void ll_readahead_init(struct inode *inode, struct ll_readahead_state *ras)
-+{
-+ spin_lock_init(&ras->ras_lock);
-+ ras_reset(ras, 0);
-+ ras->ras_requests = 0;
-+ INIT_LIST_HEAD(&ras->ras_read_beads);
-+}
-+
-+/*
-+ * Check whether the read request is in the stride window.
-+ * If it is in the stride window, return 1, otherwise return 0.
-+ */
-+static int index_in_stride_window(unsigned long index,
-+ struct ll_readahead_state *ras,
-+ struct inode *inode)
-+{
-+ unsigned long stride_gap = index - ras->ras_last_readpage - 1;
-+
-+ if (ras->ras_stride_length == 0 || ras->ras_stride_pages == 0)
-+ return 0;
-+
-+ /* If it is contiguous read */
-+ if (stride_gap == 0)
-+ return ras->ras_consecutive_pages + 1 <= ras->ras_stride_pages;
-+
-+ /*Otherwise check the stride by itself */
-+ return (ras->ras_stride_length - ras->ras_stride_pages) == stride_gap &&
-+ ras->ras_consecutive_pages == ras->ras_stride_pages;
-+}
-+
-+static void ras_update_stride_detector(struct ll_readahead_state *ras,
-+ unsigned long index)
-+{
-+ unsigned long stride_gap = index - ras->ras_last_readpage - 1;
-+
-+ if (!stride_io_mode(ras) && (stride_gap != 0 ||
-+ ras->ras_consecutive_stride_requests == 0)) {
-+ ras->ras_stride_pages = ras->ras_consecutive_pages;
-+ ras->ras_stride_length = stride_gap +ras->ras_consecutive_pages;
-+ }
-+ RAS_CDEBUG(ras);
-+}
-+
-+static unsigned long
-+stride_page_count(struct ll_readahead_state *ras, unsigned long len)
-+{
-+ return stride_pg_count(ras->ras_stride_offset, ras->ras_stride_length,
-+ ras->ras_stride_pages, ras->ras_stride_offset,
-+ len);
-+}
-+
-+/* Stride Read-ahead window will be increased inc_len according to
-+ * stride I/O pattern */
-+static void ras_stride_increase_window(struct ll_readahead_state *ras,
-+ struct ll_ra_info *ra,
-+ unsigned long inc_len)
-+{
-+ unsigned long left, step, window_len;
-+ unsigned long stride_len;
-+
-+ LASSERT(ras->ras_stride_length > 0);
-+
-+ stride_len = ras->ras_window_start + ras->ras_window_len -
-+ ras->ras_stride_offset;
-+
-+ LASSERTF(stride_len >= 0, "window_start %lu, window_len %lu"
-+ " stride_offset %lu\n", ras->ras_window_start,
-+ ras->ras_window_len, ras->ras_stride_offset);
-+
-+ left = stride_len % ras->ras_stride_length;
-+
-+ window_len = ras->ras_window_len - left;
-+
-+ if (left < ras->ras_stride_pages)
-+ left += inc_len;
-+ else
-+ left = ras->ras_stride_pages + inc_len;
-+
-+ LASSERT(ras->ras_stride_pages != 0);
-+
-+ step = left / ras->ras_stride_pages;
-+ left %= ras->ras_stride_pages;
-+
-+ window_len += step * ras->ras_stride_length + left;
-+
-+ if (stride_page_count(ras, window_len) <= ra->ra_max_pages)
-+ ras->ras_window_len = window_len;
-+
-+ RAS_CDEBUG(ras);
-+}
-+
-+/* Set stride I/O read-ahead window start offset */
-+static void ras_set_stride_offset(struct ll_readahead_state *ras)
-+{
-+ unsigned long window_len = ras->ras_next_readahead -
-+ ras->ras_window_start;
-+ unsigned long left;
-+
-+ LASSERT(ras->ras_stride_length != 0);
-+
-+ left = window_len % ras->ras_stride_length;
-+
-+ ras->ras_stride_offset = ras->ras_next_readahead - left;
-+
-+ RAS_CDEBUG(ras);
-+}
-+
-+static void ras_update(struct ll_sb_info *sbi, struct inode *inode,
-+ struct ll_readahead_state *ras, unsigned long index,
-+ unsigned hit)
-+{
-+ struct ll_ra_info *ra = &sbi->ll_ra_info;
-+ int zero = 0, stride_detect = 0, ra_miss = 0;
-+ ENTRY;
-+
-+ spin_lock(&sbi->ll_lock);
-+ spin_lock(&ras->ras_lock);
-+
-+ ll_ra_stats_inc_unlocked(ra, hit ? RA_STAT_HIT : RA_STAT_MISS);
-+
-+ /* reset the read-ahead window in two cases. First when the app seeks
-+ * or reads to some other part of the file. Secondly if we get a
-+ * read-ahead miss that we think we've previously issued. This can
-+ * be a symptom of there being so many read-ahead pages that the VM is
-+ * reclaiming it before we get to it. */
-+ if (!index_in_window(index, ras->ras_last_readpage, 8, 8)) {
-+ zero = 1;
-+ ll_ra_stats_inc_unlocked(ra, RA_STAT_DISTANT_READPAGE);
-+ } else if (!hit && ras->ras_window_len &&
-+ index < ras->ras_next_readahead &&
-+ index_in_window(index, ras->ras_window_start, 0,
-+ ras->ras_window_len)) {
-+ ra_miss = 1;
-+ ll_ra_stats_inc_unlocked(ra, RA_STAT_MISS_IN_WINDOW);
-+ }
-+
-+ /* On the second access to a file smaller than the tunable
-+ * ra_max_read_ahead_whole_pages trigger RA on all pages in the
-+ * file up to ra_max_pages. This is simply a best effort and
-+ * only occurs once per open file. Normal RA behavior is reverted
-+ * to for subsequent IO. The mmap case does not increment
-+ * ras_requests and thus can never trigger this behavior. */
-+ if (ras->ras_requests == 2 && !ras->ras_request_index) {
-+ __u64 kms_pages;
-+
-+ kms_pages = (i_size_read(inode) + CFS_PAGE_SIZE - 1) >>
-+ CFS_PAGE_SHIFT;
-+
-+ CDEBUG(D_READA, "kmsp "LPU64" mwp %lu mp %lu\n", kms_pages,
-+ ra->ra_max_read_ahead_whole_pages, ra->ra_max_pages);
-+
-+ if (kms_pages &&
-+ kms_pages <= ra->ra_max_read_ahead_whole_pages) {
-+ ras->ras_window_start = 0;
-+ ras->ras_last_readpage = 0;
-+ ras->ras_next_readahead = 0;
-+ ras->ras_window_len = min(ra->ra_max_pages,
-+ ra->ra_max_read_ahead_whole_pages);
-+ GOTO(out_unlock, 0);
-+ }
-+ }
-+ if (zero) {
-+ /* check whether it is in stride I/O mode*/
-+ if (!index_in_stride_window(index, ras, inode)) {
-+ ras_reset(ras, index);
-+ ras->ras_consecutive_pages++;
-+ ras_stride_reset(ras);
-+ GOTO(out_unlock, 0);
-+ } else {
-+ ras->ras_consecutive_requests = 0;
-+ if (++ras->ras_consecutive_stride_requests > 1)
-+ stride_detect = 1;
-+ RAS_CDEBUG(ras);
-+ }
-+ } else {
-+ if (ra_miss) {
-+ if (index_in_stride_window(index, ras, inode) &&
-+ stride_io_mode(ras)) {
-+ /*If stride-RA hit cache miss, the stride dector
-+ *will not be reset to avoid the overhead of
-+ *redetecting read-ahead mode */
-+ if (index != ras->ras_last_readpage + 1)
-+ ras->ras_consecutive_pages = 0;
-+ RAS_CDEBUG(ras);
-+ } else {
-+ /*Reset both stride window and normal RA window*/
-+ ras_reset(ras, index);
-+ ras->ras_consecutive_pages++;
-+ ras_stride_reset(ras);
-+ GOTO(out_unlock, 0);
-+ }
-+ } else if (stride_io_mode(ras)) {
-+ /* If this is contiguous read but in stride I/O mode
-+ * currently, check whether stride step still is valid,
-+ * if invalid, it will reset the stride ra window*/
-+ if (!index_in_stride_window(index, ras, inode)) {
-+ /*Shrink stride read-ahead window to be zero*/
-+ ras_stride_reset(ras);
-+ ras->ras_window_len = 0;
-+ ras->ras_next_readahead = index;
-+ }
-+ }
-+ }
-+ ras->ras_consecutive_pages++;
-+ ras_update_stride_detector(ras, index);
-+ ras->ras_last_readpage = index;
-+ ras_set_start(ras, index);
-+ ras->ras_next_readahead = max(ras->ras_window_start,
-+ ras->ras_next_readahead);
-+ RAS_CDEBUG(ras);
-+
-+ /* Trigger RA in the mmap case where ras_consecutive_requests
-+ * is not incremented and thus can't be used to trigger RA */
-+ if (!ras->ras_window_len && ras->ras_consecutive_pages == 4) {
-+ ras->ras_window_len = RAS_INCREASE_STEP;
-+ GOTO(out_unlock, 0);
-+ }
-+
-+ /* Initially reset the stride window offset to next_readahead*/
-+ if (ras->ras_consecutive_stride_requests == 2 && stride_detect)
-+ ras_set_stride_offset(ras);
-+
-+ /* The initial ras_window_len is set to the request size. To avoid
-+ * uselessly reading and discarding pages for random IO the window is
-+ * only increased once per consecutive request received. */
-+ if ((ras->ras_consecutive_requests > 1 &&
-+ !ras->ras_request_index) || stride_detect) {
-+ if (stride_io_mode(ras))
-+ ras_stride_increase_window(ras, ra, RAS_INCREASE_STEP);
-+ else
-+ ras->ras_window_len = min(ras->ras_window_len +
-+ RAS_INCREASE_STEP,
-+ ra->ra_max_pages);
-+ }
-+ EXIT;
-+out_unlock:
-+ RAS_CDEBUG(ras);
-+ ras->ras_request_index++;
-+ spin_unlock(&ras->ras_lock);
-+ spin_unlock(&sbi->ll_lock);
-+ return;
-+}
-+
-+int ll_writepage(struct page *page)
-+{
-+ struct inode *inode = page->mapping->host;
-+ struct ll_inode_info *lli = ll_i2info(inode);
-+ struct obd_export *exp;
-+ struct ll_async_page *llap;
-+ int rc = 0;
-+ ENTRY;
-+
-+ LASSERT(PageLocked(page));
-+
-+ exp = ll_i2obdexp(inode);
-+ if (exp == NULL)
-+ GOTO(out, rc = -EINVAL);
-+
-+ llap = llap_from_page(page, LLAP_ORIGIN_WRITEPAGE);
-+ if (IS_ERR(llap))
-+ GOTO(out, rc = PTR_ERR(llap));
-+
-+ LASSERT(!llap->llap_nocache);
-+ LASSERT(!PageWriteback(page));
-+ set_page_writeback(page);
-+
-+ page_cache_get(page);
-+ if (llap->llap_write_queued) {
-+ LL_CDEBUG_PAGE(D_PAGE, page, "marking urgent\n");
-+ rc = obd_set_async_flags(exp, lli->lli_smd, NULL,
-+ llap->llap_cookie,
-+ ASYNC_READY | ASYNC_URGENT);
-+ } else {
-+ rc = queue_or_sync_write(exp, inode, llap, CFS_PAGE_SIZE,
-+ ASYNC_READY | ASYNC_URGENT);
-+ }
-+ if (rc) {
-+ /* re-dirty page on error so it retries write */
-+ if (PageWriteback(page))
-+ end_page_writeback(page);
-+
-+ /* resend page only for not started IO*/
-+ if (!PageError(page))
-+ ll_redirty_page(page);
-+
-+ page_cache_release(page);
-+ }
-+out:
-+ if (rc) {
-+ if (!lli->lli_async_rc)
-+ lli->lli_async_rc = rc;
-+ /* resend page only for not started IO*/
-+ unlock_page(page);
-+ }
-+ RETURN(rc);
-+}
-+
-+/*
-+ * for now we do our readpage the same on both 2.4 and 2.5. The kernel's
-+ * read-ahead assumes it is valid to issue readpage all the way up to
-+ * i_size, but our dlm locks make that not the case. We disable the
-+ * kernel's read-ahead and do our own by walking ahead in the page cache
-+ * checking for dlm lock coverage. the main difference between 2.4 and
-+ * 2.6 is how read-ahead gets batched and issued, but we're using our own,
-+ * so they look the same.
-+ */
-+int ll_readpage(struct file *filp, struct page *page)
-+{
-+ struct ll_file_data *fd = LUSTRE_FPRIVATE(filp);
-+ struct inode *inode = page->mapping->host;
-+ struct obd_export *exp;
-+ struct ll_async_page *llap;
-+ struct obd_io_group *oig = NULL;
-+ struct lustre_handle *lockh = NULL;
-+ int rc;
-+ ENTRY;
-+
-+ LASSERT(PageLocked(page));
-+ LASSERT(!PageUptodate(page));
-+ CDEBUG(D_VFSTRACE, "VFS Op:inode=%lu/%u(%p),offset=%Lu=%#Lx\n",
-+ inode->i_ino, inode->i_generation, inode,
-+ (((loff_t)page->index) << CFS_PAGE_SHIFT),
-+ (((loff_t)page->index) << CFS_PAGE_SHIFT));
-+ LASSERT(atomic_read(&filp->f_dentry->d_inode->i_count) > 0);
-+
-+ if (!ll_i2info(inode)->lli_smd) {
-+ /* File with no objects - one big hole */
-+ /* We use this just for remove_from_page_cache that is not
-+ * exported, we'd make page back up to date. */
-+ ll_truncate_complete_page(page);
-+ clear_page(kmap(page));
-+ kunmap(page);
-+ SetPageUptodate(page);
-+ unlock_page(page);
-+ RETURN(0);
-+ }
-+
-+ rc = oig_init(&oig);
-+ if (rc < 0)
-+ GOTO(out, rc);
-+
-+ exp = ll_i2obdexp(inode);
-+ if (exp == NULL)
-+ GOTO(out, rc = -EINVAL);
-+
-+ if (fd->fd_flags & LL_FILE_GROUP_LOCKED)
-+ lockh = &fd->fd_cwlockh;
-+
-+ llap = llap_from_page_with_lockh(page, LLAP_ORIGIN_READPAGE, lockh);
-+ if (IS_ERR(llap)) {
-+ if (PTR_ERR(llap) == -ENOLCK) {
-+ CWARN("ino %lu page %lu (%llu) not covered by "
-+ "a lock (mmap?). check debug logs.\n",
-+ inode->i_ino, page->index,
-+ (long long)page->index << PAGE_CACHE_SHIFT);
-+ }
-+ GOTO(out, rc = PTR_ERR(llap));
-+ }
-+
-+ if (ll_i2sbi(inode)->ll_ra_info.ra_max_pages)
-+ ras_update(ll_i2sbi(inode), inode, &fd->fd_ras, page->index,
-+ llap->llap_defer_uptodate);
-+
-+
-+ if (llap->llap_defer_uptodate) {
-+ /* This is the callpath if we got the page from a readahead */
-+ llap->llap_ra_used = 1;
-+ rc = ll_readahead(&fd->fd_ras, exp, page->mapping, oig,
-+ fd->fd_flags);
-+ if (rc > 0)
-+ obd_trigger_group_io(exp, ll_i2info(inode)->lli_smd,
-+ NULL, oig);
-+ LL_CDEBUG_PAGE(D_PAGE, page, "marking uptodate from defer\n");
-+ SetPageUptodate(page);
-+ unlock_page(page);
-+ GOTO(out_oig, rc = 0);
-+ }
-+
-+ rc = ll_issue_page_read(exp, llap, oig, 0);
-+ if (rc)
-+ GOTO(out, rc);
-+
-+ LL_CDEBUG_PAGE(D_PAGE, page, "queued readpage\n");
-+ /* We have just requested the actual page we want, see if we can tack
-+ * on some readahead to that page's RPC before it is sent. */
-+ if (ll_i2sbi(inode)->ll_ra_info.ra_max_pages)
-+ ll_readahead(&fd->fd_ras, exp, page->mapping, oig,
-+ fd->fd_flags);
-+
-+ rc = obd_trigger_group_io(exp, ll_i2info(inode)->lli_smd, NULL, oig);
-+
-+out:
-+ if (rc)
-+ unlock_page(page);
-+out_oig:
-+ if (oig != NULL)
-+ oig_release(oig);
-+ RETURN(rc);
-+}
-+
-+static void ll_file_put_pages(struct page **pages, int numpages)
-+{
-+ int i;
-+ struct page **pp;
-+ ENTRY;
-+
-+ for (i = 0, pp = pages; i < numpages; i++, pp++) {
-+ if (*pp) {
-+ LL_CDEBUG_PAGE(D_PAGE, (*pp), "free\n");
-+ __ll_put_llap(*pp);
-+ if (page_private(*pp))
-+ CERROR("the llap wasn't freed\n");
-+ (*pp)->mapping = NULL;
-+ if (page_count(*pp) != 1)
-+ CERROR("page %p, flags %#lx, count %i, private %p\n",
-+ (*pp), (unsigned long)(*pp)->flags, page_count(*pp),
-+ (void*)page_private(*pp));
-+ __free_pages(*pp, 0);
-+ }
-+ }
-+ OBD_FREE(pages, numpages * sizeof(struct page*));
-+ EXIT;
-+}
-+
-+static struct page **ll_file_prepare_pages(int numpages, struct inode *inode,
-+ unsigned long first)
-+{
-+ struct page **pages;
-+ int i;
-+ int rc = 0;
-+ ENTRY;
-+
-+ OBD_ALLOC(pages, sizeof(struct page *) * numpages);
-+ if (pages == NULL)
-+ RETURN(ERR_PTR(-ENOMEM));
-+ for (i = 0; i < numpages; i++) {
-+ struct page *page;
-+ struct ll_async_page *llap;
-+
-+ page = alloc_pages(GFP_HIGHUSER, 0);
-+ if (page == NULL)
-+ GOTO(err, rc = -ENOMEM);
-+ pages[i] = page;
-+ /* llap_from_page needs page index and mapping to be set */
-+ page->index = first++;
-+ page->mapping = inode->i_mapping;
-+ llap = llap_from_page(page, LLAP_ORIGIN_LOCKLESS_IO);
-+ if (IS_ERR(llap))
-+ GOTO(err, rc = PTR_ERR(llap));
-+ llap->llap_lockless_io_page = 1;
-+ }
-+ RETURN(pages);
-+err:
-+ ll_file_put_pages(pages, numpages);
-+ RETURN(ERR_PTR(rc));
-+ }
-+
-+static ssize_t ll_file_copy_pages(struct page **pages, int numpages,
-+ const struct iovec *iov, unsigned long nsegs,
-+ ssize_t iov_offset, loff_t pos, size_t count,
-+ int rw)
-+{
-+ ssize_t amount = 0;
-+ int i;
-+ int updatechecksum = ll_i2sbi(pages[0]->mapping->host)->ll_flags &
-+ LL_SBI_LLITE_CHECKSUM;
-+ ENTRY;
-+
-+ for (i = 0; i < numpages; i++) {
-+ unsigned offset, bytes, left = 0;
-+ char *vaddr;
-+
-+ vaddr = kmap(pages[i]);
-+ offset = pos & (CFS_PAGE_SIZE - 1);
-+ bytes = min_t(unsigned, CFS_PAGE_SIZE - offset, count);
-+ LL_CDEBUG_PAGE(D_PAGE, pages[i], "op = %s, addr = %p, "
-+ "bytes = %u\n",
-+ (rw == WRITE) ? "CFU" : "CTU",
-+ vaddr + offset, bytes);
-+ while (bytes > 0 && !left && nsegs) {
-+ unsigned copy = min_t(ssize_t, bytes,
-+ iov->iov_len - iov_offset);
-+ if (rw == WRITE) {
-+ left = copy_from_user(vaddr + offset,
-+ iov->iov_base +iov_offset,
-+ copy);
-+ if (updatechecksum) {
-+ struct ll_async_page *llap;
-+
-+ llap = llap_cast_private(pages[i]);
-+ llap->llap_checksum =
-+ init_checksum(OSC_DEFAULT_CKSUM);
-+ llap->llap_checksum =
-+ compute_checksum(llap->llap_checksum,
-+ vaddr,CFS_PAGE_SIZE,
-+ OSC_DEFAULT_CKSUM);
-+ }
-+ } else {
-+ left = copy_to_user(iov->iov_base + iov_offset,
-+ vaddr + offset, copy);
-+ }
-+
-+ amount += copy;
-+ count -= copy;
-+ pos += copy;
-+ iov_offset += copy;
-+ bytes -= copy;
-+ if (iov_offset == iov->iov_len) {
-+ iov_offset = 0;
-+ iov++;
-+ nsegs--;
-+ }
-+ }
-+ kunmap(pages[i]);
-+ if (left) {
-+ amount -= left;
-+ break;
-+ }
-+ }
-+ if (amount == 0)
-+ RETURN(-EFAULT);
-+ RETURN(amount);
-+}
-+
-+static int ll_file_oig_pages(struct inode * inode, struct page **pages,
-+ int numpages, loff_t pos, size_t count, int rw)
-+{
-+ struct obd_io_group *oig;
-+ struct ll_inode_info *lli = ll_i2info(inode);
-+ struct obd_export *exp;
-+ loff_t org_pos = pos;
-+ obd_flag brw_flags;
-+ int rc;
-+ int i;
-+ ENTRY;
-+
-+ exp = ll_i2obdexp(inode);
-+ if (exp == NULL)
-+ RETURN(-EINVAL);
-+ rc = oig_init(&oig);
-+ if (rc)
-+ RETURN(rc);
-+ brw_flags = OBD_BRW_SRVLOCK;
-+ if (cfs_capable(CFS_CAP_SYS_RESOURCE))
-+ brw_flags |= OBD_BRW_NOQUOTA;
-+
-+ for (i = 0; i < numpages; i++) {
-+ struct ll_async_page *llap;
-+ unsigned from, bytes;
-+
-+ from = pos & (CFS_PAGE_SIZE - 1);
-+ bytes = min_t(unsigned, CFS_PAGE_SIZE - from,
-+ count - pos + org_pos);
-+ llap = llap_cast_private(pages[i]);
-+ LASSERT(llap);
-+
-+ lock_page(pages[i]);
-+
-+ LL_CDEBUG_PAGE(D_PAGE, pages[i], "offset "LPU64","
-+ " from %u, bytes = %u\n",
-+ pos, from, bytes);
-+ LASSERTF(pos >> CFS_PAGE_SHIFT == pages[i]->index,
-+ "wrong page index %lu (%lu)\n",
-+ pages[i]->index,
-+ (unsigned long)(pos >> CFS_PAGE_SHIFT));
-+ rc = obd_queue_group_io(exp, lli->lli_smd, NULL, oig,
-+ llap->llap_cookie,
-+ (rw == WRITE) ?
-+ OBD_BRW_WRITE:OBD_BRW_READ,
-+ from, bytes, brw_flags,
-+ ASYNC_READY | ASYNC_URGENT |
-+ ASYNC_COUNT_STABLE | ASYNC_GROUP_SYNC);
-+ if (rc) {
-+ i++;
-+ GOTO(out, rc);
-+ }
-+ pos += bytes;
-+ }
-+ rc = obd_trigger_group_io(exp, lli->lli_smd, NULL, oig);
-+ if (rc)
-+ GOTO(out, rc);
-+ rc = oig_wait(oig);
-+out:
-+ while(--i >= 0)
-+ unlock_page(pages[i]);
-+ oig_release(oig);
-+ RETURN(rc);
-+}
-+
-+/* Advance through passed iov, adjust iov pointer as necessary and return
-+ * starting offset in individual entry we are pointing at. Also reduce
-+ * nr_segs as needed */
-+static ssize_t ll_iov_advance(const struct iovec **iov, unsigned long *nr_segs,
-+ ssize_t offset)
-+{
-+ while (*nr_segs > 0) {
-+ if ((*iov)->iov_len > offset)
-+ return ((*iov)->iov_len - offset);
-+ offset -= (*iov)->iov_len;
-+ (*iov)++;
-+ (*nr_segs)--;
-+ }
-+ return 0;
-+}
-+
-+ssize_t ll_file_lockless_io(struct file *file, const struct iovec *iov,
-+ unsigned long nr_segs,
-+ loff_t *ppos, int rw, ssize_t count)
-+{
-+ loff_t pos;
-+ struct inode *inode = file->f_dentry->d_inode;
-+ ssize_t rc = 0;
-+ int max_pages;
-+ size_t amount = 0;
-+ unsigned long first, last;
-+ const struct iovec *iv = &iov[0];
-+ unsigned long nsegs = nr_segs;
-+ unsigned long offset = 0;
-+ ENTRY;
-+
-+ if (rw == READ) {
-+ loff_t isize;
-+
-+ ll_inode_size_lock(inode, 0);
-+ isize = i_size_read(inode);
-+ ll_inode_size_unlock(inode, 0);
-+ if (*ppos >= isize)
-+ GOTO(out, rc = 0);
-+ if (*ppos + count >= isize)
-+ count -= *ppos + count - isize;
-+ if (count == 0)
-+ GOTO(out, rc);
-+ } else {
-+ rc = generic_write_checks(file, ppos, &count, 0);
-+ if (rc)
-+ GOTO(out, rc);
-+ rc = ll_remove_suid(file->f_dentry, file->f_vfsmnt);
-+ if (rc)
-+ GOTO(out, rc);
-+ }
-+
-+ pos = *ppos;
-+ first = pos >> CFS_PAGE_SHIFT;
-+ last = (pos + count - 1) >> CFS_PAGE_SHIFT;
-+ max_pages = PTLRPC_MAX_BRW_PAGES *
-+ ll_i2info(inode)->lli_smd->lsm_stripe_count;
-+ CDEBUG(D_INFO, "%u, stripe_count = %u\n",
-+ PTLRPC_MAX_BRW_PAGES /* max_pages_per_rpc */,
-+ ll_i2info(inode)->lli_smd->lsm_stripe_count);
-+
-+ while (first <= last && rc >= 0) {
-+ int pages_for_io;
-+ struct page **pages;
-+ size_t bytes = count - amount;
-+
-+ pages_for_io = min_t(int, last - first + 1, max_pages);
-+ pages = ll_file_prepare_pages(pages_for_io, inode, first);
-+ if (IS_ERR(pages)) {
-+ rc = PTR_ERR(pages);
-+ break;
-+ }
-+ if (rw == WRITE) {
-+ rc = ll_file_copy_pages(pages, pages_for_io, iv, nsegs,
-+ offset, pos + amount, bytes,
-+ rw);
-+ if (rc < 0)
-+ GOTO(put_pages, rc);
-+ offset = ll_iov_advance(&iv, &nsegs, offset + rc);
-+ bytes = rc;
-+ }
-+ rc = ll_file_oig_pages(inode, pages, pages_for_io,
-+ pos + amount, bytes, rw);
-+ if (rc)
-+ GOTO(put_pages, rc);
-+ if (rw == READ) {
-+ rc = ll_file_copy_pages(pages, pages_for_io, iv, nsegs,
-+ offset, pos + amount, bytes, rw);
-+ if (rc < 0)
-+ GOTO(put_pages, rc);
-+ offset = ll_iov_advance(&iv, &nsegs, offset + rc);
-+ bytes = rc;
-+ }
-+ amount += bytes;
-+put_pages:
-+ ll_file_put_pages(pages, pages_for_io);
-+ first += pages_for_io;
-+ /* a short read/write check */
-+ if (pos + amount < ((loff_t)first << CFS_PAGE_SHIFT))
-+ break;
-+ /* Check if we are out of userspace buffers. (how that could
-+ happen?) */
-+ if (nsegs == 0)
-+ break;
-+ }
-+ /* NOTE: don't update i_size and KMS in absence of LDLM locks even
-+ * write makes the file large */
-+ file_accessed(file);
-+ if (rw == READ && amount < count && rc == 0) {
-+ unsigned long not_cleared;
-+
-+ while (nsegs > 0) {
-+ ssize_t to_clear = min_t(ssize_t, count - amount,
-+ iv->iov_len - offset);
-+ not_cleared = clear_user(iv->iov_base + offset,
-+ to_clear);
-+ amount += to_clear - not_cleared;
-+ if (not_cleared) {
-+ rc = -EFAULT;
-+ break;
-+ }
-+ offset = 0;
-+ iv++;
-+ nsegs--;
-+ }
-+ }
-+ if (amount > 0) {
-+ lprocfs_counter_add(ll_i2sbi(inode)->ll_stats,
-+ (rw == WRITE) ?
-+ LPROC_LL_LOCKLESS_WRITE :
-+ LPROC_LL_LOCKLESS_READ,
-+ (long)amount);
-+ *ppos += amount;
-+ RETURN(amount);
-+ }
-+out:
-+ RETURN(rc);
-+}
diff -urNad lustre~/lustre/llite/rw24.c lustre/lustre/llite/rw24.c
--- lustre~/lustre/llite/rw24.c 2009-08-19 09:51:09.000000000 +0200
-+++ lustre/lustre/llite/rw24.c 2009-08-19 14:10:45.000000000 +0200
++++ lustre/lustre/llite/rw24.c 2009-08-20 10:25:20.000000000 +0200
@@ -150,8 +150,13 @@
.readpage = ll_readpage,
.direct_IO = ll_direct_IO_24,
.writepage = ll_writepage,
-+#ifdef NO_PREPARE_WRITE
++#ifdef HAVE_WRITE_BEGIN_IN_STRUCT_ADDRESS_SPACE_OPERATIONS
+ .write_begin = ll_write_begin,
+ .write_end = ll_write_end,
+#else
@@ -16577,12 +2753,12 @@ diff -urNad lustre~/lustre/llite/rw24.c lustre/lustre/llite/rw24.c
.bmap = NULL,
diff -urNad lustre~/lustre/llite/rw26.c lustre/lustre/llite/rw26.c
--- lustre~/lustre/llite/rw26.c 2009-08-19 09:51:09.000000000 +0200
-+++ lustre/lustre/llite/rw26.c 2009-08-19 14:10:45.000000000 +0200
++++ lustre/lustre/llite/rw26.c 2009-08-20 10:25:20.000000000 +0200
@@ -339,8 +339,13 @@
.writepages = generic_writepages,
.set_page_dirty = __set_page_dirty_nobuffers,
.sync_page = NULL,
-+#ifdef NO_PREPARE_WRITE
++#ifdef HAVE_WRITE_BEGIN_IN_STRUCT_ADDRESS_SPACE_OPERATIONS
+ .write_begin = ll_write_begin,
+ .write_end = ll_write_end,
+#else
@@ -16594,7 +2770,7 @@ diff -urNad lustre~/lustre/llite/rw26.c lustre/lustre/llite/rw26.c
.bmap = NULL
diff -urNad lustre~/lustre/llite/symlink.c lustre/lustre/llite/symlink.c
--- lustre~/lustre/llite/symlink.c 2009-08-19 09:51:09.000000000 +0200
-+++ lustre/lustre/llite/symlink.c 2009-08-19 14:10:45.000000000 +0200
++++ lustre/lustre/llite/symlink.c 2009-08-20 10:25:20.000000000 +0200
@@ -177,8 +177,12 @@
up(&lli->lli_size_sem);
}
@@ -16610,14 +2786,14 @@ diff -urNad lustre~/lustre/llite/symlink.c lustre/lustre/llite/symlink.c
diff -urNad lustre~/lustre/lvfs/lustre_quota_fmt.c lustre/lustre/lvfs/lustre_quota_fmt.c
--- lustre~/lustre/lvfs/lustre_quota_fmt.c 2009-08-19 09:51:09.000000000 +0200
-+++ lustre/lustre/lvfs/lustre_quota_fmt.c 2009-08-19 14:10:45.000000000 +0200
++++ lustre/lustre/lvfs/lustre_quota_fmt.c 2009-08-20 10:25:20.000000000 +0200
@@ -50,7 +50,12 @@
#include <linux/init.h>
#include <linux/module.h>
#include <linux/slab.h>
-#include <linux/quotaio_v1.h>
+
-+#ifdef HAVE_FS_QUOTA_QUOTAIO_V1_H
++#ifdef HAVE_QUOTAIO_V1_H
+# include <quota/quotaio_v1.h> /* MAX_DQ_TIME */
+#else
+# include <linux/quotaio_v1.h>
@@ -16627,13 +2803,13 @@ diff -urNad lustre~/lustre/lvfs/lustre_quota_fmt.c lustre/lustre/lvfs/lustre_quo
#include <asm/uaccess.h>
diff -urNad lustre~/lustre/lvfs/lustre_quota_fmt_convert.c lustre/lustre/lvfs/lustre_quota_fmt_convert.c
--- lustre~/lustre/lvfs/lustre_quota_fmt_convert.c 2009-08-19 09:51:09.000000000 +0200
-+++ lustre/lustre/lvfs/lustre_quota_fmt_convert.c 2009-08-19 14:10:45.000000000 +0200
++++ lustre/lustre/lvfs/lustre_quota_fmt_convert.c 2009-08-20 10:25:20.000000000 +0200
@@ -50,7 +50,11 @@
#include <linux/init.h>
#include <linux/module.h>
#include <linux/slab.h>
-#include <linux/quotaio_v1.h>
-+#ifdef HAVE_FS_QUOTA_QUOTAIO_V1_H
++#ifdef HAVE_QUOTAIO_V1_H
+# include <quota/quotaio_v1.h> /* MAX_DQ_TIME */
+#else
+# include <linux/quotaio_v1.h>
@@ -16643,12 +2819,12 @@ diff -urNad lustre~/lustre/lvfs/lustre_quota_fmt_convert.c lustre/lustre/lvfs/lu
#include <asm/uaccess.h>
diff -urNad lustre~/lustre/lvfs/lvfs_linux.c lustre/lustre/lvfs/lvfs_linux.c
--- lustre~/lustre/lvfs/lvfs_linux.c 2009-08-19 09:51:09.000000000 +0200
-+++ lustre/lustre/lvfs/lvfs_linux.c 2009-08-19 14:10:45.000000000 +0200
++++ lustre/lustre/lvfs/lvfs_linux.c 2009-08-20 10:25:20.000000000 +0200
@@ -86,10 +86,19 @@
current_ngroups = 0;
} else {
#if LINUX_VERSION_CODE >= KERNEL_VERSION(2,6,4)
-+# ifdef HAS_STRUCT_CRED
++# ifdef HAVE_GET_GROUP_INFO
+ {
+ struct cred *new = prepare_creds();
+ save->group_info = get_group_info(new->group_info);
@@ -16668,7 +2844,7 @@ diff -urNad lustre~/lustre/lvfs/lvfs_linux.c lustre/lustre/lvfs/lvfs_linux.c
current_ngroups = save->ngroups;
} else {
#if LINUX_VERSION_CODE >= KERNEL_VERSION(2,6,4)
-+# ifdef HAS_STRUCT_CRED
++# ifdef HAVE_GET_GROUP_INFO
+ {
+ struct cred *old = prepare_creds();
+ put_group_info(get_group_info(save->group_info));
@@ -16700,7 +2876,7 @@ diff -urNad lustre~/lustre/lvfs/lvfs_linux.c lustre/lustre/lvfs/lvfs_linux.c
LASSERT(new_ctx->pwdmnt);
if (uc) {
-+#ifdef HAS_STRUCT_CRED
++#ifdef HAVE_CRED_IN_STRUCT_TASK_STRUCT
+ struct cred *new = prepare_creds();
+
+ save->luc.luc_fsuid = new->fsuid;
@@ -16742,7 +2918,7 @@ diff -urNad lustre~/lustre/lvfs/lvfs_linux.c lustre/lustre/lvfs/lvfs_linux.c
mntput(saved->pwdmnt);
current->fs->umask = saved->luc.luc_umask;
if (uc) {
-+#ifdef HAS_STRUCT_CRED
++#ifdef HAVE_CRED_IN_STRUCT_TASK_STRUCT
+ struct cred *old = prepare_creds();
+ old->fsuid = saved->luc.luc_fsuid;
+ old->fsgid = saved->luc.luc_fsgid;
@@ -16760,144 +2936,108 @@ diff -urNad lustre~/lustre/lvfs/lvfs_linux.c lustre/lustre/lvfs/lvfs_linux.c
int flags)
{
mntget(ctxt->pwdmnt);
-+#ifdef HAS_STRUCT_CRED
-+ return dentry_open(de, ctxt->pwdmnt, flags, current->real_cred);
++#ifdef HAVE_DENTRY_4ARGS
++ return dentry_open(de, ctxt->pwdmnt, flags, CRED(current));
+#else
return dentry_open(de, ctxt->pwdmnt, flags);
+#endif
}
EXPORT_SYMBOL(l_dentry_open);
-diff -urNad lustre~/lustre/lvfs/upcall_cache.c lustre/lustre/lvfs/upcall_cache.c
---- lustre~/lustre/lvfs/upcall_cache.c 2009-08-19 09:51:09.000000000 +0200
-+++ lustre/lustre/lvfs/upcall_cache.c 2009-08-19 14:10:45.000000000 +0200
-@@ -216,7 +216,7 @@
- entry->ue_primary = primary;
-
- for (i = 0; i < ginfo->nblocks; i++) {
-- int cp_count = min(NGROUPS_PER_BLOCK, (int)ngroups);
-+ int cp_count = min((int)NGROUPS_PER_BLOCK, (int)ngroups);
- int off = i * NGROUPS_PER_BLOCK;
-
- for (j = 0; j < cp_count; j++)
diff -urNad lustre~/lustre/mdc/mdc_lib.c lustre/lustre/mdc/mdc_lib.c
--- lustre~/lustre/mdc/mdc_lib.c 2009-08-19 09:51:09.000000000 +0200
-+++ lustre/lustre/mdc/mdc_lib.c 2009-08-19 14:10:45.000000000 +0200
-@@ -56,8 +56,13 @@
++++ lustre/lustre/mdc/mdc_lib.c 2009-08-20 10:25:20.000000000 +0200
+@@ -56,8 +56,8 @@
struct mds_body *b;
b = lustre_msg_buf(req->rq_reqmsg, offset, sizeof(*b));
-+#ifdef HAS_STRUCT_CRED
-+ b->fsuid = current->real_cred->fsuid;
-+ b->fsgid = current->real_cred->fsgid;
-+#else
- b->fsuid = current->fsuid;
- b->fsgid = current->fsgid;
-+#endif
+- b->fsuid = current->fsuid;
+- b->fsgid = current->fsgid;
++ b->fsuid = CREDENTIALS(current,fsuid);
++ b->fsgid = CREDENTIALS(current,fsgid);
b->capability = cfs_curproc_cap_pack();
b->fid1 = *fid;
b->size = pg_off; /* !! */
-@@ -69,8 +74,13 @@
+@@ -69,8 +69,8 @@
{
LASSERT (b != NULL);
-+#ifdef HAS_STRUCT_CRED
-+ b->fsuid = current->real_cred->fsuid;
-+ b->fsgid = current->real_cred->fsgid;
-+#else
- b->fsuid = current->fsuid;
- b->fsgid = current->fsgid;
-+#endif
+- b->fsuid = current->fsuid;
+- b->fsgid = current->fsgid;
++ b->fsuid = CREDENTIALS(current,fsuid);
++ b->fsgid = CREDENTIALS(current,fsgid);
b->capability = cfs_curproc_cap_pack();
}
-@@ -166,8 +176,13 @@
+@@ -166,8 +166,8 @@
/* XXX do something about time, uid, gid */
rec->cr_opcode = REINT_OPEN;
-+#ifdef HAS_STRUCT_CRED
-+ rec->cr_fsuid = current->real_cred->fsuid;
-+ rec->cr_fsgid = current->real_cred->fsgid;
-+#else
- rec->cr_fsuid = current->fsuid;
- rec->cr_fsgid = current->fsgid;
-+#endif
+- rec->cr_fsuid = current->fsuid;
+- rec->cr_fsgid = current->fsgid;
++ rec->cr_fsuid = CREDENTIALS(current,fsuid);
++ rec->cr_fsgid = CREDENTIALS(current,fsgid);
rec->cr_cap = cfs_curproc_cap_pack();
rec->cr_fid = op_data->fid1;
memset(&rec->cr_replayfid, 0, sizeof(rec->cr_replayfid));
-@@ -240,8 +255,13 @@
+@@ -240,8 +240,8 @@
struct mds_rec_setattr *rec = lustre_msg_buf(req->rq_reqmsg, offset,
sizeof(*rec));
rec->sa_opcode = REINT_SETATTR;
-+#ifdef HAS_STRUCT_CRED
-+ rec->sa_fsuid = current->real_cred->fsuid;
-+ rec->sa_fsgid = current->real_cred->fsgid;
-+#else
- rec->sa_fsuid = current->fsuid;
- rec->sa_fsgid = current->fsgid;
-+#endif
+- rec->sa_fsuid = current->fsuid;
+- rec->sa_fsgid = current->fsgid;
++ rec->sa_fsuid = CREDENTIALS(current,fsuid);
++ rec->sa_fsgid = CREDENTIALS(current,fsgid);
rec->sa_cap = cfs_curproc_cap_pack();
rec->sa_fid = data->fid1;
rec->sa_suppgid = -1;
-@@ -284,8 +304,13 @@
+@@ -284,8 +284,8 @@
LASSERT (rec != NULL);
rec->ul_opcode = REINT_UNLINK;
-+#ifdef HAS_STRUCT_CRED
-+ rec->ul_fsuid = current->real_cred->fsuid;
-+ rec->ul_fsgid = current->real_cred->fsgid;
-+#else
- rec->ul_fsuid = current->fsuid;
- rec->ul_fsgid = current->fsgid;
-+#endif
+- rec->ul_fsuid = current->fsuid;
+- rec->ul_fsgid = current->fsgid;
++ rec->ul_fsuid = CREDENTIALS(current,fsuid);
++ rec->ul_fsgid = CREDENTIALS(current,fsgid);
rec->ul_cap = cfs_curproc_cap_pack();
rec->ul_mode = data->create_mode;
rec->ul_suppgid = data->suppgids[0];
-@@ -307,8 +332,13 @@
+@@ -307,8 +307,8 @@
rec = lustre_msg_buf(req->rq_reqmsg, offset, sizeof (*rec));
rec->lk_opcode = REINT_LINK;
-+#ifdef HAS_STRUCT_CRED
-+ rec->lk_fsuid = current->real_cred->fsuid;
-+ rec->lk_fsgid = current->real_cred->fsgid;
-+#else
- rec->lk_fsuid = current->fsuid;
- rec->lk_fsgid = current->fsgid;
-+#endif
+- rec->lk_fsuid = current->fsuid;
+- rec->lk_fsgid = current->fsgid;
++ rec->lk_fsuid = CREDENTIALS(current,fsuid);
++ rec->lk_fsgid = CREDENTIALS(current,fsgid);
rec->lk_cap = cfs_curproc_cap_pack();
rec->lk_suppgid1 = data->suppgids[0];
rec->lk_suppgid2 = data->suppgids[1];
-@@ -331,8 +361,13 @@
+@@ -331,8 +331,8 @@
/* XXX do something about time, uid, gid */
rec->rn_opcode = REINT_RENAME;
-+#ifdef HAS_STRUCT_CRED
-+ rec->rn_fsuid = current->real_cred->fsuid;
-+ rec->rn_fsgid = current->real_cred->fsgid;
-+#else
- rec->rn_fsuid = current->fsuid;
- rec->rn_fsgid = current->fsgid;
-+#endif
+- rec->rn_fsuid = current->fsuid;
+- rec->rn_fsgid = current->fsgid;
++ rec->rn_fsuid = CREDENTIALS(current,fsuid);
++ rec->rn_fsgid = CREDENTIALS(current,fsgid);
rec->rn_cap = cfs_curproc_cap_pack();
rec->rn_suppgid1 = data->suppgids[0];
rec->rn_suppgid2 = data->suppgids[1];
-@@ -355,8 +390,13 @@
+@@ -355,8 +355,8 @@
struct mds_body *b;
b = lustre_msg_buf(req->rq_reqmsg, offset, sizeof(*b));
-+#ifdef HAS_STRUCT_CRED
-+ b->fsuid = current->real_cred->fsuid;
-+ b->fsgid = current->real_cred->fsgid;
-+#else
- b->fsuid = current->fsuid;
- b->fsgid = current->fsgid;
-+#endif
+- b->fsuid = current->fsuid;
+- b->fsgid = current->fsgid;
++ b->fsuid = CREDENTIALS(current,fsuid);
++ b->fsgid = CREDENTIALS(current,fsgid);
b->capability = cfs_curproc_cap_pack();
b->valid = valid;
b->flags = flags | MDS_BFLAG_EXT_FLAGS;
diff -urNad lustre~/lustre/mgc/mgc_request.c lustre/lustre/mgc/mgc_request.c
--- lustre~/lustre/mgc/mgc_request.c 2009-08-19 09:51:09.000000000 +0200
-+++ lustre/lustre/mgc/mgc_request.c 2009-08-19 14:10:45.000000000 +0200
++++ lustre/lustre/mgc/mgc_request.c 2009-08-20 10:25:21.000000000 +0200
@@ -415,7 +415,7 @@
obd->obd_lvfs_ctxt.fs = get_ds();
@@ -16909,20 +3049,17 @@ diff -urNad lustre~/lustre/mgc/mgc_request.c lustre/lustre/mgc/mgc_request.c
if (IS_ERR(dentry)) {
diff -urNad lustre~/lustre/obdclass/linux/linux-module.c lustre/lustre/obdclass/linux/linux-module.c
--- lustre~/lustre/obdclass/linux/linux-module.c 2009-08-19 09:51:10.000000000 +0200
-+++ lustre/lustre/obdclass/linux/linux-module.c 2009-08-19 14:10:45.000000000 +0200
-@@ -204,7 +204,11 @@
++++ lustre/lustre/obdclass/linux/linux-module.c 2009-08-20 10:25:21.000000000 +0200
+@@ -204,7 +204,7 @@
int err = 0;
ENTRY;
-+#ifdef HAS_STRUCT_CRED
-+ if (current->real_cred->fsuid != 0)
-+#else
- if (current->fsuid != 0)
-+#endif
+- if (current->fsuid != 0)
++ if (CREDENTIALS(current,fsuid) != 0)
RETURN(err = -EACCES);
if ((cmd & 0xffffff00) == ((int)'T') << 8) /* ignore all tty ioctls */
RETURN(err = -ENOTTY);
-@@ -419,13 +423,14 @@
+@@ -419,13 +419,14 @@
ENTRY;
obd_sysctl_init();
@@ -16941,7 +3078,7 @@ diff -urNad lustre~/lustre/obdclass/linux/linux-module.c lustre/lustre/obdclass/
CERROR("error registering /proc/fs/lustre/devices\n");
diff -urNad lustre~/lustre/obdclass/linux/linux-sysctl.c lustre/lustre/obdclass/linux/linux-sysctl.c
--- lustre~/lustre/obdclass/linux/linux-sysctl.c 2009-08-19 09:51:10.000000000 +0200
-+++ lustre/lustre/obdclass/linux/linux-sysctl.c 2009-08-19 14:10:45.000000000 +0200
++++ lustre/lustre/obdclass/linux/linux-sysctl.c 2009-08-20 10:25:21.000000000 +0200
@@ -56,7 +56,9 @@
cfs_sysctl_table_header_t *obd_table_header = NULL;
@@ -17216,7 +3353,7 @@ diff -urNad lustre~/lustre/obdclass/linux/linux-sysctl.c lustre/lustre/obdclass/
.maxlen = 0,
diff -urNad lustre~/lustre/obdclass/lprocfs_status.c lustre/lustre/obdclass/lprocfs_status.c
--- lustre~/lustre/obdclass/lprocfs_status.c 2009-08-19 09:51:10.000000000 +0200
-+++ lustre/lustre/obdclass/lprocfs_status.c 2009-08-19 14:10:45.000000000 +0200
++++ lustre/lustre/obdclass/lprocfs_status.c 2009-08-20 10:25:21.000000000 +0200
@@ -151,7 +151,7 @@
LPROCFS_ENTRY();
@@ -17237,7 +3374,7 @@ diff -urNad lustre~/lustre/obdclass/lprocfs_status.c lustre/lustre/obdclass/lpro
return rc;
diff -urNad lustre~/lustre/obdclass/lustre_handles.c lustre/lustre/obdclass/lustre_handles.c
--- lustre~/lustre/obdclass/lustre_handles.c 2009-08-19 09:51:10.000000000 +0200
-+++ lustre/lustre/obdclass/lustre_handles.c 2009-08-19 14:10:45.000000000 +0200
++++ lustre/lustre/obdclass/lustre_handles.c 2009-08-20 10:25:21.000000000 +0200
@@ -56,6 +56,14 @@
# define rcu_read_unlock() spin_unlock(&bucket->lock)
#endif /* ifndef HAVE_RCU */
@@ -17253,22 +3390,14 @@ diff -urNad lustre~/lustre/obdclass/lustre_handles.c lustre/lustre/obdclass/lust
static __u64 handle_base;
#define HANDLE_INCR 7
static spinlock_t handle_base_lock;
-@@ -233,6 +241,7 @@
-
- for (i = 0; i < HANDLE_HASH_SIZE; i++) {
- struct list_head *tmp, *pos;
-+ pos = tmp = NULL; /* avoid Werror */
- spin_lock(&handle_hash[i].lock);
- list_for_each_safe_rcu(tmp, pos, &(handle_hash[i].head)) {
- struct portals_handle *h;
diff -urNad lustre~/lustre/ptlrpc/service.c lustre/lustre/ptlrpc/service.c
--- lustre~/lustre/ptlrpc/service.c 2009-08-19 09:51:10.000000000 +0200
-+++ lustre/lustre/ptlrpc/service.c 2009-08-19 14:10:45.000000000 +0200
++++ lustre/lustre/ptlrpc/service.c 2009-08-20 10:25:21.000000000 +0200
@@ -1497,11 +1497,17 @@
{
struct fs_struct *fs = current->fs;
-+#ifdef NO_FS_STRUCT_COUNT
++#ifndef HAVE_COUNT_IN_STRUCT_FS_STRUCT
+ write_lock(&fs->lock);
+ fs->users++;
+ write_unlock(&fs->lock);
@@ -17283,15 +3412,27 @@ diff -urNad lustre~/lustre/ptlrpc/service.c lustre/lustre/ptlrpc/service.c
}
static void
+diff -urNad lustre~/lustre/ptlrpc/wiretest.c lustre/lustre/ptlrpc/wiretest.c
+--- lustre~/lustre/ptlrpc/wiretest.c 2009-08-19 09:51:10.000000000 +0200
++++ lustre/lustre/ptlrpc/wiretest.c 2009-08-20 10:25:21.000000000 +0200
+@@ -2319,7 +2319,7 @@
+ CLASSERT(FIEMAP_EXTENT_LAST == 0x00000001);
+ CLASSERT(FIEMAP_EXTENT_UNKNOWN == 0x00000002);
+ CLASSERT(FIEMAP_EXTENT_DELALLOC == 0x00000004);
+- CLASSERT(FIEMAP_EXTENT_NO_DIRECT == 0x00000008);
++ CLASSERT(FIEMAP_EXTENT_NO_DIRECT == 0x00002000);
+ CLASSERT(FIEMAP_EXTENT_SECONDARY == 0x00000010);
+ CLASSERT(FIEMAP_EXTENT_NET == 0x00000020);
+ CLASSERT(FIEMAP_EXTENT_DATA_COMPRESSED == 0x00000040);
diff -urNad lustre~/lustre/quota/quota_context.c lustre/lustre/quota/quota_context.c
--- lustre~/lustre/quota/quota_context.c 2009-08-19 09:51:10.000000000 +0200
-+++ lustre/lustre/quota/quota_context.c 2009-08-19 14:10:45.000000000 +0200
++++ lustre/lustre/quota/quota_context.c 2009-08-20 10:25:21.000000000 +0200
@@ -240,7 +240,11 @@
int ret = 0;
ENTRY;
-+#ifdef HAS_SB_ANY_QUOTA_LOADED
-+ if (!sb_any_quota_loaded(sb))
++#ifdef HAVE_SB_ANY_QUOTA_ACTIVE
++ if (!sb_any_quota_active(sb))
+#else
if (!sb_any_quota_enabled(sb))
+#endif
@@ -17302,8 +3443,8 @@ diff -urNad lustre~/lustre/quota/quota_context.c lustre/lustre/quota/quota_conte
int ret = QUOTA_RET_OK;
ENTRY;
-+#ifdef HAS_SB_ANY_QUOTA_LOADED
-+ if (!sb_any_quota_loaded(sb))
++#ifdef HAVE_SB_ANY_QUOTA_ACTIVE
++ if (!sb_any_quota_active(sb))
+#else
if (!sb_any_quota_enabled(sb))
+#endif
@@ -17314,8 +3455,8 @@ diff -urNad lustre~/lustre/quota/quota_context.c lustre/lustre/quota/quota_conte
ENTRY;
CLASSERT(MAXQUOTAS < 4);
-+#ifdef HAS_SB_ANY_QUOTA_LOADED
-+ if (!sb_any_quota_loaded(qctxt->lqc_sb))
++#ifdef HAVE_SB_ANY_QUOTA_ACTIVE
++ if (!sb_any_quota_active(qctxt->lqc_sb))
+#else
if (!sb_any_quota_enabled(qctxt->lqc_sb))
+#endif
@@ -17326,7 +3467,7 @@ diff -urNad lustre~/lustre/quota/quota_context.c lustre/lustre/quota/quota_conte
int ret;
LOCK_DQONOFF_MUTEX(dqopt);
-+#ifdef HAS_SB_HAS_QUOTA_ACTIVE
++#ifdef HAVE_SB_ANY_QUOTA_ACTIVE
+ if (!sb_has_quota_active(qctxt->lqc_sb, type)) {
+#else
if (!sb_has_quota_enabled(qctxt->lqc_sb, type)) {
@@ -17338,8 +3479,8 @@ diff -urNad lustre~/lustre/quota/quota_context.c lustre/lustre/quota/quota_conte
int rc;
ENTRY;
-+#ifdef HAS_SB_ANY_QUOTA_LOADED
-+ if (!sb_any_quota_loaded(qctxt->lqc_sb))
++#ifdef HAVE_SB_ANY_QUOTA_ACTIVE
++ if (!sb_any_quota_active(qctxt->lqc_sb))
+#else
if (!sb_any_quota_enabled(qctxt->lqc_sb))
+#endif
@@ -17348,13 +3489,13 @@ diff -urNad lustre~/lustre/quota/quota_context.c lustre/lustre/quota/quota_conte
data.obd = obd;
diff -urNad lustre~/lustre/quota/quota_interface.c lustre/lustre/quota/quota_interface.c
--- lustre~/lustre/quota/quota_interface.c 2009-08-19 09:51:10.000000000 +0200
-+++ lustre/lustre/quota/quota_interface.c 2009-08-19 14:10:45.000000000 +0200
++++ lustre/lustre/quota/quota_interface.c 2009-08-20 10:25:21.000000000 +0200
@@ -149,7 +149,11 @@
{
ENTRY;
-+#ifdef HAS_SB_ANY_QUOTA_LOADED
-+ if (!sb_any_quota_loaded(obd->u.obt.obt_sb))
++#ifdef HAVE_SB_ANY_QUOTA_ACTIVE
++ if (!sb_any_quota_active(obd->u.obt.obt_sb))
+#else
if (!sb_any_quota_enabled(obd->u.obt.obt_sb))
+#endif
@@ -17365,8 +3506,8 @@ diff -urNad lustre~/lustre/quota/quota_interface.c lustre/lustre/quota/quota_int
struct obd_quotactl *oqctl;
ENTRY;
-+#ifdef HAS_SB_ANY_QUOTA_LOADED
-+ if (!sb_any_quota_loaded(obt->obt_sb))
++#ifdef HAVE_SB_ANY_QUOTA_ACTIVE
++ if (!sb_any_quota_active(obt->obt_sb))
+#else
if (!sb_any_quota_enabled(obt->obt_sb))
+#endif
@@ -17377,8 +3518,8 @@ diff -urNad lustre~/lustre/quota/quota_interface.c lustre/lustre/quota/quota_int
ENTRY;
CLASSERT(MAXQUOTAS < 4);
-+#ifdef HAS_SB_ANY_QUOTA_LOADED
-+ if (!sb_any_quota_loaded(qctxt->lqc_sb))
++#ifdef HAVE_SB_ANY_QUOTA_ACTIVE
++ if (!sb_any_quota_active(qctxt->lqc_sb))
+#else
if (!sb_any_quota_enabled(qctxt->lqc_sb))
+#endif
@@ -17387,18 +3528,31 @@ diff -urNad lustre~/lustre/quota/quota_interface.c lustre/lustre/quota/quota_int
spin_lock(&qctxt->lqc_lock);
diff -urNad lustre~/lustre/quota/quota_internal.h lustre/lustre/quota/quota_internal.h
--- lustre~/lustre/quota/quota_internal.h 2009-08-19 09:51:10.000000000 +0200
-+++ lustre/lustre/quota/quota_internal.h 2009-08-19 14:10:45.000000000 +0200
-@@ -41,6 +41,13 @@
++++ lustre/lustre/quota/quota_internal.h 2009-08-20 10:25:21.000000000 +0200
+@@ -41,6 +41,14 @@
#ifdef HAVE_QUOTA_SUPPORT
-+#if LINUX_VERSION_CODE == KERNEL_VERSION(2,6,30)
++#if LINUX_VERSION_CODE >= KERNEL_VERSION(2,6,29)
+/* since 2.6.29 defined in fs/quota/quota_v?.c */
+# define QUOTABLOCK_BITS 10
+# define QUOTABLOCK_SIZE (1 << QUOTABLOCK_BITS)
++# define toqb(x) (x) /* seems to be ident() */
+#endif
+
+
/* QUSG covnert bytes to blocks when counting block quota */
#define QUSG(count, isblk) (isblk ? toqb(count) : count)
+diff -urNad lustre~/lustre/utils/wiretest.c lustre/lustre/utils/wiretest.c
+--- lustre~/lustre/utils/wiretest.c 2009-08-19 09:51:10.000000000 +0200
++++ lustre/lustre/utils/wiretest.c 2009-08-20 10:25:21.000000000 +0200
+@@ -2317,7 +2317,7 @@
+ CLASSERT(FIEMAP_EXTENT_LAST == 0x00000001);
+ CLASSERT(FIEMAP_EXTENT_UNKNOWN == 0x00000002);
+ CLASSERT(FIEMAP_EXTENT_DELALLOC == 0x00000004);
+- CLASSERT(FIEMAP_EXTENT_NO_DIRECT == 0x00000008);
++ CLASSERT(FIEMAP_EXTENT_NO_DIRECT == 0x00002000);
+ CLASSERT(FIEMAP_EXTENT_SECONDARY == 0x00000010);
+ CLASSERT(FIEMAP_EXTENT_NET == 0x00000020);
+ CLASSERT(FIEMAP_EXTENT_DATA_COMPRESSED == 0x00000040);
--
Lustre Debian Packaging
More information about the Pkg-lustre-svn-commit
mailing list