[Pkg-lustre-svn-commit] updated: [9047010] Rediffing patches to make them work on 1.6.7:

Thu Mar 12 10:27:49 UTC 2009

The following commit has been merged in the master branch:
commit 90470105bc6a388bd2e517713a84f183d61f6b30
Author: Patrick Winnertz <winnie at debian.org>
Date:   Thu Mar 12 10:34:21 2009 +0100

    Rediffing patches to make them work on 1.6.7:
      - posix_acl.dpatch
      - remove-set_tunables.dpatch
      - enable-quota.dpatch
      - removed never used patches for patchless_support in subdir
      - used new patchless support patch from bugzilla
    
    Signed-off-by: Patrick Winnertz <winnie at debian.org>

diff --git a/debian/patches/00list b/debian/patches/00list
index d9f5cdc..4318389 100644
--- a/debian/patches/00list
+++ b/debian/patches/00list
@@ -10,22 +10,7 @@ no-darwin.dpatch
 remove-set_tunables.dpatch
 libsysio.dpatch
 bug12769-ql-fix.dpatch
-#patchless_support/configure_for_HEAD.dpatch
-#patchless_support/fix_configure_RO_cache.dpatch
-#patchless_support/fix_nfs_fid_type.dpatch
-#patchless_support/fix_request_module_calls.dpatch
-#patchless_support/lustre_loop_devices_adaption.dpatch
-#patchless_support/nfs_changes_new_API.dpatch
-#patchless_support/sysctl_update.dpatch
-#patchless_support/configure_tests_2.6.27.dpatch
-#patchless_support/fix_mmap.dpatch
-#patchless_support/fix_path_API_changes.dpatch
-#patchless_support/lprocfs_changes.dpatch
-#patchless_support/new_page_fault_method.dpatch
-#patchless_support/splice_read_support.dpatch
-#patchless_support_2.6.24_lnet_part.dpatch
-#patchless_support_2.6.24_configure_part.dpatch
-patchless_support_2.6.24.dpatch
+patchless_support_2.6.26.dpatch
 #server_support_2.6.27.dpatch
 # Debian patches
 bash_completion.dpatch
diff --git a/debian/patches/enable-quota.dpatch b/debian/patches/enable-quota.dpatch
index ddcf06a..9ed249a 100755
--- a/debian/patches/enable-quota.dpatch
+++ b/debian/patches/enable-quota.dpatch
@@ -4,9 +4,9 @@
 ## DP: --enable-quota check was only run when building modules.
 
 @DPATCH@
-diff -urNad lustre-1.6.6~/configure.ac lustre-1.6.6/configure.ac
---- lustre-1.6.6~/configure.ac	2008-11-26 13:32:11.000000000 +0100
-+++ lustre-1.6.6/configure.ac	2008-11-26 13:37:27.000000000 +0100
+diff -urNad lustre~/configure.ac lustre/configure.ac
+--- lustre~/configure.ac	2009-03-12 10:32:27.000000000 +0100
++++ lustre/configure.ac	2009-03-12 11:19:53.000000000 +0100
 @@ -8,6 +8,7 @@
  LB_CHECK_VERSION
  
@@ -15,35 +15,10 @@ diff -urNad lustre-1.6.6~/configure.ac lustre-1.6.6/configure.ac
  
  AC_CANONICAL_SYSTEM
  
-diff -urNad lustre-1.6.6~/lustre/autoconf/lustre-core.m4 lustre-1.6.6/lustre/autoconf/lustre-core.m4
---- lustre-1.6.6~/lustre/autoconf/lustre-core.m4	2008-11-26 13:37:22.000000000 +0100
-+++ lustre-1.6.6/lustre/autoconf/lustre-core.m4	2008-11-26 13:38:08.000000000 +0100
-@@ -1676,24 +1676,9 @@
-         AC_HELP_STRING([--enable-quota],
-                         [enable quota support]),
-         [],[enable_quota='default'])
--if test x$linux25 != xyes; then
--        enable_quota='no'
--fi
- LB_LINUX_CONFIG([QUOTA],[
--        if test x$enable_quota = xdefault; then
-                 enable_quota='yes'
--        fi
- ],[
--        if test x$enable_quota = xdefault; then
--                enable_quota='no'
--                AC_MSG_WARN([quota is not enabled because the kernel lacks quota
-- support])
--        else
--                if test x$enable_quota = xyes; then
--                        AC_MSG_ERROR([cannot enable quota because the kernel lac
--ks quota support])
--                fi
--        fi
- ])
- if test x$enable_quota != xno; then
-         AC_DEFINE(HAVE_QUOTA_SUPPORT, 1, [Enable quota support])
-@@ -1715,6 +1700,7 @@
+diff -urNad lustre~/lustre/autoconf/lustre-core.m4 lustre/lustre/autoconf/lustre-core.m4
+--- lustre~/lustre/autoconf/lustre-core.m4	2009-03-12 11:19:52.000000000 +0100
++++ lustre/lustre/autoconf/lustre-core.m4	2009-03-12 11:19:53.000000000 +0100
+@@ -1813,6 +1813,7 @@
  ])
  ])
  
diff --git a/debian/patches/patchless_support/configure_for_HEAD.dpatch b/debian/patches/patchless_support/configure_for_HEAD.dpatch
deleted file mode 100755
index d4eae95..0000000
--- a/debian/patches/patchless_support/configure_for_HEAD.dpatch
+++ /dev/null
@@ -1,329 +0,0 @@
-#! /bin/sh /usr/share/dpatch/dpatch-run
-## posix_acl.patch by Patrick Winnertz <winnie at debian.org>
-##
-## All lines beginning with `## DP:' are a description of the patch.
-## DP: Patch which will enable 2.6.24 patchless support for lustre, taken from #14250
-
- at DPATCH@
-diff -urNad lustre~/lustre/autoconf/lustre-core.m4 lustre/lustre/autoconf/lustre-core.m4
---- lustre~/lustre/autoconf/lustre-core.m4	2008-11-25 13:59:37.000000000 +0100
-+++ lustre/lustre/autoconf/lustre-core.m4	2008-12-22 10:00:57.000000000 +0100
-@@ -1109,15 +1109,20 @@
- AC_DEFUN([LC_PAGE_CHECKED],
- [AC_MSG_CHECKING([kernel has PageChecked and SetPageChecked])
- LB_LINUX_TRY_COMPILE([
--        #include <linux/mm.h>
--        #include <linux/page-flags.h>
-+        #include <linux/autoconf.h>
-+#ifdef HAVE_LINUX_MMTYPES_H
-+        #include <linux/mm_types.h>
-+#endif
-+	#include <linux/page-flags.h>
- ],[
--        #ifndef PageChecked
--        #error PageChecked not defined in kernel
--        #endif
--        #ifndef SetPageChecked
--        #error SetPageChecked not defined in kernel
--        #endif
-+ 	struct page *p;
-+
-+        /* before 2.6.26 this define*/
-+        #ifndef PageChecked	
-+ 	/* 2.6.26 use function instead of define for it */
-+ 	SetPageChecked(p);
-+ 	PageChecked(p);
-+ 	#endif
- ],[
-         AC_MSG_RESULT(yes)
-         AC_DEFINE(HAVE_PAGE_CHECKED, 1,
-@@ -1271,11 +1276,149 @@
- 
- # 2.6.23 extract nfs export related data into exportfs.h
- AC_DEFUN([LC_HAVE_EXPORTFS_H],
--[
--tmpfl="$CFLAGS"
--CFLAGS="$CFLAGS -I$LINUX_OBJ/include"
--AC_CHECK_HEADERS([linux/exportfs.h])
--CFLAGS="$tmpfl"
-+[LB_CHECK_FILE([$LINUX/include/linux/exportfs.h], [
-+        AC_DEFINE(HAVE_LINUX_EXPORTFS_H, 1,
-+                [kernel has include/exportfs.h])
-+],[
-+        AC_MSG_RESULT([no])
-+])
-+])
-+
-+# 2.6.23 have new page fault handling API
-+AC_DEFUN([LC_VM_OP_FAULT],
-+[AC_MSG_CHECKING([if kernel has .fault in vm_operation_struct])
-+LB_LINUX_TRY_COMPILE([
-+        #include <linux/mm.h>
-+],[
-+        struct vm_operations_struct op;
-+
-+        op.fault = NULL;
-+], [
-+        AC_MSG_RESULT([yes])
-+        AC_DEFINE(HAVE_VM_OP_FAULT, 1,
-+                [if kernel has .fault in vm_operation_struct])
-+],[
-+        AC_MSG_RESULT([no])
-+])
-+])
-+
-+#2.6.23 has new shrinker API
-+AC_DEFUN([LC_REGISTER_SHRINKER],
-+[AC_MSG_CHECKING([if kernel has register_shrinker])
-+LB_LINUX_TRY_COMPILE([
-+        #include <linux/mm.h>
-+],[
-+        register_shrinker(NULL);
-+], [
-+        AC_MSG_RESULT([yes])
-+        AC_DEFINE(HAVE_REGISTER_SHRINKER, 1,
-+                [if kernel has register_shrinker])
-+],[
-+        AC_MSG_RESULT([no])
-+])
-+])
-+
-+# 2.6.24 has bio_endio with 2 args
-+AC_DEFUN([LC_BIO_ENDIO_2ARG],
-+[AC_MSG_CHECKING([if kernel has bio_endio with 2 args])
-+LB_LINUX_TRY_COMPILE([
-+        #include <linux/bio.h>
-+],[
-+        bio_endio(NULL, 0);
-+], [
-+        AC_MSG_RESULT([yes])
-+        AC_DEFINE(HAVE_BIO_ENDIO_2ARG, 1,
-+                [if kernel has bio_endio with 2 args])
-+],[
-+        AC_MSG_RESULT([no])
-+])
-+])
-+
-+# 2.6.24 has new members in exports struct.
-+AC_DEFUN([LC_FH_TO_DENTRY],
-+[AC_MSG_CHECKING([if kernel has .fh_to_dentry member in export_operations struct])
-+LB_LINUX_TRY_COMPILE([
-+#ifdef HAVE_LINUX_EXPORTFS_H
-+        #include <linux/exportfs.h>
-+#else
-+        #include <linux/fs.h>
-+#endif
-+],[
-+        struct export_operations exp;
-+
-+        exp.fh_to_dentry   = NULL;
-+], [
-+        AC_MSG_RESULT([yes])
-+        AC_DEFINE(HAVE_FH_TO_DENTRY, 1,
-+                [kernel has .fh_to_dentry member in export_operations struct])
-+],[
-+        AC_MSG_RESULT([no])
-+])
-+])
-+
-+# 2.6.24 need linux/mm_types.h included
-+AC_DEFUN([LC_HAVE_MMTYPES_H],
-+[LB_CHECK_FILE([$LINUX/include/linux/mm_types.h], [
-+        AC_DEFINE(HAVE_LINUX_MMTYPES_H, 1,
-+                [kernel has include/mm_types.h])
-+],[
-+        AC_MSG_RESULT([no])
-+])
-+])
-+
-+# 2.6.24 remove long aged procfs entry -> deleted member
-+AC_DEFUN([LC_PROCFS_DELETED],
-+[AC_MSG_CHECKING([if kernel has deleted member in procfs entry struct])
-+LB_LINUX_TRY_COMPILE([
-+	#include <linux/proc_fs.h>
-+],[
-+        struct proc_dir_entry pde;
-+
-+        pde.deleted   = NULL;
-+], [
-+        AC_MSG_RESULT([yes])
-+        AC_DEFINE(HAVE_PROCFS_DELETED, 1,
-+                [kernel has deleted member in procfs entry struct])
-+],[
-+        AC_MSG_RESULT([no])
-+])
-+])
-+
-+# 2.6.26 isn't export set_fs_pwd and change paramter in fs struct
-+AC_DEFUN([LC_FS_STRUCT_USE_PATH],
-+[AC_MSG_CHECKING([fs_struct use path structure])
-+LB_LINUX_TRY_COMPILE([
-+        #include <asm/atomic.h>
-+        #include <linux/spinlock.h>
-+        #include <linux/fs_struct.h>
-+],[
-+        struct path path;
-+        struct fs_struct fs;
-+
-+        fs.pwd = path;
-+], [
-+        AC_MSG_RESULT([yes])
-+        AC_DEFINE(HAVE_FS_STRUCT_USE_PATH, 1,
-+                [fs_struct use path structure])
-+],[
-+        AC_MSG_RESULT([no])
-+])
-+])
-+
-+# 2.6.26 remove path_release and use path_put instead
-+AC_DEFUN([LC_PATH_RELEASE],
-+[AC_MSG_CHECKING([if path_release exist])
-+LB_LINUX_TRY_COMPILE([
-+    #include <linux/dcache.h>
-+    #include <linux/namei.h>
-+],[
-+    path_release(NULL);
-+],[
-+    AC_DEFINE(HAVE_PATH_RELEASE, 1, [path_release exist])
-+    AC_MSG_RESULT([yes])
-+],[
-+    AC_MSG_RESULT([no]) 
-+])
- ])
- 
- #
-@@ -1370,13 +1513,27 @@
-           # raid5-zerocopy patch
-           LC_PAGE_CONSTANT
- 	  
--	  # 2.6.22
-+	   # 2.6.22
-           LC_INVALIDATE_BDEV_2ARG
-           LC_FS_RENAME_DOES_D_MOVE
--          # 2.6.23
-+     # 2.6.23
-           LC_UNREGISTER_BLKDEV_RETURN_INT
-           LC_KERNEL_SPLICE_READ
-           LC_HAVE_EXPORTFS_H
-+          LC_VM_OP_FAULT
-+          LC_REGISTER_SHRINKER
-+  
-+  	 # 2.6.24
-+      	 LC_HAVE_MMTYPES_H
-+         LC_BIO_ENDIO_2ARG
-+         LC_FH_TO_DENTRY
-+         LC_PROCFS_DELETED
-+  
-+     # 2.6.26
-+         LC_FS_STRUCT_USE_PATH
-+         LC_RCU_LIST_SAFE
-+         LC_PATH_RELEASE
-+
- ])
- 
- #
-@@ -1609,6 +1766,7 @@
-         ],[
-                 AC_MSG_RESULT([no]) 
-         ])
-+
- ],[
-         AC_MSG_RESULT([no])
- ])
-diff -urNad lustre~/lustre/include/linux/lustre_compat25.h lustre/lustre/include/linux/lustre_compat25.h
---- lustre~/lustre/include/linux/lustre_compat25.h	2008-11-25 13:59:37.000000000 +0100
-+++ lustre/lustre/include/linux/lustre_compat25.h	2008-12-22 10:02:32.000000000 +0100
-@@ -57,6 +57,28 @@
- #endif /* LINUX_VERSION_CODE >= KERNEL_VERSION(2,6,14) */
- 
- #ifndef HAVE_SET_FS_PWD
-+
-+#ifdef HAVE_FS_STRUCT_USE_PATH
-+static inline void ll_set_fs_pwd(struct fs_struct *fs, struct vfsmount *mnt,
-+                struct dentry *dentry)
-+{
-+        struct path path;
-+	struct path old_pwd;
-+
-+        path.mnt = mnt;
-+        path.dentry = dentry;
-+        write_lock(&fs->lock);
-+        old_pwd = fs->pwd;
-+        path_get(&path);
-+        fs->pwd = path;
-+        write_unlock(&fs->lock);
-+
-+	if (old_pwd.dentry)
-+		path_put(&old_pwd);
-+}
-+
-+#else
-+
- static inline void ll_set_fs_pwd(struct fs_struct *fs, struct vfsmount *mnt,
-                 struct dentry *dentry)
- {
-@@ -75,6 +97,7 @@
-                 mntput(old_pwdmnt);
-         }
- }
-+#endif
- #else
- #define ll_set_fs_pwd set_fs_pwd
- #endif /* HAVE_SET_FS_PWD */
-@@ -590,5 +613,56 @@
-                 vfs_rename(old,old_dir,new,new_dir)
- #endif
- 
-+#ifdef HAVE_REGISTER_SHRINKER
-+typedef int (*shrinker_t)(int nr_to_scan, gfp_t gfp_mask);
-+
-+static inline
-+struct shrinker *set_shrinker(int seek, shrinker_t func)
-+{
-+        struct shrinker *s;
-+
-+        s = kmalloc(sizeof(*s), GFP_KERNEL);
-+        if (s == NULL)
-+                return (NULL);
-+
-+        s->shrink = func;
-+        s->seeks = seek;
-+
-+        register_shrinker(s);
-+
-+        return s;
-+}
-+
-+static inline
-+void remove_shrinker(struct shrinker *shrinker) 
-+{
-+        if (shrinker == NULL)
-+                return;
-+
-+        unregister_shrinker(shrinker);
-+        kfree(shrinker);
-+}
-+#endif
-+
-+#ifdef HAVE_BIO_ENDIO_2ARG
-+#define cfs_bio_io_error(a,b)   bio_io_error((a))
-+#define cfs_bio_endio(a,b,c)    bio_endio((a),(c))
-+#else
-+#define cfs_bio_io_error(a,b)   bio_io_error((a),(b))
-+#define cfs_bio_endio(a,b,c)    bio_endio((a),(b),(c))
-+#endif
-+
-+#ifdef HAVE_FS_STRUCT_USE_PATH
-+#define cfs_fs_pwd(fs)       ((fs)->pwd.dentry)
-+#define cfs_fs_mnt(fs)       ((fs)->pwd.mnt)
-+#else
-+#define cfs_fs_pwd(fs)       ((fs)->pwd)
-+#define cfs_fs_mnt(fs)       ((fs)->pwdmnt)
-+#endif
-+
-+#ifndef list_for_each_safe_rcu
-+#define list_for_each_safe_rcu(a,b,c) list_for_each_rcu(b, c)
-+#endif
-+
- #endif /* __KERNEL__ */
- #endif /* _COMPAT25_H */
diff --git a/debian/patches/patchless_support/configure_tests_2.6.27.dpatch b/debian/patches/patchless_support/configure_tests_2.6.27.dpatch
deleted file mode 100755
index ab89906..0000000
--- a/debian/patches/patchless_support/configure_tests_2.6.27.dpatch
+++ /dev/null
@@ -1,461 +0,0 @@
-#! /bin/sh /usr/share/dpatch/dpatch-run
-## posix_acl.patch by Patrick Winnertz <winnie at debian.org>
-##
-## All lines beginning with `## DP:' are a description of the patch.
-## DP: Patch which will enable 2.6.24 patchless support for lustre, taken from #14250
-
- at DPATCH@
-Index: HEAD/libcfs/autoconf/lustre-libcfs.m4
-===================================================================
---- HEAD.orig/libcfs/autoconf/lustre-libcfs.m4	2008-11-20 14:33:51.000000000 +0200
-+++ HEAD/libcfs/autoconf/lustre-libcfs.m4	2008-12-17 17:15:38.000000000 +0200
-@@ -364,7 +364,7 @@
- ])
- 
- # 2.6.24 request not use real numbers for ctl_name
--AC_DEFUN([LN_SYSCTL_UNNUMBERED],
-+AC_DEFUN([LIBCFS_SYSCTL_UNNUMBERED],
- [AC_MSG_CHECKING([for CTL_UNNUMBERED])
- LB_LINUX_TRY_COMPILE([
-         #include <linux/sysctl.h>
-@@ -382,7 +382,7 @@
- ])
- 
- # 2.6.24 lost scatterlist->page
--AC_DEFUN([LN_SCATTERLIST_SETPAGE],
-+AC_DEFUN([LIBCFS_SCATTERLIST_SETPAGE],
- [AC_MSG_CHECKING([for exist sg_set_page])
- LB_LINUX_TRY_COMPILE([
-         #include <linux/scatterlist.h>
-@@ -398,7 +398,7 @@
- ])
- 
- # 2.6.26 use int instead of atomic for sem.count
--AC_DEFUN([LN_SEM_COUNT],
-+AC_DEFUN([LIBCFS_SEM_COUNT],
- [AC_MSG_CHECKING([atomic sem.count])
- LB_LINUX_TRY_COMPILE([
-         #include <asm/semaphore.h>
-@@ -415,6 +415,21 @@
- ])
- ])
- 
-+# 2.6.27 have second argument to sock_map_fd
-+AC_DEFUN([LIBCFS_SOCK_MAP_FD_2ARG],
-+[AC_MSG_CHECKING([sock_map_fd have second argument])
-+LB_LINUX_TRY_COMPILE([
-+	#include <linux/net.h>
-+],[
-+        sock_map_fd(NULL, 0);
-+],[
-+        AC_MSG_RESULT(yes)
-+        AC_DEFINE(HAVE_SOCK_MAP_FD_2ARG, 1,
-+                  [sock_map_fd have second argument])
-+],[
-+        AC_MSG_RESULT(NO)
-+])
-+])
- 
- #
- # LIBCFS_PROG_LINUX
-@@ -447,10 +462,12 @@
- # 2.6.23
- LIBCFS_KMEM_CACHE_CREATE_DTOR
- # 2.6.24
--LN_SYSCTL_UNNUMBERED
--LN_SCATTERLIST_SETPAGE
-+LIBCFS_SYSCTL_UNNUMBERED
-+LIBCFS_SCATTERLIST_SETPAGE
- # 2.6.26
--LN_SEM_COUNT
-+LIBCFS_SEM_COUNT
-+# 2.6.27
-+LIBCFS_SOCK_MAP_FD_2ARG
- ])
- 
- #
-Index: HEAD/libcfs/libcfs/linux/linux-tcpip.c
-===================================================================
---- HEAD.orig/libcfs/libcfs/linux/linux-tcpip.c	2008-08-07 20:22:50.000000000 +0300
-+++ HEAD/libcfs/libcfs/linux/linux-tcpip.c	2008-12-17 17:15:38.000000000 +0200
-@@ -63,7 +63,11 @@
-                 return rc;
-         }
- 
-+#ifdef HAVE_SOCK_MAP_FD_2ARG
-+        fd = sock_map_fd(sock,0);
-+#else
-         fd = sock_map_fd(sock);
-+#endif
-         if (fd < 0) {
-                 rc = fd;
-                 sock_release(sock);
-Index: HEAD/lustre/autoconf/lustre-core.m4
-===================================================================
---- HEAD.orig/lustre/autoconf/lustre-core.m4	2008-12-17 17:15:38.000000000 +0200
-+++ HEAD/lustre/autoconf/lustre-core.m4	2008-12-17 17:15:38.000000000 +0200
-@@ -1727,6 +1727,56 @@
- ])
- ])
- 
-+#2.6.27
-+AC_DEFUN([LC_INODE_PERMISION_2ARGS],
-+[AC_MSG_CHECKING([inode_operations->permission have two args])
-+LB_LINUX_TRY_COMPILE([
-+        #include <linux/fs.h>
-+],[
-+        struct inode *inode;
-+
-+        inode->i_op->permission(NULL,0);
-+],[
-+        AC_DEFINE(HAVE_INODE_PERMISION_2ARGS, 1, 
-+                  [inode_operations->permission have two args])
-+        AC_MSG_RESULT([yes])
-+],[
-+        AC_MSG_RESULT([no])
-+])
-+])
-+
-+# 2.6.27 have file_remove_suid instead of remove_suid
-+AC_DEFUN([LC_FILE_REMOVE_SUID],
-+[AC_MSG_CHECKING([kernel have file_remove_suid])
-+LB_LINUX_TRY_COMPILE([
-+        #include <linux/fs.h>
-+],[
-+        file_remove_suid(NULL);
-+],[
-+        AC_DEFINE(HAVE_FILE_REMOVE_SUID, 1,
-+                  [kernel have file_remove_suid])
-+        AC_MSG_RESULT([yes])
-+],[
-+        AC_MSG_RESULT([no])
-+])
-+])
-+
-+# 2.6.27 have new page locking API
-+AC_DEFUN([LC_TRYLOCKPAGE],
-+[AC_MSG_CHECKING([kernel use trylock_page for page lock])
-+LB_LINUX_TRY_COMPILE([
-+        #include <linux/pagemap.h>
-+],[
-+        trylock_page(NULL);
-+],[
-+        AC_DEFINE(HAVE_TRYLOCK_PAGE, 1,
-+                  [kernel use trylock_page for page lock])
-+        AC_MSG_RESULT([yes])
-+],[
-+        AC_MSG_RESULT([no])
-+])
-+])
-+
- #
- # LC_PROG_LINUX
- #
-@@ -1818,9 +1868,9 @@
-          LC_INVALIDATEPAGE_RETURN_INT
-          LC_UMOUNTBEGIN_HAS_VFSMOUNT
-          LC_SEQ_LOCK
-+         LC_EXPORT_FILEMAP_FDATAWRITE_RANGE
-          if test x$enable_server = xyes ; then
--                LC_EXPORT_INVALIDATE_MAPPING_PAGES
--                LC_EXPORT_FILEMAP_FDATAWRITE_RANGE
-+                LC_EXPORT_INVALIDATE_MAPPING_PAGES        
-          fi
- 
-          #2.6.18 + RHEL5 (fc6)
-@@ -1863,6 +1913,11 @@
-          LC_FS_STRUCT_USE_PATH
-          LC_RCU_LIST_SAFE
-          LC_PATH_RELEASE
-+
-+         # 2.6.27
-+         LC_INODE_PERMISION_2ARGS
-+         LC_FILE_REMOVE_SUID
-+         LC_TRYLOCKPAGE
- ])
- 
- #
-Index: HEAD/lustre/obdclass/lustre_handles.c
-===================================================================
---- HEAD.orig/lustre/obdclass/lustre_handles.c	2008-08-07 20:23:44.000000000 +0300
-+++ HEAD/lustre/obdclass/lustre_handles.c	2008-12-17 17:15:38.000000000 +0200
-@@ -246,7 +246,7 @@
-         int i;
- 
-         for (i = 0; i < HANDLE_HASH_SIZE; i++) {
--                struct list_head *tmp, *pos;
-+                struct list_head *tmp = NULL ,  *pos;
-                 spin_lock(&handle_hash[i].lock);
-                 list_for_each_safe_rcu(tmp, pos, &(handle_hash[i].head)) {
-                         struct portals_handle *h;
-Index: HEAD/lustre/obdclass/capa.c
-===================================================================
---- HEAD.orig/lustre/obdclass/capa.c	2008-11-17 11:36:44.000000000 +0200
-+++ HEAD/lustre/obdclass/capa.c	2008-12-17 17:15:38.000000000 +0200
-@@ -246,11 +246,7 @@
-         struct ll_crypto_hash *tfm;
-         struct capa_hmac_alg  *alg;
-         int keylen;
--        struct scatterlist sl = {
--                .page   = virt_to_page(capa),
--                .offset = (unsigned long)(capa) % CFS_PAGE_SIZE,
--                .length = offsetof(struct lustre_capa, lc_hmac),
--        };
-+        struct scatterlist sl;
- 
-         if (capa_alg(capa) != CAPA_HMAC_ALG_SHA1) {
-                 CERROR("unknown capability hmac algorithm!\n");
-@@ -267,6 +263,10 @@
-         }
-         keylen = alg->ha_keylen;
- 
-+        sg_set_page(&sl, virt_to_page(capa),
-+                    offsetof(struct lustre_capa, lc_hmac),
-+                    (unsigned long)(capa) % CFS_PAGE_SIZE);
-+
-         ll_crypto_hmac(tfm, key, &keylen, &sl, sl.length, hmac);
-         ll_crypto_free_hash(tfm);
- 
-@@ -276,16 +276,8 @@
- int capa_encrypt_id(__u32 *d, __u32 *s, __u8 *key, int keylen)
- {
-         struct ll_crypto_cipher *tfm;
--        struct scatterlist sd = {
--                .page   = virt_to_page(d),
--                .offset = (unsigned long)(d) % CFS_PAGE_SIZE,
--                .length = 16,
--        };
--        struct scatterlist ss = {
--                .page   = virt_to_page(s),
--                .offset = (unsigned long)(s) % CFS_PAGE_SIZE,
--                .length = 16,
--        };
-+        struct scatterlist sd;
-+        struct scatterlist ss;
-         struct blkcipher_desc desc;
-         unsigned int min;
-         int rc;
-@@ -309,6 +301,11 @@
-                 GOTO(out, rc);
-         }
- 
-+        sg_set_page(&sd, virt_to_page(d), 16,
-+                    (unsigned long)(d) % CFS_PAGE_SIZE);
-+
-+        sg_set_page(&ss, virt_to_page(s), 16,
-+                    (unsigned long)(s) % CFS_PAGE_SIZE);
-         desc.tfm   = tfm;
-         desc.info  = NULL;
-         desc.flags = 0;
-@@ -328,16 +325,8 @@
- int capa_decrypt_id(__u32 *d, __u32 *s, __u8 *key, int keylen)
- {
-         struct ll_crypto_cipher *tfm;
--        struct scatterlist sd = {
--                .page   = virt_to_page(d),
--                .offset = (unsigned long)(d) % CFS_PAGE_SIZE,
--                .length = 16,
--        };
--        struct scatterlist ss = {
--                .page   = virt_to_page(s),
--                .offset = (unsigned long)(s) % CFS_PAGE_SIZE,
--                .length = 16,
--        };
-+        struct scatterlist sd;
-+        struct scatterlist ss;
-         struct blkcipher_desc desc;
-         unsigned int min;
-         int rc;
-@@ -361,6 +350,12 @@
-                 GOTO(out, rc);
-         }
- 
-+        sg_set_page(&sd, virt_to_page(d), 16,
-+                    (unsigned long)(d) % CFS_PAGE_SIZE);
-+
-+        sg_set_page(&ss, virt_to_page(s), 16,
-+                    (unsigned long)(s) % CFS_PAGE_SIZE);
-+
-         desc.tfm   = tfm;
-         desc.info  = NULL;
-         desc.flags = 0;
-Index: HEAD/lustre/ptlrpc/sec_bulk.c
-===================================================================
---- HEAD.orig/lustre/ptlrpc/sec_bulk.c	2008-08-14 23:55:36.000000000 +0300
-+++ HEAD/lustre/ptlrpc/sec_bulk.c	2008-12-17 17:15:38.000000000 +0200
-@@ -992,9 +992,9 @@
-         }
- 
-         for (i = 0; i < desc->bd_iov_count; i++) {
--                sl[i].page = desc->bd_iov[i].kiov_page;
--                sl[i].offset = desc->bd_iov[i].kiov_offset & ~CFS_PAGE_MASK;
--                sl[i].length = desc->bd_iov[i].kiov_len;
-+                sg_set_page(&sl[i], desc->bd_iov[i].kiov_page,
-+                            desc->bd_iov[i].kiov_len,
-+                            desc->bd_iov[i].kiov_offset & ~CFS_PAGE_MASK);
-                 bytes += desc->bd_iov[i].kiov_len;
-         }
- 
-Index: HEAD/lustre/ptlrpc/sec_config.c
-===================================================================
---- HEAD.orig/lustre/ptlrpc/sec_config.c	2008-12-03 05:47:20.000000000 +0200
-+++ HEAD/lustre/ptlrpc/sec_config.c	2008-12-15 17:19:13.000000000 +0200
-@@ -1170,7 +1170,7 @@
- 
-         push_ctxt(&saved, &obd->obd_lvfs_ctxt, NULL);
- 
--        dentry = lookup_one_len(MOUNT_CONFIGS_DIR, current->fs->pwd,
-+        dentry = lookup_one_len(MOUNT_CONFIGS_DIR, cfs_fs_pwd(current->fs),
-                                 strlen(MOUNT_CONFIGS_DIR));
-         if (IS_ERR(dentry)) {
-                 rc = PTR_ERR(dentry);
-Index: HEAD/lustre/include/linux/lustre_patchless_compat.h
-===================================================================
---- HEAD.orig/lustre/include/linux/lustre_patchless_compat.h	2008-08-07 20:23:18.000000000 +0300
-+++ HEAD/lustre/include/linux/lustre_patchless_compat.h	2008-12-17 17:15:38.000000000 +0200
-@@ -52,7 +52,7 @@
- 
-         BUG_ON(!PageLocked(page));
- 
--#if (LINUX_VERSION_CODE >= KERNEL_VERSION(2,6,15))
-+#ifdef HAVE_RW_TREE_LOCK
-         write_lock_irq(&mapping->tree_lock);
- #else
- 	spin_lock_irq(&mapping->tree_lock);
-@@ -66,7 +66,7 @@
-         __dec_zone_page_state(page, NR_FILE_PAGES);
- #endif
- 
--#if (LINUX_VERSION_CODE >= KERNEL_VERSION(2,6,15))
-+#ifdef HAVE_RW_TREE_LOCK
-         write_unlock_irq(&mapping->tree_lock);
- #else
- 	spin_unlock_irq(&mapping->tree_lock);
-Index: HEAD/lustre/include/linux/lustre_compat25.h
-===================================================================
---- HEAD.orig/lustre/include/linux/lustre_compat25.h	2008-12-17 17:15:38.000000000 +0200
-+++ HEAD/lustre/include/linux/lustre_compat25.h	2008-12-17 17:15:38.000000000 +0200
-@@ -170,7 +170,12 @@
- #endif
- 
- /* XXX our code should be using the 2.6 calls, not the other way around */
-+#ifndef HAVE_TRYLOCK_PAGE
- #define TryLockPage(page)               TestSetPageLocked(page)
-+#else
-+#define TryLockPage(page)               (!trylock_page(page))
-+#endif
-+
- #define Page_Uptodate(page)             PageUptodate(page)
- #define ll_redirty_page(page)           set_page_dirty(page)
- 
-@@ -623,8 +628,17 @@
- #define ll_crypto_free_blkcipher(tfm)   crypto_free_tfm(tfm)
- #endif /* HAVE_ASYNC_BLOCK_CIPHER */
- 
-+#ifdef HAVE_FILE_REMOVE_SUID
-+#define ll_remove_suid(file, mnt)       file_remove_suid(file)
-+#else
-+ #ifdef HAVE_SECURITY_PLUG
-+  #define ll_remove_suid(file,mnt)      remove_suid(file->f_dentry,mnt)
-+ #else
-+  #define ll_remove_suid(file,mnt)      remove_suid(file->f_dentry)
-+ #endif
-+#endif
-+
- #ifdef HAVE_SECURITY_PLUG
--#define ll_remove_suid(inode,mnt)               remove_suid(inode,mnt)
- #define ll_vfs_rmdir(dir,entry,mnt)             vfs_rmdir(dir,entry,mnt)
- #define ll_vfs_mkdir(inode,dir,mnt,mode)        vfs_mkdir(inode,dir,mnt,mode)
- #define ll_vfs_link(old,mnt,dir,new,mnt1)       vfs_link(old,mnt,dir,new,mnt1)
-@@ -636,7 +650,6 @@
- #define ll_vfs_rename(old,old_dir,mnt,new,new_dir,mnt1) \
-                 vfs_rename(old,old_dir,mnt,new,new_dir,mnt1)
- #else
--#define ll_remove_suid(inode,mnt)               remove_suid(inode)
- #define ll_vfs_rmdir(dir,entry,mnt)             vfs_rmdir(dir,entry)
- #define ll_vfs_mkdir(inode,dir,mnt,mode)        vfs_mkdir(inode,dir,mode)
- #define ll_vfs_link(old,mnt,dir,new,mnt1)       vfs_link(old,dir,new)
-Index: HEAD/lustre/include/linux/lustre_lib.h
-===================================================================
---- HEAD.orig/lustre/include/linux/lustre_lib.h	2008-08-07 20:23:18.000000000 +0300
-+++ HEAD/lustre/include/linux/lustre_lib.h	2008-12-17 17:15:38.000000000 +0200
-@@ -49,7 +49,6 @@
- # include <string.h>
- # include <sys/types.h>
- #else
--# include <asm/semaphore.h>
- # include <linux/rwsem.h>
- # include <linux/sched.h>
- # include <linux/signal.h>
-Index: HEAD/lustre/llite/llite_internal.h
-===================================================================
---- HEAD.orig/lustre/llite/llite_internal.h	2008-12-17 17:15:38.000000000 +0200
-+++ HEAD/lustre/llite/llite_internal.h	2008-12-17 17:15:38.000000000 +0200
-@@ -661,7 +661,11 @@
-                struct lookup_intent *it, struct kstat *stat);
- int ll_getattr(struct vfsmount *mnt, struct dentry *de, struct kstat *stat);
- struct ll_file_data *ll_file_data_get(void);
-+#ifndef HAVE_INODE_PERMISION_2ARGS
- int ll_inode_permission(struct inode *inode, int mask, struct nameidata *nd);
-+#else
-+int ll_inode_permission(struct inode *inode, int mask);
-+#endif
- int ll_lov_setstripe_ea_info(struct inode *inode, struct file *file,
-                              int flags, struct lov_user_md *lum,
-                              int lum_size);
-Index: HEAD/lustre/llite/file.c
-===================================================================
---- HEAD.orig/lustre/llite/file.c	2008-11-20 14:34:31.000000000 +0200
-+++ HEAD/lustre/llite/file.c	2008-12-17 17:15:38.000000000 +0200
-@@ -2305,7 +2305,11 @@
- }
- 
- #if (LINUX_VERSION_CODE >= KERNEL_VERSION(2,6,10))
-+#ifndef HAVE_INODE_PERMISION_2ARGS
- int ll_inode_permission(struct inode *inode, int mask, struct nameidata *nd)
-+#else
-+int ll_inode_permission(struct inode *inode, int mask)
-+#endif
- {
-         CDEBUG(D_VFSTRACE, "VFS Op:inode=%lu/%u(%p), mask %o\n",
-                inode->i_ino, inode->i_generation, inode, mask);
-Index: HEAD/lustre/llite/vvp_page.c
-===================================================================
---- HEAD.orig/lustre/llite/vvp_page.c	2008-11-10 20:27:54.000000000 +0200
-+++ HEAD/lustre/llite/vvp_page.c	2008-12-17 17:15:38.000000000 +0200
-@@ -341,7 +341,7 @@
- 
-         result = -EAGAIN;
-         /* we're trying to write, but the page is locked.. come back later */
--        if (!TestSetPageLocked(vmpage)) {
-+        if (!TryLockPage(vmpage)) {
-                 if (pg->cp_state == CPS_CACHED) {
-                         /*
-                          * We can cancel IO if page wasn't dirty after all.
-Index: HEAD/lnet/autoconf/lustre-lnet.m4
-===================================================================
---- HEAD.orig/lnet/autoconf/lustre-lnet.m4	2008-09-25 07:44:45.000000000 +0300
-+++ HEAD/lnet/autoconf/lustre-lnet.m4	2008-12-17 17:15:38.000000000 +0200
-@@ -1098,6 +1098,22 @@
- AM_CONDITIONAL(BUILD_USOCKLND, test x$USOCKLND = "xusocklnd")
- ])
- 
-+# 2.6.27 have second argument to sock_map_fd
-+AC_DEFUN([LN_SOCK_MAP_FD_2ARG],
-+[AC_MSG_CHECKING([sock_map_fd have second argument])
-+LB_LINUX_TRY_COMPILE([
-+	#include <linux/net.h>
-+],[
-+        sock_map_fd(NULL, 0);
-+],[
-+        AC_MSG_RESULT(yes)
-+        AC_DEFINE(HAVE_SOCK_MAP_FD_2ARG, 1,
-+                  [sock_map_fd have second argument])
-+],[
-+        AC_MSG_RESULT(NO)
-+])
-+])
-+
- #
- # LN_CONFIG_FILES
- #
diff --git a/debian/patches/patchless_support/fix_configure_RO_cache.dpatch b/debian/patches/patchless_support/fix_configure_RO_cache.dpatch
deleted file mode 100755
index 0c5664b..0000000
--- a/debian/patches/patchless_support/fix_configure_RO_cache.dpatch
+++ /dev/null
@@ -1,57 +0,0 @@
-#! /bin/sh /usr/share/dpatch/dpatch-run
-## posix_acl.patch by Patrick Winnertz <winnie at debian.org>
-##
-## All lines beginning with `## DP:' are a description of the patch.
-## DP: Patch which will enable 2.6.24 patchless support for lustre, taken from #14250
-
- at DPATCH@
-diff -urNad lustre~/lustre/autoconf/lustre-core.m4 lustre/lustre/autoconf/lustre-core.m4
---- lustre~/lustre/autoconf/lustre-core.m4	2008-12-22 10:08:29.000000000 +0100
-+++ lustre/lustre/autoconf/lustre-core.m4	2008-12-22 10:08:29.000000000 +0100
-@@ -1240,6 +1240,9 @@
- ])
- ])
- 
-+# 2.6.18
-+
-+
- # 2.6.23 have return type 'void' for unregister_blkdev
- AC_DEFUN([LC_UNREGISTER_BLKDEV_RETURN_INT],
- [AC_MSG_CHECKING([if unregister_blkdev return int])
-@@ -1384,6 +1387,26 @@
- ])
- ])
- 
-+# 2.6.25 change define to inline
-+AC_DEFUN([LC_MAPPING_CAP_WRITEBACK_DIRTY],
-+[AC_MSG_CHECKING([if kernel have mapping_cap_writeback_dirty])
-+LB_LINUX_TRY_COMPILE([
-+        #include <linux/backing-dev.h>
-+],[
-+        #ifndef mapping_cap_writeback_dirty
-+        mapping_cap_writeback_dirty(NULL);
-+        #endif
-+],[
-+        AC_MSG_RESULT([yes])
-+        AC_DEFINE(HAVE_MAPPING_CAP_WRITEBACK_DIRTY, 1,
-+                [kernel have mapping_cap_writeback_dirty])
-+],[
-+        AC_MSG_RESULT([no])
-+])
-+])
-+
-+
-+
- # 2.6.26 isn't export set_fs_pwd and change paramter in fs struct
- AC_DEFUN([LC_FS_STRUCT_USE_PATH],
- [AC_MSG_CHECKING([fs_struct use path structure])
-@@ -1528,6 +1551,9 @@
-          LC_BIO_ENDIO_2ARG
-          LC_FH_TO_DENTRY
-          LC_PROCFS_DELETED
-+
-+         #2.6.25
-+         LC_MAPPING_CAP_WRITEBACK_DIRTY
-   
-      # 2.6.26
-          LC_FS_STRUCT_USE_PATH
diff --git a/debian/patches/patchless_support/fix_mmap.dpatch b/debian/patches/patchless_support/fix_mmap.dpatch
deleted file mode 100755
index 4b05ac7..0000000
--- a/debian/patches/patchless_support/fix_mmap.dpatch
+++ /dev/null
@@ -1,308 +0,0 @@
-#! /bin/sh /usr/share/dpatch/dpatch-run
-## posix_acl.patch by Patrick Winnertz <winnie at debian.org>
-##
-## All lines beginning with `## DP:' are a description of the patch.
-## DP: Patch which will enable 2.6.24 patchless support for lustre, taken from #14250
-
- at DPATCH@
-Index: b1_8_gate/lustre/llite/llite_mmap.c
-===================================================================
---- b1_8_gate.orig/lustre/llite/llite_mmap.c	2008-11-11 18:23:11.000000000 +0300
-+++ b1_8_gate/lustre/llite/llite_mmap.c	2008-12-03 13:25:37.000000000 +0300
-@@ -81,8 +81,7 @@
- int lt_get_mmap_locks(struct ll_lock_tree *tree,
-                       unsigned long addr, size_t count);
- 
--struct page *ll_nopage(struct vm_area_struct *vma, unsigned long address,
--                       int *type);
-+static struct vm_operations_struct ll_file_vm_ops;
- 
- struct ll_lock_tree_node * ll_node_from_inode(struct inode *inode, __u64 start,
-                                               __u64 end, ldlm_mode_t mode)
-@@ -285,9 +284,19 @@
-         return LCK_PR;
- }
- 
-+static void policy_from_vma_pgoff(ldlm_policy_data_t *policy,
-+                                  struct vm_area_struct *vma,
-+                                  __u64 pgoff, size_t count)
-+{
-+        policy->l_extent.start = pgoff << CFS_PAGE_SHIFT;
-+        policy->l_extent.end = (policy->l_extent.start + count - 1) |
-+                               ~CFS_PAGE_MASK;
-+}
-+
- static void policy_from_vma(ldlm_policy_data_t *policy,
-                             struct vm_area_struct *vma, unsigned long addr,
-                             size_t count)
-+
- {
-         policy->l_extent.start = ((addr - vma->vm_start) & CFS_PAGE_MASK) +
-                                  ((__u64)vma->vm_pgoff << CFS_PAGE_SHIFT);
-@@ -308,7 +317,7 @@
-         spin_lock(&mm->page_table_lock);
-         for(vma = find_vma(mm, addr);
-             vma != NULL && vma->vm_start < (addr + count); vma = vma->vm_next) {
--                if (vma->vm_ops && vma->vm_ops->nopage == ll_nopage &&
-+                if (vma->vm_ops && vma->vm_ops == &ll_file_vm_ops &&
-                     vma->vm_flags & VM_SHARED) {
-                         ret = vma;
-                         break;
-@@ -360,44 +369,30 @@
-         }
-         RETURN(0);
- }
--/**
-- * Page fault handler.
-- *
-- * \param vma - is virtiual area struct related to page fault
-- * \param address - address when hit fault
-- * \param type - of fault
-- *
-- * \return allocated and filled page for address
-- * \retval NOPAGE_SIGBUS if page not exist on this address
-- * \retval NOPAGE_OOM not have memory for allocate new page
-- */
--struct page *ll_nopage(struct vm_area_struct *vma, unsigned long address,
--                       int *type)
-+
-+static int ll_get_extent_lock(struct vm_area_struct *vma, unsigned long pgoff,
-+                              int *save_flags, struct lustre_handle *lockh)
- {
-         struct file *filp = vma->vm_file;
-         struct ll_file_data *fd = LUSTRE_FPRIVATE(filp);
-         struct inode *inode = filp->f_dentry->d_inode;
--        struct lustre_handle lockh = { 0 };
-         ldlm_policy_data_t policy;
-         ldlm_mode_t mode;
--        struct page *page = NULL;
-         struct ll_inode_info *lli = ll_i2info(inode);
--        struct lov_stripe_md *lsm;
-         struct ost_lvb lvb;
-         __u64 kms, old_mtime;
--        unsigned long pgoff, size, rand_read, seq_read;
--        int rc = 0;
-+        unsigned long size;
-         ENTRY;
- 
-         if (lli->lli_smd == NULL) {
-                 CERROR("No lsm on fault?\n");
--                RETURN(NOPAGE_SIGBUS);
-+                RETURN(0);
-         }
- 
-         ll_clear_file_contended(inode);
- 
-         /* start and end the lock on the first and last bytes in the page */
--        policy_from_vma(&policy, vma, address, CFS_PAGE_SIZE);
-+        policy_from_vma_pgoff(&policy, vma, pgoff, CFS_PAGE_SIZE);
- 
-         CDEBUG(D_MMAP, "nopage vma %p inode %lu, locking ["LPU64", "LPU64"]\n",
-                vma, inode->i_ino, policy.l_extent.start, policy.l_extent.end);
-@@ -405,26 +400,28 @@
-         mode = mode_from_vma(vma);
-         old_mtime = LTIME_S(inode->i_mtime);
- 
--        lsm = lli->lli_smd;
--        rc = ll_extent_lock(fd, inode, lsm, mode, &policy,
--                            &lockh, LDLM_FL_CBPENDING | LDLM_FL_NO_LRU);
--        if (rc != 0)
--                RETURN(NOPAGE_SIGBUS);
-+        if(ll_extent_lock(fd, inode, lli->lli_smd, mode, &policy,
-+                          lockh, LDLM_FL_CBPENDING | LDLM_FL_NO_LRU) != 0)
-+                RETURN(0);
- 
-         if (vma->vm_flags & VM_EXEC && LTIME_S(inode->i_mtime) != old_mtime)
-                 CWARN("binary changed. inode %lu\n", inode->i_ino);
- 
--        lov_stripe_lock(lsm);
-+        lov_stripe_lock(lli->lli_smd);
-         inode_init_lvb(inode, &lvb);
--        obd_merge_lvb(ll_i2obdexp(inode), lsm, &lvb, 1);
-+        if(obd_merge_lvb(ll_i2obdexp(inode), lli->lli_smd, &lvb, 1)) {
-+                lov_stripe_unlock(lli->lli_smd);
-+                RETURN(0);
-+        }
-         kms = lvb.lvb_size;
- 
--        pgoff = ((address - vma->vm_start) >> CFS_PAGE_SHIFT) + vma->vm_pgoff;
-         size = (kms + CFS_PAGE_SIZE - 1) >> CFS_PAGE_SHIFT;
-+        CDEBUG(D_INFO, "Kms %lu - %lu\n", size, pgoff);
- 
-         if (pgoff >= size) {
--                lov_stripe_unlock(lsm);
-+                lov_stripe_unlock(lli->lli_smd);
-                 ll_glimpse_size(inode, LDLM_FL_BLOCK_GRANTED);
-+                lov_stripe_lock(lli->lli_smd);
-         } else {
-                 /* XXX change inode size without ll_inode_size_lock() held!
-                  *     there is a race condition with truncate path. (see
-@@ -446,29 +443,69 @@
-                         CDEBUG(D_INODE, "ino=%lu, updating i_size %llu\n",
-                                inode->i_ino, i_size_read(inode));
-                 }
--                lov_stripe_unlock(lsm);
-         }
- 
-         /* If mapping is writeable, adjust kms to cover this page,
-          * but do not extend kms beyond actual file size.
-          * policy.l_extent.end is set to the end of the page by policy_from_vma
-          * bug 10919 */
--        lov_stripe_lock(lsm);
-         if (mode == LCK_PW)
--                obd_adjust_kms(ll_i2obdexp(inode), lsm,
-+                obd_adjust_kms(ll_i2obdexp(inode), lli->lli_smd,
-                                min_t(loff_t, policy.l_extent.end + 1,
-                                i_size_read(inode)), 0);
--        lov_stripe_unlock(lsm);
-+        lov_stripe_unlock(lli->lli_smd);
- 
-         /* disable VM_SEQ_READ and use VM_RAND_READ to make sure that
-          * the kernel will not read other pages not covered by ldlm in
-          * filemap_nopage. we do our readahead in ll_readpage.
-          */
--        rand_read = vma->vm_flags & VM_RAND_READ;
--        seq_read = vma->vm_flags & VM_SEQ_READ;
-+        *save_flags = vma->vm_flags & (VM_RAND_READ | VM_SEQ_READ);
-         vma->vm_flags &= ~ VM_SEQ_READ;
-         vma->vm_flags |= VM_RAND_READ;
- 
-+        return 1;
-+}
-+
-+static void ll_put_extent_lock(struct vm_area_struct *vma, int save_flags,
-+                             struct lustre_handle *lockh)
-+{
-+        struct file *filp = vma->vm_file;
-+        struct ll_file_data *fd = LUSTRE_FPRIVATE(filp);
-+        struct inode *inode = filp->f_dentry->d_inode;
-+        ldlm_mode_t mode;
-+
-+        mode = mode_from_vma(vma);
-+        vma->vm_flags &= ~(VM_RAND_READ | VM_SEQ_READ);
-+        vma->vm_flags |= save_flags;
-+
-+        ll_extent_unlock(fd, inode, ll_i2info(inode)->lli_smd, mode, lockh);
-+}
-+
-+#ifndef HAVE_VM_OP_FAULT
-+/**
-+ * Page fault handler.
-+ *
-+ * \param vma - is virtiual area struct related to page fault
-+ * \param address - address when hit fault
-+ * \param type - of fault
-+ *
-+ * \return allocated and filled page for address
-+ * \retval NOPAGE_SIGBUS if page not exist on this address
-+ * \retval NOPAGE_OOM not have memory for allocate new page
-+ */
-+struct page *ll_nopage(struct vm_area_struct *vma, unsigned long address,
-+                       int *type)
-+{
-+        struct lustre_handle lockh = { 0 };
-+        int save_fags = 0;
-+        unsigned long pgoff;
-+        struct page *page;
-+        ENTRY;
-+
-+        pgoff = ((address - vma->vm_start) >> CFS_PAGE_SHIFT) + vma->vm_pgoff;
-+        if(!ll_get_extent_lock(vma, pgoff, &save_fags, &lockh))
-+                RETURN(NOPAGE_SIGBUS);
-+
-         page = filemap_nopage(vma, address, type);
-         if (page != NOPAGE_SIGBUS && page != NOPAGE_OOM)
-                 LL_CDEBUG_PAGE(D_PAGE, page, "got addr %lu type %lx\n", address,
-@@ -477,13 +514,48 @@
-                 CDEBUG(D_PAGE, "got addr %lu type %lx - SIGBUS\n",  address,
-                                (long)type);
- 
--        vma->vm_flags &= ~VM_RAND_READ;
--        vma->vm_flags |= (rand_read | seq_read);
-+        ll_put_extent_lock(vma, save_fags, &lockh);
- 
--        ll_extent_unlock(fd, inode, ll_i2info(inode)->lli_smd, mode, &lockh);
-         RETURN(page);
- }
- 
-+#else
-+/* New fault() API*/
-+/**
-+ * Page fault handler.
-+ *
-+ * \param vma - is virtiual area struct related to page fault
-+ * \param address - address when hit fault
-+ * \param type - of fault
-+ *
-+ * \return allocated and filled page for address
-+ * \retval NOPAGE_SIGBUS if page not exist on this address
-+ * \retval NOPAGE_OOM not have memory for allocate new page
-+ */
-+int ll_fault(struct vm_area_struct *vma, struct vm_fault *vmf)
-+{
-+        struct lustre_handle lockh = { 0 };
-+        int save_fags = 0;
-+        int rc;
-+        ENTRY;
-+
-+        if(!ll_get_extent_lock(vma, vmf->pgoff, &save_fags, &lockh))
-+               RETURN(VM_FAULT_SIGBUS);
-+
-+        rc = filemap_fault(vma, vmf);
-+        if (vmf->page)
-+                LL_CDEBUG_PAGE(D_PAGE, vmf->page, "got addr %p type NOPAGE\n",
-+                               vmf->virtual_address);
-+        else
-+                CDEBUG(D_PAGE, "got addr %p - SIGBUS\n",
-+                       vmf->virtual_address);
-+
-+        ll_put_extent_lock(vma, save_fags, &lockh);
-+
-+        RETURN(rc);
-+}
-+#endif
-+
- /* To avoid cancel the locks covering mmapped region for lock cache pressure,
-  * we track the mapped vma count by lli_mmap_cnt.
-  * ll_vm_open():  when first vma is linked, split locks from lru.
-@@ -548,6 +620,7 @@
-         }
- }
- 
-+#ifndef HAVE_VM_OP_FAULT
- #ifndef HAVE_FILEMAP_POPULATE
- static int (*filemap_populate)(struct vm_area_struct * area, unsigned long address, unsigned long len, pgprot_t prot, unsigned long pgoff, int nonblock);
- #endif
-@@ -562,6 +635,7 @@
-         rc = filemap_populate(area, address, len, prot, pgoff, 1);
-         RETURN(rc);
- }
-+#endif
- 
- /* return the user space pointer that maps to a file offset via a vma */
- static inline unsigned long file_to_user(struct vm_area_struct *vma, __u64 byte)
-@@ -588,10 +662,14 @@
- }
- 
- static struct vm_operations_struct ll_file_vm_ops = {
--        .nopage         = ll_nopage,
-         .open           = ll_vm_open,
-         .close          = ll_vm_close,
-+#ifdef HAVE_VM_OP_FAULT
-+        .fault          = ll_fault,
-+#else
-+        .nopage         = ll_nopage,
-         .populate       = ll_populate,
-+#endif
- };
- 
- int ll_file_mmap(struct file * file, struct vm_area_struct * vma)
-@@ -602,7 +680,7 @@
-         ll_stats_ops_tally(ll_i2sbi(file->f_dentry->d_inode), LPROC_LL_MAP, 1);
-         rc = generic_file_mmap(file, vma);
-         if (rc == 0) {
--#ifndef HAVE_FILEMAP_POPULATE
-+#if !defined(HAVE_FILEMAP_POPULATE) && !defined(HAVE_VM_OP_FAULT)
-                 if (!filemap_populate)
-                         filemap_populate = vma->vm_ops->populate;
- #endif
diff --git a/debian/patches/patchless_support/fix_nfs_fid_type.dpatch b/debian/patches/patchless_support/fix_nfs_fid_type.dpatch
deleted file mode 100755
index 222d358..0000000
--- a/debian/patches/patchless_support/fix_nfs_fid_type.dpatch
+++ /dev/null
@@ -1,352 +0,0 @@
-#! /bin/sh /usr/share/dpatch/dpatch-run
-## posix_acl.patch by Patrick Winnertz <winnie at debian.org>
-##
-## All lines beginning with `## DP:' are a description of the patch.
-## DP: Patch which will enable 2.6.24 patchless support for lustre, taken from #14250
-
- at DPATCH@
-diff -urNad lustre~/lustre/llite/llite_internal.h lustre/lustre/llite/llite_internal.h
---- lustre~/lustre/llite/llite_internal.h	2008-11-25 13:59:37.000000000 +0100
-+++ lustre/lustre/llite/llite_internal.h	2008-12-22 10:13:32.000000000 +0100
-@@ -748,9 +748,6 @@
- /* llite/llite_nfs.c */
- extern struct export_operations lustre_export_operations;
- __u32 get_uuid2int(const char *name, int len);
--struct dentry *ll_fh_to_dentry(struct super_block *sb, __u32 *data, int len,
--                               int fhtype, int parent);
--int ll_dentry_to_fh(struct dentry *, __u32 *datap, int *lenp, int need_parent);
- 
- /* llite/special.c */
- extern struct inode_operations ll_special_inode_operations;
-diff -urNad lustre~/lustre/llite/llite_nfs.c lustre/lustre/llite/llite_nfs.c
---- lustre~/lustre/llite/llite_nfs.c	2008-11-25 13:59:37.000000000 +0100
-+++ lustre/lustre/llite/llite_nfs.c	2008-12-22 10:23:59.000000000 +0100
-@@ -57,11 +57,7 @@
-         return (key0 << 1);
- }
- 
--#if (LINUX_VERSION_CODE < KERNEL_VERSION(2,5,0))
--static int ll_nfs_test_inode(struct inode *inode, unsigned long ino, void *opaque)
--#else
- static int ll_nfs_test_inode(struct inode *inode, void *opaque)
--#endif
- {
-         struct ll_fid *iid = opaque;
- 
-@@ -72,36 +68,30 @@
- }
- 
- static struct inode * search_inode_for_lustre(struct super_block *sb,
--                                              unsigned long ino,
--                                              unsigned long generation,
--                                              int mode)
-+                                              struct ll_fid *iid)
- {
-         struct ptlrpc_request *req = NULL;
-         struct ll_sb_info *sbi = ll_s2sbi(sb);
--        struct ll_fid fid;
-         unsigned long valid = 0;
-         int eadatalen = 0, rc;
-         struct inode *inode = NULL;
--        struct ll_fid iid = { .id = ino, .generation = generation };
-         ENTRY;
- 
--        inode = ILOOKUP(sb, ino, ll_nfs_test_inode, &iid);
-+        inode = ILOOKUP(sb, iid->id, ll_nfs_test_inode, iid);
- 
-         if (inode)
-                 RETURN(inode);
--        if (S_ISREG(mode)) {
--                rc = ll_get_max_mdsize(sbi, &eadatalen);
--                if (rc) 
--                        RETURN(ERR_PTR(rc));
--                valid |= OBD_MD_FLEASIZE;
--        }
--        fid.id = (__u64)ino;
--        fid.generation = generation;
--        fid.f_type = mode;
- 
--        rc = mdc_getattr(sbi->ll_mdc_exp, &fid, valid, eadatalen, &req);
-+        rc = ll_get_max_mdsize(sbi, &eadatalen);
-+        if (rc)
-+                RETURN(ERR_PTR(rc));
-+
-+        valid |= OBD_MD_FLEASIZE;
-+
-+        /* mds_fid2dentry is ignore f_type */
-+        rc = mdc_getattr(sbi->ll_mdc_exp, iid, valid, eadatalen, &req);
-         if (rc) {
--                CERROR("failure %d inode %lu\n", rc, ino);
-+                CERROR("failure %d inode "LPU64"\n", rc, iid->id);
-                 RETURN(ERR_PTR(rc));
-         }
- 
-@@ -115,67 +105,35 @@
-         RETURN(inode);
- }
- 
--static struct dentry *ll_iget_for_nfs(struct super_block *sb, unsigned long ino,
--                                      __u32 generation, umode_t mode)
-+static struct dentry *ll_iget_for_nfs(struct super_block *sb,
-+                                      struct ll_fid *iid)
- {
-         struct inode *inode;
-         struct dentry *result;
--#if (LINUX_VERSION_CODE < KERNEL_VERSION(2,5,0))
--        struct list_head *lp;
--#endif
-         ENTRY;
- 
--        if (ino == 0)
-+        if (iid->id == 0)
-                 RETURN(ERR_PTR(-ESTALE));
- 
--        inode = search_inode_for_lustre(sb, ino, generation, mode);
--        if (IS_ERR(inode)) {
-+        inode = search_inode_for_lustre(sb, iid);
-+        if (IS_ERR(inode)) 
-                 RETURN(ERR_PTR(PTR_ERR(inode)));
--        }
-         if (is_bad_inode(inode) ||
--            (generation && inode->i_generation != generation)){
-+            (iid->generation && inode->i_generation != iid->generation)) {
-                 /* we didn't find the right inode.. */
-                 CERROR("Inode %lu, Bad count: %lu %d or version  %u %u\n",
-                        inode->i_ino, (unsigned long)inode->i_nlink,
-                        atomic_read(&inode->i_count), inode->i_generation,
--                       generation);
-+                       iid->generation);
-                 iput(inode);
-                 RETURN(ERR_PTR(-ESTALE));
-         }
- 
--#if (LINUX_VERSION_CODE > KERNEL_VERSION(2,5,0))
-         result = d_alloc_anon(inode);
-         if (!result) {
-                 iput(inode);
-                 RETURN(ERR_PTR(-ENOMEM));
-         }
--#else
--        /* now to find a dentry.
--         * If possible, get a well-connected one
--         */
--        spin_lock(&dcache_lock);
--        for (lp = inode->i_dentry.next; lp != &inode->i_dentry ; lp=lp->next) {
--                result = list_entry(lp,struct dentry, d_alias);
--                lock_dentry(result);
--                if (!(result->d_flags & DCACHE_DISCONNECTED)) {
--                        dget_locked(result);
--                        ll_set_dflags(result, DCACHE_REFERENCED);
--                        unlock_dentry(result);
--                        spin_unlock(&dcache_lock);
--                        iput(inode);
--                        RETURN(result);
--                }
--                unlock_dentry(result);
--        }
--        spin_unlock(&dcache_lock);
--        result = d_alloc_root(inode);
--        if (result == NULL) {
--                iput(inode);
--                RETURN(ERR_PTR(-ENOMEM));
--        }
--        result->d_flags |= DCACHE_DISCONNECTED;
--
--#endif
-         ll_set_dd(result);
- 
-         lock_dentry(result);
-@@ -192,57 +150,98 @@
-         RETURN(result);
- }
- 
--struct dentry *ll_fh_to_dentry(struct super_block *sb, __u32 *data, int len,
--                               int fhtype, int parent)
-+#define LUSTRE_NFS_FID                0x94
-+
-+struct lustre_nfs_fid {
-+        struct ll_fid   child;
-+        struct ll_fid   parent;
-+        umode_t         mode;
-+};
-+
-+/* The return value is file handle type:
-+ * 1 -- contains child file handle;
-+ * 2 -- contains child file handle and parent file handle;
-+ * 255 -- error.
-+ */
-+static int ll_encode_fh(struct dentry *de, __u32 *fh, int *plen,
-+                        int connectable)
- {
--        switch (fhtype) {
--                case 2:
--                        if (len < 5)
--                                break;
--                        if (parent)
--                                return ll_iget_for_nfs(sb, data[3], 0, data[4]);
--                case 1:
--                        if (len < 3)
--                                break;
--                        if (parent)
--                                break;
--                        return ll_iget_for_nfs(sb, data[0], data[1], data[2]);
--                default: break;
--        }
--        return ERR_PTR(-EINVAL);
-+        struct inode *inode = de->d_inode;
-+        struct inode *parent = de->d_parent->d_inode;
-+        struct lustre_nfs_fid *nfs_fid = (void *)fh;
-+        ENTRY;
-+
-+        CDEBUG(D_INFO, "encoding for (%lu) maxlen=%d minlen=%lu\n",
-+              inode->i_ino, *plen,
-+              sizeof(struct lustre_nfs_fid));
-+
-+        if (*plen < sizeof(struct lustre_nfs_fid))
-+                RETURN(255);
-+
-+        ll_inode2fid(&nfs_fid->child, inode);
-+        ll_inode2fid(&nfs_fid->parent, parent);
-+
-+        nfs_fid->mode = (S_IFMT & inode->i_mode);
-+        *plen = sizeof(struct lustre_nfs_fid);
-+
-+        RETURN(LUSTRE_NFS_FID);
- }
- 
--int ll_dentry_to_fh(struct dentry *dentry, __u32 *datap, int *lenp,
--                    int need_parent)
-+#ifdef HAVE_FH_TO_DENTRY
-+static struct dentry *ll_fh_to_dentry(struct super_block *sb, struct fid *fid,
-+                                      int fh_len, int fh_type)
- {
--        if (*lenp < 3)
--                return 255;
--        *datap++ = dentry->d_inode->i_ino;
--        *datap++ = dentry->d_inode->i_generation;
--        *datap++ = (__u32)(S_IFMT & dentry->d_inode->i_mode);
-+        struct lustre_nfs_fid *nfs_fid = (struct lustre_nfs_fid *)fid;
- 
--        if (*lenp == 3 || S_ISDIR(dentry->d_inode->i_mode)) {
--                *lenp = 3;
--                return 1;
--        }
--        if (dentry->d_parent) {
--                *datap++ = dentry->d_parent->d_inode->i_ino;
--                *datap++ = (__u32)(S_IFMT & dentry->d_parent->d_inode->i_mode);
-+        if (fh_type != LUSTRE_NFS_FID)
-+                RETURN(ERR_PTR(-EINVAL));
- 
--                *lenp = 5;
--                return 2;
--        }
--        *lenp = 3;
--        return 1;
-+        RETURN(ll_iget_for_nfs(sb, &nfs_fid->child));
- }
-+static struct dentry *ll_fh_to_parent(struct super_block *sb, struct fid *fid,
-+                                      int fh_len, int fh_type)
-+{
-+        struct lustre_nfs_fid *nfs_fid = (struct lustre_nfs_fid *)fid;
- 
--#if (LINUX_VERSION_CODE > KERNEL_VERSION(2,5,0))
--struct dentry *ll_get_dentry(struct super_block *sb, void *data)
-+        if (fh_type != LUSTRE_NFS_FID)
-+                RETURN(ERR_PTR(-EINVAL));
-+        RETURN(ll_iget_for_nfs(sb, &nfs_fid->parent));
-+}
-+
-+#else
-+/*
-+ * This length is counted as amount of __u32,
-+ *  It is composed of a fid and a mode
-+ */
-+static struct dentry *ll_decode_fh(struct super_block *sb, __u32 *fh, int fh_len,
-+                                     int fh_type,
-+                                     int (*acceptable)(void *, struct dentry *),
-+                                     void *context)
- {
--        __u32 *inump = (__u32*)data;
--        return ll_iget_for_nfs(sb, inump[0], inump[1], S_IFREG);
-+        struct lustre_nfs_fid *nfs_fid = (void *)fh;
-+        struct dentry *entry;
-+        ENTRY;
-+
-+        CDEBUG(D_INFO, "decoding for "LPU64" fh_len=%d fh_type=%x\n",
-+                nfs_fid->child.id, fh_len, fh_type);
-+
-+        if (fh_type != LUSTRE_NFS_FID)
-+                  RETURN(ERR_PTR(-ESTALE));
-+
-+        entry = sb->s_export_op->find_exported_dentry(sb, &nfs_fid->child,
-+                                                      &nfs_fid->parent,
-+                                                      acceptable, context);
-+        RETURN(entry);
- }
- 
-+struct dentry *ll_get_dentry(struct super_block *sb, void *data)
-+{
-+        struct lustre_nfs_fid *fid = data;
-+        ENTRY;
-+
-+        RETURN(ll_iget_for_nfs(sb, &fid->child));
-+}
-+#endif
- struct dentry *ll_get_parent(struct dentry *dchild)
- {
-         struct ptlrpc_request *req = NULL;
-@@ -254,11 +253,11 @@
-         char dotdot[] = "..";
-         int  rc = 0;
-         ENTRY;
--        
-+
-         LASSERT(dir && S_ISDIR(dir->i_mode));
--        
--        sbi = ll_s2sbi(dir->i_sb);       
-- 
-+
-+        sbi = ll_s2sbi(dir->i_sb);
-+
-         fid.id = (__u64)dir->i_ino;
-         fid.generation = dir->i_generation;
-         fid.f_type = S_IFDIR;
-@@ -269,11 +268,12 @@
-                 CERROR("failure %d inode %lu get parent\n", rc, dir->i_ino);
-                 return ERR_PTR(rc);
-         }
--        body = lustre_msg_buf(req->rq_repmsg, REPLY_REC_OFF, sizeof (*body)); 
--       
-+        body = lustre_msg_buf(req->rq_repmsg, REPLY_REC_OFF, sizeof (*body));
-+
-         LASSERT((body->valid & OBD_MD_FLGENER) && (body->valid & OBD_MD_FLID));
--        
--        result = ll_iget_for_nfs(dir->i_sb, body->ino, body->generation, S_IFDIR);
-+        fid.id = body->ino;
-+        fid.generation = body->generation;
-+        result = ll_iget_for_nfs(dir->i_sb, &fid);
- 
-         if (IS_ERR(result))
-                 rc = PTR_ERR(result);
-@@ -282,10 +282,18 @@
-         if (rc)
-                 return ERR_PTR(rc);
-         RETURN(result);
--} 
-+}
- 
-+
-+#if THREAD_SIZE >= 8192
- struct export_operations lustre_export_operations = {
--       .get_parent = ll_get_parent,
--       .get_dentry = ll_get_dentry, 
-+        .encode_fh  = ll_encode_fh,
-+#ifdef HAVE_FH_TO_DENTRY
-+        .fh_to_dentry = ll_fh_to_dentry,
-+        .fh_to_parent = ll_fh_to_parent,
-+#else
-+        .get_dentry = ll_get_dentry,
-+        .decode_fh  = ll_decode_fh,
-+#endif
- };
- #endif
diff --git a/debian/patches/patchless_support/fix_path_API_changes.dpatch b/debian/patches/patchless_support/fix_path_API_changes.dpatch
deleted file mode 100755
index 291bf83..0000000
--- a/debian/patches/patchless_support/fix_path_API_changes.dpatch
+++ /dev/null
@@ -1,83 +0,0 @@
-#! /bin/sh /usr/share/dpatch/dpatch-run
-## posix_acl.patch by Patrick Winnertz <winnie at debian.org>
-##
-## All lines beginning with `## DP:' are a description of the patch.
-## DP: Patch which will enable 2.6.24 patchless support for lustre, taken from #14250
-
- at DPATCH@
-Index: b1_8_gate/lustre/llite/symlink.c
-===================================================================
---- b1_8_gate.orig/lustre/llite/symlink.c	2008-11-27 07:36:47.000000000 +0300
-+++ b1_8_gate/lustre/llite/symlink.c	2008-11-27 07:37:23.000000000 +0300
-@@ -177,8 +177,12 @@
-                 up(&lli->lli_size_sem);
-         }
-         if (rc) {
-+#ifdef HAVE_PATH_RELEASE
-                 path_release(nd); /* Kernel assumes that ->follow_link()
-                                      releases nameidata on error */
-+#else
-+                path_put(&nd->path);
-+#endif
-                 GOTO(out, rc);
-         }
- 
-Index: b1_8_gate/lustre/mgc/mgc_request.c
-===================================================================
---- b1_8_gate.orig/lustre/mgc/mgc_request.c	2008-11-27 07:36:47.000000000 +0300
-+++ b1_8_gate/lustre/mgc/mgc_request.c	2008-11-27 07:37:23.000000000 +0300
-@@ -415,7 +415,7 @@
-         obd->obd_lvfs_ctxt.fs = get_ds();
- 
-         push_ctxt(&saved, &obd->obd_lvfs_ctxt, NULL);
--        dentry = lookup_one_len(MOUNT_CONFIGS_DIR, current->fs->pwd,
-+        dentry = lookup_one_len(MOUNT_CONFIGS_DIR, cfs_fs_pwd(current->fs),
-                                 strlen(MOUNT_CONFIGS_DIR));
-         pop_ctxt(&saved, &obd->obd_lvfs_ctxt, NULL);
-         if (IS_ERR(dentry)) {
-Index: b1_8_gate/lustre/ptlrpc/service.c
-===================================================================
---- b1_8_gate.orig/lustre/ptlrpc/service.c	2008-11-27 07:36:47.000000000 +0300
-+++ b1_8_gate/lustre/ptlrpc/service.c	2008-11-27 07:37:23.000000000 +0300
-@@ -1501,7 +1501,7 @@
-         cfs_daemonize(name);
-         exit_fs(cfs_current());
-         current->fs = fs;
--        ll_set_fs_pwd(current->fs, init_task.fs->pwdmnt, init_task.fs->pwd);
-+        ll_set_fs_pwd(current->fs, cfs_fs_mnt(init_task.fs), cfs_fs_pwd(init_task.fs));
- }
- 
- static void
-Index: b1_8_gate/lustre/lvfs/lvfs_linux.c
-===================================================================
---- b1_8_gate.orig/lustre/lvfs/lvfs_linux.c	2008-11-27 07:36:47.000000000 +0300
-+++ b1_8_gate/lustre/lvfs/lvfs_linux.c	2008-11-27 07:37:23.000000000 +0300
-@@ -148,10 +148,10 @@
-         */
- 
-         save->fs = get_fs();
--        LASSERT(atomic_read(&current->fs->pwd->d_count));
-+        LASSERT(atomic_read(&cfs_fs_pwd(current->fs)->d_count));
-         LASSERT(atomic_read(&new_ctx->pwd->d_count));
--        save->pwd = dget(current->fs->pwd);
--        save->pwdmnt = mntget(current->fs->pwdmnt);
-+        save->pwd = dget(cfs_fs_pwd(current->fs));
-+        save->pwdmnt = mntget(cfs_fs_mnt(current->fs));
-         save->luc.luc_umask = current->fs->umask;
- 
-         LASSERT(save->pwd);
-@@ -205,10 +205,10 @@
-                atomic_read(&current->fs->pwdmnt->mnt_count));
-         */
- 
--        LASSERTF(current->fs->pwd == new_ctx->pwd, "%p != %p\n",
--                 current->fs->pwd, new_ctx->pwd);
--        LASSERTF(current->fs->pwdmnt == new_ctx->pwdmnt, "%p != %p\n",
--                 current->fs->pwdmnt, new_ctx->pwdmnt);
-+        LASSERTF(cfs_fs_pwd(current->fs) == new_ctx->pwd, "%p != %p\n",
-+                 cfs_fs_pwd(current->fs), new_ctx->pwd);
-+        LASSERTF(cfs_fs_mnt(current->fs) == new_ctx->pwdmnt, "%p != %p\n",
-+                 cfs_fs_mnt(current->fs), new_ctx->pwdmnt);
- 
-         set_fs(saved->fs);
-         ll_set_fs_pwd(current->fs, saved->pwdmnt, saved->pwd);
diff --git a/debian/patches/patchless_support/fix_request_module_calls.dpatch b/debian/patches/patchless_support/fix_request_module_calls.dpatch
deleted file mode 100755
index e01c88f..0000000
--- a/debian/patches/patchless_support/fix_request_module_calls.dpatch
+++ /dev/null
@@ -1,20 +0,0 @@
-#! /bin/sh /usr/share/dpatch/dpatch-run
-## posix_acl.patch by Patrick Winnertz <winnie at debian.org>
-##
-## All lines beginning with `## DP:' are a description of the patch.
-## DP: Patch which will enable 2.6.24 patchless support for lustre, taken from #14250
-
- at DPATCH@
-Index: b1_8_gate/lnet/lnet/api-ni.c
-===================================================================
---- b1_8_gate.orig/lnet/lnet/api-ni.c	2008-10-21 19:12:50.000000000 +0400
-+++ b1_8_gate/lnet/lnet/api-ni.c	2008-11-27 16:06:07.000000000 +0300
-@@ -1032,7 +1032,7 @@
- #ifdef __KERNEL__
-                 if (lnd == NULL) {
-                         LNET_MUTEX_UP(&the_lnet.ln_lnd_mutex);
--                        rc = request_module(libcfs_lnd2modname(lnd_type));
-+                        rc = request_module("%s", libcfs_lnd2modname(lnd_type));
-                         LNET_MUTEX_DOWN(&the_lnet.ln_lnd_mutex);
- 
-                         lnd = lnet_find_lnd_by_type(lnd_type);
diff --git a/debian/patches/patchless_support/lprocfs_changes.dpatch b/debian/patches/patchless_support/lprocfs_changes.dpatch
deleted file mode 100755
index 91819bc..0000000
--- a/debian/patches/patchless_support/lprocfs_changes.dpatch
+++ /dev/null
@@ -1,78 +0,0 @@
-#! /bin/sh /usr/share/dpatch/dpatch-run
-## posix_acl.patch by Patrick Winnertz <winnie at debian.org>
-##
-## All lines beginning with `## DP:' are a description of the patch.
-## DP: Patch which will enable 2.6.24 patchless support for lustre, taken from #14250
-
- at DPATCH@
-Index: HEAD/lustre/include/lprocfs_status.h
-===================================================================
---- HEAD.orig/lustre/include/lprocfs_status.h	2008-12-08 05:48:08.000000000 +0200
-+++ HEAD/lustre/include/lprocfs_status.h	2008-12-08 13:42:28.000000000 +0200
-@@ -563,6 +563,8 @@
- #define LPROCFS_EXIT()            do {  \
-         up_read(&_lprocfs_lock);        \
- } while(0)
-+
-+#ifdef HAVE_PROCFS_DELETED
- #define LPROCFS_ENTRY_AND_CHECK(dp) do {        \
-         typecheck(struct proc_dir_entry *, dp); \
-         LPROCFS_ENTRY();                        \
-@@ -571,6 +573,14 @@
-                 return -ENODEV;                 \
-         }                                       \
- } while(0)
-+#define LPROCFS_CHECK_DELETED(dp) ((dp)->deleted)
-+#else
-+
-+#define LPROCFS_ENTRY_AND_CHECK(dp) \
-+        LPROCFS_ENTRY();
-+#define LPROCFS_CHECK_DELETED(dp) (0)
-+#endif
-+
- #define LPROCFS_WRITE_ENTRY()     do {  \
-         down_write(&_lprocfs_lock);     \
- } while(0)
-@@ -578,6 +588,7 @@
-         up_write(&_lprocfs_lock);       \
- } while(0)
- 
-+
- /* You must use these macros when you want to refer to
-  * the import in a client obd_device for a lprocfs entry */
- #define LPROCFS_CLIMP_CHECK(obd) do {           \
-Index: HEAD/lustre/obdclass/linux/linux-module.c
-===================================================================
---- HEAD.orig/lustre/obdclass/linux/linux-module.c	2008-12-08 05:48:20.000000000 +0200
-+++ HEAD/lustre/obdclass/linux/linux-module.c	2008-12-08 13:50:36.000000000 +0200
-@@ -418,7 +418,7 @@
-         ENTRY;
- 
-         obd_sysctl_init();
--        proc_lustre_root = lprocfs_register("lustre", proc_root_fs,
-+        proc_lustre_root = lprocfs_register("fs/lustre", NULL,
-                                             lprocfs_base, NULL);
-         rc = lprocfs_seq_create(proc_lustre_root, "devices", 0444,
-                                 &obd_device_list_fops, NULL);
-Index: HEAD/lustre/obdclass/lprocfs_status.c
-===================================================================
---- HEAD.orig/lustre/obdclass/lprocfs_status.c	2008-12-08 05:48:20.000000000 +0200
-+++ HEAD/lustre/obdclass/lprocfs_status.c	2008-12-08 13:42:28.000000000 +0200
-@@ -173,7 +173,7 @@
- 
-         LPROCFS_ENTRY();
-         OBD_FAIL_TIMEOUT(OBD_FAIL_LPROC_REMOVE, 10);
--        if (!dp->deleted && dp->read_proc)
-+        if (!LPROCFS_CHECK_DELETED(dp) && dp->read_proc)
-                 rc = dp->read_proc(page, &start, *ppos, CFS_PAGE_SIZE,
-                                    &eof, dp->data);
-         LPROCFS_EXIT();
-@@ -213,7 +213,7 @@
-         int rc = -EIO;
- 
-         LPROCFS_ENTRY();
--        if (!dp->deleted && dp->write_proc)
-+        if (!LPROCFS_CHECK_DELETED(dp) && dp->write_proc)
-                 rc = dp->write_proc(f, buf, size, dp->data);
-         LPROCFS_EXIT();
-         return rc;
diff --git a/debian/patches/patchless_support/lustre_loop_devices_adaption.dpatch b/debian/patches/patchless_support/lustre_loop_devices_adaption.dpatch
deleted file mode 100755
index bc3b884..0000000
--- a/debian/patches/patchless_support/lustre_loop_devices_adaption.dpatch
+++ /dev/null
@@ -1,84 +0,0 @@
-#! /bin/sh /usr/share/dpatch/dpatch-run
-## posix_acl.patch by Patrick Winnertz <winnie at debian.org>
-##
-## All lines beginning with `## DP:' are a description of the patch.
-## DP: Patch which will enable 2.6.24 patchless support for lustre, taken from #14250
-
- at DPATCH@
-Index: HEAD/lustre/llite/lloop.c
-===================================================================
---- HEAD.orig/lustre/llite/lloop.c	2008-11-17 11:36:35.000000000 +0200
-+++ HEAD/lustre/llite/lloop.c	2008-12-17 23:29:17.000000000 +0200
-@@ -152,7 +152,7 @@
-         struct semaphore   lo_bh_mutex;
-         atomic_t           lo_pending;
- 
--        request_queue_t    *lo_queue;
-+        struct request_queue  *lo_queue;
- 
-         /* data to handle bio for lustre. */
-         struct lo_request_data {
-@@ -288,7 +288,7 @@
-         return bio;
- }
- 
--static int loop_make_request(request_queue_t *q, struct bio *old_bio)
-+static int loop_make_request(struct request_queue *q, struct bio *old_bio)
- {
-         struct lloop_device *lo = q->queuedata;
-         int rw = bio_rw(old_bio);
-@@ -317,7 +317,7 @@
-         if (atomic_dec_and_test(&lo->lo_pending))
-                 up(&lo->lo_bh_mutex);
- out:
--        bio_io_error(old_bio, old_bio->bi_size);
-+        cfs_bio_io_error(old_bio, old_bio->bi_size);
-         return 0;
- inactive:
-         spin_unlock_irq(&lo->lo_lock);
-@@ -327,7 +327,7 @@
- /*
-  * kick off io on the underlying address space
-  */
--static void loop_unplug(request_queue_t *q)
-+static void loop_unplug(struct request_queue *q)
- {
-         struct lloop_device *lo = q->queuedata;
- 
-@@ -339,7 +339,7 @@
- {
-         int ret;
-         ret = do_bio_filebacked(lo, bio);
--        bio_endio(bio, bio->bi_size, ret);
-+        cfs_bio_endio(bio, bio->bi_size, ret);
- }
- 
- /*
-@@ -366,7 +366,8 @@
-         up(&lo->lo_sem);
- 
-         for (;;) {
--                down_interruptible(&lo->lo_bh_mutex);
-+                if(!down_interruptible(&lo->lo_bh_mutex))
-+                        continue;
-                 /*
-                  * could be upped because of tear-down, not because of
-                  * pending work
-@@ -743,7 +744,7 @@
- 
- out_mem4:
-         while (i--)
--                blk_put_queue(loop_dev[i].lo_queue);
-+                blk_cleanup_queue(loop_dev[i].lo_queue);
-         i = max_loop;
- out_mem3:
-         while (i--)
-@@ -765,7 +766,7 @@
-         ll_iocontrol_unregister(ll_iocontrol_magic);
-         for (i = 0; i < max_loop; i++) {
-                 del_gendisk(disks[i]);
--                blk_put_queue(loop_dev[i].lo_queue);
-+                blk_cleanup_queue(loop_dev[i].lo_queue);
-                 put_disk(disks[i]);
-         }
-         if (ll_unregister_blkdev(lloop_major, "lloop"))
diff --git a/debian/patches/patchless_support/new_page_fault_method.dpatch b/debian/patches/patchless_support/new_page_fault_method.dpatch
deleted file mode 100755
index 26f84d6..0000000
--- a/debian/patches/patchless_support/new_page_fault_method.dpatch
+++ /dev/null
@@ -1,442 +0,0 @@
-#! /bin/sh /usr/share/dpatch/dpatch-run
-## posix_acl.patch by Patrick Winnertz <winnie at debian.org>
-##
-## All lines beginning with `## DP:' are a description of the patch.
-## DP: Patch which will enable 2.6.24 patchless support for lustre, taken from #14250
-
- at DPATCH@
-Index: HEAD/lustre/llite/vvp_io.c
-===================================================================
---- HEAD.orig/lustre/llite/vvp_io.c	2008-12-17 17:15:38.000000000 +0200
-+++ HEAD/lustre/llite/vvp_io.c	2008-12-17 17:15:38.000000000 +0200
-@@ -559,6 +559,61 @@
-         RETURN(result);
- }
- 
-+#ifndef HAVE_VM_OP_FAULT
-+static int vvp_io_kernel_fault(struct vvp_fault_io *cfio)
-+{
-+        cfs_page_t *result;
-+
-+        result = filemap_nopage(cfio->ft_vma, cfio->ft_address, cfio->ft_type);
-+        if (result != NOPAGE_SIGBUS && result != NOPAGE_OOM)
-+                LL_CDEBUG_PAGE(D_PAGE, result,
-+                               "got addr %lu type %lx\n",
-+                               cfio->ft_address, (long)cfio->ft_type);
-+        else
-+                CDEBUG(D_PAGE, "got addr %lu type %lx - SIGBUS\n",
-+                       cfio->ft_address, (long)cfio->ft_type);
-+
-+        if (result == NOPAGE_SIGBUS)
-+                return -EFAULT;
-+        else if (result == NOPAGE_OOM)
-+                return -ENOMEM;
-+
-+        /* new fault API can return page locked already */
-+        lock_page(result);
-+        cfio->ft_page = result;
-+
-+        return 0;
-+}
-+#else
-+static int vvp_io_kernel_fault(struct vvp_fault_io *cfio)
-+{
-+        cfio->ft_flags = filemap_fault(cfio->ft_vma, cfio->ft_vmf);
-+        if (cfio->ft_vmf->page)
-+                LL_CDEBUG_PAGE(D_PAGE, cfio->ft_vmf->page,
-+                               "got addr %p type NOPAGE\n",
-+                               cfio->ft_vmf->virtual_address);
-+        else
-+                CDEBUG(D_PAGE, "got addr %p - SIGBUS\n",
-+                       cfio->ft_vmf->virtual_address);
-+
-+        if (unlikely (cfio->ft_flags & VM_FAULT_ERROR))
-+                return -EFAULT;
-+
-+        if (unlikely (cfio->ft_flags & VM_FAULT_NOPAGE))
-+                return -ENOMEM;
-+
-+        if (unlikely(!(cfio->ft_flags & VM_FAULT_LOCKED))) {
-+                lock_page(cfio->ft_vmf->page);
-+                cfio->ft_flags |= VM_FAULT_LOCKED;
-+        }
-+
-+        cfio->ft_page = cfio->ft_vmf->page;
-+
-+        return 0;
-+}
-+
-+#endif
-+
- static int vvp_io_fault_start(const struct lu_env *env,
-                               const struct cl_io_slice *ios)
- {
-@@ -568,9 +623,12 @@
-         struct inode        *inode   = ccc_object_inode(obj);
-         struct cl_fault_io  *fio     = &io->u.ci_fault;
-         struct vvp_fault_io *cfio    = &vio->u.fault;
--        cfs_page_t          *vmpage;
-         loff_t               offset;
-+        int                  kernel_result = 0;
-         int                  result  = 0;
-+        struct cl_page      *page;
-+        loff_t               size;
-+        pgoff_t              last; /* last page in a file data region */
- 
-         LASSERT(vio->cui_oneshot == 0);
- 
-@@ -587,55 +645,43 @@
-         if (result != 0)
-                 return result;
- 
--        vmpage = filemap_nopage(cfio->ft_vma, cfio->ft_address, cfio->ft_type);
--        if (vmpage != NOPAGE_SIGBUS && vmpage != NOPAGE_OOM)
--                LL_CDEBUG_PAGE(D_PAGE, vmpage,
--                               "got addr %lu type %lx\n",
--                               cfio->ft_address, (long)cfio->ft_type);
--        else
--                CDEBUG(D_PAGE, "got addr %lu type %lx - SIGBUS\n",
--                       cfio->ft_address, (long)cfio->ft_type);
--
--        if (vmpage == NOPAGE_SIGBUS)
--                result = -EFAULT;
--        else if (vmpage == NOPAGE_OOM)
--                result = -ENOMEM;
--        else {
--                struct cl_page *page;
--                loff_t          size;
--                pgoff_t         last; /* last page in a file data region */
--
--                /* Temporarily lock vmpage to keep cl_page_find() happy. */
--                lock_page(vmpage);
--                page = cl_page_find(env, obj, fio->ft_index, vmpage,
--                                    CPT_CACHEABLE);
--                unlock_page(vmpage);
--                if (!IS_ERR(page)) {
--                        size = i_size_read(inode);
--                        last = cl_index(obj, size - 1);
--                        if (fio->ft_index == last)
--                                /*
--                                 * Last page is mapped partially.
--                                 */
--                                fio->ft_nob = size - cl_offset(obj,
--                                                               fio->ft_index);
--                        else
--                                fio->ft_nob = cl_page_size(obj);
--                        lu_ref_add(&page->cp_reference, "fault", io);
--                        fio->ft_page = page;
--                        /*
--                         * Certain 2.6 kernels return not-NULL from
--                         * filemap_nopage() when page is beyond the file size,
--                         * on the grounds that "An external ptracer can access
--                         * pages that normally aren't accessible.." Don't
--                         * propagate such page fault to the lower layers to
--                         * avoid side-effects like KMS updates.
--                         */
--                        if (fio->ft_index > last)
--                                result = +1;
--                } else
--                        result = PTR_ERR(page);
-+        /* must return locked page */
-+        kernel_result = vvp_io_kernel_fault(cfio);
-+        if (kernel_result != 0)
-+                return kernel_result;
-+
-+        page = cl_page_find(env, obj, fio->ft_index, cfio->ft_page,
-+                            CPT_CACHEABLE);
-+        if (IS_ERR(page)) {
-+                unlock_page(cfio->ft_page);
-+                page_cache_release(cfio->ft_page);
-+                cfio->ft_page = NULL;
-+                return PTR_ERR(page);
-         }
-+
-+        size = i_size_read(inode);
-+        last = cl_index(obj, size - 1);
-+        if (fio->ft_index == last)
-+                /*
-+                 * Last page is mapped partially.
-+                 */
-+                fio->ft_nob = size - cl_offset(obj, fio->ft_index);
-+         else
-+                fio->ft_nob = cl_page_size(obj);
-+
-+         lu_ref_add(&page->cp_reference, "fault", io);
-+         fio->ft_page = page;
-+         /*
-+          * Certain 2.6 kernels return not-NULL from
-+          * filemap_nopage() when page is beyond the file size,
-+          * on the grounds that "An external ptracer can access
-+          * pages that normally aren't accessible.." Don't
-+          * propagate such page fault to the lower layers to
-+          * avoid side-effects like KMS updates.
-+          */
-+          if (fio->ft_index > last)
-+                result = +1;
-+
-         return result;
- }
- 
-Index: HEAD/lustre/llite/llite_mmap.c
-===================================================================
---- HEAD.orig/lustre/llite/llite_mmap.c	2008-11-17 11:36:34.000000000 +0200
-+++ HEAD/lustre/llite/llite_mmap.c	2008-12-17 17:15:38.000000000 +0200
-@@ -72,6 +72,8 @@
- struct page *ll_nopage(struct vm_area_struct *vma, unsigned long address,
-                        int *type);
- 
-+static struct vm_operations_struct ll_file_vm_ops;
-+
- void policy_from_vma(ldlm_policy_data_t *policy,
-                             struct vm_area_struct *vma, unsigned long addr,
-                             size_t count)
-@@ -95,7 +97,7 @@
-         spin_lock(&mm->page_table_lock);
-         for(vma = find_vma(mm, addr);
-             vma != NULL && vma->vm_start < (addr + count); vma = vma->vm_next) {
--                if (vma->vm_ops && vma->vm_ops->nopage == ll_nopage &&
-+                if (vma->vm_ops && vma->vm_ops == &ll_file_vm_ops &&
-                     vma->vm_flags & VM_SHARED) {
-                         ret = vma;
-                         break;
-@@ -105,6 +107,7 @@
-         RETURN(ret);
- }
- 
-+#ifndef HAVE_VM_OP_FAULT
- /**
-  * Lustre implementation of a vm_operations_struct::nopage() method, called by
-  * VM to server page fault (both in kernel and user space).
-@@ -125,11 +128,12 @@
- struct page *ll_nopage(struct vm_area_struct *vma, unsigned long address,
-                        int *type)
- {
-+
-         struct file       *file  = vma->vm_file;
-         struct inode      *inode = file->f_dentry->d_inode;
-         struct lu_env     *env;
-         struct cl_io      *io;
--        struct page       *page  = NULL;
-+        struct page       *page  = NOPAGE_SIGBUS; 
-         struct cl_env_nest nest;
-         int result;
- 
-@@ -143,7 +147,7 @@
-          * one.
-          */
-         env = cl_env_nested_get(&nest);
--        if (!IS_ERR(env)) {
-+        if (IS_ERR(env)) {
-                 pgoff_t pg_offset;
-                 const unsigned long writable = VM_SHARED|VM_WRITE;
-                 unsigned long ra_flags;
-@@ -183,16 +187,25 @@
-                         cio->cui_fd = LUSTRE_FPRIVATE(file);
- 
-                         result = cl_io_loop(env, io);
--                        if (result == 0) {
--                                LASSERT(fio->ft_page != NULL);
--                                page = cl_page_vmpage(env, fio->ft_page);
--                        } else if (result == -EFAULT) {
--                                page = NOPAGE_SIGBUS;
--                        } else if (result == -ENOMEM) {
--                                page = NOPAGE_OOM;
--                        }
--                } else
-+                } else {
-                         result = io->ci_result;
-+                }
-+
-+                switch (result) {
-+                case 0:
-+                        LASSERT(fio->ft_page != NULL);
-+                        page = vio->u.fault.ft_page;
-+                        page_unlock(page);
-+                        break;
-+                case -EFAULT:
-+                        page = NOPAGE_SIGBUS;
-+                        break;
-+                case -ENOMEM:
-+                        page = NOPAGE_OOM;
-+                        break;
-+                default:
-+                        LBUG();
-+                }
- 
-                 vma->vm_flags &= ~VM_RAND_READ;
-                 vma->vm_flags |= ra_flags;
-@@ -200,8 +213,99 @@
-                 cl_io_fini(env, io);
-                 cl_env_nested_put(&nest, env);
-         }
-+
-         RETURN(page);
- }
-+#else
-+/* New fault() API*/
-+/**
-+ * Page fault handler.
-+ *
-+ * \param vma - is virtiual area struct related to page fault
-+ * \param address - address when hit fault
-+ * \param type - of fault
-+ *
-+ * \return allocated and filled page for address
-+ * \retval NOPAGE_SIGBUS if page not exist on this address
-+ * \retval NOPAGE_OOM not have memory for allocate new page
-+ */
-+int ll_fault(struct vm_area_struct *vma, struct vm_fault *vmf)
-+{
-+        struct file       *file  = vma->vm_file;
-+        struct inode      *inode = file->f_dentry->d_inode;
-+        struct lu_env     *env;
-+        struct cl_io      *io;
-+        const unsigned long writable = VM_SHARED|VM_WRITE;
-+        unsigned long      ra_flags;
-+        struct cl_fault_io *fio;
-+        struct cl_env_nest nest;
-+        int result;
-+        int fault_ret = 0;
-+
-+        ENTRY;
-+
-+        /*
-+         * vm_operations_struct::nopage() can be called when lustre IO is
-+         * already active for the current thread, e.g., when doing read/write
-+         * against user level buffer mapped from Lustre buffer. To avoid
-+         * stomping on existing context, optionally force an allocation of a new
-+         * one.
-+         */
-+        env = cl_env_nested_get(&nest);
-+        if (IS_ERR(env))
-+                RETURN(VM_FAULT_ERROR);
-+
-+        io = &ccc_env_info(env)->cti_io;
-+        io->ci_obj = ll_i2info(inode)->lli_clob;
-+        LASSERT(io->ci_obj != NULL);
-+
-+        fio = &io->u.ci_fault;
-+        fio->ft_index      = vmf->pgoff + vma->vm_pgoff;
-+        fio->ft_writable   = (vma->vm_flags&writable) == writable;
-+        fio->ft_executable = vma->vm_flags&VM_EXEC;
-+
-+        /*
-+         * disable VM_SEQ_READ and use VM_RAND_READ to make sure that
-+         * the kernel will not read other pages not covered by ldlm in
-+         * filemap_nopage. we do our readahead in ll_readpage.
-+         */
-+        ra_flags = vma->vm_flags & (VM_RAND_READ|VM_SEQ_READ);
-+        vma->vm_flags &= ~VM_SEQ_READ;
-+        vma->vm_flags |= VM_RAND_READ;
-+
-+        CDEBUG(D_INFO, "vm_flags: %lx (%lu %i %i)\n", vma->vm_flags,
-+               fio->ft_index, fio->ft_writable, fio->ft_executable);
-+
-+        if (cl_io_init(env, io, CIT_FAULT, io->ci_obj) == 0) {
-+                struct vvp_io *vio = vvp_env_io(env);
-+                struct ccc_io *cio = ccc_env_io(env);
-+
-+                LASSERT(cio->cui_cl.cis_io == io);
-+
-+                vio->u.fault.ft_vma     = vma;
-+                vio->u.fault.ft_vmf     = vmf;
-+                cio->cui_fd = LUSTRE_FPRIVATE(file);
-+
-+                result = cl_io_loop(env, io);
-+                fault_ret = vio->u.fault.ft_flags;
-+                if (result != 0)
-+                        fault_ret |= VM_FAULT_ERROR;
-+        } else {
-+                if(io->ci_result)
-+                        fault_ret = VM_FAULT_ERROR;
-+        }
-+
-+        vma->vm_flags |= ra_flags;
-+
-+        cl_io_fini(env, io);
-+        cl_env_nested_put(&nest, env);
-+
-+        RETURN(fault_ret);
-+}
-+
-+
-+
-+#endif
- 
- /**
-  *  To avoid cancel the locks covering mmapped region for lock cache pressure,
-@@ -234,6 +338,7 @@
-         EXIT;
- }
- 
-+#ifndef HAVE_VM_OP_FAULT
- #ifndef HAVE_FILEMAP_POPULATE
- static int (*filemap_populate)(struct vm_area_struct * area, unsigned long address, unsigned long len, pgprot_t prot, unsigned long pgoff, int nonblock);
- #endif
-@@ -248,6 +353,7 @@
-         rc = filemap_populate(area, address, len, prot, pgoff, 1);
-         RETURN(rc);
- }
-+#endif
- 
- /* return the user space pointer that maps to a file offset via a vma */
- static inline unsigned long file_to_user(struct vm_area_struct *vma, __u64 byte)
-@@ -274,10 +380,15 @@
- }
- 
- static struct vm_operations_struct ll_file_vm_ops = {
-+#ifndef HAVE_VM_OP_FAULT
-         .nopage         = ll_nopage,
-+        .populate       = ll_populate,
-+
-+#else
-+        .fault          = ll_fault,
-+#endif
-         .open           = ll_vm_open,
-         .close          = ll_vm_close,
--        .populate       = ll_populate,
- };
- 
- int ll_file_mmap(struct file * file, struct vm_area_struct * vma)
-@@ -288,7 +399,7 @@
-         ll_stats_ops_tally(ll_i2sbi(file->f_dentry->d_inode), LPROC_LL_MAP, 1);
-         rc = generic_file_mmap(file, vma);
-         if (rc == 0) {
--#if !defined(HAVE_FILEMAP_POPULATE)
-+#if !defined(HAVE_FILEMAP_POPULATE) && !defined(HAVE_VM_OP_FAULT)
-                 if (!filemap_populate)
-                         filemap_populate = vma->vm_ops->populate;
- #endif
-Index: HEAD/lustre/llite/llite_internal.h
-===================================================================
---- HEAD.orig/lustre/llite/llite_internal.h	2008-12-17 17:15:38.000000000 +0200
-+++ HEAD/lustre/llite/llite_internal.h	2008-12-17 17:35:56.000000000 +0200
-@@ -819,6 +819,11 @@
-                         time_t                 ft_mtime;
-                         struct vm_area_struct *ft_vma;
-                         /**
-+                         *  locked page returned from vvp_io
-+                         */
-+                        cfs_page_t            *ft_page;
-+#ifndef HAVE_VM_OP_FAULT
-+                        /**
-                          * Virtual address at which fault occurred.
-                          */
-                         unsigned long          ft_address;
-@@ -826,6 +831,16 @@
-                          * Fault type, as to be supplied to filemap_nopage().
-                          */
-                         int                   *ft_type;
-+#else
-+                        /**
-+                         * kernel fault info
-+                         */
-+                        struct vm_fault       *ft_vmf;
-+                        /**
-+                         * fault API used bitflags for return code save it.
-+                         */
-+                        unsigned int           ft_flags;
-+#endif
-                 } fault;
-         } u;
-         /**
diff --git a/debian/patches/patchless_support/nfs_changes_new_API.dpatch b/debian/patches/patchless_support/nfs_changes_new_API.dpatch
deleted file mode 100755
index 15b2602..0000000
--- a/debian/patches/patchless_support/nfs_changes_new_API.dpatch
+++ /dev/null
@@ -1,277 +0,0 @@
-#! /bin/sh /usr/share/dpatch/dpatch-run
-## posix_acl.patch by Patrick Winnertz <winnie at debian.org>
-##
-## All lines beginning with `## DP:' are a description of the patch.
-## DP: Patch which will enable 2.6.24 patchless support for lustre, taken from #14250
-
- at DPATCH@
-Index: HEAD/lustre/llite/llite_internal.h
-===================================================================
---- HEAD.orig/lustre/llite/llite_internal.h	2008-11-20 14:34:31.000000000 +0200
-+++ HEAD/lustre/llite/llite_internal.h	2008-12-17 17:15:38.000000000 +0200
-@@ -744,9 +744,6 @@
- /* llite/llite_nfs.c */
- extern struct export_operations lustre_export_operations;
- __u32 get_uuid2int(const char *name, int len);
--struct dentry *ll_fh_to_dentry(struct super_block *sb, __u32 *data, int len,
--                               int fhtype, int parent);
--int ll_dentry_to_fh(struct dentry *, __u32 *datap, int *lenp, int need_parent);
- 
- /* llite/special.c */
- extern struct inode_operations ll_special_inode_operations;
-Index: HEAD/lustre/llite/llite_nfs.c
-===================================================================
---- HEAD.orig/lustre/llite/llite_nfs.c	2008-11-17 11:36:34.000000000 +0200
-+++ HEAD/lustre/llite/llite_nfs.c	2008-12-17 17:40:22.000000000 +0200
-@@ -67,14 +67,13 @@
- }
- 
- static struct inode *search_inode_for_lustre(struct super_block *sb,
--                                             struct lu_fid *fid,
--                                             int mode)
-+                                             struct lu_fid *fid)
- {
-         struct ll_sb_info     *sbi = ll_s2sbi(sb);
-         struct ptlrpc_request *req = NULL;
-         struct inode          *inode = NULL;
-         unsigned long         valid = 0;
--        int                   eadatalen = 0;
-+        int                   eadatalen;
-         ino_t                 ino = ll_fid_build_ino(sbi, fid);
-         int                   rc;
-         ENTRY;
-@@ -85,13 +84,13 @@
-         if (inode)
-                 RETURN(inode);
- 
--        if (S_ISREG(mode)) {
--                rc = ll_get_max_mdsize(sbi, &eadatalen);
--                if (rc) 
--                        RETURN(ERR_PTR(rc)); 
--                valid |= OBD_MD_FLEASIZE;
--        }
-+	rc = ll_get_max_mdsize(sbi, &eadatalen);
-+	if (rc)
-+	        RETURN(ERR_PTR(rc));
-+
-+	valid |= OBD_MD_FLEASIZE;
- 
-+	/* mds_fid2dentry ignore f_type */
-         rc = md_getattr(sbi->ll_md_exp, fid, NULL, valid, eadatalen, &req);
-         if (rc) {
-                 CERROR("can't get object attrs, fid "DFID", rc %d\n",
-@@ -107,9 +106,7 @@
-         RETURN(inode);
- }
- 
--static struct dentry *ll_iget_for_nfs(struct super_block *sb,
--                                      struct lu_fid *fid,
--                                      umode_t mode)
-+static struct dentry *ll_iget_for_nfs(struct super_block *sb,  struct lu_fid *fid, __u32 mode)
- {
-         struct inode  *inode;
-         struct dentry *result;
-@@ -119,7 +116,7 @@
-         if (!fid_is_sane(fid))
-                 RETURN(ERR_PTR(-ESTALE));
- 
--        inode = search_inode_for_lustre(sb, fid, mode);
-+        inode = search_inode_for_lustre(sb, fid);
-         if (IS_ERR(inode))
-                 RETURN(ERR_PTR(PTR_ERR(inode)));
- 
-@@ -142,85 +139,103 @@
-         RETURN(result);
- }
- 
-+#define LUSTRE_NFS_FID		0x97
-+
-+struct lustre_nfs_fid {
-+        struct lu_fid   child;
-+        struct lu_fid   parent;
-+        umode_t         mode;
-+};
-+
-+/* The return value is file handle type:
-+ * 1 -- contains child file handle;
-+ * 2 -- contains child file handle and parent file handle;
-+ * 255 -- error.
-+ */
-+static int ll_encode_fh(struct dentry *de, __u32 *fh, int *plen,
-+                        int connectable)
-+{
-+        struct inode *inode = de->d_inode;
-+	struct inode *parent = de->d_parent->d_inode;
-+	struct lustre_nfs_fid *nfs_fid = (void *)fh;
-+        ENTRY;
-+
-+        CDEBUG(D_INFO, "encoding for (%lu,"DFID") maxlen=%d minlen=%u\n",
-+              inode->i_ino, PFID(ll_inode2fid(inode)), *plen,
-+              sizeof(struct lustre_nfs_fid));
-+
-+        if (*plen < sizeof(struct lustre_nfs_fid))
-+                RETURN(255);
-+
-+        nfs_fid->child = *ll_inode2fid(inode);
-+        nfs_fid->parent = *ll_inode2fid(parent);
-+        nfs_fid->mode = (S_IFMT & inode->i_mode);
-+        *plen = sizeof(struct lustre_nfs_fid);
-+
-+        RETURN(LUSTRE_NFS_FID);
-+}
-+
-+#ifdef HAVE_FH_TO_DENTRY
-+static struct dentry *ll_fh_to_dentry(struct super_block *sb, struct fid *fid,
-+                                      int fh_len, int fh_type)
-+{
-+        struct lustre_nfs_fid *nfs_fid = (struct lustre_nfs_fid *)fid;
-+
-+        if (fh_type != LUSTRE_NFS_FID)
-+                RETURN(ERR_PTR(-EINVAL));
-+
-+        RETURN(ll_iget_for_nfs(sb, &nfs_fid->child, nfs_fid->mode));
-+}
-+
-+static struct dentry *ll_fh_to_parent(struct super_block *sb, struct fid *fid,
-+                                      int fh_len, int fh_type)
-+{
-+        struct lustre_nfs_fid *nfs_fid = (struct lustre_nfs_fid *)fid;
-+
-+        if (fh_type != LUSTRE_NFS_FID)
-+                RETURN(ERR_PTR(-EINVAL));
-+
-+
-+        RETURN(ll_iget_for_nfs(sb, &nfs_fid->parent, S_IFDIR));
-+}
-+
-+#else
-+
- /*
-  * This length is counted as amount of __u32,
-  *  It is composed of a fid and a mode 
-  */
--#define ONE_FH_LEN (sizeof(struct lu_fid)/4 + 1)
--
- static struct dentry *ll_decode_fh(struct super_block *sb, __u32 *fh, int fh_len,
-                                    int fh_type,
-                                    int (*acceptable)(void *, struct dentry *),
-                                    void *context)
- {
--        struct lu_fid *parent = NULL;
--        struct lu_fid *child;
-+        struct lustre_nfs_fid *nfs_fid = (void *)fh;
-         struct dentry *entry;
-         ENTRY;
- 
--        CDEBUG(D_INFO, "decoding for "DFID" fh_len=%d fh_type=%d\n", 
--                PFID((struct lu_fid*)fh), fh_len, fh_type);
-+        CDEBUG(D_INFO, "decoding for "DFID" fh_len=%d fh_type=%x\n", 
-+                PFID(&nfs_fid->child), fh_len, fh_type);
- 
--        if (fh_type != 1 && fh_type != 2)
--                RETURN(ERR_PTR(-ESTALE));
--        if (fh_len < ONE_FH_LEN * fh_type)
-+        if (fh_type != LUSTRE_NFS_FID)
-                 RETURN(ERR_PTR(-ESTALE));
- 
--        child = (struct lu_fid*)fh;
--        if (fh_type == 2)
--                parent = (struct lu_fid*)(fh + ONE_FH_LEN);
--                
--        entry = sb->s_export_op->find_exported_dentry(sb, child, parent,
-+        entry = sb->s_export_op->find_exported_dentry(sb, &nfs_fid->child,
-+						      &nfs_fid->parent,
-                                                       acceptable, context);
-         RETURN(entry);
- }
- 
--/* The return value is file handle type:
-- * 1 -- contains child file handle;
-- * 2 -- contains child file handle and parent file handle;
-- * 255 -- error.
-- */
--static int ll_encode_fh(struct dentry *de, __u32 *fh, int *plen, int connectable)
--{
--        struct inode    *inode = de->d_inode;
--        struct lu_fid   *fid = ll_inode2fid(inode);
--        ENTRY;
--
--        CDEBUG(D_INFO, "encoding for (%lu,"DFID") maxlen=%d minlen=%d\n",
--                       inode->i_ino, PFID(fid), *plen, (int)ONE_FH_LEN);
--
--        if (*plen < ONE_FH_LEN)
--                RETURN(255);
--
--        memcpy((char*)fh, fid, sizeof(*fid));
--        *(fh + ONE_FH_LEN - 1) = (__u32)(S_IFMT & inode->i_mode);
--
--        if (de->d_parent && *plen >= ONE_FH_LEN * 2) {
--                struct inode *parent = de->d_parent->d_inode;
--                fh += ONE_FH_LEN;
--                memcpy((char*)fh, &ll_i2info(parent)->lli_fid, sizeof(*fid));
--                *(fh + ONE_FH_LEN - 1) = (__u32)(S_IFMT & parent->i_mode);
--                *plen = ONE_FH_LEN * 2;
--                RETURN(2);
--        } else {
--                *plen = ONE_FH_LEN;
--                RETURN(1);
--        }
--}
--
- static struct dentry *ll_get_dentry(struct super_block *sb, void *data)
- {
--        struct lu_fid      *fid;
-+        struct lustre_nfs_fid *fid = data;
-         struct dentry      *entry;
--        __u32               mode;
-         ENTRY;
- 
--        fid = (struct lu_fid *)data;
--        mode = *((__u32*)data + ONE_FH_LEN - 1);
--        
--        entry = ll_iget_for_nfs(sb, fid, mode);
-+        entry = ll_iget_for_nfs(sb, &fid->child, fid->mode);
-         RETURN(entry);
- }
-+#endif
- 
- static struct dentry *ll_get_parent(struct dentry *dchild)
- {
-@@ -232,11 +247,11 @@
-         static char           dotdot[] = "..";
-         int                   rc;
-         ENTRY;
--        
-+
-         LASSERT(dir && S_ISDIR(dir->i_mode));
--        
-+
-         sbi = ll_s2sbi(dir->i_sb);
-- 
-+
-         CDEBUG(D_INFO, "getting parent for (%lu,"DFID")\n", 
-                         dir->i_ino, PFID(ll_inode2fid(dir)));
- 
-@@ -249,7 +264,7 @@
-         }
-         body = req_capsule_server_get(&req->rq_pill, &RMF_MDT_BODY);
-         LASSERT(body->valid & OBD_MD_FLID);
--        
-+
-         CDEBUG(D_INFO, "parent for "DFID" is "DFID"\n", 
-                 PFID(ll_inode2fid(dir)), PFID(&body->fid1));
- 
-@@ -261,7 +276,12 @@
- 
- struct export_operations lustre_export_operations = {
-        .get_parent = ll_get_parent,
--       .get_dentry = ll_get_dentry,
-        .encode_fh  = ll_encode_fh,
-+#ifdef HAVE_FH_TO_DENTRY
-+	.fh_to_dentry = ll_fh_to_dentry,
-+	.fh_to_parent = ll_fh_to_parent,
-+#else
-+       .get_dentry = ll_get_dentry,
-        .decode_fh  = ll_decode_fh,
-+#endif
- };
diff --git a/debian/patches/patchless_support/splice_read_support.dpatch b/debian/patches/patchless_support/splice_read_support.dpatch
deleted file mode 100755
index 9384817..0000000
--- a/debian/patches/patchless_support/splice_read_support.dpatch
+++ /dev/null
@@ -1,423 +0,0 @@
-#! /bin/sh /usr/share/dpatch/dpatch-run
-## posix_acl.patch by Patrick Winnertz <winnie at debian.org>
-##
-## All lines beginning with `## DP:' are a description of the patch.
-## DP: Patch which will enable 2.6.24 patchless support for lustre, taken from #14250
-
- at DPATCH@
-Index: HEAD/lustre/autoconf/lustre-core.m4
-===================================================================
---- HEAD.orig/lustre/autoconf/lustre-core.m4	2008-12-17 16:21:23.000000000 +0200
-+++ HEAD/lustre/autoconf/lustre-core.m4	2008-12-17 16:21:40.000000000 +0200
-@@ -1388,6 +1388,25 @@
- ])
- 
- # 2.6.23 change .sendfile to .splice_read
-+# RHEL4 (-92 kernel) have both sendfile and .splice_read API
-+AC_DEFUN([LC_KERNEL_SENDFILE],
-+[AC_MSG_CHECKING([if kernel has .sendfile])
-+LB_LINUX_TRY_COMPILE([
-+        #include <linux/fs.h>
-+],[
-+        struct file_operations file;
-+
-+        file.sendfile = NULL;
-+], [
-+        AC_MSG_RESULT([yes])
-+        AC_DEFINE(HAVE_KERNEL_SENDFILE, 1,
-+                [kernel has .sendfile])
-+],[
-+        AC_MSG_RESULT([no])
-+])
-+])
-+
-+# 2.6.23 change .sendfile to .splice_read
- AC_DEFUN([LC_KERNEL_SPLICE_READ],
- [AC_MSG_CHECKING([if kernel has .splice_read])
- LB_LINUX_TRY_COMPILE([
-@@ -1846,6 +1865,7 @@
-          # 2.6.23
-          LC_UNREGISTER_BLKDEV_RETURN_INT
-          LC_KERNEL_SPLICE_READ
-+         LC_KERNEL_SENDFILE
-          LC_HAVE_EXPORTFS_H
-          LC_VM_OP_FAULT
-          LC_REGISTER_SHRINKER
-Index: HEAD/lustre/obdclass/cl_io.c
-===================================================================
---- HEAD.orig/lustre/obdclass/cl_io.c	2008-11-12 22:58:06.000000000 +0200
-+++ HEAD/lustre/obdclass/cl_io.c	2008-12-17 16:21:40.000000000 +0200
-@@ -75,15 +75,6 @@
- }
- 
- /**
-- * True, iff \a io is a sendfile().
-- */
--int cl_io_is_sendfile(const struct cl_io *io)
--{
--        return io->ci_type == CIT_READ && io->u.ci_rd.rd_is_sendfile;
--}
--EXPORT_SYMBOL(cl_io_is_sendfile);
--
--/**
-  * Returns true iff there is an IO ongoing in the given environment.
-  */
- int cl_io_is_going(const struct lu_env *env)
-Index: HEAD/lustre/include/cl_object.h
-===================================================================
---- HEAD.orig/lustre/include/cl_object.h	2008-11-08 01:52:38.000000000 +0200
-+++ HEAD/lustre/include/cl_object.h	2008-12-17 16:21:40.000000000 +0200
-@@ -2177,6 +2177,16 @@
-         int         crw_nonblock;
- };
- 
-+/* IO subtypes */
-+enum cl_io_subtype {
-+        /** normal IO */
-+        IO_NORMAL,
-+        /** io called from .sendfile */
-+        IO_SENDFILE,
-+        /** io started from splice_{read|write} */
-+        IO_SPLICE
-+};
-+
- /**
-  * State for io.
-  *
-@@ -2207,7 +2217,6 @@
-         union {
-                 struct cl_rd_io {
-                         struct cl_io_rw_common rd;
--                        int                    rd_is_sendfile;
-                 } ci_rd;
-                 struct cl_wr_io {
-                         struct cl_io_rw_common wr;
-@@ -2860,8 +2869,6 @@
-         return io->ci_type == CIT_WRITE && io->u.ci_wr.wr_append;
- }
- 
--int cl_io_is_sendfile(const struct cl_io *io);
--
- struct cl_io *cl_io_top(struct cl_io *io);
- 
- void cl_io_print(const struct lu_env *env, void *cookie,
-Index: HEAD/lustre/include/lclient.h
-===================================================================
---- HEAD.orig/lustre/include/lclient.h	2008-12-11 06:02:39.000000000 +0200
-+++ HEAD/lustre/include/lclient.h	2008-12-17 16:21:40.000000000 +0200
-@@ -51,14 +51,19 @@
-  * Common IO arguments for various VFS I/O interfaces.
-  */
- struct ccc_io_args {
--        int           cia_is_sendfile;
-+        /** normal/sendfile/splice */
-+        enum cl_io_subtype cia_io_subtype;
- #ifndef HAVE_FILE_WRITEV
--        struct kiocb *cia_iocb;
-+        struct kiocb      *cia_iocb;
- #endif
--        struct iovec *cia_iov;
--        unsigned long cia_nrsegs;
--        read_actor_t  cia_actor;
--        void         *cia_target;
-+        struct iovec      *cia_iov;
-+        unsigned long      cia_nrsegs;
-+        /* sendfile */
-+        read_actor_t       cia_actor;
-+        void              *cia_target;
-+        /* splice */
-+        struct pipe_inode_info  *cia_pipe;
-+        unsigned int       cia_flags;
- };
- 
- /**
-Index: HEAD/lustre/llite/vvp_io.c
-===================================================================
---- HEAD.orig/lustre/llite/vvp_io.c	2008-11-12 23:00:37.000000000 +0200
-+++ HEAD/lustre/llite/vvp_io.c	2008-12-17 16:21:40.000000000 +0200
-@@ -52,6 +52,16 @@
- static struct vvp_io *cl2vvp_io(const struct lu_env *env,
-                                 const struct cl_io_slice *slice);
- 
-+/**
-+ * True, if \a io is a normal io, False for sendfile() / splice_{read|write}
-+ */
-+static int vvp_io_is_normalio(const struct lu_env *env, const struct cl_io *io)
-+{
-+        struct vvp_io *vio = vvp_env_io(env);
-+
-+        return vio->ci_io_subtype == IO_NORMAL;
-+}
-+
- /*****************************************************************************
-  *
-  * io operations.
-@@ -132,7 +142,7 @@
- 
-         LASSERT(io->ci_type == CIT_READ || io->ci_type == CIT_WRITE);
- 
--        if (cl_io_is_sendfile(io))
-+        if (!vvp_io_is_normalio(env, io))
-                 RETURN(0);
- 
-         for (seg = 0; seg < vio->cui_nrsegs; seg++) {
-@@ -180,7 +190,7 @@
-         size_t size = io->u.ci_rw.crw_count;
- 
-         vio->cui_iov_olen = 0;
--        if (cl_io_is_sendfile(io) || size == vio->cui_tot_count)
-+        if (!vvp_io_is_normalio(env, io) || size == vio->cui_tot_count)
-                 return;
- 
-         if (vio->cui_tot_nrsegs == 0)
-@@ -476,11 +486,27 @@
- 
-         /* BUG: 5972 */
-         file_accessed(file);
--        if (cl_io_is_sendfile(io)) {
-+        switch(vio->ci_io_subtype) {
-+        case IO_NORMAL:
-+                 result = lustre_generic_file_read(file, cio, &pos);
-+                 break;
-+#ifdef HAVE_KERNEL_SENDFILE
-+        case IO_SENDFILE:
-                 result = generic_file_sendfile(file, &pos, cnt,
--                                vio->u.read.cui_actor, vio->u.read.cui_target);
--        } else {
--                result = lustre_generic_file_read(file, cio, &pos);
-+                                vio->u.sendfile.cui_actor,
-+                                vio->u.sendfile.cui_target);
-+                break;
-+#endif
-+#ifdef HAVE_KERNEL_SPLICE_READ
-+        case IO_SPLICE:
-+                result = generic_file_splice_read(file, &pos,
-+                                vio->u.splice.pipe, cnt,
-+                                vio->u.splice.flags);
-+                break;
-+#endif
-+        default:
-+                CERROR("Wrong IO type %u\n", vio->ci_io_subtype);
-+                LBUG();
-         }
- 
-         if (result >= 0) {
-@@ -622,7 +648,7 @@
- 
-         CLOBINVRNT(env, obj, ccc_object_invariant(obj));
- 
--        if (!cl_io_is_sendfile(io) && io->ci_continue) {
-+        if (vvp_io_is_normalio(env, io) && io->ci_continue) {
-                 /* update the iov */
-                 LASSERT(vio->cui_tot_nrsegs >= vio->cui_nrsegs);
-                 LASSERT(vio->cui_tot_count  >= nob);
-Index: HEAD/lustre/llite/llite_internal.h
-===================================================================
---- HEAD.orig/lustre/llite/llite_internal.h	2008-12-17 16:21:24.000000000 +0200
-+++ HEAD/lustre/llite/llite_internal.h	2008-12-17 16:21:40.000000000 +0200
-@@ -791,11 +791,22 @@
- void vvp_write_complete(struct ccc_object *club, struct ccc_page *page);
- 
- struct vvp_io {
-+        /** io subtype */
-+        enum cl_io_subtype             ci_io_subtype;
-+
-         union {
-+#ifdef HAVE_KERNEL_SENDFILE
-                 struct {
-                         read_actor_t      cui_actor;
-                         void             *cui_target;
--                } read;
-+                } sendfile;
-+#endif
-+#ifdef HAVE_KERNEL_SPLICE_READ
-+                struct {
-+                        struct pipe_inode_info *pipe;
-+                        unsigned int            flags;
-+                } splice;
-+#endif
-                 struct vvp_fault_io {
-                         /**
-                          * Inode modification time that is checked across DLM
-Index: HEAD/lustre/llite/file.c
-===================================================================
---- HEAD.orig/lustre/llite/file.c	2008-11-20 14:34:31.000000000 +0200
-+++ HEAD/lustre/llite/file.c	2008-12-17 16:26:49.000000000 +0200
-@@ -807,27 +807,43 @@
-         io = &ccc_env_info(env)->cti_io;
-         ll_io_init(io, file, iot == CIT_WRITE);
- 
--        if (iot == CIT_READ)
--                io->u.ci_rd.rd_is_sendfile = args->cia_is_sendfile;
--
-         if (cl_io_rw_init(env, io, iot, *ppos, count) == 0) {
-                 struct vvp_io *vio = vvp_env_io(env);
-                 struct ccc_io *cio = ccc_env_io(env);
--                if (cl_io_is_sendfile(io)) {
--                        vio->u.read.cui_actor = args->cia_actor;
--                        vio->u.read.cui_target = args->cia_target;
--                } else {
-+
-+                vio->ci_io_subtype = args->cia_io_subtype;
-+
-+                switch(vio->ci_io_subtype) {
-+                case IO_NORMAL:
-                         cio->cui_iov = args->cia_iov;
-                         cio->cui_nrsegs = args->cia_nrsegs;
- #ifndef HAVE_FILE_WRITEV
-                         cio->cui_iocb = args->cia_iocb;
- #endif
-+                        break;
-+#ifdef HAVE_KERNEL_SENDFILE
-+                case IO_SENDFILE:
-+                        vio->u.sendfile.cui_actor = args->cia_actor;
-+                        vio->u.sendfile.cui_target = args->cia_target;
-+                        break;
-+#endif
-+#ifdef HAVE_KERNEL_SPLICE_READ
-+                case IO_SPLICE:
-+                        vio->u.splice.pipe = args->cia_pipe;
-+                        vio->u.splice.flags = args->cia_flags;
-+                        break;
-+#endif
-+                default:
-+                        CERROR("Unknow IO type - %u\n", vio->ci_io_subtype);
-+                        LBUG();
-                 }
-                 cio->cui_fd  = LUSTRE_FPRIVATE(file);
-                 result = cl_io_loop(env, io);
--        } else
-+        } else {
-                 /* cl_io_rw_init() handled IO */
-                 result = io->ci_result;
-+        }
-+
-         if (io->ci_nob > 0) {
-                 result = io->ci_nob;
-                 *ppos = io->u.ci_wr.wr.crw_pos;
-@@ -888,7 +904,7 @@
-                 RETURN(PTR_ERR(env));
- 
-         args = &vvp_env_info(env)->vti_args;
--        args->cia_is_sendfile = 0;
-+        args->cia_io_subtype = IO_NORMAL;
-         args->cia_iov = (struct iovec *)iov;
-         args->cia_nrsegs = nr_segs;
-         result = ll_file_io_generic(env, args, file, CIT_READ, ppos, count);
-@@ -937,7 +953,7 @@
-                 RETURN(PTR_ERR(env));
- 
-         args = &vvp_env_info(env)->vti_args;
--        args->cia_is_sendfile = 0;
-+        args->cia_io_subtype = IO_NORMAL;
-         args->cia_iov = (struct iovec *)iov;
-         args->cia_nrsegs = nr_segs;
-         args->cia_iocb = iocb;
-@@ -1002,6 +1018,7 @@
-         args = &vvp_env_info(env)->vti_args;
-         args->cia_iov = (struct iovec *)iov;
-         args->cia_nrsegs = nr_segs;
-+        args->cia_io_subtype = IO_NORMAL;
-         result = ll_file_io_generic(env, args, file, CIT_WRITE, ppos, count);
-         cl_env_put(env, &refcheck);
-         RETURN(result);
-@@ -1052,6 +1069,7 @@
-         args->cia_iov = (struct iovec *)iov;
-         args->cia_nrsegs = nr_segs;
-         args->cia_iocb = iocb;
-+        args->cia_io_subtype = IO_NORMAL;
-         result = ll_file_io_generic(env, args, iocb->ki_filp, CIT_WRITE,
-                                   &iocb->ki_pos, count);
-         cl_env_put(env, &refcheck);
-@@ -1089,6 +1107,7 @@
- #endif
- 
- 
-+#ifdef HAVE_KERNEL_SENDFILE
- /*
-  * Send file content (through pagecache) somewhere with helper
-  */
-@@ -1106,13 +1125,43 @@
-                 RETURN(PTR_ERR(env));
- 
-         args = &vvp_env_info(env)->vti_args;
--        args->cia_is_sendfile = 1;
-+        args->cia_io_subtype = IO_SENDFILE;
-         args->cia_target = target;
-         args->cia_actor = actor;
-         result = ll_file_io_generic(env, args, in_file, CIT_READ, ppos, count);
-         cl_env_put(env, &refcheck);
-         RETURN(result);
- }
-+#endif
-+
-+#ifdef HAVE_KERNEL_SPLICE_READ
-+/*
-+ * Send file content (through pagecache) somewhere with helper
-+ */
-+static ssize_t ll_file_splice_read(struct file *in_file, loff_t *ppos,
-+                                   struct pipe_inode_info *pipe, size_t count,
-+                                   unsigned int flags)
-+{
-+        struct lu_env      *env;
-+        struct ccc_io_args *args;
-+        ssize_t             result;
-+        int                 refcheck;
-+        ENTRY;
-+
-+        env = cl_env_get(&refcheck);
-+        if (IS_ERR(env))
-+                RETURN(PTR_ERR(env));
-+
-+        args = &vvp_env_info(env)->vti_args;
-+        args->cia_io_subtype = IO_SPLICE;
-+        args->cia_pipe = pipe;
-+        args->cia_flags = flags;
-+
-+        result = ll_file_io_generic(env, args, in_file, CIT_READ, ppos, count);
-+        cl_env_put(env, &refcheck);
-+        RETURN(result);
-+}
-+#endif
- 
- static int ll_lov_recreate_obj(struct inode *inode, struct file *file,
-                                unsigned long arg)
-@@ -2390,7 +2439,12 @@
-         .release        = ll_file_release,
-         .mmap           = ll_file_mmap,
-         .llseek         = ll_file_seek,
-+#ifdef HAVE_KERNEL_SENDFILE
-         .sendfile       = ll_file_sendfile,
-+#endif
-+#ifdef HAVE_KERNEL_SPLICE_READ
-+        .splice_read    = ll_file_splice_read,
-+#endif
-         .fsync          = ll_fsync,
- };
- 
-@@ -2404,7 +2458,12 @@
-         .release        = ll_file_release,
-         .mmap           = ll_file_mmap,
-         .llseek         = ll_file_seek,
-+#ifdef HAVE_KERNEL_SENDFILE
-         .sendfile       = ll_file_sendfile,
-+#endif
-+#ifdef HAVE_KERNEL_SPLICE_READ
-+        .splice_read    = ll_file_splice_read,
-+#endif
-         .fsync          = ll_fsync,
- #ifdef HAVE_F_OP_FLOCK
-         .flock          = ll_file_flock,
-@@ -2423,7 +2482,12 @@
-         .release        = ll_file_release,
-         .mmap           = ll_file_mmap,
-         .llseek         = ll_file_seek,
-+#ifdef HAVE_KERNEL_SENDFILE
-         .sendfile       = ll_file_sendfile,
-+#endif
-+#ifdef HAVE_KERNEL_SPLICE_READ
-+        .splice_read    = ll_file_splice_read,
-+#endif
-         .fsync          = ll_fsync,
- #ifdef HAVE_F_OP_FLOCK
-         .flock          = ll_file_noflock,
diff --git a/debian/patches/patchless_support/sysctl_update.dpatch b/debian/patches/patchless_support/sysctl_update.dpatch
deleted file mode 100755
index 23078a7..0000000
--- a/debian/patches/patchless_support/sysctl_update.dpatch
+++ /dev/null
@@ -1,278 +0,0 @@
-#! /bin/sh /usr/share/dpatch/dpatch-run
-## posix_acl.patch by Patrick Winnertz <winnie at debian.org>
-##
-## All lines beginning with `## DP:' are a description of the patch.
-## DP: Patch which will enable 2.6.24 patchless support for lustre, taken from #14250
-
- at DPATCH@
-Index: HEAD/lustre/obdclass/linux/linux-sysctl.c
-===================================================================
---- HEAD.orig/lustre/obdclass/linux/linux-sysctl.c	2008-08-12 11:40:23.000000000 +0400
-+++ HEAD/lustre/obdclass/linux/linux-sysctl.c	2008-12-05 21:13:18.000000000 +0300
-@@ -56,7 +56,9 @@
- 
- cfs_sysctl_table_header_t *obd_table_header = NULL;
- 
--#define OBD_SYSCTL 300
-+#ifndef HAVE_SYSCTL_UNNUMBERED
-+
-+#define CTL_LUSTRE      300
- 
- enum {
-         OBD_FAIL_LOC = 1,       /* control test failures instrumentation */
-@@ -74,6 +76,23 @@
-         OBD_ALLOC_FAIL_RATE,    /* memory allocation random failure rate */
-         OBD_MAX_DIRTY_PAGES,    /* maximum dirty pages */
- };
-+#else
-+#define CTL_LUSTRE              CTL_UNNUMBERED
-+#define OBD_FAIL_LOC            CTL_UNNUMBERED
-+#define OBD_FAIL_VAL            CTL_UNNUMBERED
-+#define OBD_TIMEOUT             CTL_UNNUMBERED
-+#define OBD_DUMP_ON_TIMEOUT     CTL_UNNUMBERED
-+#define OBD_MEMUSED             CTL_UNNUMBERED
-+#define OBD_PAGESUSED           CTL_UNNUMBERED
-+#define OBD_MAXMEMUSED          CTL_UNNUMBERED
-+#define OBD_MAXPAGESUSED        CTL_UNNUMBERED
-+#define OBD_SYNCFILTER          CTL_UNNUMBERED
-+#define OBD_LDLM_TIMEOUT        CTL_UNNUMBERED
-+#define OBD_DUMP_ON_EVICTION    CTL_UNNUMBERED
-+#define OBD_DEBUG_PEER_ON_TIMEOUT CTL_UNNUMBERED
-+#define OBD_ALLOC_FAIL_RATE     CTL_UNNUMBERED
-+#define OBD_MAX_DIRTY_PAGES     CTL_UNNUMBERED
-+#endif
- 
- int LL_PROC_PROTO(proc_fail_loc)
- {
-@@ -100,6 +119,7 @@
- {
-         char buf[22];
-         int len;
-+        struct ctl_table dummy;
-         DECLARE_LL_PROC_PPOS_DECL;
- 
-         if (!*lenp || (*ppos && !write)) {
-@@ -113,17 +133,19 @@
-         if (len > *lenp)
-                 len = *lenp;
-         buf[len] = '\0';
--        if (copy_to_user(buffer, buf, len))
--                return -EFAULT;
--        *lenp = len;
--        *ppos += *lenp;
--        return 0;
-+
-+        dummy = *table;
-+        dummy.data = buf;
-+        dummy.maxlen = sizeof(buf);
-+
-+        return ll_proc_dostring(&dummy,write,filp,buffer,lenp, ppos);
- }
- 
- int LL_PROC_PROTO(proc_pages_alloc)
- {
-         char buf[22];
-         int len;
-+        struct ctl_table dummy;
-         DECLARE_LL_PROC_PPOS_DECL;
- 
-         if (!*lenp || (*ppos && !write)) {
-@@ -137,17 +159,19 @@
-         if (len > *lenp)
-                 len = *lenp;
-         buf[len] = '\0';
--        if (copy_to_user(buffer, buf, len))
--                return -EFAULT;
--        *lenp = len;
--        *ppos += *lenp;
--        return 0;
-+
-+        dummy = *table;
-+        dummy.data = buf;
-+        dummy.maxlen = sizeof(buf);
-+
-+        return ll_proc_dostring(&dummy,write,filp,buffer,lenp, ppos);
- }
- 
- int LL_PROC_PROTO(proc_mem_max)
- {
-         char buf[22];
-         int len;
-+        struct ctl_table dummy;
-         DECLARE_LL_PROC_PPOS_DECL;
- 
-         if (!*lenp || (*ppos && !write)) {
-@@ -161,17 +185,19 @@
-         if (len > *lenp)
-                 len = *lenp;
-         buf[len] = '\0';
--        if (copy_to_user(buffer, buf, len))
--                return -EFAULT;
--        *lenp = len;
--        *ppos += *lenp;
--        return 0;
-+
-+        dummy = *table;
-+        dummy.data = buf;
-+        dummy.maxlen = sizeof(buf);
-+
-+        return ll_proc_dostring(&dummy,write,filp,buffer,lenp, ppos);
- }
- 
- int LL_PROC_PROTO(proc_pages_max)
- {
-         char buf[22];
-         int len;
-+        struct ctl_table dummy;
-         DECLARE_LL_PROC_PPOS_DECL;
- 
-         if (!*lenp || (*ppos && !write)) {
-@@ -185,11 +211,12 @@
-         if (len > *lenp)
-                 len = *lenp;
-         buf[len] = '\0';
--        if (copy_to_user(buffer, buf, len))
--                return -EFAULT;
--        *lenp = len;
--        *ppos += *lenp;
--        return 0;
-+
-+        dummy = *table;
-+        dummy.data = buf;
-+        dummy.maxlen = sizeof(buf);
-+
-+        return ll_proc_dostring(&dummy,write,filp,buffer,lenp, ppos);
- }
- 
- int LL_PROC_PROTO(proc_max_dirty_pages_in_mb)
-@@ -216,7 +243,8 @@
-                         obd_max_dirty_pages = 4 << (20 - CFS_PAGE_SHIFT);
-                 }
-         } else {
--                char buf[21];
-+                char buf[22];
-+                struct ctl_table dummy;
-                 int len;
- 
-                 len = lprocfs_read_frac_helper(buf, sizeof(buf),
-@@ -225,7 +253,13 @@
-                 if (len > *lenp)
-                         len = *lenp;
-                 buf[len] = '\0';
--                if (copy_to_user(buffer, buf, len))
-+
-+                dummy = *table;
-+                dummy.data = buf;
-+                dummy.maxlen = sizeof(buf);
-+
-+                rc = ll_proc_dostring(&dummy,write,filp,buffer,lenp, ppos);
-+                if (rc)
-                         return -EFAULT;
-                 *lenp = len;
-         }
-@@ -248,7 +282,8 @@
-                                                (unsigned int*)table->data,
-                                                OBD_ALLOC_FAIL_MULT);
-         } else {
--                char buf[21];
-+                char buf[22];
-+                struct ctl_table dummy;
-                 int  len;
- 
-                 len = lprocfs_read_frac_helper(buf, 21,
-@@ -257,7 +292,12 @@
-                 if (len > *lenp)
-                         len = *lenp;
-                 buf[len] = '\0';
--                if (copy_to_user(buffer, buf, len))
-+                dummy = *table;
-+                dummy.data = buf;
-+                dummy.maxlen = sizeof(buf);
-+
-+                rc = ll_proc_dostring(&dummy,write,filp,buffer,lenp, ppos);
-+                if(rc)
-                         return -EFAULT;
-                 *lenp = len;
-         }
-@@ -281,7 +321,8 @@
-                 .data     = &obd_fail_val,
-                 .maxlen   = sizeof(int),
-                 .mode     = 0644,
--                .proc_handler = &proc_dointvec
-+                .proc_handler = &proc_dointvec,
-+                .strategy = &sysctl_intvec,
-         },
-         {
-                 .ctl_name = OBD_TIMEOUT,
-@@ -297,7 +338,7 @@
-                 .data     = &obd_debug_peer_on_timeout,
-                 .maxlen   = sizeof(int),
-                 .mode     = 0644,
--                .proc_handler = &proc_dointvec
-+                .proc_handler = &proc_dointvec,
-         },
-         {
-                 .ctl_name = OBD_DUMP_ON_TIMEOUT,
-@@ -305,7 +346,7 @@
-                 .data     = &obd_dump_on_timeout,
-                 .maxlen   = sizeof(int),
-                 .mode     = 0644,
--                .proc_handler = &proc_dointvec
-+                .proc_handler = &proc_dointvec,
-         },
-         {
-                 .ctl_name = OBD_DUMP_ON_EVICTION,
-@@ -313,7 +354,7 @@
-                 .data     = &obd_dump_on_eviction,
-                 .maxlen   = sizeof(int),
-                 .mode     = 0644,
--                .proc_handler = &proc_dointvec
-+                .proc_handler = &proc_dointvec,
-         },
-         {
-                 .ctl_name = OBD_MEMUSED,
-@@ -321,7 +362,7 @@
-                 .data     = NULL,
-                 .maxlen   = 0,
-                 .mode     = 0444,
--                .proc_handler = &proc_memory_alloc
-+                .proc_handler = &proc_memory_alloc,
-         },
-         {
-                 .ctl_name = OBD_PAGESUSED,
-@@ -329,7 +370,7 @@
-                 .data     = NULL,
-                 .maxlen   = 0,
-                 .mode     = 0444,
--                .proc_handler = &proc_pages_alloc
-+                .proc_handler = &proc_pages_alloc,
-         },
-         {
-                 .ctl_name = OBD_MAXMEMUSED,
-@@ -337,7 +378,7 @@
-                 .data     = NULL,
-                 .maxlen   = 0,
-                 .mode     = 0444,
--                .proc_handler = &proc_mem_max
-+                .proc_handler = &proc_mem_max,
-         },
-         {
-                 .ctl_name = OBD_MAXPAGESUSED,
-@@ -345,7 +386,7 @@
-                 .data     = NULL,
-                 .maxlen   = 0,
-                 .mode     = 0444,
--                .proc_handler = &proc_pages_max
-+                .proc_handler = &proc_pages_max,
-         },
-         {
-                 .ctl_name = OBD_LDLM_TIMEOUT,
-@@ -378,7 +419,7 @@
- 
- static cfs_sysctl_table_t parent_table[] = {
-         {
--                .ctl_name = OBD_SYSCTL,
-+                .ctl_name = CTL_LUSTRE,
-                 .procname = "lustre",
-                 .data     = NULL,
-                 .maxlen   = 0,
diff --git a/debian/patches/patchless_support_2.6.26.dpatch b/debian/patches/patchless_support_2.6.26.dpatch
new file mode 100755
index 0000000..2173bde
--- /dev/null
+++ b/debian/patches/patchless_support_2.6.26.dpatch
@@ -0,0 +1,16673 @@
+#! /bin/sh /usr/share/dpatch/dpatch-run
+## posix_acl.patch by Patrick Winnertz <winnie at debian.org>
+##
+## All lines beginning with `## DP:' are a description of the patch.
+## DP: Patch which will enable 2.6.26 patchless support for lustre, taken from #14250
+
+ at DPATCH@
+diff -urNad lustre~/lnet/autoconf/lustre-lnet.m4 lustre/lnet/autoconf/lustre-lnet.m4
+--- lustre~/lnet/autoconf/lustre-lnet.m4	2009-03-12 10:32:27.000000000 +0100
++++ lustre/lnet/autoconf/lustre-lnet.m4	2009-03-12 11:02:51.000000000 +0100
+@@ -1362,6 +1362,22 @@
+ ])
+ ])
+ 
++# 2.6.27 have second argument to sock_map_fd
++AC_DEFUN([LN_SOCK_MAP_FD_2ARG],
++[AC_MSG_CHECKING([sock_map_fd have second argument])
++LB_LINUX_TRY_COMPILE([
++	#include <linux/net.h>
++],[
++        sock_map_fd(NULL, 0);
++],[
++        AC_MSG_RESULT(yes)
++        AC_DEFINE(HAVE_SOCK_MAP_FD_2ARG, 1,
++                  [sock_map_fd have second argument])
++],[
++        AC_MSG_RESULT(NO)
++])
++])
++
+ #
+ # LN_PROG_LINUX
+ #
+@@ -1410,6 +1426,8 @@
+ LN_SCATTERLIST_SETPAGE
+ # 2.6.26
+ LN_SEM_COUNT
++# 2.6.27
++LN_SOCK_MAP_FD_2ARG
+ ])
+ 
+ #
+diff -urNad lustre~/lnet/libcfs/linux/linux-prim.c lustre/lnet/libcfs/linux/linux-prim.c
+--- lustre~/lnet/libcfs/linux/linux-prim.c	2008-08-07 11:51:06.000000000 +0200
++++ lustre/lnet/libcfs/linux/linux-prim.c	2009-03-12 11:02:51.000000000 +0100
+@@ -49,7 +49,7 @@
+ void cfs_enter_debugger(void)
+ {
+ #if defined(CONFIG_KGDB)
+-        BREAKPOINT();
++//        BREAKPOINT();
+ #elif defined(__arch_um__)
+         asm("int $3");
+ #else
+diff -urNad lustre~/lnet/libcfs/linux/linux-tcpip.c lustre/lnet/libcfs/linux/linux-tcpip.c
+--- lustre~/lnet/libcfs/linux/linux-tcpip.c	2008-08-07 11:51:07.000000000 +0200
++++ lustre/lnet/libcfs/linux/linux-tcpip.c	2009-03-12 11:02:51.000000000 +0100
+@@ -63,7 +63,11 @@
+                 return rc;
+         }
+ 
++#ifdef HAVE_SOCK_MAP_FD_2ARG
++        fd = sock_map_fd(sock,0);
++#else
+         fd = sock_map_fd(sock);
++#endif
+         if (fd < 0) {
+                 rc = fd;
+                 sock_release(sock);
+diff -urNad lustre~/lnet/lnet/api-ni.c lustre/lnet/lnet/api-ni.c
+--- lustre~/lnet/lnet/api-ni.c	2009-03-12 10:21:27.000000000 +0100
++++ lustre/lnet/lnet/api-ni.c	2009-03-12 11:02:51.000000000 +0100
+@@ -1032,7 +1032,7 @@
+ #ifdef __KERNEL__
+                 if (lnd == NULL) {
+                         LNET_MUTEX_UP(&the_lnet.ln_lnd_mutex);
+-                        rc = request_module(libcfs_lnd2modname(lnd_type));
++                        rc = request_module("%s", libcfs_lnd2modname(lnd_type));
+                         LNET_MUTEX_DOWN(&the_lnet.ln_lnd_mutex);
+ 
+                         lnd = lnet_find_lnd_by_type(lnd_type);
+diff -urNad lustre~/lustre/autoconf/lustre-core.m4 lustre/lustre/autoconf/lustre-core.m4
+--- lustre~/lustre/autoconf/lustre-core.m4	2009-03-12 10:32:27.000000000 +0100
++++ lustre/lustre/autoconf/lustre-core.m4	2009-03-12 11:07:59.000000000 +0100
+@@ -1106,15 +1106,20 @@
+ AC_DEFUN([LC_PAGE_CHECKED],
+ [AC_MSG_CHECKING([kernel has PageChecked and SetPageChecked])
+ LB_LINUX_TRY_COMPILE([
+-        #include <linux/mm.h>
+-        #include <linux/page-flags.h>
++        #include <linux/autoconf.h>
++#ifdef HAVE_LINUX_MMTYPES_H
++        #include <linux/mm_types.h>
++#endif
++	#include <linux/page-flags.h>
+ ],[
+-        #ifndef PageChecked
+-        #error PageChecked not defined in kernel
+-        #endif
+-        #ifndef SetPageChecked
+-        #error SetPageChecked not defined in kernel
+-        #endif
++ 	struct page *p;
++
++        /* before 2.6.26 this define*/
++        #ifndef PageChecked	
++ 	/* 2.6.26 use function instead of define for it */
++ 	SetPageChecked(p);
++ 	PageChecked(p);
++ 	#endif
+ ],[
+         AC_MSG_RESULT(yes)
+         AC_DEFINE(HAVE_PAGE_CHECKED, 1,
+@@ -1232,6 +1237,9 @@
+ ])
+ ])
+ 
++# 2.6.18
++
++
+ # 2.6.23 have return type 'void' for unregister_blkdev
+ AC_DEFUN([LC_UNREGISTER_BLKDEV_RETURN_INT],
+ [AC_MSG_CHECKING([if unregister_blkdev return int])
+@@ -1249,6 +1257,25 @@
+ ])
+ 
+ # 2.6.23 change .sendfile to .splice_read
++# RHEL4 (-92 kernel) have both sendfile and .splice_read API
++AC_DEFUN([LC_KERNEL_SENDFILE],
++[AC_MSG_CHECKING([if kernel has .sendfile])
++LB_LINUX_TRY_COMPILE([
++        #include <linux/fs.h>
++],[
++        struct file_operations file;
++
++        file.sendfile = NULL;
++], [
++        AC_MSG_RESULT([yes])
++        AC_DEFINE(HAVE_KERNEL_SENDFILE, 1,
++                [kernel has .sendfile])
++],[
++        AC_MSG_RESULT([no])
++])
++])
++
++# 2.6.23 change .sendfile to .splice_read
+ AC_DEFUN([LC_KERNEL_SPLICE_READ],
+ [AC_MSG_CHECKING([if kernel has .splice_read])
+ LB_LINUX_TRY_COMPILE([
+@@ -1268,11 +1295,219 @@
+ 
+ # 2.6.23 extract nfs export related data into exportfs.h
+ AC_DEFUN([LC_HAVE_EXPORTFS_H],
+-[
+-tmpfl="$CFLAGS"
+-CFLAGS="$CFLAGS -I$LINUX_OBJ/include"
+-AC_CHECK_HEADERS([linux/exportfs.h])
+-CFLAGS="$tmpfl"
++[LB_CHECK_FILE([$LINUX/include/linux/exportfs.h], [
++        AC_DEFINE(HAVE_LINUX_EXPORTFS_H, 1,
++                [kernel has include/exportfs.h])
++],[
++        AC_MSG_RESULT([no])
++])
++])
++
++# 2.6.23 have new page fault handling API
++AC_DEFUN([LC_VM_OP_FAULT],
++[AC_MSG_CHECKING([if kernel has .fault in vm_operation_struct])
++LB_LINUX_TRY_COMPILE([
++        #include <linux/mm.h>
++],[
++        struct vm_operations_struct op;
++
++        op.fault = NULL;
++], [
++        AC_MSG_RESULT([yes])
++        AC_DEFINE(HAVE_VM_OP_FAULT, 1,
++                [if kernel has .fault in vm_operation_struct])
++],[
++        AC_MSG_RESULT([no])
++])
++])
++
++#2.6.23 has new shrinker API
++AC_DEFUN([LC_REGISTER_SHRINKER],
++[AC_MSG_CHECKING([if kernel has register_shrinker])
++LB_LINUX_TRY_COMPILE([
++        #include <linux/mm.h>
++],[
++        register_shrinker(NULL);
++], [
++        AC_MSG_RESULT([yes])
++        AC_DEFINE(HAVE_REGISTER_SHRINKER, 1,
++                [if kernel has register_shrinker])
++],[
++        AC_MSG_RESULT([no])
++])
++])
++
++# 2.6.24 has bio_endio with 2 args
++AC_DEFUN([LC_BIO_ENDIO_2ARG],
++[AC_MSG_CHECKING([if kernel has bio_endio with 2 args])
++LB_LINUX_TRY_COMPILE([
++        #include <linux/bio.h>
++],[
++        bio_endio(NULL, 0);
++], [
++        AC_MSG_RESULT([yes])
++        AC_DEFINE(HAVE_BIO_ENDIO_2ARG, 1,
++                [if kernel has bio_endio with 2 args])
++],[
++        AC_MSG_RESULT([no])
++])
++])
++
++# 2.6.24 has new members in exports struct.
++AC_DEFUN([LC_FH_TO_DENTRY],
++[AC_MSG_CHECKING([if kernel has .fh_to_dentry member in export_operations struct])
++LB_LINUX_TRY_COMPILE([
++#ifdef HAVE_LINUX_EXPORTFS_H
++        #include <linux/exportfs.h>
++#else
++        #include <linux/fs.h>
++#endif
++],[
++        struct export_operations exp;
++
++        exp.fh_to_dentry   = NULL;
++], [
++        AC_MSG_RESULT([yes])
++        AC_DEFINE(HAVE_FH_TO_DENTRY, 1,
++                [kernel has .fh_to_dentry member in export_operations struct])
++],[
++        AC_MSG_RESULT([no])
++])
++])
++
++# 2.6.24 need linux/mm_types.h included
++AC_DEFUN([LC_HAVE_MMTYPES_H],
++[LB_CHECK_FILE([$LINUX/include/linux/mm_types.h], [
++        AC_DEFINE(HAVE_LINUX_MMTYPES_H, 1,
++                [kernel has include/mm_types.h])
++],[
++        AC_MSG_RESULT([no])
++])
++])
++
++# 2.6.24 remove long aged procfs entry -> deleted member
++AC_DEFUN([LC_PROCFS_DELETED],
++[AC_MSG_CHECKING([if kernel has deleted member in procfs entry struct])
++LB_LINUX_TRY_COMPILE([
++	#include <linux/proc_fs.h>
++],[
++        struct proc_dir_entry pde;
++
++        pde.deleted   = NULL;
++], [
++        AC_MSG_RESULT([yes])
++        AC_DEFINE(HAVE_PROCFS_DELETED, 1,
++                [kernel has deleted member in procfs entry struct])
++],[
++        AC_MSG_RESULT([no])
++])
++])
++
++# 2.6.25 change define to inline
++AC_DEFUN([LC_MAPPING_CAP_WRITEBACK_DIRTY],
++[AC_MSG_CHECKING([if kernel have mapping_cap_writeback_dirty])
++LB_LINUX_TRY_COMPILE([
++        #include <linux/backing-dev.h>
++],[
++        #ifndef mapping_cap_writeback_dirty
++        mapping_cap_writeback_dirty(NULL);
++        #endif
++],[
++        AC_MSG_RESULT([yes])
++        AC_DEFINE(HAVE_MAPPING_CAP_WRITEBACK_DIRTY, 1,
++                [kernel have mapping_cap_writeback_dirty])
++],[
++        AC_MSG_RESULT([no])
++])
++])
++
++
++
++# 2.6.26 isn't export set_fs_pwd and change paramter in fs struct
++AC_DEFUN([LC_FS_STRUCT_USE_PATH],
++[AC_MSG_CHECKING([fs_struct use path structure])
++LB_LINUX_TRY_COMPILE([
++        #include <asm/atomic.h>
++        #include <linux/spinlock.h>
++        #include <linux/fs_struct.h>
++],[
++        struct path path;
++        struct fs_struct fs;
++
++        fs.pwd = path;
++], [
++        AC_MSG_RESULT([yes])
++        AC_DEFINE(HAVE_FS_STRUCT_USE_PATH, 1,
++                [fs_struct use path structure])
++],[
++        AC_MSG_RESULT([no])
++])
++])
++
++# 2.6.26 remove path_release and use path_put instead
++AC_DEFUN([LC_PATH_RELEASE],
++[AC_MSG_CHECKING([if path_release exist])
++LB_LINUX_TRY_COMPILE([
++    #include <linux/dcache.h>
++    #include <linux/namei.h>
++],[
++    path_release(NULL);
++],[
++    AC_DEFINE(HAVE_PATH_RELEASE, 1, [path_release exist])
++    AC_MSG_RESULT([yes])
++],[
++    AC_MSG_RESULT([no]) 
++])
++])
++
++#2.6.27
++AC_DEFUN([LC_INODE_PERMISION_2ARGS],
++[AC_MSG_CHECKING([inode_operations->permission have two args])
++LB_LINUX_TRY_COMPILE([
++        #include <linux/fs.h>
++],[
++        struct inode *inode;
++
++        inode->i_op->permission(NULL,0);
++],[
++        AC_DEFINE(HAVE_INODE_PERMISION_2ARGS, 1, 
++                  [inode_operations->permission have two args])
++        AC_MSG_RESULT([yes])
++],[
++        AC_MSG_RESULT([no])
++])
++])
++
++# 2.6.27 have file_remove_suid instead of remove_suid
++AC_DEFUN([LC_FILE_REMOVE_SUID],
++[AC_MSG_CHECKING([kernel have file_remove_suid])
++LB_LINUX_TRY_COMPILE([
++        #include <linux/fs.h>
++],[
++        file_remove_suid(NULL);
++],[
++        AC_DEFINE(HAVE_FILE_REMOVE_SUID, 1,
++                  [kernel have file_remove_suid])
++        AC_MSG_RESULT([yes])
++],[
++        AC_MSG_RESULT([no])
++])
++])
++
++# 2.6.27 have new page locking API
++AC_DEFUN([LC_TRYLOCKPAGE],
++[AC_MSG_CHECKING([kernel use trylock_page for page lock])
++LB_LINUX_TRY_COMPILE([
++        #include <linux/pagemap.h>
++],[
++        trylock_page(NULL);
++],[
++        AC_DEFINE(HAVE_TRYLOCK_PAGE, 1,
++                  [kernel use trylock_page for page lock])
++        AC_MSG_RESULT([yes])
++],[
++        AC_MSG_RESULT([no])
++])
+ ])
+ 
+ #
+@@ -1372,8 +1607,30 @@
+           LC_FS_RENAME_DOES_D_MOVE
+           # 2.6.23
+           LC_UNREGISTER_BLKDEV_RETURN_INT
++          LC_KERNEL_SENDFILE
+           LC_KERNEL_SPLICE_READ
+           LC_HAVE_EXPORTFS_H
++          LC_VM_OP_FAULT
++          LC_REGISTER_SHRINKER
++
++          #2.6.25
++          LC_MAPPING_CAP_WRITEBACK_DIRTY
++ 
++ 	  # 2.6.24
++ 	  LC_HAVE_MMTYPES_H
++          LC_BIO_ENDIO_2ARG
++          LC_FH_TO_DENTRY
++          LC_PROCFS_DELETED
++ 
++          # 2.6.26
++          LC_FS_STRUCT_USE_PATH
++          LC_RCU_LIST_SAFE
++          LC_PATH_RELEASE
++
++          # 2.6.27
++          LC_INODE_PERMISION_2ARGS
++          LC_FILE_REMOVE_SUID
++          LC_TRYLOCKPAGE
+ ])
+ 
+ #
+@@ -1606,6 +1863,7 @@
+         ],[
+                 AC_MSG_RESULT([no]) 
+         ])
++
+ ],[
+         AC_MSG_RESULT([no])
+ ])
+diff -urNad lustre~/lustre/autoconf/lustre-core.m4.orig lustre/lustre/autoconf/lustre-core.m4.orig
+--- lustre~/lustre/autoconf/lustre-core.m4.orig	1970-01-01 00:00:00.000000000 +0000
++++ lustre/lustre/autoconf/lustre-core.m4.orig	2009-03-12 10:32:27.000000000 +0100
+@@ -0,0 +1,1817 @@
++#* -*- mode: c; c-basic-offset: 8; indent-tabs-mode: nil; -*-
++#* vim:expandtab:shiftwidth=8:tabstop=8:
++#
++# LC_CONFIG_SRCDIR
++#
++# Wrapper for AC_CONFIG_SUBDIR
++#
++AC_DEFUN([LC_CONFIG_SRCDIR],
++[AC_CONFIG_SRCDIR([lustre/obdclass/obdo.c])
++])
++
++#
++# LC_PATH_DEFAULTS
++#
++# lustre specific paths
++#
++AC_DEFUN([LC_PATH_DEFAULTS],
++[# ptlrpc kernel build requires this
++LUSTRE="$PWD/lustre"
++AC_SUBST(LUSTRE)
++
++# mount.lustre
++rootsbindir='/sbin'
++AC_SUBST(rootsbindir)
++
++demodir='$(docdir)/demo'
++AC_SUBST(demodir)
++
++pkgexampledir='${pkgdatadir}/examples'
++AC_SUBST(pkgexampledir)
++])
++
++#
++# LC_TARGET_SUPPORTED
++#
++# is the target os supported?
++#
++AC_DEFUN([LC_TARGET_SUPPORTED],
++[case $target_os in
++	linux* | darwin*)
++$1
++		;;
++	*)
++$2
++		;;
++esac
++])
++
++#
++# LC_CONFIG_EXT3
++#
++# that ext3 is enabled in the kernel
++#
++AC_DEFUN([LC_CONFIG_EXT3],
++[LB_LINUX_CONFIG([EXT3_FS],[],[
++	LB_LINUX_CONFIG([EXT3_FS_MODULE],[],[$2])
++])
++LB_LINUX_CONFIG([EXT3_FS_XATTR],[$1],[$3])
++])
++
++#
++# LC_FSHOOKS
++#
++# If we have (and can build) fshooks.h
++#
++AC_DEFUN([LC_FSHOOKS],
++[LB_CHECK_FILE([$LINUX/include/linux/fshooks.h],[
++	AC_MSG_CHECKING([if fshooks.h can be compiled])
++	LB_LINUX_TRY_COMPILE([
++		#include <linux/fshooks.h>
++	],[],[
++		AC_MSG_RESULT([yes])
++	],[
++		AC_MSG_RESULT([no])
++		AC_MSG_WARN([You might have better luck with gcc 3.3.x.])
++		AC_MSG_WARN([You can set CC=gcc33 before running configure.])
++		AC_MSG_ERROR([Your compiler cannot build fshooks.h.])
++	])
++$1
++],[
++$2
++])
++])
++
++#
++# LC_STRUCT_KIOBUF
++#
++# rh 2.4.18 has iobuf->dovary, but other kernels do not
++#
++AC_DEFUN([LC_STRUCT_KIOBUF],
++[AC_MSG_CHECKING([if struct kiobuf has a dovary field])
++LB_LINUX_TRY_COMPILE([
++	#include <linux/iobuf.h>
++],[
++	struct kiobuf iobuf;
++	iobuf.dovary = 1;
++],[
++	AC_MSG_RESULT([yes])
++	AC_DEFINE(HAVE_KIOBUF_DOVARY, 1, [struct kiobuf has a dovary field])
++],[
++	AC_MSG_RESULT([no])
++])
++])
++
++#
++# LC_FUNC_COND_RESCHED
++#
++# cond_resched() was introduced in 2.4.20
++#
++AC_DEFUN([LC_FUNC_COND_RESCHED],
++[AC_MSG_CHECKING([if kernel offers cond_resched])
++LB_LINUX_TRY_COMPILE([
++	#include <linux/sched.h>
++],[
++	cond_resched();
++],[
++	AC_MSG_RESULT([yes])
++	AC_DEFINE(HAVE_COND_RESCHED, 1, [cond_resched found])
++],[
++	AC_MSG_RESULT([no])
++])
++])
++
++#
++# LC_FUNC_ZAP_PAGE_RANGE
++#
++# if zap_page_range() takes a vma arg
++#
++AC_DEFUN([LC_FUNC_ZAP_PAGE_RANGE],
++[AC_MSG_CHECKING([if zap_page_range with vma parameter])
++ZAP_PAGE_RANGE_VMA="`grep -c 'zap_page_range.*struct vm_area_struct' $LINUX/include/linux/mm.h`"
++if test "$ZAP_PAGE_RANGE_VMA" != 0 ; then
++	AC_DEFINE(ZAP_PAGE_RANGE_VMA, 1, [zap_page_range with vma parameter])
++	AC_MSG_RESULT([yes])
++else
++	AC_MSG_RESULT([no])
++fi
++])
++
++#
++# LC_FUNC_PDE
++#
++# if proc_fs.h defines PDE()
++#
++AC_DEFUN([LC_FUNC_PDE],
++[AC_MSG_CHECKING([if kernel defines PDE])
++HAVE_PDE="`grep -c 'proc_dir_entry..PDE' $LINUX/include/linux/proc_fs.h`"
++if test "$HAVE_PDE" != 0 ; then
++	AC_DEFINE(HAVE_PDE, 1, [the kernel defines PDE])
++	AC_MSG_RESULT([yes])
++else
++	AC_MSG_RESULT([no])
++fi
++])
++
++#
++# LC_FUNC_FILEMAP_FDATASYNC
++#
++# if filemap_fdatasync() exists
++#
++AC_DEFUN([LC_FUNC_FILEMAP_FDATAWRITE],
++[AC_MSG_CHECKING([whether filemap_fdatawrite() is defined])
++LB_LINUX_TRY_COMPILE([
++	#include <linux/fs.h>
++],[
++	int (*foo)(struct address_space *)= filemap_fdatawrite;
++],[
++	AC_MSG_RESULT([yes])
++	AC_DEFINE(HAVE_FILEMAP_FDATAWRITE, 1, [filemap_fdatawrite() found])
++],[
++	AC_MSG_RESULT([no])
++])
++])
++
++#
++# LC_FUNC_DIRECT_IO
++#
++# if direct_IO takes a struct file argument
++#
++AC_DEFUN([LC_FUNC_DIRECT_IO],
++[AC_MSG_CHECKING([if kernel passes struct file to direct_IO])
++HAVE_DIO_FILE="`grep -c 'direct_IO.*struct file' $LINUX/include/linux/fs.h`"
++if test "$HAVE_DIO_FILE" != 0 ; then
++	AC_DEFINE(HAVE_DIO_FILE, 1, [the kernel passes struct file to direct_IO])
++	AC_MSG_RESULT(yes)
++else
++	AC_MSG_RESULT(no)
++fi
++])
++
++#
++# LC_HEADER_MM_INLINE
++#
++# RHEL kernels define page_count in mm_inline.h
++#
++AC_DEFUN([LC_HEADER_MM_INLINE],
++[AC_MSG_CHECKING([if kernel has mm_inline.h header])
++LB_LINUX_TRY_COMPILE([
++	#include <linux/mm_inline.h>
++],[
++	#ifndef page_count
++	#error mm_inline.h does not define page_count
++	#endif
++],[
++	AC_MSG_RESULT([yes])
++	AC_DEFINE(HAVE_MM_INLINE, 1, [mm_inline found])
++],[
++	AC_MSG_RESULT([no])
++])
++])
++
++#
++# LC_STRUCT_INODE
++#
++# if inode->i_alloc_sem exists
++#
++AC_DEFUN([LC_STRUCT_INODE],
++[AC_MSG_CHECKING([if struct inode has i_alloc_sem])
++LB_LINUX_TRY_COMPILE([
++	#include <linux/fs.h>
++	#include <linux/version.h>
++],[
++	struct inode i;
++	return (char *)&i.i_alloc_sem - (char *)&i;
++],[
++	AC_MSG_RESULT([yes])
++	AC_DEFINE(HAVE_I_ALLOC_SEM, 1, [struct inode has i_alloc_sem])
++],[
++	AC_MSG_RESULT([no])
++])
++])
++
++#
++# LC_FUNC_REGISTER_CACHE
++#
++# if register_cache() is defined by kernel
++#
++AC_DEFUN([LC_FUNC_REGISTER_CACHE],
++[AC_MSG_CHECKING([if kernel defines register_cache()])
++LB_LINUX_TRY_COMPILE([
++	#include <linux/list.h>
++	#include <linux/cache_def.h>
++],[
++	struct cache_definition cache;
++],[
++	AC_MSG_RESULT([yes])
++	AC_DEFINE(HAVE_REGISTER_CACHE, 1, [register_cache found])
++	AC_MSG_CHECKING([if kernel expects return from cache shrink function])
++	HAVE_CACHE_RETURN_INT="`grep -c 'int.*shrink' $LINUX/include/linux/cache_def.h`"
++	if test "$HAVE_CACHE_RETURN_INT" != 0 ; then
++		AC_DEFINE(HAVE_CACHE_RETURN_INT, 1, [kernel expects return from shrink_cache])
++		AC_MSG_RESULT(yes)
++	else
++		AC_MSG_RESULT(no)
++	fi
++],[
++	AC_MSG_RESULT([no])
++])
++])
++
++#
++# LC_FUNC_GRAB_CACHE_PAGE_NOWAIT_GFP
++#
++# check for our patched grab_cache_page_nowait_gfp() function
++#
++AC_DEFUN([LC_FUNC_GRAB_CACHE_PAGE_NOWAIT_GFP],
++[AC_MSG_CHECKING([if kernel defines grab_cache_page_nowait_gfp()])
++HAVE_GCPN_GFP="`grep -c 'grab_cache_page_nowait_gfp' $LINUX/include/linux/pagemap.h`"
++if test "$HAVE_GCPN_GFP" != 0 ; then
++	AC_DEFINE(HAVE_GRAB_CACHE_PAGE_NOWAIT_GFP, 1,
++		[kernel has grab_cache_page_nowait_gfp()])
++	AC_MSG_RESULT(yes)
++else
++	AC_MSG_RESULT(no)
++fi
++])
++
++#
++# LC_FUNC_DEV_SET_RDONLY
++#
++# check for the old-style dev_set_rdonly which took an extra "devno" param
++# and can only set a single device to discard writes at one time
++#
++AC_DEFUN([LC_FUNC_DEV_SET_RDONLY],
++[AC_MSG_CHECKING([if kernel has new dev_set_rdonly])
++LB_LINUX_TRY_COMPILE([
++        #include <linux/fs.h>
++],[
++        #ifndef HAVE_CLEAR_RDONLY_ON_PUT
++        #error needs to be patched by lustre kernel patches from Lustre version 1.4.3 or above.
++        #endif
++],[
++        AC_MSG_RESULT([yes])
++        AC_DEFINE(HAVE_DEV_SET_RDONLY, 1, [kernel has new dev_set_rdonly])
++],[
++        AC_MSG_RESULT([no, Linux kernel source needs to be patches by lustre 
++kernel patches from Lustre version 1.4.3 or above.])
++])
++])
++
++#
++# LC_CONFIG_BACKINGFS
++#
++# setup, check the backing filesystem
++#
++AC_DEFUN([LC_CONFIG_BACKINGFS],
++[
++BACKINGFS="ldiskfs"
++
++if test x$with_ldiskfs = xno ; then
++	BACKINGFS="ext3"
++
++	if test x$linux25$enable_server = xyesyes ; then
++		AC_MSG_ERROR([ldiskfs is required for 2.6-based servers.])
++	fi
++
++	# --- Check that ext3 and ext3 xattr are enabled in the kernel
++	LC_CONFIG_EXT3([],[
++		AC_MSG_ERROR([Lustre requires that ext3 is enabled in the kernel])
++	],[
++		AC_MSG_WARN([Lustre requires that extended attributes for ext3 are enabled in the kernel])
++		AC_MSG_WARN([This build may fail.])
++	])
++else
++	# ldiskfs is enabled
++	LB_DEFINE_LDISKFS_OPTIONS
++fi #ldiskfs
++
++AC_MSG_CHECKING([which backing filesystem to use])
++AC_MSG_RESULT([$BACKINGFS])
++AC_SUBST(BACKINGFS)
++])
++
++#
++# LC_CONFIG_PINGER
++#
++# the pinger is temporary, until we have the recovery node in place
++#
++AC_DEFUN([LC_CONFIG_PINGER],
++[AC_MSG_CHECKING([whether to enable pinger support])
++AC_ARG_ENABLE([pinger],
++	AC_HELP_STRING([--disable-pinger],
++			[disable recovery pinger support]),
++	[],[enable_pinger='yes'])
++AC_MSG_RESULT([$enable_pinger])
++if test x$enable_pinger != xno ; then
++  AC_DEFINE(ENABLE_PINGER, 1, Use the Pinger)
++fi
++])
++
++#
++# LC_CONFIG_CHECKSUM
++#
++# do checksum of bulk data between client and OST
++#
++AC_DEFUN([LC_CONFIG_CHECKSUM],
++[AC_MSG_CHECKING([whether to enable data checksum support])
++AC_ARG_ENABLE([checksum],
++       AC_HELP_STRING([--disable-checksum],
++                       [disable data checksum support]),
++       [],[enable_checksum='yes'])
++AC_MSG_RESULT([$enable_checksum])
++if test x$enable_checksum != xno ; then
++  AC_DEFINE(ENABLE_CHECKSUM, 1, do data checksums)
++fi
++])
++
++#
++# LC_CONFIG_HEALTH_CHECK_WRITE
++#
++# Turn on the actual write to the disk
++#
++AC_DEFUN([LC_CONFIG_HEALTH_CHECK_WRITE],
++[AC_MSG_CHECKING([whether to enable a write with the health check])
++AC_ARG_ENABLE([health-write],
++        AC_HELP_STRING([--enable-health-write],
++                        [enable disk writes when doing health check]),
++        [],[enable_health_write='no'])
++AC_MSG_RESULT([$enable_health_write])
++if test x$enable_health_write == xyes ; then
++  AC_DEFINE(USE_HEALTH_CHECK_WRITE, 1, Write when Checking Health)
++fi
++])
++
++#
++# LC_CONFIG_LIBLUSTRE_RECOVERY
++#
++AC_DEFUN([LC_CONFIG_LIBLUSTRE_RECOVERY],
++[AC_MSG_CHECKING([whether to enable liblustre recovery support])
++AC_ARG_ENABLE([liblustre-recovery],
++	AC_HELP_STRING([--disable-liblustre-recovery],
++			[disable liblustre recovery support]),
++	[],[enable_liblustre_recovery='yes'])
++AC_MSG_RESULT([$enable_liblustre_recovery])
++if test x$enable_liblustre_recovery != xno ; then
++  AC_DEFINE(ENABLE_LIBLUSTRE_RECOVERY, 1, Liblustre Can Recover)
++fi
++])
++
++#
++# LC_CONFIG_OBD_BUFFER_SIZE
++#
++# the maximum buffer size of lctl ioctls
++#
++AC_DEFUN([LC_CONFIG_OBD_BUFFER_SIZE],
++[AC_MSG_CHECKING([maximum OBD ioctl size])
++AC_ARG_WITH([obd-buffer-size],
++	AC_HELP_STRING([--with-obd-buffer-size=[size]],
++			[set lctl ioctl maximum bytes (default=8192)]),
++	[
++		OBD_BUFFER_SIZE=$with_obd_buffer_size
++	],[
++		OBD_BUFFER_SIZE=8192
++	])
++AC_MSG_RESULT([$OBD_BUFFER_SIZE bytes])
++AC_DEFINE_UNQUOTED(OBD_MAX_IOCTL_BUFFER, $OBD_BUFFER_SIZE, [IOCTL Buffer Size])
++])
++
++#
++# LC_STRUCT_STATFS
++#
++# AIX does not have statfs.f_namelen
++#
++AC_DEFUN([LC_STRUCT_STATFS],
++[AC_MSG_CHECKING([if struct statfs has a f_namelen field])
++LB_LINUX_TRY_COMPILE([
++	#include <linux/vfs.h>
++],[
++	struct statfs sfs;
++	sfs.f_namelen = 1;
++],[
++	AC_MSG_RESULT([yes])
++	AC_DEFINE(HAVE_STATFS_NAMELEN, 1, [struct statfs has a namelen field])
++],[
++	AC_MSG_RESULT([no])
++])
++])
++
++#
++# LC_READLINK_SSIZE_T
++#
++AC_DEFUN([LC_READLINK_SSIZE_T],
++[AC_MSG_CHECKING([if readlink returns ssize_t])
++AC_TRY_COMPILE([
++	#include <unistd.h>
++],[
++	ssize_t readlink(const char *, char *, size_t);
++],[
++	AC_MSG_RESULT([yes])
++	AC_DEFINE(HAVE_POSIX_1003_READLINK, 1, [readlink returns ssize_t])
++],[
++	AC_MSG_RESULT([no])
++])
++])
++
++AC_DEFUN([LC_FUNC_PAGE_MAPPED],
++[AC_MSG_CHECKING([if kernel offers page_mapped])
++LB_LINUX_TRY_COMPILE([
++	#include <linux/mm.h>
++],[
++	page_mapped(NULL);
++],[
++	AC_MSG_RESULT([yes])
++	AC_DEFINE(HAVE_PAGE_MAPPED, 1, [page_mapped found])
++],[
++	AC_MSG_RESULT([no])
++])
++])
++
++AC_DEFUN([LC_STRUCT_FILE_OPS_UNLOCKED_IOCTL],
++[AC_MSG_CHECKING([if struct file_operations has an unlocked_ioctl field])
++LB_LINUX_TRY_COMPILE([
++        #include <linux/fs.h>
++],[
++        struct file_operations fops;
++        &fops.unlocked_ioctl;
++],[
++        AC_MSG_RESULT([yes])
++        AC_DEFINE(HAVE_UNLOCKED_IOCTL, 1, [struct file_operations has an unlock ed_ioctl field])
++],[
++        AC_MSG_RESULT([no])
++])
++])
++
++AC_DEFUN([LC_FILEMAP_POPULATE],
++[AC_MSG_CHECKING([for exported filemap_populate])
++LB_LINUX_TRY_COMPILE([
++        #include <asm/page.h>
++        #include <linux/mm.h>
++],[
++	filemap_populate(NULL, 0, 0, __pgprot(0), 0, 0);
++],[
++        AC_MSG_RESULT([yes])
++        AC_DEFINE(HAVE_FILEMAP_POPULATE, 1, [Kernel exports filemap_populate])
++],[
++        AC_MSG_RESULT([no])
++])
++])
++
++AC_DEFUN([LC_D_ADD_UNIQUE],
++[AC_MSG_CHECKING([for d_add_unique])
++LB_LINUX_TRY_COMPILE([
++        #include <linux/dcache.h>
++],[
++       d_add_unique(NULL, NULL);
++],[
++        AC_MSG_RESULT([yes])
++        AC_DEFINE(HAVE_D_ADD_UNIQUE, 1, [Kernel has d_add_unique])
++],[
++        AC_MSG_RESULT([no])
++])
++])
++
++AC_DEFUN([LC_BIT_SPINLOCK_H],
++[LB_CHECK_FILE([$LINUX/include/linux/bit_spinlock.h],[
++	AC_MSG_CHECKING([if bit_spinlock.h can be compiled])
++	LB_LINUX_TRY_COMPILE([
++		#include <asm/processor.h>
++		#include <linux/spinlock.h>
++		#include <linux/bit_spinlock.h>
++	],[],[
++		AC_MSG_RESULT([yes])
++		AC_DEFINE(HAVE_BIT_SPINLOCK_H, 1, [Kernel has bit_spinlock.h])
++	],[
++		AC_MSG_RESULT([no])
++	])
++],
++[])
++])
++
++#
++# LC_POSIX_ACL_XATTR
++#
++# If we have xattr_acl.h 
++#
++AC_DEFUN([LC_XATTR_ACL],
++[LB_CHECK_FILE([$LINUX/include/linux/xattr_acl.h],[
++	AC_MSG_CHECKING([if xattr_acl.h can be compiled])
++	LB_LINUX_TRY_COMPILE([
++		#include <linux/xattr_acl.h>
++	],[],[
++		AC_MSG_RESULT([yes])
++		AC_DEFINE(HAVE_XATTR_ACL, 1, [Kernel has xattr_acl])
++	],[
++		AC_MSG_RESULT([no])
++	])
++],
++[])
++])
++
++#
++# LC_LINUX_FIEMAP_H
++#
++# If we have fiemap.h
++# after 2.6.27 use fiemap.h in include/linux
++#
++AC_DEFUN([LC_LINUX_FIEMAP_H],
++[LB_CHECK_FILE([$LINUX/include/linux/fiemap.h],[
++        AC_MSG_CHECKING([if fiemap.h can be compiled])
++        LB_LINUX_TRY_COMPILE([
++                #include <linux/fiemap.h>
++        ],[],[
++                AC_MSG_RESULT([yes])
++                AC_DEFINE(HAVE_LINUX_FIEMAP_H, 1, [Kernel has fiemap.h])
++        ],[
++                AC_MSG_RESULT([no])
++        ])
++],
++[])
++])
++
++
++AC_DEFUN([LC_STRUCT_INTENT_FILE],
++[AC_MSG_CHECKING([if struct open_intent has a file field])
++LB_LINUX_TRY_COMPILE([
++        #include <linux/fs.h>
++        #include <linux/namei.h>
++],[
++        struct open_intent intent;
++        &intent.file;
++],[
++        AC_MSG_RESULT([yes])
++        AC_DEFINE(HAVE_FILE_IN_STRUCT_INTENT, 1, [struct open_intent has a file field])
++],[
++        AC_MSG_RESULT([no])
++])
++])
++
++
++AC_DEFUN([LC_POSIX_ACL_XATTR_H],
++[LB_CHECK_FILE([$LINUX/include/linux/posix_acl_xattr.h],[
++        AC_MSG_CHECKING([if linux/posix_acl_xattr.h can be compiled])
++        LB_LINUX_TRY_COMPILE([
++                #include <linux/posix_acl_xattr.h>
++        ],[],[
++                AC_MSG_RESULT([yes])
++                AC_DEFINE(HAVE_LINUX_POSIX_ACL_XATTR_H, 1, [linux/posix_acl_xattr.h found])
++
++        ],[
++                AC_MSG_RESULT([no])
++        ])
++$1
++],[
++AC_MSG_RESULT([no])
++])
++])
++
++#
++# LC_EXPORT___IGET
++# starting from 2.6.19 linux kernel exports __iget()
++#
++AC_DEFUN([LC_EXPORT___IGET],
++[LB_CHECK_SYMBOL_EXPORT([__iget],
++[fs/inode.c],[
++        AC_DEFINE(HAVE_EXPORT___IGET, 1, [kernel exports __iget])
++],[
++])
++])
++
++
++AC_DEFUN([LC_LUSTRE_VERSION_H],
++[LB_CHECK_FILE([$LINUX/include/linux/lustre_version.h],[
++	rm -f "$LUSTRE/include/linux/lustre_version.h"
++],[
++	touch "$LUSTRE/include/linux/lustre_version.h"
++	if test x$enable_server = xyes ; then
++        	AC_MSG_WARN([Unpatched kernel detected.])
++        	AC_MSG_WARN([Lustre servers cannot be built with an unpatched kernel;])
++        	AC_MSG_WARN([disabling server build])
++        	enable_server='no'
++	fi
++])
++])
++
++AC_DEFUN([LC_FUNC_SET_FS_PWD],
++[LB_CHECK_SYMBOL_EXPORT([set_fs_pwd],
++[fs/namespace.c],[
++        AC_DEFINE(HAVE_SET_FS_PWD, 1, [set_fs_pwd is exported])
++],[
++])
++])
++
++#
++# check for FS_RENAME_DOES_D_MOVE flag
++#
++AC_DEFUN([LC_FS_RENAME_DOES_D_MOVE],
++[AC_MSG_CHECKING([if kernel has FS_RENAME_DOES_D_MOVE flag])
++LB_LINUX_TRY_COMPILE([
++        #include <linux/fs.h>
++],[
++        int v = FS_RENAME_DOES_D_MOVE;
++],[
++        AC_MSG_RESULT([yes])
++        AC_DEFINE(HAVE_FS_RENAME_DOES_D_MOVE, 1, [kernel has FS_RENAME_DOES_D_MOVE flag])
++],[
++        AC_MSG_RESULT([no])
++])
++])
++
++#
++# LC_FUNC_MS_FLOCK_LOCK
++#
++# SLES9 kernel has MS_FLOCK_LOCK sb flag
++#
++AC_DEFUN([LC_FUNC_MS_FLOCK_LOCK],
++[AC_MSG_CHECKING([if kernel has MS_FLOCK_LOCK sb flag])
++LB_LINUX_TRY_COMPILE([
++        #include <linux/fs.h>
++],[
++        int flags = MS_FLOCK_LOCK;
++],[
++        AC_DEFINE(HAVE_MS_FLOCK_LOCK, 1,
++                [kernel has MS_FLOCK_LOCK flag])
++        AC_MSG_RESULT([yes])
++],[
++        AC_MSG_RESULT([no])
++])
++])
++
++#
++# LC_FUNC_HAVE_CAN_SLEEP_ARG
++#
++# SLES9 kernel has third arg can_sleep
++# in fs/locks.c: flock_lock_file_wait()
++#
++AC_DEFUN([LC_FUNC_HAVE_CAN_SLEEP_ARG],
++[AC_MSG_CHECKING([if kernel has third arg can_sleep in fs/locks.c: flock_lock_file_wait()])
++LB_LINUX_TRY_COMPILE([
++        #include <linux/fs.h>
++],[
++        int cansleep;
++        struct file *file;
++        struct file_lock *file_lock;
++        flock_lock_file_wait(file, file_lock, cansleep);
++],[
++        AC_DEFINE(HAVE_CAN_SLEEP_ARG, 1,
++                [kernel has third arg can_sleep in fs/locks.c: flock_lock_file_wait()])
++        AC_MSG_RESULT([yes])
++],[
++        AC_MSG_RESULT([no])
++])
++])
++
++#
++# LC_FUNC_F_OP_FLOCK
++#
++# rhel4.2 kernel has f_op->flock field
++#
++AC_DEFUN([LC_FUNC_F_OP_FLOCK],
++[AC_MSG_CHECKING([if struct file_operations has flock field])
++LB_LINUX_TRY_COMPILE([
++        #include <linux/fs.h>
++],[
++        struct file_operations ll_file_operations_flock;
++        ll_file_operations_flock.flock = NULL;
++],[
++        AC_DEFINE(HAVE_F_OP_FLOCK, 1,
++                [struct file_operations has flock field])
++        AC_MSG_RESULT([yes])
++],[
++        AC_MSG_RESULT([no])
++])
++])
++
++#
++# LC_FUNC_MS_FLOCK_LOCK
++#
++# SLES9 kernel has MS_FLOCK_LOCK sb flag
++#
++AC_DEFUN([LC_FUNC_MS_FLOCK_LOCK],
++[AC_MSG_CHECKING([if kernel has MS_FLOCK_LOCK sb flag])
++LB_LINUX_TRY_COMPILE([
++        #include <linux/fs.h>
++],[
++        int flags = MS_FLOCK_LOCK;
++],[
++        AC_DEFINE(HAVE_MS_FLOCK_LOCK, 1,
++                [kernel has MS_FLOCK_LOCK flag])
++        AC_MSG_RESULT([yes])
++],[
++        AC_MSG_RESULT([no])
++])
++])
++
++#
++# LC_FUNC_HAVE_CAN_SLEEP_ARG
++#
++# SLES9 kernel has third arg can_sleep
++# in fs/locks.c: flock_lock_file_wait()
++#
++AC_DEFUN([LC_FUNC_HAVE_CAN_SLEEP_ARG],
++[AC_MSG_CHECKING([if kernel has third arg can_sleep in fs/locks.c: flock_lock_file_wait()])
++LB_LINUX_TRY_COMPILE([
++        #include <linux/fs.h>
++],[
++        int cansleep;
++        struct file *file;
++        struct file_lock *file_lock;
++        flock_lock_file_wait(file, file_lock, cansleep);
++],[
++        AC_DEFINE(HAVE_CAN_SLEEP_ARG, 1,
++                [kernel has third arg can_sleep in fs/locks.c: flock_lock_file_wait()])
++        AC_MSG_RESULT([yes])
++],[
++        AC_MSG_RESULT([no])
++])
++])
++
++#
++# LC_TASK_PPTR
++#
++# task struct has p_pptr instead of parent
++#
++AC_DEFUN([LC_TASK_PPTR],
++[AC_MSG_CHECKING([task p_pptr found])
++LB_LINUX_TRY_COMPILE([
++	#include <linux/sched.h>
++],[
++	struct task_struct *p;
++	
++	p = p->p_pptr;
++],[
++	AC_MSG_RESULT([yes])
++	AC_DEFINE(HAVE_TASK_PPTR, 1, [task p_pptr found])
++],[
++	AC_MSG_RESULT([no])
++])
++])
++
++#
++# LC_FUNC_F_OP_FLOCK
++#
++# rhel4.2 kernel has f_op->flock field
++#
++AC_DEFUN([LC_FUNC_F_OP_FLOCK],
++[AC_MSG_CHECKING([if struct file_operations has flock field])
++LB_LINUX_TRY_COMPILE([
++        #include <linux/fs.h>
++],[
++        struct file_operations ll_file_operations_flock;
++        ll_file_operations_flock.flock = NULL;
++],[
++        AC_DEFINE(HAVE_F_OP_FLOCK, 1,
++                [struct file_operations has flock field])
++        AC_MSG_RESULT([yes])
++],[
++        AC_MSG_RESULT([no])
++])
++])
++
++# LC_INODE_I_MUTEX
++# after 2.6.15 inode have i_mutex intead of i_sem
++AC_DEFUN([LC_INODE_I_MUTEX],
++[AC_MSG_CHECKING([if inode has i_mutex ])
++LB_LINUX_TRY_COMPILE([
++	#include <linux/mutex.h>
++	#include <linux/fs.h>
++	#undef i_mutex
++],[
++	struct inode i;
++
++	mutex_unlock(&i.i_mutex);
++],[
++        AC_MSG_RESULT(yes)
++        AC_DEFINE(HAVE_INODE_I_MUTEX, 1,
++                [after 2.6.15 inode have i_mutex intead of i_sem])
++],[
++        AC_MSG_RESULT(no)
++])
++])
++
++# LC_DQUOTOFF_MUTEX
++# after 2.6.17 dquote use mutex instead if semaphore
++AC_DEFUN([LC_DQUOTOFF_MUTEX],
++[AC_MSG_CHECKING([use dqonoff_mutex])
++LB_LINUX_TRY_COMPILE([
++	#include <linux/mutex.h>
++	#include <linux/fs.h>
++        #include <linux/quota.h>
++],[
++        struct quota_info dq;
++
++        mutex_unlock(&dq.dqonoff_mutex);
++],[
++        AC_MSG_RESULT(yes)
++        AC_DEFINE(HAVE_DQUOTOFF_MUTEX, 1,
++                [after 2.6.17 dquote use mutex instead if semaphore])
++],[
++        AC_MSG_RESULT(no)
++])
++])
++
++#
++# LC_STATFS_DENTRY_PARAM
++# starting from 2.6.18 linux kernel uses dentry instead of
++# super_block for first vfs_statfs argument
++#
++AC_DEFUN([LC_STATFS_DENTRY_PARAM],
++[AC_MSG_CHECKING([first vfs_statfs parameter is dentry])
++LB_LINUX_TRY_COMPILE([
++        #include <linux/fs.h>
++],[
++	int vfs_statfs(struct dentry *, struct kstatfs *);
++],[
++        AC_DEFINE(HAVE_STATFS_DENTRY_PARAM, 1,
++                [first parameter of vfs_statfs is dentry])
++        AC_MSG_RESULT([yes])
++],[
++        AC_MSG_RESULT([no])
++])
++])
++
++#
++# LC_VFS_KERN_MOUNT
++# starting from 2.6.18 kernel don't export do_kern_mount
++# and want to use vfs_kern_mount instead.
++#
++AC_DEFUN([LC_VFS_KERN_MOUNT],
++[AC_MSG_CHECKING([vfs_kern_mount exist in kernel])
++LB_LINUX_TRY_COMPILE([
++        #include <linux/mount.h>
++],[
++        vfs_kern_mount(NULL, 0, NULL, NULL);
++],[
++        AC_DEFINE(HAVE_VFS_KERN_MOUNT, 1,
++                [vfs_kern_mount exist in kernel])
++        AC_MSG_RESULT([yes])
++],[
++        AC_MSG_RESULT([no])
++])
++])
++
++# 
++# LC_INVALIDATEPAGE_RETURN_INT
++# more 2.6 api changes.  return type for the invalidatepage
++# address_space_operation is 'void' in new kernels but 'int' in old
++#
++AC_DEFUN([LC_INVALIDATEPAGE_RETURN_INT],
++[AC_MSG_CHECKING([invalidatepage has return int])
++LB_LINUX_TRY_COMPILE([
++        #include <linux/buffer_head.h>
++],[
++	int rc = block_invalidatepage(NULL, 0);
++],[
++	AC_MSG_RESULT(yes)
++	AC_DEFINE(HAVE_INVALIDATEPAGE_RETURN_INT, 1,
++		[Define if return type of invalidatepage should be int])
++],[
++	AC_MSG_RESULT(no)
++])
++])
++
++# LC_UMOUNTBEGIN_HAS_VFSMOUNT
++# more 2.6 API changes. 2.6.18 umount_begin has different parameters
++AC_DEFUN([LC_UMOUNTBEGIN_HAS_VFSMOUNT],
++[AC_MSG_CHECKING([if umount_begin needs vfsmount parameter instead of super_block])
++tmp_flags="$EXTRA_KCFLAGS"
++EXTRA_KCFLAGS="-Werror"
++LB_LINUX_TRY_COMPILE([
++	#include <linux/fs.h>
++
++	struct vfsmount;
++	static void cfg_umount_begin (struct vfsmount *v, int flags)
++	{
++    		;
++	}
++
++	static struct super_operations cfg_super_operations = {
++		.umount_begin	= cfg_umount_begin,
++	};
++],[
++	cfg_super_operations.umount_begin(NULL,0);
++],[
++	AC_MSG_RESULT(yes)
++	AC_DEFINE(HAVE_UMOUNTBEGIN_VFSMOUNT, 1,
++		[Define umount_begin need second argument])
++],[
++	AC_MSG_RESULT(no)
++])
++EXTRA_KCFLAGS="$tmp_flags"
++])
++
++# 2.6.19 API changes
++# inode don't have i_blksize field
++AC_DEFUN([LC_INODE_BLKSIZE],
++[AC_MSG_CHECKING([inode has i_blksize field])
++LB_LINUX_TRY_COMPILE([
++#include <linux/fs.h>
++],[
++	struct inode i;
++	i.i_blksize = 0; 
++],[
++	AC_MSG_RESULT(yes)
++	AC_DEFINE(HAVE_INODE_BLKSIZE, 1,
++		[struct inode has i_blksize field])
++],[
++	AC_MSG_RESULT(no)
++])
++])
++
++# LC_VFS_READDIR_U64_INO
++# 2.6.19 use u64 for inode number instead of inode_t
++AC_DEFUN([LC_VFS_READDIR_U64_INO],
++[AC_MSG_CHECKING([check vfs_readdir need 64bit inode number])
++tmp_flags="$EXTRA_KCFLAGS"
++EXTRA_KCFLAGS="-Werror"
++LB_LINUX_TRY_COMPILE([
++#include <linux/fs.h>
++	int fillonedir(void * __buf, const char * name, int namlen, loff_t offset,
++                      u64 ino, unsigned int d_type)
++	{
++		return 0;
++	}
++],[
++	filldir_t filter;
++
++	filter = fillonedir;
++	return 1;
++],[
++        AC_MSG_RESULT(yes)
++        AC_DEFINE(HAVE_VFS_READDIR_U64_INO, 1,
++                [if vfs_readdir need 64bit inode number])
++],[
++        AC_MSG_RESULT(no)
++])
++EXTRA_KCFLAGS="$tmp_flags"
++])
++
++# LC_FILE_WRITEV
++# 2.6.19 replaced writev with aio_write
++AC_DEFUN([LC_FILE_WRITEV],
++[AC_MSG_CHECKING([writev in fops])
++LB_LINUX_TRY_COMPILE([
++        #include <linux/fs.h>
++],[
++        struct file_operations *fops = NULL;
++        fops->writev = NULL;
++],[
++        AC_MSG_RESULT(yes)
++        AC_DEFINE(HAVE_FILE_WRITEV, 1,
++                [use fops->writev])
++],[
++	AC_MSG_RESULT(no)
++])
++])
++
++# LC_GENERIC_FILE_READ
++# 2.6.19 replaced readv with aio_read
++AC_DEFUN([LC_FILE_READV],
++[AC_MSG_CHECKING([readv in fops])
++LB_LINUX_TRY_COMPILE([
++        #include <linux/fs.h>
++],[
++        struct file_operations *fops = NULL;
++        fops->readv = NULL;
++],[
++        AC_MSG_RESULT(yes)
++        AC_DEFINE(HAVE_FILE_READV, 1,
++                [use fops->readv])
++],[
++        AC_MSG_RESULT(no)
++])
++])
++
++# LC_NR_PAGECACHE
++# 2.6.18 don't export nr_pagecahe
++AC_DEFUN([LC_NR_PAGECACHE],
++[AC_MSG_CHECKING([kernel export nr_pagecache])
++LB_LINUX_TRY_COMPILE([
++        #include <linux/pagemap.h>
++],[
++        return atomic_read(&nr_pagecache);
++],[
++        AC_MSG_RESULT(yes)
++        AC_DEFINE(HAVE_NR_PAGECACHE, 1,
++                [is kernel export nr_pagecache])
++],[
++        AC_MSG_RESULT(no)
++])
++])
++
++# LC_CANCEL_DIRTY_PAGE
++# 2.6.20 introduse cancel_dirty_page instead of 
++# clear_page_dirty.
++AC_DEFUN([LC_CANCEL_DIRTY_PAGE],
++[AC_MSG_CHECKING([kernel has cancel_dirty_page])
++LB_LINUX_TRY_COMPILE([
++        #include <linux/mm.h>
++        #include <linux/page-flags.h>
++],[
++        cancel_dirty_page(NULL, 0);
++],[
++        AC_MSG_RESULT(yes)
++        AC_DEFINE(HAVE_CANCEL_DIRTY_PAGE, 1,
++                  [kernel has cancel_dirty_page instead of clear_page_dirty])
++],[
++        AC_MSG_RESULT(no)
++])
++])
++
++#
++# LC_PAGE_CONSTANT
++#
++# In order to support raid5 zerocopy patch, we have to patch the kernel to make
++# it support constant page, which means the page won't be modified during the
++# IO.
++#
++AC_DEFUN([LC_PAGE_CONSTANT],
++[AC_MSG_CHECKING([if kernel have PageConstant defined])
++LB_LINUX_TRY_COMPILE([
++        #include <linux/mm.h>
++        #include <linux/page-flags.h>
++],[
++        #ifndef PG_constant
++        #error "Have no raid5 zcopy patch"
++        #endif
++],[
++        AC_MSG_RESULT(yes)
++        AC_DEFINE(HAVE_PAGE_CONSTANT, 1, [kernel have PageConstant supported])
++],[
++        AC_MSG_RESULT(no);
++])
++])
++
++# RHEL5 in FS-cache patch rename PG_checked flag
++# into PG_fs_misc
++AC_DEFUN([LC_PG_FS_MISC],
++[AC_MSG_CHECKING([kernel has PG_fs_misc])
++LB_LINUX_TRY_COMPILE([
++        #include <linux/mm.h>
++        #include <linux/page-flags.h>
++],[
++        #ifndef PG_fs_misc
++        #error PG_fs_misc not defined in kernel
++        #endif
++],[
++        AC_MSG_RESULT(yes)
++        AC_DEFINE(HAVE_PG_FS_MISC, 1,
++                  [is kernel have PG_fs_misc])
++],[
++        AC_MSG_RESULT(no)
++])
++])
++
++# RHEL5 PageChecked and SetPageChecked defined
++AC_DEFUN([LC_PAGE_CHECKED],
++[AC_MSG_CHECKING([kernel has PageChecked and SetPageChecked])
++LB_LINUX_TRY_COMPILE([
++        #include <linux/mm.h>
++        #include <linux/page-flags.h>
++],[
++        #ifndef PageChecked
++        #error PageChecked not defined in kernel
++        #endif
++        #ifndef SetPageChecked
++        #error SetPageChecked not defined in kernel
++        #endif
++],[
++        AC_MSG_RESULT(yes)
++        AC_DEFINE(HAVE_PAGE_CHECKED, 1,
++                  [does kernel have PageChecked and SetPageChecked])
++],[
++        AC_MSG_RESULT(no)
++])
++])
++
++AC_DEFUN([LC_EXPORT_TRUNCATE_COMPLETE],
++[LB_CHECK_SYMBOL_EXPORT([truncate_complete_page],
++[mm/truncate.c],[
++AC_DEFINE(HAVE_TRUNCATE_COMPLETE_PAGE, 1,
++            [kernel export truncate_complete_page])
++],[
++])
++])
++
++AC_DEFUN([LC_EXPORT_D_REHASH_COND],
++[LB_CHECK_SYMBOL_EXPORT([d_rehash_cond],
++[fs/dcache.c],[
++AC_DEFINE(HAVE_D_REHASH_COND, 1,
++            [d_rehash_cond is exported by the kernel])
++],[
++])
++])
++
++AC_DEFUN([LC_EXPORT___D_REHASH],
++[LB_CHECK_SYMBOL_EXPORT([__d_rehash],
++[fs/dcache.c],[
++AC_DEFINE(HAVE___D_REHASH, 1,
++            [__d_rehash is exported by the kernel])
++],[
++])
++])
++
++AC_DEFUN([LC_EXPORT_D_MOVE_LOCKED],
++[LB_CHECK_SYMBOL_EXPORT([d_move_locked],
++[fs/dcache.c],[
++AC_DEFINE(HAVE_D_MOVE_LOCKED, 1,
++            [d_move_locked is exported by the kernel])
++],[
++])
++])
++
++AC_DEFUN([LC_EXPORT___D_MOVE],
++[LB_CHECK_SYMBOL_EXPORT([__d_move],
++[fs/dcache.c],[
++AC_DEFINE(HAVE___D_MOVE, 1,
++            [__d_move is exported by the kernel])
++],[
++])
++])
++
++# The actual symbol exported varies among architectures, so we need
++# to check many symbols (but only in the current architecture.)  No
++# matter what symbol is exported, the kernel #defines node_to_cpumask
++# to the appropriate function and that's what we use.
++AC_DEFUN([LC_EXPORT_NODE_TO_CPUMASK],
++         [LB_CHECK_SYMBOL_EXPORT([node_to_cpumask],
++                                 [arch/$LINUX_ARCH/mm/numa.c],
++                                 [AC_DEFINE(HAVE_NODE_TO_CPUMASK, 1,
++                                            [node_to_cpumask is exported by
++                                             the kernel])]) # x86_64
++          LB_CHECK_SYMBOL_EXPORT([node_to_cpu_mask],
++                                 [arch/$LINUX_ARCH/kernel/smpboot.c],
++                                 [AC_DEFINE(HAVE_NODE_TO_CPUMASK, 1,
++                                            [node_to_cpumask is exported by
++                                             the kernel])]) # ia64
++          LB_CHECK_SYMBOL_EXPORT([node_2_cpu_mask],
++                                 [arch/$LINUX_ARCH/kernel/smpboot.c],
++                                 [AC_DEFINE(HAVE_NODE_TO_CPUMASK, 1,
++                                            [node_to_cpumask is exported by
++                                             the kernel])]) # i386
++          ])
++
++#
++# LC_VFS_INTENT_PATCHES
++#
++# check if the kernel has the VFS intent patches
++AC_DEFUN([LC_VFS_INTENT_PATCHES],
++[AC_MSG_CHECKING([if the kernel has the VFS intent patches])
++LB_LINUX_TRY_COMPILE([
++	#include <linux/fs.h>
++        #include <linux/namei.h>
++],[
++        struct nameidata nd;
++        struct lookup_intent *it;
++
++        it = &nd.intent;
++        intent_init(it, IT_OPEN);
++        it->d.lustre.it_disposition = 0;
++        it->d.lustre.it_data = NULL;
++],[
++        AC_MSG_RESULT([yes])
++        AC_DEFINE(HAVE_VFS_INTENT_PATCHES, 1, [VFS intent patches are applied])
++],[
++        AC_MSG_RESULT([no])
++])
++])
++
++# 2.6.22 lost second parameter for invalidate_bdev
++AC_DEFUN([LC_INVALIDATE_BDEV_2ARG],
++[AC_MSG_CHECKING([if invalidate_bdev has second argument])
++LB_LINUX_TRY_COMPILE([
++        #include <linux/buffer_head.h>
++],[
++        invalidate_bdev(NULL,0);
++],[
++        AC_MSG_RESULT([yes])
++        AC_DEFINE(HAVE_INVALIDATE_BDEV_2ARG, 1,
++                [invalidate_bdev has second argument])
++],[
++        AC_MSG_RESULT([no])
++])
++])
++
++# 2.6.23 have return type 'void' for unregister_blkdev
++AC_DEFUN([LC_UNREGISTER_BLKDEV_RETURN_INT],
++[AC_MSG_CHECKING([if unregister_blkdev return int])
++LB_LINUX_TRY_COMPILE([
++        #include <linux/fs.h>
++],[
++        int i = unregister_blkdev(0,NULL);
++],[
++        AC_MSG_RESULT([yes])
++        AC_DEFINE(HAVE_UNREGISTER_BLKDEV_RETURN_INT, 1, 
++                [unregister_blkdev return int])
++],[
++        AC_MSG_RESULT([no])
++])
++])
++
++# 2.6.23 change .sendfile to .splice_read
++AC_DEFUN([LC_KERNEL_SPLICE_READ],
++[AC_MSG_CHECKING([if kernel has .splice_read])
++LB_LINUX_TRY_COMPILE([
++        #include <linux/fs.h>
++],[
++        struct file_operations file;
++
++        file.splice_read = NULL;
++], [
++        AC_MSG_RESULT([yes])
++        AC_DEFINE(HAVE_KERNEL_SPLICE_READ, 1,
++                [kernel has .slice_read])
++],[
++        AC_MSG_RESULT([no])
++])
++])
++
++# 2.6.23 extract nfs export related data into exportfs.h
++AC_DEFUN([LC_HAVE_EXPORTFS_H],
++[
++tmpfl="$CFLAGS"
++CFLAGS="$CFLAGS -I$LINUX_OBJ/include"
++AC_CHECK_HEADERS([linux/exportfs.h])
++CFLAGS="$tmpfl"
++])
++
++#
++# LC_PROG_LINUX
++#
++# Lustre linux kernel checks
++#
++AC_DEFUN([LC_PROG_LINUX],
++         [LC_LUSTRE_VERSION_H
++          if test x$enable_server = xyes ; then
++              LC_CONFIG_BACKINGFS
++          fi
++          LC_CONFIG_PINGER
++          LC_CONFIG_CHECKSUM
++          LC_CONFIG_LIBLUSTRE_RECOVERY
++          LC_CONFIG_HEALTH_CHECK_WRITE
++          LC_CONFIG_LRU_RESIZE
++          LC_CONFIG_ADAPTIVE_TIMEOUTS
++          LC_QUOTA_MODULE
++
++          LC_TASK_PPTR
++          # RHEL4 patches
++          LC_EXPORT_TRUNCATE_COMPLETE
++          LC_EXPORT_D_REHASH_COND
++          LC_EXPORT___D_REHASH
++          LC_EXPORT_D_MOVE_LOCKED
++          LC_EXPORT___D_MOVE
++          LC_EXPORT_NODE_TO_CPUMASK
++
++          LC_STRUCT_KIOBUF
++          LC_FUNC_COND_RESCHED
++          LC_FUNC_ZAP_PAGE_RANGE
++          LC_FUNC_PDE
++          LC_FUNC_DIRECT_IO
++          LC_HEADER_MM_INLINE
++          LC_STRUCT_INODE
++          LC_FUNC_REGISTER_CACHE
++          LC_FUNC_GRAB_CACHE_PAGE_NOWAIT_GFP
++          LC_FUNC_DEV_SET_RDONLY
++          LC_FUNC_FILEMAP_FDATAWRITE
++          LC_STRUCT_STATFS
++          LC_FUNC_PAGE_MAPPED
++          LC_STRUCT_FILE_OPS_UNLOCKED_IOCTL
++          LC_FILEMAP_POPULATE
++          LC_D_ADD_UNIQUE
++          LC_BIT_SPINLOCK_H
++          LC_XATTR_ACL
++          LC_STRUCT_INTENT_FILE
++          LC_POSIX_ACL_XATTR_H
++          LC_EXPORT___IGET
++          LC_FUNC_SET_FS_PWD
++          LC_FUNC_MS_FLOCK_LOCK
++          LC_FUNC_HAVE_CAN_SLEEP_ARG
++          LC_FUNC_F_OP_FLOCK
++          LC_QUOTA_READ
++          LC_COOKIE_FOLLOW_LINK
++          LC_FUNC_RCU
++          LC_QUOTA64
++
++          # does the kernel have VFS intent patches?
++          LC_VFS_INTENT_PATCHES
++
++          # 2.6.15
++          LC_INODE_I_MUTEX
++
++          # 2.6.16
++          LC_SECURITY_PLUG  # for SLES10 SP2
++
++          # 2.6.17
++          LC_DQUOTOFF_MUTEX
++
++          # 2.6.18
++          LC_NR_PAGECACHE
++          LC_STATFS_DENTRY_PARAM
++          LC_VFS_KERN_MOUNT
++          LC_INVALIDATEPAGE_RETURN_INT
++          LC_UMOUNTBEGIN_HAS_VFSMOUNT
++
++          #2.6.18 + RHEL5 (fc6)
++          LC_PG_FS_MISC
++          LC_PAGE_CHECKED
++
++          # 2.6.19
++          LC_INODE_BLKSIZE
++          LC_VFS_READDIR_U64_INO
++          LC_FILE_WRITEV
++          LC_FILE_READV
++
++          # 2.6.20
++          LC_CANCEL_DIRTY_PAGE
++
++          # raid5-zerocopy patch
++          LC_PAGE_CONSTANT
++	  
++	  # 2.6.22
++          LC_INVALIDATE_BDEV_2ARG
++          LC_FS_RENAME_DOES_D_MOVE
++          # 2.6.23
++          LC_UNREGISTER_BLKDEV_RETURN_INT
++          LC_KERNEL_SPLICE_READ
++          LC_HAVE_EXPORTFS_H
++])
++
++#
++# LC_CONFIG_CLIENT_SERVER
++#
++# Build client/server sides of Lustre
++#
++AC_DEFUN([LC_CONFIG_CLIENT_SERVER],
++[AC_MSG_CHECKING([whether to build Lustre server support])
++AC_ARG_ENABLE([server],
++	AC_HELP_STRING([--disable-server],
++			[disable Lustre server support]),
++	[],[enable_server='yes'])
++AC_MSG_RESULT([$enable_server])
++
++AC_MSG_CHECKING([whether to build Lustre client support])
++AC_ARG_ENABLE([client],
++	AC_HELP_STRING([--disable-client],
++			[disable Lustre client support]),
++	[],[enable_client='yes'])
++AC_MSG_RESULT([$enable_client])])
++
++#
++# LC_CONFIG_LIBLUSTRE
++#
++# whether to build liblustre
++#
++AC_DEFUN([LC_CONFIG_LIBLUSTRE],
++[AC_MSG_CHECKING([whether to build Lustre library])
++AC_ARG_ENABLE([liblustre],
++	AC_HELP_STRING([--disable-liblustre],
++			[disable building of Lustre library]),
++	[],[enable_liblustre=$with_sysio])
++AC_MSG_RESULT([$enable_liblustre])
++# only build sysio if liblustre is built
++with_sysio="$enable_liblustre"
++
++AC_MSG_CHECKING([whether to build liblustre tests])
++AC_ARG_ENABLE([liblustre-tests],
++	AC_HELP_STRING([--enable-liblustre-tests],
++			[enable liblustre tests, if --disable-tests is used]),
++	[],[enable_liblustre_tests=$enable_tests])
++if test x$enable_liblustre != xyes ; then
++   enable_liblustre_tests='no'
++fi
++AC_MSG_RESULT([$enable_liblustre_tests])
++
++AC_MSG_CHECKING([whether to enable liblustre acl])
++AC_ARG_ENABLE([liblustre-acl],
++	AC_HELP_STRING([--disable-liblustre-acl],
++			[disable ACL support for liblustre]),
++	[],[enable_liblustre_acl=yes])
++AC_MSG_RESULT([$enable_liblustre_acl])
++if test x$enable_liblustre_acl = xyes ; then
++  AC_DEFINE(LIBLUSTRE_POSIX_ACL, 1, Liblustre Support ACL-enabled MDS)
++fi
++
++#
++# --enable-mpitest
++#
++AC_ARG_ENABLE(mpitests,
++	AC_HELP_STRING([--enable-mpitest=yes|no|mpich directory],
++                           [include mpi tests]),
++	[
++	 enable_mpitests=yes
++         case $enableval in
++         yes)
++		MPI_ROOT=/opt/mpich
++		LDFLAGS="$LDFLAGS -L$MPI_ROOT/ch-p4/lib -L$MPI_ROOT/ch-p4/lib64"
++		CFLAGS="$CFLAGS -I$MPI_ROOT/include"
++		;;
++         no)
++		enable_mpitests=no
++		;;
++	 [[\\/$]]* | ?:[[\\/]]* )
++		MPI_ROOT=$enableval
++		LDFLAGS="$LDFLAGS -L$with_mpi/lib"
++		CFLAGS="$CFLAGS -I$MPI_ROOT/include"
++                ;;
++         *)
++                 AC_MSG_ERROR([expected absolute directory name for --enable-mpitests or yes or no])
++                 ;;
++	 esac
++	],
++	[
++	MPI_ROOT=/opt/mpich
++        LDFLAGS="$LDFLAGS -L$MPI_ROOT/ch-p4/lib -L$MPI_ROOT/ch-p4/lib64"
++        CFLAGS="$CFLAGS -I$MPI_ROOT/include"
++	enable_mpitests=yes
++	]
++)
++AC_SUBST(MPI_ROOT)
++
++if test x$enable_mpitests != xno; then
++	AC_MSG_CHECKING([whether to mpitests can be built])
++        AC_CHECK_FILE([$MPI_ROOT/include/mpi.h],
++                      [AC_CHECK_LIB([mpich],[MPI_Start],[enable_mpitests=yes],[enable_mpitests=no])],
++                      [enable_mpitests=no])
++fi
++AC_MSG_RESULT([$enable_mpitests])
++
++
++AC_MSG_NOTICE([Enabling Lustre configure options for libsysio])
++ac_configure_args="$ac_configure_args --with-lustre-hack --with-sockets"
++
++LC_CONFIG_PINGER
++LC_CONFIG_LIBLUSTRE_RECOVERY
++])
++
++AC_DEFUN([LC_CONFIG_LRU_RESIZE],
++[AC_MSG_CHECKING([whether to enable lru self-adjusting])
++AC_ARG_ENABLE([lru_resize], 
++	AC_HELP_STRING([--enable-lru-resize],
++			[enable lru resize support]),
++	[],[enable_lru_resize='yes'])
++AC_MSG_RESULT([$enable_lru_resize])
++if test x$enable_lru_resize != xno; then
++   AC_DEFINE(HAVE_LRU_RESIZE_SUPPORT, 1, [Enable lru resize support])
++fi
++])
++
++AC_DEFUN([LC_CONFIG_ADAPTIVE_TIMEOUTS],
++[AC_MSG_CHECKING([whether to enable ptlrpc adaptive timeouts support])
++AC_ARG_ENABLE([adaptive_timeouts],
++	AC_HELP_STRING([--enable-adaptive-timeouts],
++			[enable ptlrpc adaptive timeouts support]),
++	[],[enable_adaptive_timeouts='no'])
++AC_MSG_RESULT([$enable_adaptive_timeouts])
++if test x$enable_adaptive_timeouts == xyes; then
++   AC_DEFINE(HAVE_AT_SUPPORT, 1, [Enable adaptive timeouts support])
++fi
++])
++
++#
++# LC_CONFIG_QUOTA
++#
++# whether to enable quota support global control
++#
++AC_DEFUN([LC_CONFIG_QUOTA],
++[AC_ARG_ENABLE([quota],
++	AC_HELP_STRING([--enable-quota],
++			[enable quota support]),
++	[],[enable_quota='yes'])
++])
++
++# whether to enable quota support(kernel modules)
++AC_DEFUN([LC_QUOTA_MODULE],
++[if test x$enable_quota != xno; then
++    LB_LINUX_CONFIG([QUOTA],[
++	enable_quota_module='yes'
++	AC_DEFINE(HAVE_QUOTA_SUPPORT, 1, [Enable quota support])
++    ],[
++	enable_quota_module='no'
++	AC_MSG_WARN([quota is not enabled because the kernel - lacks quota support])
++    ])
++fi
++])
++
++AC_DEFUN([LC_QUOTA],
++[#check global
++LC_CONFIG_QUOTA
++#check for utils
++AC_CHECK_HEADER(sys/quota.h,
++                [AC_DEFINE(HAVE_SYS_QUOTA_H, 1, [Define to 1 if you have <sys/quota.h>.])],
++                [AC_MSG_ERROR([don't find <sys/quota.h> in your system])])
++])
++
++AC_DEFUN([LC_QUOTA_READ],
++[AC_MSG_CHECKING([if kernel supports quota_read])
++LB_LINUX_TRY_COMPILE([
++	#include <linux/fs.h>
++],[
++	struct super_operations sp;
++        void *i = (void *)sp.quota_read;
++],[
++	AC_MSG_RESULT([yes])
++	AC_DEFINE(KERNEL_SUPPORTS_QUOTA_READ, 1, [quota_read found])
++],[
++	AC_MSG_RESULT([no])
++])
++])
++
++#
++# LC_COOKIE_FOLLOW_LINK
++#
++# kernel 2.6.13+ ->follow_link returns a cookie
++#
++
++AC_DEFUN([LC_COOKIE_FOLLOW_LINK],
++[AC_MSG_CHECKING([if inode_operations->follow_link returns a cookie])
++LB_LINUX_TRY_COMPILE([
++        #include <linux/fs.h>
++        #include <linux/namei.h>
++],[
++        struct dentry dentry;
++        struct nameidata nd;
++
++        dentry.d_inode->i_op->put_link(&dentry, &nd, NULL);
++],[
++        AC_DEFINE(HAVE_COOKIE_FOLLOW_LINK, 1, [inode_operations->follow_link returns a cookie])
++        AC_MSG_RESULT([yes])
++],[
++        AC_MSG_RESULT([no])
++])
++])
++
++#
++# LC_FUNC_RCU
++#
++# kernels prior than 2.6.0(?) have no RCU supported; in kernel 2.6.5(SUSE), 
++# call_rcu takes three parameters.
++#
++AC_DEFUN([LC_FUNC_RCU],
++[AC_MSG_CHECKING([if kernel have RCU supported])
++LB_LINUX_TRY_COMPILE([
++        #include <linux/rcupdate.h>
++],[],[
++        AC_DEFINE(HAVE_RCU, 1, [have RCU defined])
++        AC_MSG_RESULT([yes])
++
++        AC_MSG_CHECKING([if call_rcu takes three parameters])
++        LB_LINUX_TRY_COMPILE([
++                #include <linux/rcupdate.h>
++        ],[
++                struct rcu_head rh;
++                call_rcu(&rh, (void (*)(struct rcu_head *))1, NULL);
++        ],[
++                AC_DEFINE(HAVE_CALL_RCU_PARAM, 1, [call_rcu takes three parameters])
++                AC_MSG_RESULT([yes])
++        ],[
++                AC_MSG_RESULT([no]) 
++        ])
++],[
++        AC_MSG_RESULT([no])
++])
++])
++
++#
++# LC_QUOTA64
++# linux kernel may have 64-bit limits support
++#
++AC_DEFUN([LC_QUOTA64],
++[AC_MSG_CHECKING([if kernel has 64-bit quota limits support])
++LB_LINUX_TRY_COMPILE([
++        #include <linux/kernel.h>
++        #include <linux/fs.h>
++        #include <linux/quotaio_v2.h>
++        int versions[] = V2_INITQVERSIONS_R1;
++        struct v2_disk_dqblk_r1 dqblk_r1;
++],[],[
++        AC_DEFINE(HAVE_QUOTA64, 1, [have quota64])
++        AC_MSG_RESULT([yes])
++
++],[
++        AC_MSG_WARN([4 TB (or larger) block quota limits can only be used with OSTs not larger than 4 TB.])
++        AC_MSG_WARN([Continuing with limited quota support.])
++        AC_MSG_WARN([quotacheck is needed for filesystems with recent quota versions.])
++        AC_MSG_RESULT([no])
++])
++])
++
++# LC_SECURITY_PLUG  # for SLES10 SP2
++# check security plug in sles10 sp2 kernel 
++AC_DEFUN([LC_SECURITY_PLUG],
++[AC_MSG_CHECKING([If kernel has security plug support])
++LB_LINUX_TRY_COMPILE([
++        #include <linux/fs.h>
++],[
++        struct dentry   *dentry;
++        struct vfsmount *mnt;
++        struct iattr    *iattr;
++
++        notify_change(dentry, mnt, iattr);
++],[
++        AC_MSG_RESULT(yes)
++        AC_DEFINE(HAVE_SECURITY_PLUG, 1,
++                [SLES10 SP2 use extra parameter in vfs])
++],[
++        AC_MSG_RESULT(no)
++])
++])
++
++#
++# LC_CONFIGURE
++#
++# other configure checks
++#
++AC_DEFUN([LC_CONFIGURE],
++[LC_CONFIG_OBD_BUFFER_SIZE
++
++# include/liblustre.h
++AC_CHECK_HEADERS([asm/page.h sys/user.h sys/vfs.h stdint.h blkid/blkid.h])
++
++# liblustre/llite_lib.h
++AC_CHECK_HEADERS([xtio.h file.h])
++
++# liblustre/dir.c
++AC_CHECK_HEADERS([linux/types.h sys/types.h linux/unistd.h unistd.h])
++
++# liblustre/lutil.c
++AC_CHECK_HEADERS([netinet/in.h arpa/inet.h catamount/data.h])
++AC_CHECK_FUNCS([inet_ntoa])
++
++# libsysio/src/readlink.c
++LC_READLINK_SSIZE_T
++
++# lvfs/prng.c - depends on linux/types.h from liblustre/dir.c
++AC_CHECK_HEADERS([linux/random.h], [], [],
++                 [#ifdef HAVE_LINUX_TYPES_H
++                  # include <linux/types.h>
++                  #endif
++                 ])
++
++# utils/llverfs.c
++AC_CHECK_HEADERS([ext2fs/ext2fs.h])
++
++# check for -lz support
++ZLIB=""
++AC_CHECK_LIB([z],
++             [adler32],
++             [AC_CHECK_HEADERS([zlib.h],
++                               [ZLIB="-lz"
++                                AC_DEFINE([HAVE_ADLER], 1,
++                                          [support alder32 checksum type])],
++                               [AC_MSG_WARN([No zlib-devel package found,
++                                             unable to use adler32 checksum])])],
++             [AC_MSG_WARN([No zlib package found, unable to use adler32 checksum])]
++)
++AC_SUBST(ZLIB)
++
++# Super safe df
++AC_ARG_ENABLE([mindf],
++      AC_HELP_STRING([--enable-mindf],
++                      [Make statfs report the minimum available space on any single OST instead of the sum of free space on all OSTs]),
++      [],[])
++if test "$enable_mindf" = "yes" ;  then
++      AC_DEFINE([MIN_DF], 1, [Report minimum OST free space])
++fi
++
++AC_ARG_ENABLE([fail_alloc],
++        AC_HELP_STRING([--disable-fail-alloc],
++                [disable randomly alloc failure]),
++        [],[enable_fail_alloc=yes])
++AC_MSG_CHECKING([whether to randomly failing memory alloc])
++AC_MSG_RESULT([$enable_fail_alloc])
++if test x$enable_fail_alloc != xno ; then
++        AC_DEFINE([RANDOM_FAIL_ALLOC], 1, [enable randomly alloc failure])
++fi
++
++])
++
++#
++# LC_CONDITIONALS
++#
++# AM_CONDITIONALS for lustre
++#
++AC_DEFUN([LC_CONDITIONALS],
++[AM_CONDITIONAL(LIBLUSTRE, test x$enable_liblustre = xyes)
++AM_CONDITIONAL(USE_QUILT, test x$QUILT != xno)
++AM_CONDITIONAL(LIBLUSTRE_TESTS, test x$enable_liblustre_tests = xyes)
++AM_CONDITIONAL(MPITESTS, test x$enable_mpitests = xyes, Build MPI Tests)
++AM_CONDITIONAL(CLIENT, test x$enable_client = xyes)
++AM_CONDITIONAL(SERVER, test x$enable_server = xyes)
++AM_CONDITIONAL(QUOTA, test x$enable_quota_module = xyes)
++AM_CONDITIONAL(BLKID, test x$ac_cv_header_blkid_blkid_h = xyes)
++AM_CONDITIONAL(EXT2FS_DEVEL, test x$ac_cv_header_ext2fs_ext2fs_h = xyes)
++AM_CONDITIONAL(LIBPTHREAD, test x$enable_libpthread = xyes)
++])
++
++#
++# LC_CONFIG_FILES
++#
++# files that should be generated with AC_OUTPUT
++#
++AC_DEFUN([LC_CONFIG_FILES],
++[AC_CONFIG_FILES([
++lustre/Makefile
++lustre/autoMakefile
++lustre/autoconf/Makefile
++lustre/contrib/Makefile
++lustre/doc/Makefile
++lustre/include/Makefile
++lustre/include/lustre_ver.h
++lustre/include/linux/Makefile
++lustre/include/lustre/Makefile
++lustre/kernel_patches/targets/2.6-suse.target
++lustre/kernel_patches/targets/2.6-vanilla.target
++lustre/kernel_patches/targets/2.6-rhel4.target
++lustre/kernel_patches/targets/2.6-rhel5.target
++lustre/kernel_patches/targets/2.6-fc5.target
++lustre/kernel_patches/targets/2.6-patchless.target
++lustre/kernel_patches/targets/2.6-sles10.target
++lustre/kernel_patches/targets/hp_pnnl-2.4.target
++lustre/kernel_patches/targets/rh-2.4.target
++lustre/kernel_patches/targets/rhel-2.4.target
++lustre/kernel_patches/targets/suse-2.4.21-2.target
++lustre/kernel_patches/targets/sles-2.4.target
++lustre/ldlm/Makefile
++lustre/liblustre/Makefile
++lustre/liblustre/tests/Makefile
++lustre/llite/Makefile
++lustre/llite/autoMakefile
++lustre/lov/Makefile
++lustre/lov/autoMakefile
++lustre/lvfs/Makefile
++lustre/lvfs/autoMakefile
++lustre/mdc/Makefile
++lustre/mdc/autoMakefile
++lustre/mds/Makefile
++lustre/mds/autoMakefile
++lustre/obdclass/Makefile
++lustre/obdclass/autoMakefile
++lustre/obdclass/linux/Makefile
++lustre/obdecho/Makefile
++lustre/obdecho/autoMakefile
++lustre/obdfilter/Makefile
++lustre/obdfilter/autoMakefile
++lustre/osc/Makefile
++lustre/osc/autoMakefile
++lustre/ost/Makefile
++lustre/ost/autoMakefile
++lustre/mgc/Makefile
++lustre/mgc/autoMakefile
++lustre/mgs/Makefile
++lustre/mgs/autoMakefile
++lustre/ptlrpc/Makefile
++lustre/ptlrpc/autoMakefile
++lustre/quota/Makefile
++lustre/quota/autoMakefile
++lustre/scripts/Makefile
++lustre/scripts/version_tag.pl
++lustre/tests/Makefile
++lustre/utils/Makefile
++])
++case $lb_target_os in
++        darwin)
++                AC_CONFIG_FILES([ lustre/obdclass/darwin/Makefile ])
++                ;;
++esac
++
++])
+diff -urNad lustre~/lustre/include/linux/lustre_compat25.h lustre/lustre/include/linux/lustre_compat25.h
+--- lustre~/lustre/include/linux/lustre_compat25.h	2009-03-12 10:33:45.000000000 +0100
++++ lustre/lustre/include/linux/lustre_compat25.h	2009-03-12 11:02:51.000000000 +0100
+@@ -57,6 +57,28 @@
+ #endif /* LINUX_VERSION_CODE >= KERNEL_VERSION(2,6,14) */
+ 
+ #ifndef HAVE_SET_FS_PWD
++
++#ifdef HAVE_FS_STRUCT_USE_PATH
++static inline void ll_set_fs_pwd(struct fs_struct *fs, struct vfsmount *mnt,
++                struct dentry *dentry)
++{
++        struct path path;
++	struct path old_pwd;
++
++        path.mnt = mnt;
++        path.dentry = dentry;
++        write_lock(&fs->lock);
++        old_pwd = fs->pwd;
++        path_get(&path);
++        fs->pwd = path;
++        write_unlock(&fs->lock);
++
++	if (old_pwd.dentry)
++		path_put(&old_pwd);
++}
++
++#else
++
+ static inline void ll_set_fs_pwd(struct fs_struct *fs, struct vfsmount *mnt,
+                 struct dentry *dentry)
+ {
+@@ -75,6 +97,7 @@
+                 mntput(old_pwdmnt);
+         }
+ }
++#endif
+ #else
+ #define ll_set_fs_pwd set_fs_pwd
+ #endif /* HAVE_SET_FS_PWD */
+@@ -151,7 +174,12 @@
+ #endif
+ 
+ /* XXX our code should be using the 2.6 calls, not the other way around */
++#ifndef HAVE_TRYLOCK_PAGE
+ #define TryLockPage(page)               TestSetPageLocked(page)
++#else
++#define TryLockPage(page)               (!trylock_page(page))
++#endif
++
+ #define Page_Uptodate(page)             PageUptodate(page)
+ #define ll_redirty_page(page)           set_page_dirty(page)
+ 
+@@ -364,8 +392,17 @@
+ #define LL_RENAME_DOES_D_MOVE	FS_ODD_RENAME
+ #endif
+ 
++#ifdef HAVE_FILE_REMOVE_SUID
++#define ll_remove_suid(file, mnt)       file_remove_suid(file)
++#else
++ #ifdef HAVE_SECURITY_PLUG
++  #define ll_remove_suid(file,mnt)      remove_suid(file->f_dentry,mnt)
++ #else
++  #define ll_remove_suid(file,mnt)      remove_suid(file->f_dentry)
++ #endif
++#endif
++
+ #ifdef HAVE_SECURITY_PLUG
+-#define ll_remove_suid(inode,mnt)               remove_suid(inode,mnt)
+ #define ll_vfs_rmdir(dir,entry,mnt)             vfs_rmdir(dir,entry,mnt)
+ #define ll_vfs_mkdir(inode,dir,mnt,mode)        vfs_mkdir(inode,dir,mnt,mode)
+ #define ll_vfs_link(old,mnt,dir,new,mnt1)       vfs_link(old,mnt,dir,new,mnt1)
+@@ -377,7 +414,6 @@
+ #define ll_vfs_rename(old,old_dir,mnt,new,new_dir,mnt1) \
+                 vfs_rename(old,old_dir,mnt,new,new_dir,mnt1)
+ #else
+-#define ll_remove_suid(inode,mnt)               remove_suid(inode)
+ #define ll_vfs_rmdir(dir,entry,mnt)             vfs_rmdir(dir,entry)
+ #define ll_vfs_mkdir(inode,dir,mnt,mode)        vfs_mkdir(inode,dir,mode)
+ #define ll_vfs_link(old,mnt,dir,new,mnt1)       vfs_link(old,dir,new)
+@@ -388,6 +424,57 @@
+                 vfs_rename(old,old_dir,new,new_dir)
+ #endif
+ 
++#ifdef HAVE_REGISTER_SHRINKER
++typedef int (*shrinker_t)(int nr_to_scan, gfp_t gfp_mask);
++
++static inline
++struct shrinker *set_shrinker(int seek, shrinker_t func)
++{
++        struct shrinker *s;
++
++        s = kmalloc(sizeof(*s), GFP_KERNEL);
++        if (s == NULL)
++                return (NULL);
++
++        s->shrink = func;
++        s->seeks = seek;
++
++        register_shrinker(s);
++
++        return s;
++}
++
++static inline
++void remove_shrinker(struct shrinker *shrinker) 
++{
++        if (shrinker == NULL)
++                return;
++
++        unregister_shrinker(shrinker);
++        kfree(shrinker);
++}
++#endif
++
++#ifdef HAVE_BIO_ENDIO_2ARG
++#define cfs_bio_io_error(a,b)   bio_io_error((a))
++#define cfs_bio_endio(a,b,c)    bio_endio((a),(c))
++#else
++#define cfs_bio_io_error(a,b)   bio_io_error((a),(b))
++#define cfs_bio_endio(a,b,c)    bio_endio((a),(b),(c))
++#endif
++
++#ifdef HAVE_FS_STRUCT_USE_PATH
++#define cfs_fs_pwd(fs)       ((fs)->pwd.dentry)
++#define cfs_fs_mnt(fs)       ((fs)->pwd.mnt)
++#else
++#define cfs_fs_pwd(fs)       ((fs)->pwd)
++#define cfs_fs_mnt(fs)       ((fs)->pwdmnt)
++#endif
++
++#ifndef list_for_each_safe_rcu
++#define list_for_each_safe_rcu(a,b,c) list_for_each_rcu(a, c)
++#endif
++
+ #ifndef abs
+ static inline int abs(int x)
+ {
+diff -urNad lustre~/lustre/include/linux/lustre_compat25.h.orig lustre/lustre/include/linux/lustre_compat25.h.orig
+--- lustre~/lustre/include/linux/lustre_compat25.h.orig	1970-01-01 00:00:00.000000000 +0000
++++ lustre/lustre/include/linux/lustre_compat25.h.orig	2009-03-12 10:33:45.000000000 +0100
+@@ -0,0 +1,411 @@
++/* -*- mode: c; c-basic-offset: 8; indent-tabs-mode: nil; -*-
++ * vim:expandtab:shiftwidth=8:tabstop=8:
++ *
++ * GPL HEADER START
++ *
++ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
++ *
++ * This program is free software; you can redistribute it and/or modify
++ * it under the terms of the GNU General Public License version 2 only,
++ * as published by the Free Software Foundation.
++ *
++ * This program is distributed in the hope that it will be useful, but
++ * WITHOUT ANY WARRANTY; without even the implied warranty of
++ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
++ * General Public License version 2 for more details (a copy is included
++ * in the LICENSE file that accompanied this code).
++ *
++ * You should have received a copy of the GNU General Public License
++ * version 2 along with this program; If not, see
++ * http://www.sun.com/software/products/lustre/docs/GPLv2.pdf
++ *
++ * Please contact Sun Microsystems, Inc., 4150 Network Circle, Santa Clara,
++ * CA 95054 USA or visit www.sun.com if you need additional information or
++ * have any questions.
++ *
++ * GPL HEADER END
++ */
++/*
++ * Copyright  2008 Sun Microsystems, Inc. All rights reserved
++ * Use is subject to license terms.
++ */
++/*
++ * This file is part of Lustre, http://www.lustre.org/
++ * Lustre is a trademark of Sun Microsystems, Inc.
++ */
++
++#ifndef _LINUX_COMPAT25_H
++#define _LINUX_COMPAT25_H
++
++#ifdef __KERNEL__
++
++#if LINUX_VERSION_CODE < KERNEL_VERSION(2,6,5)
++#error sorry, lustre requires at least 2.6.5
++#endif
++
++#include <libcfs/linux/portals_compat25.h>
++
++#include <linux/lustre_patchless_compat.h>
++
++#if LINUX_VERSION_CODE >= KERNEL_VERSION(2,6,14)
++struct ll_iattr_struct {
++        struct iattr    iattr;
++        unsigned int    ia_attr_flags;
++};
++#else
++#define ll_iattr_struct iattr
++#endif /* LINUX_VERSION_CODE >= KERNEL_VERSION(2,6,14) */
++
++#ifndef HAVE_SET_FS_PWD
++static inline void ll_set_fs_pwd(struct fs_struct *fs, struct vfsmount *mnt,
++                struct dentry *dentry)
++{
++        struct dentry *old_pwd;
++        struct vfsmount *old_pwdmnt;
++
++        write_lock(&fs->lock);
++        old_pwd = fs->pwd;
++        old_pwdmnt = fs->pwdmnt;
++        fs->pwdmnt = mntget(mnt);
++        fs->pwd = dget(dentry);
++        write_unlock(&fs->lock);
++
++        if (old_pwd) {
++                dput(old_pwd);
++                mntput(old_pwdmnt);
++        }
++}
++#else
++#define ll_set_fs_pwd set_fs_pwd
++#endif /* HAVE_SET_FS_PWD */
++
++#ifdef HAVE_INODE_I_MUTEX
++#define UNLOCK_INODE_MUTEX(inode) do {mutex_unlock(&(inode)->i_mutex); } while(0)
++#define LOCK_INODE_MUTEX(inode) do {mutex_lock(&(inode)->i_mutex); } while(0)
++#define TRYLOCK_INODE_MUTEX(inode) mutex_trylock(&(inode)->i_mutex)
++#else
++#define UNLOCK_INODE_MUTEX(inode) do {up(&(inode)->i_sem); } while(0)
++#define LOCK_INODE_MUTEX(inode) do {down(&(inode)->i_sem); } while(0)
++#define TRYLOCK_INODE_MUTEX(inode) (!down_trylock(&(inode)->i_sem))
++#endif /* HAVE_INODE_I_MUTEX */
++
++#if LINUX_VERSION_CODE >= KERNEL_VERSION(2,6,15)
++#define d_child d_u.d_child
++#define d_rcu d_u.d_rcu
++#endif
++
++#ifdef HAVE_DQUOTOFF_MUTEX
++#define UNLOCK_DQONOFF_MUTEX(dqopt) do {mutex_unlock(&(dqopt)->dqonoff_mutex); } while(0)
++#define LOCK_DQONOFF_MUTEX(dqopt) do {mutex_lock(&(dqopt)->dqonoff_mutex); } while(0)
++#else
++#define UNLOCK_DQONOFF_MUTEX(dqopt) do {up(&(dqopt)->dqonoff_sem); } while(0)
++#define LOCK_DQONOFF_MUTEX(dqopt) do {down(&(dqopt)->dqonoff_sem); } while(0)
++#endif /* HAVE_DQUOTOFF_MUTEX */
++
++
++#if LINUX_VERSION_CODE < KERNEL_VERSION(2,6,4)
++#define NGROUPS_SMALL           NGROUPS
++#define NGROUPS_PER_BLOCK       ((int)(EXEC_PAGESIZE / sizeof(gid_t)))
++
++struct group_info {
++        int        ngroups;
++        atomic_t   usage;
++        gid_t      small_block[NGROUPS_SMALL];
++        int        nblocks;
++        gid_t     *blocks[0];
++};
++#define current_ngroups current->ngroups
++#define current_groups current->groups
++
++struct group_info *groups_alloc(int gidsetsize);
++void groups_free(struct group_info *ginfo);
++#else /* >= 2.6.4 */
++
++#define current_ngroups current->group_info->ngroups
++#define current_groups current->group_info->small_block
++
++#endif /* LINUX_VERSION_CODE < KERNEL_VERSION(2,6,4) */
++
++#ifndef page_private
++#define page_private(page) ((page)->private)
++#define set_page_private(page, v) ((page)->private = (v))
++#endif
++
++#ifndef HAVE_GFP_T
++#define gfp_t int
++#endif
++
++#define lock_dentry(___dentry)          spin_lock(&(___dentry)->d_lock)
++#define unlock_dentry(___dentry)        spin_unlock(&(___dentry)->d_lock)
++
++#define ll_kernel_locked()      kernel_locked()
++
++/*
++ * OBD need working random driver, thus all our
++ * initialization routines must be called after device
++ * driver initialization
++ */
++#ifndef MODULE
++#undef module_init
++#define module_init(a)     late_initcall(a)
++#endif
++
++/* XXX our code should be using the 2.6 calls, not the other way around */
++#define TryLockPage(page)               TestSetPageLocked(page)
++#define Page_Uptodate(page)             PageUptodate(page)
++#define ll_redirty_page(page)           set_page_dirty(page)
++
++#define KDEVT_INIT(val)                 (val)
++
++#define LTIME_S(time)                   (time.tv_sec)
++#define ll_path_lookup                  path_lookup
++#define ll_permission(inode,mask,nd)    permission(inode,mask,nd)
++
++#define ll_pgcache_lock(mapping)          spin_lock(&mapping->page_lock)
++#define ll_pgcache_unlock(mapping)        spin_unlock(&mapping->page_lock)
++#define ll_call_writepage(inode, page)  \
++                                (inode)->i_mapping->a_ops->writepage(page, NULL)
++#define ll_invalidate_inode_pages(inode) \
++                                invalidate_inode_pages((inode)->i_mapping)
++#define ll_truncate_complete_page(page) \
++                                truncate_complete_page(page->mapping, page)
++
++#define ll_vfs_create(a,b,c,d)          vfs_create(a,b,c,d)
++#define ll_dev_t                        dev_t
++#define kdev_t                          dev_t
++#define to_kdev_t(dev)                  (dev)
++#define kdev_t_to_nr(dev)               (dev)
++#define val_to_kdev(dev)                (dev)
++#define ILOOKUP(sb, ino, test, data)    ilookup5(sb, ino, test, data);
++
++#include <linux/writeback.h>
++
++static inline int cleanup_group_info(void)
++{
++        struct group_info *ginfo;
++
++        ginfo = groups_alloc(0);
++        if (!ginfo)
++                return -ENOMEM;
++
++        set_current_groups(ginfo);
++        put_group_info(ginfo);
++
++        return 0;
++}
++
++#define __set_page_ll_data(page, llap) \
++        do {       \
++                page_cache_get(page); \
++                SetPagePrivate(page); \
++                set_page_private(page, (unsigned long)llap); \
++        } while (0)
++#define __clear_page_ll_data(page) \
++        do {       \
++                ClearPagePrivate(page); \
++                set_page_private(page, 0); \
++                page_cache_release(page); \
++        } while(0)
++
++#define kiobuf bio
++
++#include <linux/proc_fs.h>
++
++#if !defined(HAVE_D_REHASH_COND) && defined(HAVE___D_REHASH)
++#define d_rehash_cond(dentry, lock) __d_rehash(dentry, lock)
++extern void __d_rehash(struct dentry *dentry, int lock);
++#endif
++
++#if !defined(HAVE_D_MOVE_LOCKED) && defined(HAVE___D_MOVE)
++#define d_move_locked(dentry, target) __d_move(dentry, target)
++extern void __d_move(struct dentry *dentry, struct dentry *target);
++#endif
++
++#ifdef HAVE_CAN_SLEEP_ARG
++#define ll_flock_lock_file_wait(file, lock, can_sleep) \
++        flock_lock_file_wait(file, lock, can_sleep)
++#else
++#define ll_flock_lock_file_wait(file, lock, can_sleep) \
++        flock_lock_file_wait(file, lock)
++#endif
++
++#define CheckWriteback(page, cmd) \
++        ((!PageWriteback(page) && (cmd & OBD_BRW_READ)) || \
++         (PageWriteback(page) && (cmd & OBD_BRW_WRITE)))
++
++
++#ifdef HAVE_PAGE_LIST
++static inline int mapping_has_pages(struct address_space *mapping)
++{
++        int rc = 1;
++
++        ll_pgcache_lock(mapping);
++        if (list_empty(&mapping->dirty_pages) &&
++            list_empty(&mapping->clean_pages) &&
++            list_empty(&mapping->locked_pages)) {
++                rc = 0;
++        }
++        ll_pgcache_unlock(mapping);
++
++        return rc;
++}
++#else
++static inline int mapping_has_pages(struct address_space *mapping)
++{
++        return mapping->nrpages > 0;
++}
++#endif
++
++#ifdef HAVE_KIOBUF_KIO_BLOCKS
++#define KIOBUF_GET_BLOCKS(k) ((k)->kio_blocks)
++#else
++#define KIOBUF_GET_BLOCKS(k) ((k)->blocks)
++#endif
++
++#if (LINUX_VERSION_CODE < KERNEL_VERSION(2,6,7))
++#define ll_set_dflags(dentry, flags) do { dentry->d_vfs_flags |= flags; } while(0)
++#define ll_vfs_symlink(dir, dentry, mnt, path, mode) \
++                       vfs_symlink(dir, dentry, path)
++#else
++#define ll_set_dflags(dentry, flags) do { \
++                spin_lock(&dentry->d_lock); \
++                dentry->d_flags |= flags; \
++                spin_unlock(&dentry->d_lock); \
++        } while(0)
++#ifdef HAVE_SECURITY_PLUG
++#define ll_vfs_symlink(dir, dentry, mnt, path, mode) \
++                vfs_symlink(dir, dentry, mnt, path, mode)
++#else
++#define ll_vfs_symlink(dir, dentry, mnt, path, mode) \
++                vfs_symlink(dir, dentry, path, mode)
++#endif
++#endif
++
++#ifndef container_of
++#define container_of(ptr, type, member) ({                      \
++                const typeof( ((type *)0)->member ) *__mptr = (ptr); \
++                (type *)( (char *)__mptr - offsetof(type,member) );})
++#endif
++
++#ifdef HAVE_I_ALLOC_SEM
++#define UP_WRITE_I_ALLOC_SEM(i)   do { up_write(&(i)->i_alloc_sem); } while (0)
++#define DOWN_WRITE_I_ALLOC_SEM(i) do { down_write(&(i)->i_alloc_sem); } while(0)
++#define LASSERT_I_ALLOC_SEM_WRITE_LOCKED(i) LASSERT(down_read_trylock(&(i)->i_alloc_sem) == 0)
++
++#define UP_READ_I_ALLOC_SEM(i)    do { up_read(&(i)->i_alloc_sem); } while (0)
++#define DOWN_READ_I_ALLOC_SEM(i)  do { down_read(&(i)->i_alloc_sem); } while (0)
++#define LASSERT_I_ALLOC_SEM_READ_LOCKED(i) LASSERT(down_write_trylock(&(i)->i_alloc_sem) == 0)
++#else
++#define UP_READ_I_ALLOC_SEM(i)              do { } while (0)
++#define DOWN_READ_I_ALLOC_SEM(i)            do { } while (0)
++#define LASSERT_I_ALLOC_SEM_READ_LOCKED(i)  do { } while (0)
++
++#define UP_WRITE_I_ALLOC_SEM(i)             do { } while (0)
++#define DOWN_WRITE_I_ALLOC_SEM(i)           do { } while (0)
++#define LASSERT_I_ALLOC_SEM_WRITE_LOCKED(i) do { } while (0)
++#endif
++
++#ifndef HAVE_GRAB_CACHE_PAGE_NOWAIT_GFP
++#define grab_cache_page_nowait_gfp(x, y, z) grab_cache_page_nowait((x), (y))
++#endif
++
++#ifndef HAVE_FILEMAP_FDATAWRITE
++#define filemap_fdatawrite(mapping)      filemap_fdatasync(mapping)
++#endif
++
++#ifdef HAVE_VFS_KERN_MOUNT
++static inline 
++struct vfsmount *
++ll_kern_mount(const char *fstype, int flags, const char *name, void *data)
++{
++        struct file_system_type *type = get_fs_type(fstype);
++        struct vfsmount *mnt;
++        if (!type)
++                return ERR_PTR(-ENODEV);
++        mnt = vfs_kern_mount(type, flags, name, data);
++        module_put(type->owner);
++        return mnt;
++}
++#else
++#define ll_kern_mount(fstype, flags, name, data) do_kern_mount((fstype), (flags), (name), (data))
++#endif
++
++#ifdef HAVE_STATFS_DENTRY_PARAM
++#define ll_do_statfs(sb, sfs) (sb)->s_op->statfs((sb)->s_root, (sfs))
++#else
++#define ll_do_statfs(sb, sfs) (sb)->s_op->statfs((sb), (sfs))
++#endif
++
++/* task_struct */
++#ifndef HAVE_TASK_PPTR
++#define p_pptr parent
++#endif
++
++#ifdef HAVE_UNREGISTER_BLKDEV_RETURN_INT
++#define ll_unregister_blkdev(a,b)       unregister_blkdev((a),(b))
++#else
++static inline 
++int ll_unregister_blkdev(unsigned int dev, const char *name)
++{
++        unregister_blkdev(dev, name);
++        return 0;
++}
++#endif
++
++#ifdef HAVE_INVALIDATE_BDEV_2ARG
++#define ll_invalidate_bdev(a,b)         invalidate_bdev((a),(b))
++#else
++#define ll_invalidate_bdev(a,b)         invalidate_bdev((a))
++#endif
++
++#ifdef HAVE_FS_RENAME_DOES_D_MOVE
++#define LL_RENAME_DOES_D_MOVE	FS_RENAME_DOES_D_MOVE
++#else
++#define LL_RENAME_DOES_D_MOVE	FS_ODD_RENAME
++#endif
++
++#ifdef HAVE_SECURITY_PLUG
++#define ll_remove_suid(inode,mnt)               remove_suid(inode,mnt)
++#define ll_vfs_rmdir(dir,entry,mnt)             vfs_rmdir(dir,entry,mnt)
++#define ll_vfs_mkdir(inode,dir,mnt,mode)        vfs_mkdir(inode,dir,mnt,mode)
++#define ll_vfs_link(old,mnt,dir,new,mnt1)       vfs_link(old,mnt,dir,new,mnt1)
++#define ll_vfs_unlink(inode,entry,mnt)          vfs_unlink(inode,entry,mnt)
++#define ll_vfs_mknod(dir,entry,mnt,mode,dev)            \
++                vfs_mknod(dir,entry,mnt,mode,dev)
++#define ll_security_inode_unlink(dir,entry,mnt)         \
++                security_inode_unlink(dir,entry,mnt)     
++#define ll_vfs_rename(old,old_dir,mnt,new,new_dir,mnt1) \
++                vfs_rename(old,old_dir,mnt,new,new_dir,mnt1)
++#else
++#define ll_remove_suid(inode,mnt)               remove_suid(inode)
++#define ll_vfs_rmdir(dir,entry,mnt)             vfs_rmdir(dir,entry)
++#define ll_vfs_mkdir(inode,dir,mnt,mode)        vfs_mkdir(inode,dir,mode)
++#define ll_vfs_link(old,mnt,dir,new,mnt1)       vfs_link(old,dir,new)
++#define ll_vfs_unlink(inode,entry,mnt)          vfs_unlink(inode,entry)
++#define ll_vfs_mknod(dir,entry,mnt,mode,dev)    vfs_mknod(dir,entry,mode,dev)
++#define ll_security_inode_unlink(dir,entry,mnt) security_inode_unlink(dir,entry)     
++#define ll_vfs_rename(old,old_dir,mnt,new,new_dir,mnt1) \
++                vfs_rename(old,old_dir,new,new_dir)
++#endif
++
++#ifndef abs
++static inline int abs(int x)
++{
++        return (x < 0) ? -x : x;
++}
++#endif
++
++#ifndef labs
++static inline long labs(long x)
++{
++        return (x < 0) ? -x : x;
++}
++#endif
++
++/* Using kernel fls(). Userspace will use one defined in user-bitops.h. */
++#ifndef __fls
++#define __fls fls
++#endif
++
++#endif /* __KERNEL__ */
++#endif /* _COMPAT25_H */
+diff -urNad lustre~/lustre/include/linux/lustre_lib.h lustre/lustre/include/linux/lustre_lib.h
+--- lustre~/lustre/include/linux/lustre_lib.h	2008-08-07 11:52:06.000000000 +0200
++++ lustre/lustre/include/linux/lustre_lib.h	2009-03-12 11:02:51.000000000 +0100
+@@ -49,7 +49,6 @@
+ # include <string.h>
+ # include <sys/types.h>
+ #else
+-# include <asm/semaphore.h>
+ # include <linux/rwsem.h>
+ # include <linux/sched.h>
+ # include <linux/signal.h>
+diff -urNad lustre~/lustre/include/linux/lustre_patchless_compat.h lustre/lustre/include/linux/lustre_patchless_compat.h
+--- lustre~/lustre/include/linux/lustre_patchless_compat.h	2008-08-07 11:52:10.000000000 +0200
++++ lustre/lustre/include/linux/lustre_patchless_compat.h	2009-03-12 11:02:51.000000000 +0100
+@@ -52,7 +52,7 @@
+ 
+         BUG_ON(!PageLocked(page));
+ 
+-#if (LINUX_VERSION_CODE >= KERNEL_VERSION(2,6,15))
++#ifdef HAVE_RW_TREE_LOCK
+         write_lock_irq(&mapping->tree_lock);
+ #else
+ 	spin_lock_irq(&mapping->tree_lock);
+@@ -65,7 +65,7 @@
+ #else
+ 	__dec_zone_page_state(page, NR_FILE_PAGES);
+ #endif
+-#if (LINUX_VERSION_CODE >= KERNEL_VERSION(2,6,15))
++#ifdef HAVE_RW_TREE_LOCK
+         write_unlock_irq(&mapping->tree_lock);
+ #else
+ 	spin_unlock_irq(&mapping->tree_lock);
+diff -urNad lustre~/lustre/include/lprocfs_status.h lustre/lustre/include/lprocfs_status.h
+--- lustre~/lustre/include/lprocfs_status.h	2009-03-12 10:32:27.000000000 +0100
++++ lustre/lustre/include/lprocfs_status.h	2009-03-12 11:02:51.000000000 +0100
+@@ -521,6 +521,8 @@
+ #define LPROCFS_EXIT()            do {  \
+         up_read(&_lprocfs_lock);        \
+ } while(0)
++
++#ifdef HAVE_PROCFS_DELETED
+ #define LPROCFS_ENTRY_AND_CHECK(dp) do {        \
+         typecheck(struct proc_dir_entry *, dp); \
+         LPROCFS_ENTRY();                        \
+@@ -529,6 +531,14 @@
+                 return -ENODEV;                 \
+         }                                       \
+ } while(0)
++#define LPROCFS_CHECK_DELETED(dp) ((dp)->deleted)
++#else
++
++#define LPROCFS_ENTRY_AND_CHECK(dp) \
++        LPROCFS_ENTRY();
++#define LPROCFS_CHECK_DELETED(dp) (0)
++#endif
++
+ #define LPROCFS_WRITE_ENTRY()     do {  \
+         down_write(&_lprocfs_lock);     \
+ } while(0)
+@@ -536,6 +546,7 @@
+         up_write(&_lprocfs_lock);       \
+ } while(0)
+ 
++
+ /* You must use these macros when you want to refer to
+  * the import in a client obd_device for a lprocfs entry */
+ #define LPROCFS_CLIMP_CHECK(obd) do {           \
+diff -urNad lustre~/lustre/include/lprocfs_status.h.orig lustre/lustre/include/lprocfs_status.h.orig
+--- lustre~/lustre/include/lprocfs_status.h.orig	1970-01-01 00:00:00.000000000 +0000
++++ lustre/lustre/include/lprocfs_status.h.orig	2009-03-12 10:32:27.000000000 +0100
+@@ -0,0 +1,817 @@
++/* -*- mode: c; c-basic-offset: 8; indent-tabs-mode: nil; -*-
++ * vim:expandtab:shiftwidth=8:tabstop=8:
++ *
++ * GPL HEADER START
++ *
++ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
++ *
++ * This program is free software; you can redistribute it and/or modify
++ * it under the terms of the GNU General Public License version 2 only,
++ * as published by the Free Software Foundation.
++ *
++ * This program is distributed in the hope that it will be useful, but
++ * WITHOUT ANY WARRANTY; without even the implied warranty of
++ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
++ * General Public License version 2 for more details (a copy is included
++ * in the LICENSE file that accompanied this code).
++ *
++ * You should have received a copy of the GNU General Public License
++ * version 2 along with this program; If not, see
++ * http://www.sun.com/software/products/lustre/docs/GPLv2.pdf
++ *
++ * Please contact Sun Microsystems, Inc., 4150 Network Circle, Santa Clara,
++ * CA 95054 USA or visit www.sun.com if you need additional information or
++ * have any questions.
++ *
++ * GPL HEADER END
++ */
++/*
++ * Copyright  2008 Sun Microsystems, Inc. All rights reserved
++ * Use is subject to license terms.
++ */
++/*
++ * This file is part of Lustre, http://www.lustre.org/
++ * Lustre is a trademark of Sun Microsystems, Inc.
++ *
++ * lustre/include/lprocfs_status.h
++ *
++ * Top level header file for LProc SNMP
++ *
++ * Author: Hariharan Thantry thantry at users.sourceforge.net
++ */
++#ifndef _LPROCFS_SNMP_H
++#define _LPROCFS_SNMP_H
++
++#include <lustre/lustre_idl.h>
++#if defined(__linux__)
++#include <linux/lprocfs_status.h>
++#elif defined(__APPLE__)
++#include <darwin/lprocfs_status.h>
++#elif defined(__WINNT__)
++#include <winnt/lprocfs_status.h>
++#else
++#error Unsupported operating system.
++#endif
++
++#undef LPROCFS
++#if (defined(__KERNEL__) && defined(CONFIG_PROC_FS))
++# define LPROCFS
++#endif
++
++struct lprocfs_vars {
++        const char   *name;
++        cfs_read_proc_t *read_fptr;
++        cfs_write_proc_t *write_fptr;
++        void *data;
++        struct file_operations *fops;
++        /**
++         * /proc file mode.
++         */
++        mode_t proc_mode;
++};
++
++struct lprocfs_static_vars {
++        struct lprocfs_vars *module_vars;
++        struct lprocfs_vars *obd_vars;
++};
++
++/* if we find more consumers this could be generalized */
++#define OBD_HIST_MAX 32
++struct obd_histogram {
++        spinlock_t      oh_lock;
++        unsigned long   oh_buckets[OBD_HIST_MAX];
++};
++
++enum {
++        BRW_R_PAGES = 0,
++        BRW_W_PAGES,
++        BRW_R_RPC_HIST,
++        BRW_W_RPC_HIST,
++        BRW_R_IO_TIME,
++        BRW_W_IO_TIME,
++        BRW_R_DISCONT_PAGES,
++        BRW_W_DISCONT_PAGES,
++        BRW_R_DISCONT_BLOCKS,
++        BRW_W_DISCONT_BLOCKS,
++        BRW_R_DISK_IOSIZE,
++        BRW_W_DISK_IOSIZE,
++        BRW_R_DIO_FRAGS,
++        BRW_W_DIO_FRAGS,
++        BRW_LAST,
++};
++
++struct brw_stats {
++        struct obd_histogram hist[BRW_LAST];
++};
++
++
++/* An lprocfs counter can be configured using the enum bit masks below.
++ *
++ * LPROCFS_CNTR_EXTERNALLOCK indicates that an external lock already
++ * protects this counter from concurrent updates. If not specified,
++ * lprocfs an internal per-counter lock variable. External locks are
++ * not used to protect counter increments, but are used to protect
++ * counter readout and resets.
++ *
++ * LPROCFS_CNTR_AVGMINMAX indicates a multi-valued counter samples,
++ * (i.e. counter can be incremented by more than "1"). When specified,
++ * the counter maintains min, max and sum in addition to a simple
++ * invocation count. This allows averages to be be computed.
++ * If not specified, the counter is an increment-by-1 counter.
++ * min, max, sum, etc. are not maintained.
++ *
++ * LPROCFS_CNTR_STDDEV indicates that the counter should track sum of
++ * squares (for multi-valued counter samples only). This allows
++ * external computation of standard deviation, but involves a 64-bit
++ * multiply per counter increment.
++ */
++
++enum {
++        LPROCFS_CNTR_EXTERNALLOCK = 0x0001,
++        LPROCFS_CNTR_AVGMINMAX    = 0x0002,
++        LPROCFS_CNTR_STDDEV       = 0x0004,
++
++        /* counter data type */
++        LPROCFS_TYPE_REGS         = 0x0100,
++        LPROCFS_TYPE_BYTES        = 0x0200,
++        LPROCFS_TYPE_PAGES        = 0x0400,
++        LPROCFS_TYPE_CYCLE        = 0x0800,
++};
++
++struct lprocfs_atomic {
++        atomic_t               la_entry;
++        atomic_t               la_exit;
++};
++
++#define LC_MIN_INIT ((~(__u64)0) >> 1)
++
++struct lprocfs_counter {
++        struct lprocfs_atomic  lc_cntl;  /* may need to move to per set */
++        unsigned int           lc_config;
++        __s64                  lc_count;
++        __s64                  lc_sum;
++        __s64                  lc_min;
++        __s64                  lc_max;
++        __s64                  lc_sumsquare;
++        const char            *lc_name;   /* must be static */
++        const char            *lc_units;  /* must be static */
++};
++
++struct lprocfs_percpu {
++        struct lprocfs_counter lp_cntr[0];
++};
++
++#define LPROCFS_GET_NUM_CPU 0x0001
++#define LPROCFS_GET_SMP_ID  0x0002
++
++enum lprocfs_stats_flags {
++        LPROCFS_STATS_FLAG_PERCPU   = 0x0000, /* per cpu counter */
++        LPROCFS_STATS_FLAG_NOPERCPU = 0x0001, /* stats have no percpu
++                                               * area and need locking */
++};
++
++enum lprocfs_fields_flags {
++        LPROCFS_FIELDS_FLAGS_CONFIG = 0x0001,
++        LPROCFS_FIELDS_FLAGS_SUM    = 0x0002,
++        LPROCFS_FIELDS_FLAGS_MIN    = 0x0003,
++        LPROCFS_FIELDS_FLAGS_MAX    = 0x0004,
++        LPROCFS_FIELDS_FLAGS_AVG    = 0x0005,
++        LPROCFS_FIELDS_FLAGS_SUMSQUARE = 0x0006,
++        LPROCFS_FIELDS_FLAGS_COUNT  = 0x0007,
++};
++
++struct lprocfs_stats {
++        unsigned int           ls_num;     /* # of counters */
++        int                    ls_flags; /* See LPROCFS_STATS_FLAG_* */
++        spinlock_t             ls_lock;  /* Lock used only when there are
++                                          * no percpu stats areas */
++        struct lprocfs_percpu *ls_percpu[0];
++};
++
++static inline int opcode_offset(__u32 opc) {
++        if (opc < OST_LAST_OPC) {
++                 /* OST opcode */
++                return (opc - OST_FIRST_OPC);
++        } else if (opc < MDS_LAST_OPC) {
++                /* MDS opcode */
++                return (opc - MDS_FIRST_OPC +
++                        (OST_LAST_OPC - OST_FIRST_OPC));
++        } else if (opc < LDLM_LAST_OPC) {
++                /* LDLM Opcode */
++                return (opc - LDLM_FIRST_OPC +
++                        (MDS_LAST_OPC - MDS_FIRST_OPC) +
++                        (OST_LAST_OPC - OST_FIRST_OPC));
++        } else if (opc < MGS_LAST_OPC) {
++                /* MGS Opcode */
++                return (opc - MGS_FIRST_OPC +
++                        (LDLM_LAST_OPC - LDLM_FIRST_OPC) +
++                        (MDS_LAST_OPC - MDS_FIRST_OPC) +
++                        (OST_LAST_OPC - OST_FIRST_OPC));
++        } else if (opc < OBD_LAST_OPC) {
++                /* OBD Ping */
++                return (opc - OBD_FIRST_OPC +
++                        (MGS_LAST_OPC - MGS_FIRST_OPC) +
++                        (LDLM_LAST_OPC - LDLM_FIRST_OPC) +
++                        (MDS_LAST_OPC - MDS_FIRST_OPC) +
++                        (OST_LAST_OPC - OST_FIRST_OPC));
++        } else if (opc < LLOG_LAST_OPC) {
++                /* LLOG Opcode */
++                return (opc - LLOG_FIRST_OPC +
++                        (OBD_LAST_OPC - OBD_FIRST_OPC) +
++                        (MGS_LAST_OPC - MGS_FIRST_OPC) +
++                        (LDLM_LAST_OPC - LDLM_FIRST_OPC) +
++                        (MDS_LAST_OPC - MDS_FIRST_OPC) +
++                        (OST_LAST_OPC - OST_FIRST_OPC));
++        } else if (opc < QUOTA_LAST_OPC) {
++                /* LQUOTA Opcode */
++                return (opc - QUOTA_FIRST_OPC +
++                        (LLOG_LAST_OPC - LLOG_FIRST_OPC) +
++                        (OBD_LAST_OPC - OBD_FIRST_OPC) +
++                        (MGS_LAST_OPC - MGS_FIRST_OPC) +
++                        (LDLM_LAST_OPC - LDLM_FIRST_OPC) +
++                        (MDS_LAST_OPC - MDS_FIRST_OPC) +
++                        (OST_LAST_OPC - OST_FIRST_OPC));
++        } else {
++                /* Unknown Opcode */
++                return -1;
++        }
++}
++
++#define LUSTRE_MAX_OPCODES ((OST_LAST_OPC - OST_FIRST_OPC)     + \
++                            (MDS_LAST_OPC - MDS_FIRST_OPC)     + \
++                            (LDLM_LAST_OPC - LDLM_FIRST_OPC)   + \
++                            (MGS_LAST_OPC - MGS_FIRST_OPC)     + \
++                            (OBD_LAST_OPC - OBD_FIRST_OPC)     + \
++                            (LLOG_LAST_OPC - LLOG_FIRST_OPC)   + \
++                            (QUOTA_LAST_OPC - QUOTA_FIRST_OPC))
++
++#define EXTRA_MAX_OPCODES ((PTLRPC_LAST_CNTR - PTLRPC_FIRST_CNTR)  + \
++                           (EXTRA_LAST_OPC - EXTRA_FIRST_OPC))
++
++enum {
++        PTLRPC_REQWAIT_CNTR = 0,
++        PTLRPC_REQQDEPTH_CNTR,
++        PTLRPC_REQACTIVE_CNTR,
++        PTLRPC_TIMEOUT,
++        PTLRPC_REQBUF_AVAIL_CNTR,
++        PTLRPC_LAST_CNTR
++};
++
++#define PTLRPC_FIRST_CNTR PTLRPC_REQWAIT_CNTR
++
++enum {
++        LDLM_GLIMPSE_ENQUEUE = 0,
++        LDLM_PLAIN_ENQUEUE,
++        LDLM_EXTENT_ENQUEUE,
++        LDLM_FLOCK_ENQUEUE,
++        LDLM_IBITS_ENQUEUE,
++        MDS_REINT_SETATTR,
++        MDS_REINT_CREATE,
++        MDS_REINT_LINK,
++        MDS_REINT_UNLINK,
++        MDS_REINT_RENAME,
++        MDS_REINT_OPEN,
++        BRW_READ_BYTES,
++        BRW_WRITE_BYTES,
++        EXTRA_LAST_OPC
++};
++
++#define EXTRA_FIRST_OPC LDLM_GLIMPSE_ENQUEUE
++/* class_obd.c */
++extern cfs_proc_dir_entry_t *proc_lustre_root;
++
++struct obd_device;
++struct file;
++struct obd_histogram;
++
++/* Days / hours / mins / seconds format */
++struct dhms {
++        int d,h,m,s;
++};
++static inline void s2dhms(struct dhms *ts, time_t secs)
++{
++        ts->d = secs / 86400;
++        secs = secs % 86400;
++        ts->h = secs / 3600;
++        secs = secs % 3600;
++        ts->m = secs / 60;
++        ts->s = secs % 60;
++}
++#define DHMS_FMT "%dd%dh%02dm%02ds"
++#define DHMS_VARS(x) (x)->d, (x)->h, (x)->m, (x)->s
++
++
++#ifdef LPROCFS
++
++static inline int lprocfs_stats_lock(struct lprocfs_stats *stats, int type)
++{
++        int rc = 0;
++
++        if (stats->ls_flags & LPROCFS_STATS_FLAG_NOPERCPU) {
++                if (type & LPROCFS_GET_NUM_CPU)
++                        rc = 1;
++                if (type & LPROCFS_GET_SMP_ID)
++                        rc = 0;
++                spin_lock(&stats->ls_lock);
++        } else {
++                if (type & LPROCFS_GET_NUM_CPU)
++                        rc = num_possible_cpus();
++                if (type & LPROCFS_GET_SMP_ID)
++                        rc = smp_processor_id();
++        }
++        return rc;
++}
++
++static inline void lprocfs_stats_unlock(struct lprocfs_stats *stats)
++{
++        if (stats->ls_flags & LPROCFS_STATS_FLAG_NOPERCPU)
++                spin_unlock(&stats->ls_lock);
++}
++
++/* Two optimized LPROCFS counter increment functions are provided:
++ *     lprocfs_counter_incr(cntr, value) - optimized for by-one counters
++ *     lprocfs_counter_add(cntr) - use for multi-valued counters
++ * Counter data layout allows config flag, counter lock and the
++ * count itself to reside within a single cache line.
++ */
++
++extern void lprocfs_counter_add(struct lprocfs_stats *stats, int idx,
++                                long amount);
++extern void lprocfs_counter_sub(struct lprocfs_stats *stats, int idx,
++                                long amount);
++
++#define lprocfs_counter_incr(stats, idx) \
++        lprocfs_counter_add(stats, idx, 1)
++#define lprocfs_counter_decr(stats, idx) \
++        lprocfs_counter_sub(stats, idx, 1)
++
++extern __s64 lprocfs_read_helper(struct lprocfs_counter *lc,
++                                 enum lprocfs_fields_flags field);
++
++static inline __u64 lprocfs_stats_collector(struct lprocfs_stats *stats,
++                                            int idx,
++                                            enum lprocfs_fields_flags field)
++{
++        __u64 ret = 0;
++        int i;
++
++        LASSERT(stats != NULL);
++        for (i = 0; i < num_possible_cpus(); i++)
++                ret += lprocfs_read_helper(&(stats->ls_percpu[i]->lp_cntr[idx]),
++                                           field);
++        return ret;
++}
++
++extern struct lprocfs_stats *lprocfs_alloc_stats(unsigned int num,
++                                                 enum lprocfs_stats_flags flags);
++extern void lprocfs_clear_stats(struct lprocfs_stats *stats);
++extern void lprocfs_free_stats(struct lprocfs_stats **stats);
++extern void lprocfs_init_ops_stats(int num_private_stats,
++                                   struct lprocfs_stats *stats);
++extern void lprocfs_init_ldlm_stats(struct lprocfs_stats *ldlm_stats);
++extern int lprocfs_alloc_obd_stats(struct obd_device *obddev,
++                                   unsigned int num_private_stats);
++extern void lprocfs_counter_init(struct lprocfs_stats *stats, int index,
++                                 unsigned conf, const char *name,
++                                 const char *units);
++extern void lprocfs_free_obd_stats(struct obd_device *obddev);
++struct obd_export;
++extern int lprocfs_add_clear_entry(struct obd_device * obd,
++                                   cfs_proc_dir_entry_t *entry);
++extern int lprocfs_exp_setup(struct obd_export *exp,
++                             lnet_nid_t *peer_nid, int *newnid);
++extern int lprocfs_exp_cleanup(struct obd_export *exp);
++extern int lprocfs_add_simple(struct proc_dir_entry *root,
++                              char *name, read_proc_t *read_proc,
++                              write_proc_t *write_proc, void *data);
++extern int lprocfs_register_stats(cfs_proc_dir_entry_t *root, const char *name,
++                                  struct lprocfs_stats *stats);
++
++/* lprocfs_status.c */
++extern int lprocfs_add_vars(cfs_proc_dir_entry_t *root,
++                            struct lprocfs_vars *var,
++                            void *data);
++
++extern cfs_proc_dir_entry_t *lprocfs_register(const char *name,
++                                               cfs_proc_dir_entry_t *parent,
++                                               struct lprocfs_vars *list,
++                                               void *data);
++
++extern void lprocfs_remove(cfs_proc_dir_entry_t **root);
++
++extern cfs_proc_dir_entry_t *lprocfs_srch(cfs_proc_dir_entry_t *root,
++                                           const char *name);
++
++extern int lprocfs_obd_setup(struct obd_device *obd, struct lprocfs_vars *list);
++extern int lprocfs_obd_cleanup(struct obd_device *obd);
++extern int lprocfs_add_simple(struct proc_dir_entry *root, char *name,
++                              read_proc_t *read_proc, write_proc_t *write_proc,
++                              void *data);
++struct nid_stat;
++extern void lprocfs_free_per_client_stats(struct obd_device *obd);
++extern int lprocfs_nid_stats_clear_write(struct file *file, const char *buffer,
++                                         unsigned long count, void *data);
++extern int lprocfs_nid_stats_clear_read(char *page, char **start, off_t off,
++                                        int count, int *eof,  void *data);
++
++
++extern struct file_operations lprocfs_evict_client_fops;
++
++extern int lprocfs_seq_create(cfs_proc_dir_entry_t *parent, char *name,
++                              mode_t mode, struct file_operations *seq_fops,
++                              void *data);
++extern int lprocfs_obd_seq_create(struct obd_device *dev, char *name,
++                                  mode_t mode, struct file_operations *seq_fops,
++                                  void *data);
++
++/* Generic callbacks */
++
++extern int lprocfs_rd_u64(char *page, char **start, off_t off,
++                          int count, int *eof, void *data);
++extern int lprocfs_rd_atomic(char *page, char **start, off_t off,
++                             int count, int *eof, void *data);
++extern int lprocfs_wr_atomic(struct file *file, const char *buffer,
++                             unsigned long count, void *data);
++extern int lprocfs_rd_uint(char *page, char **start, off_t off,
++                           int count, int *eof, void *data);
++extern int lprocfs_wr_uint(struct file *file, const char *buffer,
++                           unsigned long count, void *data);
++extern int lprocfs_rd_uuid(char *page, char **start, off_t off,
++                           int count, int *eof, void *data);
++extern int lprocfs_rd_name(char *page, char **start, off_t off,
++                           int count, int *eof, void *data);
++extern int lprocfs_rd_fstype(char *page, char **start, off_t off,
++                             int count, int *eof, void *data);
++extern int lprocfs_rd_server_uuid(char *page, char **start, off_t off,
++                                  int count, int *eof, void *data);
++extern int lprocfs_rd_conn_uuid(char *page, char **start, off_t off,
++                                int count, int *eof, void *data);
++extern int lprocfs_rd_import(char *page, char **start, off_t off, int count,
++                             int *eof, void *data);
++extern int lprocfs_rd_connect_flags(char *page, char **start, off_t off,
++                                    int count, int *eof, void *data);
++extern int lprocfs_rd_num_exports(char *page, char **start, off_t off,
++                                  int count, int *eof, void *data);
++extern int lprocfs_rd_numrefs(char *page, char **start, off_t off,
++                              int count, int *eof, void *data);
++struct adaptive_timeout;
++extern int lprocfs_at_hist_helper(char *page, int count, int rc,
++                                  struct adaptive_timeout *at);
++extern int lprocfs_rd_timeouts(char *page, char **start, off_t off,
++                               int count, int *eof, void *data);
++extern int lprocfs_wr_timeouts(struct file *file, const char *buffer,
++                               unsigned long count, void *data);
++extern int lprocfs_wr_evict_client(struct file *file, const char *buffer,
++                                   unsigned long count, void *data);
++extern int lprocfs_wr_ping(struct file *file, const char *buffer,
++                           unsigned long count, void *data);
++
++/* Statfs helpers */
++extern int lprocfs_rd_blksize(char *page, char **start, off_t off,
++                              int count, int *eof, void *data);
++extern int lprocfs_rd_kbytestotal(char *page, char **start, off_t off,
++                                  int count, int *eof, void *data);
++extern int lprocfs_rd_kbytesfree(char *page, char **start, off_t off,
++                                 int count, int *eof, void *data);
++extern int lprocfs_rd_kbytesavail(char *page, char **start, off_t off,
++                                 int count, int *eof, void *data);
++extern int lprocfs_rd_filestotal(char *page, char **start, off_t off,
++                                 int count, int *eof, void *data);
++extern int lprocfs_rd_filesfree(char *page, char **start, off_t off,
++                                int count, int *eof, void *data);
++extern int lprocfs_rd_filegroups(char *page, char **start, off_t off,
++                                 int count, int *eof, void *data);
++
++extern int lprocfs_write_helper(const char *buffer, unsigned long count,
++                                int *val);
++extern int lprocfs_write_frac_helper(const char *buffer, unsigned long count,
++                                     int *val, int mult);
++extern int lprocfs_read_frac_helper(char *buffer, unsigned long count,
++                                    long val, int mult);
++extern int lprocfs_write_u64_helper(const char *buffer, unsigned long count,
++                                    __u64 *val);
++extern int lprocfs_write_frac_u64_helper(const char *buffer, unsigned long count,
++                                         __u64 *val, int mult);
++void lprocfs_oh_tally(struct obd_histogram *oh, unsigned int value);
++void lprocfs_oh_tally_log2(struct obd_histogram *oh, unsigned int value);
++void lprocfs_oh_clear(struct obd_histogram *oh);
++unsigned long lprocfs_oh_sum(struct obd_histogram *oh);
++
++/* lprocfs_status.c: counter read/write functions */
++extern int lprocfs_counter_read(char *page, char **start, off_t off,
++                                int count, int *eof, void *data);
++extern int lprocfs_counter_write(struct file *file, const char *buffer,
++                                 unsigned long count, void *data);
++
++/* lprocfs_status.c: recovery status */
++int lprocfs_obd_rd_recovery_status(char *page, char **start, off_t off,
++                                   int count, int *eof, void *data);
++
++/* lprocfs_statuc.c: hash statistics */
++int lprocfs_obd_rd_hash(char *page, char **start, off_t off,
++                        int count, int *eof, void *data);
++
++extern int lprocfs_seq_release(struct inode *, struct file *);
++
++/* in lprocfs_stat.c, to protect the private data for proc entries */
++extern struct rw_semaphore _lprocfs_lock;
++#define LPROCFS_ENTRY()           do {  \
++        down_read(&_lprocfs_lock);      \
++} while(0)
++#define LPROCFS_EXIT()            do {  \
++        up_read(&_lprocfs_lock);        \
++} while(0)
++#define LPROCFS_ENTRY_AND_CHECK(dp) do {        \
++        typecheck(struct proc_dir_entry *, dp); \
++        LPROCFS_ENTRY();                        \
++        if ((dp)->deleted) {                    \
++                LPROCFS_EXIT();                 \
++                return -ENODEV;                 \
++        }                                       \
++} while(0)
++#define LPROCFS_WRITE_ENTRY()     do {  \
++        down_write(&_lprocfs_lock);     \
++} while(0)
++#define LPROCFS_WRITE_EXIT()      do {  \
++        up_write(&_lprocfs_lock);       \
++} while(0)
++
++/* You must use these macros when you want to refer to
++ * the import in a client obd_device for a lprocfs entry */
++#define LPROCFS_CLIMP_CHECK(obd) do {           \
++        typecheck(struct obd_device *, obd);    \
++        down_read(&(obd)->u.cli.cl_sem);        \
++        if ((obd)->u.cli.cl_import == NULL) {   \
++             up_read(&(obd)->u.cli.cl_sem);     \
++             return -ENODEV;                    \
++        }                                       \
++} while(0)
++#define LPROCFS_CLIMP_EXIT(obd)                 \
++        up_read(&(obd)->u.cli.cl_sem);
++
++
++/* write the name##_seq_show function, call LPROC_SEQ_FOPS_RO for read-only
++  proc entries; otherwise, you will define name##_seq_write function also for
++  a read-write proc entry, and then call LPROC_SEQ_SEQ instead. Finally,
++  call lprocfs_obd_seq_create(obd, filename, 0444, &name#_fops, data); */
++#define __LPROC_SEQ_FOPS(name, custom_seq_write)                           \
++static int name##_seq_open(struct inode *inode, struct file *file) {       \
++        struct proc_dir_entry *dp = PDE(inode);                            \
++        int rc;                                                            \
++        LPROCFS_ENTRY_AND_CHECK(dp);                                       \
++        rc = single_open(file, name##_seq_show, dp->data);                 \
++        if (rc) {                                                          \
++                LPROCFS_EXIT();                                            \
++                return rc;                                                 \
++        }                                                                  \
++        return 0;                                                          \
++}                                                                          \
++struct file_operations name##_fops = {                                     \
++        .owner   = THIS_MODULE,                                            \
++        .open    = name##_seq_open,                                        \
++        .read    = seq_read,                                               \
++        .write   = custom_seq_write,                                       \
++        .llseek  = seq_lseek,                                              \
++        .release = lprocfs_seq_release,                                    \
++}
++
++#define LPROC_SEQ_FOPS_RO(name)         __LPROC_SEQ_FOPS(name, NULL)
++#define LPROC_SEQ_FOPS(name)            __LPROC_SEQ_FOPS(name, name##_seq_write)
++
++/* lproc_ptlrpc.c */
++struct ptlrpc_request;
++extern void target_print_req(void *seq_file, struct ptlrpc_request *req);
++
++#ifdef CRAY_XT3
++/* lprocfs_status.c: read recovery max time bz13079 */
++int lprocfs_obd_rd_recovery_maxtime(char *page, char **start, off_t off,
++                                    int count, int *eof, void *data);
++
++/* lprocfs_status.c: write recovery max time bz13079 */
++int lprocfs_obd_wr_recovery_maxtime(struct file *file, const char *buffer,
++                                    unsigned long count, void *data);
++#endif
++
++/* all quota proc functions */
++extern int lprocfs_quota_rd_bunit(char *page, char **start, off_t off, int count,
++                                  int *eof, void *data);
++extern int lprocfs_quota_wr_bunit(struct file *file, const char *buffer,
++                                  unsigned long count, void *data);
++extern int lprocfs_quota_rd_btune(char *page, char **start, off_t off, int count,
++                                  int *eof, void *data);
++extern int lprocfs_quota_wr_btune(struct file *file, const char *buffer,
++                                  unsigned long count, void *data);
++extern int lprocfs_quota_rd_iunit(char *page, char **start, off_t off, int count,
++                                  int *eof, void *data);
++extern int lprocfs_quota_wr_iunit(struct file *file, const char *buffer,
++                                  unsigned long count, void *data);
++extern int lprocfs_quota_rd_itune(char *page, char **start, off_t off, int count,
++                                  int *eof, void *data);
++extern int lprocfs_quota_wr_itune(struct file *file, const char *buffer,
++                                  unsigned long count, void *data);
++extern int lprocfs_quota_rd_type(char *page, char **start, off_t off, int count,
++                                 int *eof, void *data);
++extern int lprocfs_quota_wr_type(struct file *file, const char *buffer,
++                                 unsigned long count, void *data);
++extern int lprocfs_quota_rd_switch_seconds(char *page, char **start, off_t off,
++                                           int count, int *eof, void *data);
++extern int lprocfs_quota_wr_switch_seconds(struct file *file, const char *buffer,
++                                           unsigned long count, void *data);
++extern int lprocfs_quota_rd_sync_blk(char *page, char **start, off_t off,
++                                     int count, int *eof, void *data);
++extern int lprocfs_quota_wr_sync_blk(struct file *file, const char *buffer,
++                                     unsigned long count, void *data);
++extern int lprocfs_quota_rd_switch_qs(char *page, char **start, off_t off,
++                                      int count, int *eof, void *data);
++extern int lprocfs_quota_wr_switch_qs(struct file *file, const char *buffer,
++                                      unsigned long count, void *data);
++extern int lprocfs_quota_rd_boundary_factor(char *page, char **start, off_t off,
++                                            int count, int *eof, void *data);
++extern int lprocfs_quota_wr_boundary_factor(struct file *file, const char *buffer,
++                                            unsigned long count, void *data);
++extern int lprocfs_quota_rd_least_bunit(char *page, char **start, off_t off,
++                                        int count, int *eof, void *data);
++extern int lprocfs_quota_wr_least_bunit(struct file *file, const char *buffer,
++                                        unsigned long count, void *data);
++extern int lprocfs_quota_rd_least_iunit(char *page, char **start, off_t off,
++                                        int count, int *eof, void *data);
++extern int lprocfs_quota_wr_least_iunit(struct file *file, const char *buffer,
++                                        unsigned long count, void *data);
++extern int lprocfs_quota_rd_qs_factor(char *page, char **start, off_t off,
++                                      int count, int *eof, void *data);
++extern int lprocfs_quota_wr_qs_factor(struct file *file, const char *buffer,
++                                      unsigned long count, void *data);
++
++#else
++/* LPROCFS is not defined */
++static inline void lprocfs_counter_add(struct lprocfs_stats *stats,
++                                       int index, long amount) { return; }
++static inline void lprocfs_counter_incr(struct lprocfs_stats *stats,
++                                        int index) { return; }
++static inline void lprocfs_counter_sub(struct lprocfs_stats *stats,
++                                       int index, long amount) { return; }
++static inline void lprocfs_counter_init(struct lprocfs_stats *stats,
++                                        int index, unsigned conf,
++                                        const char *name, const char *units)
++{ return; }
++
++static inline __u64 lc_read_helper(struct lprocfs_counter *lc,
++                                   enum lprocfs_fields_flags field)
++{ return 0; }
++
++static inline struct lprocfs_stats* lprocfs_alloc_stats(unsigned int num,
++                                                        enum lprocfs_stats_flags flags)
++{ return NULL; }
++static inline void lprocfs_clear_stats(struct lprocfs_stats *stats)
++{ return; }
++static inline void lprocfs_free_stats(struct lprocfs_stats **stats)
++{ return; }
++static inline int lprocfs_register_stats(cfs_proc_dir_entry_t *root,
++                                            const char *name,
++                                            struct lprocfs_stats *stats)
++{ return 0; }
++static inline void lprocfs_init_ops_stats(int num_private_stats,
++                                          struct lprocfs_stats *stats)
++{ return; }
++static inline void lprocfs_init_ldlm_stats(struct lprocfs_stats *ldlm_stats)
++{ return; }
++static inline int lprocfs_alloc_obd_stats(struct obd_device *obddev,
++                                          unsigned int num_private_stats)
++{ return 0; }
++static inline void lprocfs_free_obd_stats(struct obd_device *obddev)
++{ return; }
++
++struct obd_export;
++static inline int lprocfs_add_clear_entry(struct obd_export *exp)
++{ return 0; }
++static inline int lprocfs_exp_setup(struct obd_export *exp,
++                                    lnet_nid_t *peer_nid, int *newnid)
++{ return 0; }
++static inline int lprocfs_exp_cleanup(struct obd_export *exp)
++{ return 0; }
++static inline int lprocfs_add_simple(struct proc_dir_entry *root,
++                                     char *name,
++                                     read_proc_t *read_proc,
++                                     write_proc_t *write_proc,
++                                     void *data)
++{return 0; }
++struct nid_stat;
++static inline void lprocfs_free_per_client_stats(struct obd_device *obd)
++{}
++static inline
++int lprocfs_nid_stats_clear_write(struct file *file, const char *buffer,
++                                  unsigned long count, void *data)
++{return count;}
++static inline
++int lprocfs_nid_stats_clear_read(char *page, char **start, off_t off,
++                                 int count, int *eof,  void *data)
++{return count;}
++
++
++static inline cfs_proc_dir_entry_t *
++lprocfs_register(const char *name, cfs_proc_dir_entry_t *parent,
++                 struct lprocfs_vars *list, void *data) { return NULL; }
++static inline int lprocfs_add_vars(cfs_proc_dir_entry_t *root,
++                                   struct lprocfs_vars *var,
++                                   void *data) { return 0; }
++static inline void lprocfs_remove(cfs_proc_dir_entry_t **root) {};
++static inline cfs_proc_dir_entry_t *lprocfs_srch(cfs_proc_dir_entry_t *head,
++                                    const char *name) {return 0;}
++static inline int lprocfs_obd_setup(struct obd_device *dev,
++                                    struct lprocfs_vars *list) { return 0; }
++static inline int lprocfs_obd_cleanup(struct obd_device *dev)  { return 0; }
++static inline int lprocfs_rd_u64(char *page, char **start, off_t off,
++                                 int count, int *eof, void *data) { return 0; }
++static inline int lprocfs_rd_uuid(char *page, char **start, off_t off,
++                                  int count, int *eof, void *data) { return 0; }
++static inline int lprocfs_rd_name(char *page, char **start, off_t off,
++                                  int count, int *eof, void *data) { return 0; }
++static inline int lprocfs_rd_server_uuid(char *page, char **start, off_t off,
++                                         int count, int *eof, void *data)
++{ return 0; }
++static inline int lprocfs_rd_conn_uuid(char *page, char **start, off_t off,
++                                       int count, int *eof, void *data)
++{ return 0; }
++static inline int lprocfs_rd_import(char *page, char **start, off_t off, int count,
++                                    int *eof, void *data) { return 0; }
++static inline int lprocfs_rd_connect_flags(char *page, char **start, off_t off,
++                                           int count, int *eof, void *data)
++{ return 0; }
++static inline int lprocfs_rd_num_exports(char *page, char **start, off_t off,
++                                         int count, int *eof, void *data)
++{ return 0; }
++static inline int lprocfs_rd_numrefs(char *page, char **start, off_t off,
++                                     int count, int *eof, void *data)
++{ return 0; }
++struct adaptive_timeout;
++static inline int lprocfs_at_hist_helper(char *page, int count, int rc,
++                                         struct adaptive_timeout *at)
++{ return 0; }
++static inline int lprocfs_rd_timeouts(char *page, char **start, off_t off,
++                                      int count, int *eof, void *data)
++{ return 0; }
++static inline int lprocfs_wr_timeouts(struct file *file, const char *buffer,
++                                      unsigned long count, void *data)
++{ return 0; }
++static inline int lprocfs_wr_evict_client(struct file *file, const char *buffer,
++                                          unsigned long count, void *data)
++{ return 0; }
++static inline int lprocfs_wr_ping(struct file *file, const char *buffer,
++                                  unsigned long count, void *data)
++{ return 0; }
++
++
++/* Statfs helpers */
++static inline
++int lprocfs_rd_blksize(char *page, char **start, off_t off,
++                       int count, int *eof, void *data) { return 0; }
++static inline
++int lprocfs_rd_kbytestotal(char *page, char **start, off_t off,
++                           int count, int *eof, void *data) { return 0; }
++static inline
++int lprocfs_rd_kbytesfree(char *page, char **start, off_t off,
++                          int count, int *eof, void *data) { return 0; }
++static inline
++int lprocfs_rd_kbytesavail(char *page, char **start, off_t off,
++                           int count, int *eof, void *data) { return 0; }
++static inline
++int lprocfs_rd_filestotal(char *page, char **start, off_t off,
++                          int count, int *eof, void *data) { return 0; }
++static inline
++int lprocfs_rd_filesfree(char *page, char **start, off_t off,
++                         int count, int *eof, void *data)  { return 0; }
++static inline
++int lprocfs_rd_filegroups(char *page, char **start, off_t off,
++                          int count, int *eof, void *data) { return 0; }
++static inline
++void lprocfs_oh_tally(struct obd_histogram *oh, unsigned int value) {}
++static inline
++void lprocfs_oh_tally_log2(struct obd_histogram *oh, unsigned int value) {}
++static inline
++void lprocfs_oh_clear(struct obd_histogram *oh) {}
++static inline
++unsigned long lprocfs_oh_sum(struct obd_histogram *oh) { return 0; }
++static inline
++int lprocfs_counter_read(char *page, char **start, off_t off,
++                         int count, int *eof, void *data) { return 0; }
++static inline
++int lprocfs_counter_write(struct file *file, const char *buffer,
++                          unsigned long count, void *data) { return 0; }
++
++static inline
++__u64 lprocfs_stats_collector(struct lprocfs_stats *stats, int idx,
++                               enum lprocfs_fields_flags field)
++{ return (__u64)0; }
++
++#define LPROCFS_ENTRY()
++#define LPROCFS_EXIT()
++#define LPROCFS_ENTRY_AND_CHECK(dp)
++#define LPROC_SEQ_FOPS_RO(name)
++#define LPROC_SEQ_FOPS(name)
++
++/* lproc_ptlrpc.c */
++#define target_print_req NULL
++
++#endif /* LPROCFS */
++
++#endif /* LPROCFS_SNMP_H */
+diff -urNad lustre~/lustre/llite/file.c lustre/lustre/llite/file.c
+--- lustre~/lustre/llite/file.c	2009-03-12 11:02:39.000000000 +0100
++++ lustre/lustre/llite/file.c	2009-03-12 11:02:51.000000000 +0100
+@@ -1801,11 +1801,12 @@
+ #endif
+ }
+ 
++#ifdef HAVE_KERNEL_SENDFILE
+ /*
+  * Send file content (through pagecache) somewhere with helper
+  */
+-static ssize_t ll_file_sendfile(struct file *in_file, loff_t *ppos,size_t count,
+-                                read_actor_t actor, void *target)
++static ssize_t ll_file_sendfile(struct file *in_file, loff_t *ppos,
++                                size_t count, read_actor_t actor, void *target)
+ {
+         struct inode *inode = in_file->f_dentry->d_inode;
+         struct ll_inode_info *lli = ll_i2info(inode);
+@@ -1814,10 +1815,10 @@
+         struct ll_lock_tree_node *node;
+         struct ost_lvb lvb;
+         struct ll_ra_read bead;
+-        int rc;
+-        ssize_t retval;
++        ssize_t rc;
+         __u64 kms;
+         ENTRY;
++
+         CDEBUG(D_VFSTRACE, "VFS Op:inode=%lu/%u(%p),size="LPSZ",offset=%Ld\n",
+                inode->i_ino, inode->i_generation, inode, count, *ppos);
+ 
+@@ -1831,8 +1832,10 @@
+         in_file->f_ra.ra_pages = 0;
+ 
+         /* File with no objects, nothing to lock */
+-        if (!lsm)
+-                RETURN(generic_file_sendfile(in_file, ppos, count, actor, target));
++        if (!lsm) {
++                rc = generic_file_sendfile(in_file, ppos, count, actor, target);
++                RETURN(rc);
++        }
+ 
+         node = ll_node_from_inode(inode, *ppos, *ppos + count - 1, LCK_PR);
+         if (IS_ERR(node))
+@@ -1872,8 +1875,8 @@
+                 /* A glimpse is necessary to determine whether we return a
+                  * short read (B) or some zeroes at the end of the buffer (C) */
+                 ll_inode_size_unlock(inode, 1);
+-                retval = ll_glimpse_size(inode, LDLM_FL_BLOCK_GRANTED);
+-                if (retval)
++                rc = ll_glimpse_size(inode, LDLM_FL_BLOCK_GRANTED);
++                if (rc)
+                         goto out;
+         } else {
+                 /* region is within kms and, hence, within real file size (A) */
+@@ -1889,13 +1892,115 @@
+         ll_ra_read_in(in_file, &bead);
+         /* BUG: 5972 */
+         file_accessed(in_file);
+-        retval = generic_file_sendfile(in_file, ppos, count, actor, target);
++        rc = generic_file_sendfile(in_file, ppos, count, actor, target);
+         ll_ra_read_ex(in_file, &bead);
+ 
+  out:
+         ll_tree_unlock(&tree);
+-        RETURN(retval);
++        RETURN(rc);
++}
++#endif
++
++/* change based on 
++ * http://git.kernel.org/?p=linux/kernel/git/torvalds/linux-2.6.git;a=commit;h=f0930fffa99e7fe0a0c4b6c7d9a244dc88288c27
++ */
++#ifdef HAVE_KERNEL_SPLICE_READ
++static ssize_t ll_file_splice_read(struct file *in_file, loff_t *ppos,
++                                   struct pipe_inode_info *pipe, size_t count,
++                                   unsigned int flags)
++{
++        struct inode *inode = in_file->f_dentry->d_inode;
++        struct ll_inode_info *lli = ll_i2info(inode);
++        struct lov_stripe_md *lsm = lli->lli_smd;
++        struct ll_lock_tree tree;
++        struct ll_lock_tree_node *node;
++        struct ost_lvb lvb;
++        struct ll_ra_read bead;
++        ssize_t rc;
++        __u64 kms;
++        ENTRY;
++
++        CDEBUG(D_VFSTRACE, "VFS Op:inode=%lu/%u(%p),size="LPSZ",offset=%Ld\n",
++               inode->i_ino, inode->i_generation, inode, count, *ppos);
++
++        /* "If nbyte is 0, read() will return 0 and have no other results."
++         *                      -- Single Unix Spec */
++        if (count == 0)
++                RETURN(0);
++
++        ll_stats_ops_tally(ll_i2sbi(inode), LPROC_LL_READ_BYTES, count);
++        /* turn off the kernel's read-ahead */
++        in_file->f_ra.ra_pages = 0;
++
++        /* File with no objects, nothing to lock */
++        if (!lsm) {
++                rc = generic_file_splice_read(in_file, ppos, pipe, count, flags);
++                RETURN(rc);
++        }
++
++        node = ll_node_from_inode(inode, *ppos, *ppos + count - 1, LCK_PR);
++        if (IS_ERR(node))
++                RETURN(PTR_ERR(node));
++
++        tree.lt_fd = LUSTRE_FPRIVATE(in_file);
++        rc = ll_tree_lock(&tree, node, NULL, count,
++                          in_file->f_flags & O_NONBLOCK?LDLM_FL_BLOCK_NOWAIT:0);
++        if (rc != 0)
++                RETURN(rc);
++
++        ll_clear_file_contended(inode);
++        ll_inode_size_lock(inode, 1);
++        /*
++         * Consistency guarantees: following possibilities exist for the
++         * relation between region being read and real file size at this
++         * moment:
++         *
++         *  (A): the region is completely inside of the file;
++         *
++         *  (B-x): x bytes of region are inside of the file, the rest is
++         *  outside;
++         *
++         *  (C): the region is completely outside of the file.
++         *
++         * This classification is stable under DLM lock acquired by
++         * ll_tree_lock() above, because to change class, other client has to
++         * take DLM lock conflicting with our lock. Also, any updates to
++         * ->i_size by other threads on this client are serialized by
++         * ll_inode_size_lock(). This guarantees that short reads are handled
++         * correctly in the face of concurrent writes and truncates.
++         */
++        inode_init_lvb(inode, &lvb);
++        obd_merge_lvb(ll_i2sbi(inode)->ll_osc_exp, lsm, &lvb, 1);
++        kms = lvb.lvb_size;
++        if (*ppos + count - 1 > kms) {
++                /* A glimpse is necessary to determine whether we return a
++                 * short read (B) or some zeroes at the end of the buffer (C) */
++                ll_inode_size_unlock(inode, 1);
++                rc = ll_glimpse_size(inode, LDLM_FL_BLOCK_GRANTED);
++                if (rc)
++                        goto out;
++        } else {
++                /* region is within kms and, hence, within real file size (A) */
++                i_size_write(inode, kms);
++                ll_inode_size_unlock(inode, 1);
++        }
++
++        CDEBUG(D_INFO, "Send ino %lu, "LPSZ" bytes, offset %lld, i_size %llu\n",
++               inode->i_ino, count, *ppos, i_size_read(inode));
++
++        bead.lrr_start = *ppos >> CFS_PAGE_SHIFT;
++        bead.lrr_count = (count + CFS_PAGE_SIZE - 1) >> CFS_PAGE_SHIFT;
++        ll_ra_read_in(in_file, &bead);
++        /* BUG: 5972 */
++        file_accessed(in_file);
++        rc = generic_file_splice_read(in_file, ppos, pipe, count, flags);
++        ll_ra_read_ex(in_file, &bead);
++
++ out:
++        ll_tree_unlock(&tree);
++        RETURN(rc);
+ }
++#endif
+ 
+ static int ll_lov_recreate_obj(struct inode *inode, struct file *file,
+                                unsigned long arg)
+@@ -3084,7 +3189,11 @@
+ }
+ 
+ #if (LINUX_VERSION_CODE >= KERNEL_VERSION(2,6,10))
++#ifndef HAVE_INODE_PERMISION_2ARGS
+ int ll_inode_permission(struct inode *inode, int mask, struct nameidata *nd)
++#else
++int ll_inode_permission(struct inode *inode, int mask)
++#endif
+ {
+         CDEBUG(D_VFSTRACE, "VFS Op:inode=%lu/%u(%p), mask %o\n",
+                inode->i_ino, inode->i_generation, inode, mask);
+@@ -3093,7 +3202,7 @@
+         return generic_permission(inode, mask, lustre_check_acl);
+ }
+ #else
+-#if (LINUX_VERSION_CODE >= KERNEL_VERSION(2,6,0))
++#ifndef HAVE_INODE_PERMISION_2ARGS
+ int ll_inode_permission(struct inode *inode, int mask, struct nameidata *nd)
+ #else
+ int ll_inode_permission(struct inode *inode, int mask)
+@@ -3163,7 +3272,12 @@
+         .release        = ll_file_release,
+         .mmap           = ll_file_mmap,
+         .llseek         = ll_file_seek,
++#ifdef HAVE_KERNEL_SPLICE_READ
++        .splice_read    = ll_file_splice_read,
++#endif
++#ifdef HAVE_KERNEL_SENDFILE
+         .sendfile       = ll_file_sendfile,
++#endif
+         .fsync          = ll_fsync,
+ };
+ 
+@@ -3185,7 +3299,12 @@
+         .release        = ll_file_release,
+         .mmap           = ll_file_mmap,
+         .llseek         = ll_file_seek,
++#ifdef HAVE_KERNEL_SPLICE_READ
++        .splice_read    = ll_file_splice_read,
++#endif
++#ifdef HAVE_KERNEL_SENDFILE
+         .sendfile       = ll_file_sendfile,
++#endif
+         .fsync          = ll_fsync,
+ #ifdef HAVE_F_OP_FLOCK
+         .flock          = ll_file_flock,
+@@ -3212,7 +3331,12 @@
+         .release        = ll_file_release,
+         .mmap           = ll_file_mmap,
+         .llseek         = ll_file_seek,
++#ifdef HAVE_KERNEL_SPLICE_READ
++        .splice_read    = ll_file_splice_read,
++#endif
++#ifdef HAVE_KERNEL_SENDFILE
+         .sendfile       = ll_file_sendfile,
++#endif
+         .fsync          = ll_fsync,
+ #ifdef HAVE_F_OP_FLOCK
+         .flock          = ll_file_noflock,
+diff -urNad lustre~/lustre/llite/file.c.orig lustre/lustre/llite/file.c.orig
+--- lustre~/lustre/llite/file.c.orig	1970-01-01 00:00:00.000000000 +0000
++++ lustre/lustre/llite/file.c.orig	2009-03-12 11:02:39.000000000 +0100
+@@ -0,0 +1,3335 @@
++/* -*- mode: c; c-basic-offset: 8; indent-tabs-mode: nil; -*-
++ * vim:expandtab:shiftwidth=8:tabstop=8:
++ *
++ * GPL HEADER START
++ *
++ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
++ *
++ * This program is free software; you can redistribute it and/or modify
++ * it under the terms of the GNU General Public License version 2 only,
++ * as published by the Free Software Foundation.
++ *
++ * This program is distributed in the hope that it will be useful, but
++ * WITHOUT ANY WARRANTY; without even the implied warranty of
++ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
++ * General Public License version 2 for more details (a copy is included
++ * in the LICENSE file that accompanied this code).
++ *
++ * You should have received a copy of the GNU General Public License
++ * version 2 along with this program; If not, see
++ * http://www.sun.com/software/products/lustre/docs/GPLv2.pdf
++ *
++ * Please contact Sun Microsystems, Inc., 4150 Network Circle, Santa Clara,
++ * CA 95054 USA or visit www.sun.com if you need additional information or
++ * have any questions.
++ *
++ * GPL HEADER END
++ */
++/*
++ * Copyright  2008 Sun Microsystems, Inc. All rights reserved
++ * Use is subject to license terms.
++ */
++/*
++ * This file is part of Lustre, http://www.lustre.org/
++ * Lustre is a trademark of Sun Microsystems, Inc.
++ *
++ * lustre/llite/file.c
++ *
++ * Author: Peter Braam <braam at clusterfs.com>
++ * Author: Phil Schwan <phil at clusterfs.com>
++ * Author: Andreas Dilger <adilger at clusterfs.com>
++ */
++
++#define DEBUG_SUBSYSTEM S_LLITE
++#include <lustre_dlm.h>
++#include <lustre_lite.h>
++#include <linux/pagemap.h>
++#include <linux/file.h>
++#include <linux/posix_acl.h>
++#include "llite_internal.h"
++#include <lustre/ll_fiemap.h>
++
++/* also used by llite/special.c:ll_special_open() */
++struct ll_file_data *ll_file_data_get(void)
++{
++        struct ll_file_data *fd;
++
++        OBD_SLAB_ALLOC_PTR(fd, ll_file_data_slab);
++        return fd;
++}
++
++static void ll_file_data_put(struct ll_file_data *fd)
++{
++        if (fd != NULL)
++                OBD_SLAB_FREE_PTR(fd, ll_file_data_slab);
++}
++
++static int ll_close_inode_openhandle(struct inode *inode,
++                                     struct obd_client_handle *och)
++{
++        struct ptlrpc_request *req = NULL;
++        struct obd_device *obd;
++        struct obdo *oa;
++        int rc;
++        ENTRY;
++
++        obd = class_exp2obd(ll_i2mdcexp(inode));
++        if (obd == NULL) {
++                CERROR("Invalid MDC connection handle "LPX64"\n",
++                       ll_i2mdcexp(inode)->exp_handle.h_cookie);
++                GOTO(out, rc = 0);
++        }
++
++        /*
++         * here we check if this is forced umount. If so this is called on
++         * canceling "open lock" and we do not call mdc_close() in this case, as
++         * it will not be successful, as import is already deactivated.
++         */
++        if (obd->obd_force)
++                GOTO(out, rc = 0);
++
++        OBDO_ALLOC(oa);
++        if (!oa)
++                RETURN(-ENOMEM); // XXX We leak openhandle and request here.
++
++        oa->o_id = inode->i_ino;
++        oa->o_valid = OBD_MD_FLID;
++        obdo_from_inode(oa, inode, OBD_MD_FLTYPE | OBD_MD_FLMODE |
++                                   OBD_MD_FLSIZE | OBD_MD_FLBLOCKS |
++                                   OBD_MD_FLATIME | OBD_MD_FLMTIME |
++                                   OBD_MD_FLCTIME);
++        if (ll_is_inode_dirty(inode)) {
++                oa->o_flags = MDS_BFLAG_UNCOMMITTED_WRITES;
++                oa->o_valid |= OBD_MD_FLFLAGS;
++        }
++
++        rc = mdc_close(ll_i2mdcexp(inode), oa, och, &req);
++        if (rc == EAGAIN) {
++                /* We are the last writer, so the MDS has instructed us to get
++                 * the file size and any write cookies, then close again. */
++                ll_queue_done_writing(inode);
++                rc = 0;
++        } else if (rc) {
++                CERROR("inode %lu mdc close failed: rc = %d\n",
++                       inode->i_ino, rc);
++        }
++
++        OBDO_FREE(oa);
++
++        if (rc == 0) {
++                rc = ll_objects_destroy(req, inode);
++                if (rc)
++                        CERROR("inode %lu ll_objects destroy: rc = %d\n",
++                               inode->i_ino, rc);
++        }
++
++        ptlrpc_req_finished(req); /* This is close request */
++        EXIT;
++out:
++        mdc_clear_open_replay_data(och);
++
++        return rc;
++}
++
++int ll_mdc_real_close(struct inode *inode, int flags)
++{
++        struct ll_inode_info *lli = ll_i2info(inode);
++        int rc = 0;
++        struct obd_client_handle **och_p;
++        struct obd_client_handle *och;
++        __u64 *och_usecount;
++
++        ENTRY;
++
++        if (flags & FMODE_WRITE) {
++                och_p = &lli->lli_mds_write_och;
++                och_usecount = &lli->lli_open_fd_write_count;
++        } else if (flags & FMODE_EXEC) {
++                och_p = &lli->lli_mds_exec_och;
++                och_usecount = &lli->lli_open_fd_exec_count;
++         } else {
++                LASSERT(flags & FMODE_READ);
++                och_p = &lli->lli_mds_read_och;
++                och_usecount = &lli->lli_open_fd_read_count;
++        }
++
++        down(&lli->lli_och_sem);
++        if (*och_usecount) { /* There are still users of this handle, so
++                                skip freeing it. */
++                up(&lli->lli_och_sem);
++                RETURN(0);
++        }
++        och=*och_p;
++        *och_p = NULL;
++        up(&lli->lli_och_sem);
++
++        if (och) { /* There might be a race and somebody have freed this och
++                      already */
++                rc = ll_close_inode_openhandle(inode, och);
++                och->och_fh.cookie = DEAD_HANDLE_MAGIC;
++                OBD_FREE(och, sizeof *och);
++        }
++
++        RETURN(rc);
++}
++
++int ll_mdc_close(struct obd_export *mdc_exp, struct inode *inode,
++                        struct file *file)
++{
++        struct ll_file_data *fd = LUSTRE_FPRIVATE(file);
++        struct ll_inode_info *lli = ll_i2info(inode);
++        int rc = 0;
++        ENTRY;
++
++        /* clear group lock, if present */
++        if (unlikely(fd->fd_flags & LL_FILE_GROUP_LOCKED)) {
++                struct lov_stripe_md *lsm = ll_i2info(inode)->lli_smd;
++                fd->fd_flags &= ~(LL_FILE_GROUP_LOCKED|LL_FILE_IGNORE_LOCK);
++                rc = ll_extent_unlock(fd, inode, lsm, LCK_GROUP,
++                                      &fd->fd_cwlockh);
++        }
++
++        /* Let's see if we have good enough OPEN lock on the file and if
++           we can skip talking to MDS */
++        if (file->f_dentry->d_inode) { /* Can this ever be false? */
++                int lockmode;
++                int flags = LDLM_FL_BLOCK_GRANTED | LDLM_FL_TEST_LOCK;
++                struct lustre_handle lockh;
++                struct inode *inode = file->f_dentry->d_inode;
++                struct ldlm_res_id file_res_id = {.name={inode->i_ino,
++                                                         inode->i_generation}};
++                ldlm_policy_data_t policy = {.l_inodebits={MDS_INODELOCK_OPEN}};
++
++                down(&lli->lli_och_sem);
++                if (fd->fd_omode & FMODE_WRITE) {
++                        lockmode = LCK_CW;
++                        LASSERT(lli->lli_open_fd_write_count);
++                        lli->lli_open_fd_write_count--;
++                } else if (fd->fd_omode & FMODE_EXEC) {
++                        lockmode = LCK_PR;
++                        LASSERT(lli->lli_open_fd_exec_count);
++                        lli->lli_open_fd_exec_count--;
++                } else {
++                        lockmode = LCK_CR;
++                        LASSERT(lli->lli_open_fd_read_count);
++                        lli->lli_open_fd_read_count--;
++                }
++                up(&lli->lli_och_sem);
++
++                if (!ldlm_lock_match(mdc_exp->exp_obd->obd_namespace, flags,
++                                     &file_res_id, LDLM_IBITS, &policy,lockmode,
++                                     &lockh)) {
++                        rc = ll_mdc_real_close(file->f_dentry->d_inode,
++                                                fd->fd_omode);
++                }
++        } else {
++                CERROR("Releasing a file %p with negative dentry %p. Name %s",
++                       file, file->f_dentry, file->f_dentry->d_name.name);
++        }
++
++        LUSTRE_FPRIVATE(file) = NULL;
++        ll_file_data_put(fd);
++
++        RETURN(rc);
++}
++
++int lov_test_and_clear_async_rc(struct lov_stripe_md *lsm);
++
++/* While this returns an error code, fput() the caller does not, so we need
++ * to make every effort to clean up all of our state here.  Also, applications
++ * rarely check close errors and even if an error is returned they will not
++ * re-try the close call.
++ */
++int ll_file_release(struct inode *inode, struct file *file)
++{
++        struct ll_file_data *fd;
++        struct ll_sb_info *sbi = ll_i2sbi(inode);
++        struct ll_inode_info *lli = ll_i2info(inode);
++        struct lov_stripe_md *lsm = lli->lli_smd;
++        int rc;
++        ENTRY;
++
++        CDEBUG(D_VFSTRACE, "VFS Op:inode=%lu/%u(%p)\n", inode->i_ino,
++               inode->i_generation, inode);
++
++
++        if (inode->i_sb->s_root != file->f_dentry)
++                ll_stats_ops_tally(sbi, LPROC_LL_RELEASE, 1);
++        fd = LUSTRE_FPRIVATE(file);
++        LASSERT(fd != NULL);
++
++        /* The last ref on @file, maybe not the the owner pid of statahead.
++         * Different processes can open the same dir, "ll_opendir_key" means:
++         * it is me that should stop the statahead thread. */
++        if (lli->lli_opendir_key == fd && lli->lli_opendir_pid != 0)
++                ll_stop_statahead(inode, lli->lli_opendir_key);
++
++        if (inode->i_sb->s_root == file->f_dentry) {
++                LUSTRE_FPRIVATE(file) = NULL;
++                ll_file_data_put(fd);
++                RETURN(0);
++        }
++
++        if (lsm)
++                lov_test_and_clear_async_rc(lsm);
++        lli->lli_async_rc = 0;
++
++        /* Ensure that dirty pages are flushed out with the right creds */
++        if (file->f_mode & FMODE_WRITE)
++                filemap_fdatawrite(file->f_mapping);
++
++        rc = ll_mdc_close(sbi->ll_mdc_exp, inode, file);
++        RETURN(rc);
++}
++
++static int ll_intent_file_open(struct file *file, void *lmm,
++                               int lmmsize, struct lookup_intent *itp)
++{
++        struct ll_sb_info *sbi = ll_i2sbi(file->f_dentry->d_inode);
++        struct mdc_op_data data;
++        struct dentry *parent = file->f_dentry->d_parent;
++        const char *name = file->f_dentry->d_name.name;
++        const int len = file->f_dentry->d_name.len;
++        struct inode *inode = file->f_dentry->d_inode;
++        struct ptlrpc_request *req;
++        int rc;
++        ENTRY;
++
++        if (!parent)
++                RETURN(-ENOENT);
++
++        ll_prepare_mdc_op_data(&data, parent->d_inode, inode,
++                               name, len, O_RDWR, NULL);
++
++        /* Usually we come here only for NFSD, and we want open lock.
++           But we can also get here with pre 2.6.15 patchless kernels, and in
++           that case that lock is also ok */
++        /* We can also get here if there was cached open handle in revalidate_it
++         * but it disappeared while we were getting from there to ll_file_open.
++         * But this means this file was closed and immediatelly opened which
++         * makes a good candidate for using OPEN lock */
++        /* If lmmsize & lmm are not 0, we are just setting stripe info
++         * parameters. No need for the open lock */
++        if (!lmm && !lmmsize)
++                itp->it_flags |= MDS_OPEN_LOCK;
++
++        rc = mdc_intent_lock(sbi->ll_mdc_exp, &data, lmm, lmmsize, itp,
++                              0 /*unused */, &req, ll_mdc_blocking_ast, 0);
++        if (rc == -ESTALE) {
++                /* reason for keep own exit path - don`t flood log
++                * with messages with -ESTALE errors.
++                */
++                if (!it_disposition(itp, DISP_OPEN_OPEN) ||
++                     it_open_error(DISP_OPEN_OPEN, itp))
++                        GOTO(out, rc);
++                ll_release_openhandle(file->f_dentry, itp);
++                GOTO(out, rc);
++        }
++
++        if (rc != 0 || it_open_error(DISP_OPEN_OPEN, itp)) {
++                rc = rc ? rc : it_open_error(DISP_OPEN_OPEN, itp);
++                CDEBUG(D_VFSTRACE, "lock enqueue: err: %d\n", rc);
++                GOTO(out, rc);
++        }
++
++        if (itp->d.lustre.it_lock_mode)
++                mdc_set_lock_data(&itp->d.lustre.it_lock_handle,
++                                  inode);
++
++        rc = ll_prep_inode(sbi->ll_osc_exp, &file->f_dentry->d_inode,
++                           req, DLM_REPLY_REC_OFF, NULL);
++out:
++        ptlrpc_req_finished(itp->d.lustre.it_data);
++        it_clear_disposition(itp, DISP_ENQ_COMPLETE);
++        ll_intent_drop_lock(itp);
++
++        RETURN(rc);
++}
++
++
++static void ll_och_fill(struct ll_inode_info *lli, struct lookup_intent *it,
++                        struct obd_client_handle *och)
++{
++        struct ptlrpc_request *req = it->d.lustre.it_data;
++        struct mds_body *body;
++
++        LASSERT(och);
++
++        body = lustre_msg_buf(req->rq_repmsg, DLM_REPLY_REC_OFF, sizeof(*body));
++        LASSERT(body != NULL);                  /* reply already checked out */
++        /* and swabbed in mdc_enqueue */
++        LASSERT(lustre_rep_swabbed(req, DLM_REPLY_REC_OFF));
++
++        memcpy(&och->och_fh, &body->handle, sizeof(body->handle));
++        och->och_magic = OBD_CLIENT_HANDLE_MAGIC;
++        lli->lli_io_epoch = body->io_epoch;
++
++        mdc_set_open_replay_data(och, it->d.lustre.it_data);
++}
++
++int ll_local_open(struct file *file, struct lookup_intent *it,
++                  struct ll_file_data *fd, struct obd_client_handle *och)
++{
++        ENTRY;
++
++        LASSERT(!LUSTRE_FPRIVATE(file));
++
++        LASSERT(fd != NULL);
++
++        if (och)
++                ll_och_fill(ll_i2info(file->f_dentry->d_inode), it, och);
++        LUSTRE_FPRIVATE(file) = fd;
++        ll_readahead_init(file->f_dentry->d_inode, &fd->fd_ras);
++        fd->fd_omode = it->it_flags;
++
++        RETURN(0);
++}
++
++/* Open a file, and (for the very first open) create objects on the OSTs at
++ * this time.  If opened with O_LOV_DELAY_CREATE, then we don't do the object
++ * creation or open until ll_lov_setstripe() ioctl is called.  We grab
++ * lli_open_sem to ensure no other process will create objects, send the
++ * stripe MD to the MDS, or try to destroy the objects if that fails.
++ *
++ * If we already have the stripe MD locally then we don't request it in
++ * mdc_open(), by passing a lmm_size = 0.
++ *
++ * It is up to the application to ensure no other processes open this file
++ * in the O_LOV_DELAY_CREATE case, or the default striping pattern will be
++ * used.  We might be able to avoid races of that sort by getting lli_open_sem
++ * before returning in the O_LOV_DELAY_CREATE case and dropping it here
++ * or in ll_file_release(), but I'm not sure that is desirable/necessary.
++ */
++int ll_file_open(struct inode *inode, struct file *file)
++{
++        struct ll_inode_info *lli = ll_i2info(inode);
++        struct lookup_intent *it, oit = { .it_op = IT_OPEN,
++                                          .it_flags = file->f_flags };
++        struct lov_stripe_md *lsm;
++        struct ptlrpc_request *req = NULL;
++        struct obd_client_handle **och_p;
++        __u64 *och_usecount;
++        struct ll_file_data *fd;
++        int rc = 0, opendir_set = 0;
++        ENTRY;
++
++        CDEBUG(D_VFSTRACE, "VFS Op:inode=%lu/%u(%p), flags %o\n", inode->i_ino,
++               inode->i_generation, inode, file->f_flags);
++
++#ifdef HAVE_VFS_INTENT_PATCHES
++        it = file->f_it;
++#else
++        it = file->private_data; /* XXX: compat macro */
++        file->private_data = NULL; /* prevent ll_local_open assertion */
++#endif
++
++        fd = ll_file_data_get();
++        if (fd == NULL)
++                RETURN(-ENOMEM);
++
++        if (S_ISDIR(inode->i_mode)) {
++again:
++                spin_lock(&lli->lli_lock);
++                if (lli->lli_opendir_key == NULL && lli->lli_opendir_pid == 0) {
++                        LASSERT(lli->lli_sai == NULL);
++                        lli->lli_opendir_key = fd;
++                        lli->lli_opendir_pid = cfs_curproc_pid();
++                        opendir_set = 1;
++                } else if (unlikely(lli->lli_opendir_pid == cfs_curproc_pid() &&
++                                    lli->lli_opendir_key != NULL)) {
++                        /* Two cases for this:
++                         * (1) The same process open such directory many times.
++                         * (2) The old process opened the directory, and exited
++                         *     before its children processes. Then new process
++                         *     with the same pid opens such directory before the
++                         *     old process's children processes exit.
++                         * reset stat ahead for such cases. */
++                        spin_unlock(&lli->lli_lock);
++                        CDEBUG(D_INFO, "Conflict statahead for %.*s %lu/%u"
++                               " reset it.\n", file->f_dentry->d_name.len,
++                               file->f_dentry->d_name.name,
++                               inode->i_ino, inode->i_generation);
++                        ll_stop_statahead(inode, lli->lli_opendir_key);
++                        goto again;
++                }
++                spin_unlock(&lli->lli_lock);
++        }
++
++        if (inode->i_sb->s_root == file->f_dentry) {
++                LUSTRE_FPRIVATE(file) = fd;
++                RETURN(0);
++        }
++
++        if (!it || !it->d.lustre.it_disposition) {
++                /* Convert f_flags into access mode. We cannot use file->f_mode,
++                 * because everything but O_ACCMODE mask was stripped from it */
++                if ((oit.it_flags + 1) & O_ACCMODE)
++                        oit.it_flags++;
++                if (file->f_flags & O_TRUNC)
++                        oit.it_flags |= FMODE_WRITE;
++
++                /* kernel only call f_op->open in dentry_open.  filp_open calls
++                 * dentry_open after call to open_namei that checks permissions.
++                 * Only nfsd_open call dentry_open directly without checking
++                 * permissions and because of that this code below is safe. */
++                if (oit.it_flags & FMODE_WRITE)
++                        oit.it_flags |= MDS_OPEN_OWNEROVERRIDE;
++
++                /* We do not want O_EXCL here, presumably we opened the file
++                 * already? XXX - NFS implications? */
++                oit.it_flags &= ~O_EXCL;
++
++                it = &oit;
++        }
++
++restart:
++        /* Let's see if we have file open on MDS already. */
++        if (it->it_flags & FMODE_WRITE) {
++                och_p = &lli->lli_mds_write_och;
++                och_usecount = &lli->lli_open_fd_write_count;
++        } else if (it->it_flags & FMODE_EXEC) {
++                och_p = &lli->lli_mds_exec_och;
++                och_usecount = &lli->lli_open_fd_exec_count;
++         } else {
++                och_p = &lli->lli_mds_read_och;
++                och_usecount = &lli->lli_open_fd_read_count;
++        }
++
++        LASSERTF(it->it_flags != 0, "it %p dist %d \n", it,
++                 it->d.lustre.it_disposition);
++
++        down(&lli->lli_och_sem);
++        if (*och_p) { /* Open handle is present */
++                if (it_disposition(it, DISP_OPEN_OPEN)) {
++                        /* Well, there's extra open request that we do not need,
++                           let's close it somehow. This will decref request. */
++                        rc = it_open_error(DISP_OPEN_OPEN, it);
++                        if (rc) {
++                                up(&lli->lli_och_sem);
++                                ll_file_data_put(fd);
++                                GOTO(out_openerr, rc);
++                        }
++                        ll_release_openhandle(file->f_dentry, it);
++                        lprocfs_counter_incr(ll_i2sbi(inode)->ll_stats,
++                                             LPROC_LL_OPEN);
++                }
++                (*och_usecount)++;
++
++                rc = ll_local_open(file, it, fd, NULL);
++
++                LASSERTF(rc == 0, "rc = %d\n", rc);
++        } else {
++                LASSERT(*och_usecount == 0);
++                if (!it->d.lustre.it_disposition) {
++                        /* We cannot just request lock handle now, new ELC code
++                           means that one of other OPEN locks for this file
++                           could be cancelled, and since blocking ast handler
++                           would attempt to grab och_sem as well, that would
++                           result in a deadlock */
++                        up(&lli->lli_och_sem);
++                        rc = ll_intent_file_open(file, NULL, 0, it);
++                        if (rc) {
++                                ll_file_data_put(fd);
++                                GOTO(out_openerr, rc);
++                        }
++
++                        mdc_set_lock_data(&it->d.lustre.it_lock_handle,
++                                          file->f_dentry->d_inode);
++                        goto restart;
++                }
++
++                OBD_ALLOC(*och_p, sizeof (struct obd_client_handle));
++                if (!*och_p) {
++                        ll_file_data_put(fd);
++                        GOTO(out_och_free, rc = -ENOMEM);
++                }
++                (*och_usecount)++;
++               req = it->d.lustre.it_data;
++
++                /* mdc_intent_lock() didn't get a request ref if there was an
++                 * open error, so don't do cleanup on the request here
++                 * (bug 3430) */
++                /* XXX (green): Should not we bail out on any error here, not
++                 * just open error? */
++                rc = it_open_error(DISP_OPEN_OPEN, it);
++                if (rc) {
++                        ll_file_data_put(fd);
++                        GOTO(out_och_free, rc);
++                }
++
++                ll_stats_ops_tally(ll_i2sbi(inode), LPROC_LL_OPEN, 1);
++                rc = ll_local_open(file, it, fd, *och_p);
++                LASSERTF(rc == 0, "rc = %d\n", rc);
++        }
++        up(&lli->lli_och_sem);
++
++        /* Must do this outside lli_och_sem lock to prevent deadlock where
++           different kind of OPEN lock for this same inode gets cancelled
++           by ldlm_cancel_lru */
++        if (!S_ISREG(inode->i_mode))
++                GOTO(out, rc);
++
++        lsm = lli->lli_smd;
++        if (lsm == NULL) {
++                if (file->f_flags & O_LOV_DELAY_CREATE ||
++                    !(file->f_mode & FMODE_WRITE)) {
++                        CDEBUG(D_INODE, "object creation was delayed\n");
++                        GOTO(out, rc);
++                }
++        }
++        file->f_flags &= ~O_LOV_DELAY_CREATE;
++        GOTO(out, rc);
++ out:
++        ptlrpc_req_finished(req);
++        if (req)
++                it_clear_disposition(it, DISP_ENQ_OPEN_REF);
++        if (rc == 0) {
++                ll_open_complete(inode);
++        } else {
++out_och_free:
++                if (*och_p) {
++                        OBD_FREE(*och_p, sizeof (struct obd_client_handle));
++                        *och_p = NULL; /* OBD_FREE writes some magic there */
++                        (*och_usecount)--;
++                }
++                up(&lli->lli_och_sem);
++out_openerr:
++                if (opendir_set != 0)
++                        ll_stop_statahead(inode, lli->lli_opendir_key);
++        }
++
++        return rc;
++}
++
++/* Fills the obdo with the attributes for the inode defined by lsm */
++int ll_lsm_getattr(struct obd_export *exp, struct lov_stripe_md *lsm,
++                   struct obdo *oa)
++{
++        struct ptlrpc_request_set *set;
++        struct obd_info oinfo = { { { 0 } } };
++        int rc;
++        ENTRY;
++
++        LASSERT(lsm != NULL);
++
++        memset(oa, 0, sizeof *oa);
++        oinfo.oi_md = lsm;
++        oinfo.oi_oa = oa;
++        oa->o_id = lsm->lsm_object_id;
++        oa->o_mode = S_IFREG;
++        oa->o_valid = OBD_MD_FLID | OBD_MD_FLTYPE | OBD_MD_FLSIZE |
++                OBD_MD_FLBLOCKS | OBD_MD_FLBLKSZ | OBD_MD_FLMTIME |
++                OBD_MD_FLCTIME;
++
++        set = ptlrpc_prep_set();
++        if (set == NULL) {
++                rc = -ENOMEM;
++        } else {
++                rc = obd_getattr_async(exp, &oinfo, set);
++                if (rc == 0)
++                        rc = ptlrpc_set_wait(set);
++                ptlrpc_set_destroy(set);
++        }
++        if (rc)
++                RETURN(rc);
++
++        oa->o_valid &= (OBD_MD_FLBLOCKS | OBD_MD_FLBLKSZ | OBD_MD_FLMTIME |
++                        OBD_MD_FLCTIME | OBD_MD_FLSIZE);
++        RETURN(0);
++}
++
++static int ll_lock_to_stripe_offset(struct inode *inode, struct ldlm_lock *lock)
++{
++        struct ll_inode_info *lli = ll_i2info(inode);
++        struct lov_stripe_md *lsm = lli->lli_smd;
++        struct obd_export *exp = ll_i2obdexp(inode);
++        struct {
++                char name[16];
++                struct ldlm_lock *lock;
++        } key = { .name = KEY_LOCK_TO_STRIPE, .lock = lock };
++        __u32 stripe, vallen = sizeof(stripe);
++        int rc;
++        ENTRY;
++
++        if (lsm->lsm_stripe_count == 1)
++                GOTO(check, stripe = 0);
++
++        /* get our offset in the lov */
++        rc = obd_get_info(exp, sizeof(key), &key, &vallen, &stripe, lsm);
++        if (rc != 0) {
++                CERROR("obd_get_info: rc = %d\n", rc);
++                RETURN(rc);
++        }
++        LASSERT(stripe < lsm->lsm_stripe_count);
++
++check:
++        if (lsm->lsm_oinfo[stripe]->loi_id != lock->l_resource->lr_name.name[0]||
++            lsm->lsm_oinfo[stripe]->loi_gr != lock->l_resource->lr_name.name[1]){
++                LDLM_ERROR(lock, "resource doesn't match object "LPU64"/"LPU64,
++                           lsm->lsm_oinfo[stripe]->loi_id,
++                           lsm->lsm_oinfo[stripe]->loi_gr);
++                RETURN(-ELDLM_NO_LOCK_DATA);
++        }
++
++        RETURN(stripe);
++}
++
++/* Get extra page reference to ensure it is not going away */
++void ll_pin_extent_cb(void *data)
++{
++        struct page *page = data;
++
++        page_cache_get(page);
++
++        return;
++}
++/* Flush the page from page cache for an extent as its canceled.
++ * Page to remove is delivered as @data.
++ *
++ * No one can dirty the extent until we've finished our work and they cannot
++ * enqueue another lock.  The DLM protects us from ll_file_read/write here,
++ * but other kernel actors could have pages locked.
++ *
++ * If @discard is set, there is no need to write the page if it is dirty.
++ *
++ * Called with the DLM lock held. */
++int ll_page_removal_cb(void *data, int discard)
++{
++        int rc;
++        struct page *page = data;
++        struct address_space *mapping;
++
++        ENTRY;
++
++        /* We have page reference already from ll_pin_page */
++        lock_page(page);
++
++        /* Already truncated by somebody */
++        if (!page->mapping)
++                GOTO(out, rc = 0);
++
++        mapping = page->mapping;
++
++        ll_teardown_mmaps(mapping,
++                          (__u64)page->index << PAGE_CACHE_SHIFT,
++                          ((__u64)page->index<<PAGE_CACHE_SHIFT)|
++                                                              ~PAGE_CACHE_MASK);
++        LL_CDEBUG_PAGE(D_PAGE, page, "removing page\n");
++        if (!discard && PageWriteback(page))
++                wait_on_page_writeback(page);
++
++        if (!discard && clear_page_dirty_for_io(page)) {
++                rc = ll_call_writepage(page->mapping->host, page);
++                /* either waiting for io to complete or reacquiring
++                 * the lock that the failed writepage released */
++                lock_page(page);
++                wait_on_page_writeback(page);
++                if (rc < 0) {
++                        CERROR("writepage inode %lu(%p) of page %p "
++                               "failed: %d\n", mapping->host->i_ino,
++                               mapping->host, page, rc);
++                        if (rc == -ENOSPC)
++                                set_bit(AS_ENOSPC, &mapping->flags);
++                        else
++                                set_bit(AS_EIO, &mapping->flags);
++                }
++        }
++        if (page->mapping != NULL) {
++                struct ll_async_page *llap = llap_cast_private(page);
++                // checking again to account for writeback's lock_page()
++                LL_CDEBUG_PAGE(D_PAGE, page, "truncating\n");
++                if (llap)
++                        ll_ra_accounting(llap, page->mapping);
++                ll_truncate_complete_page(page);
++        }
++        EXIT;
++out:
++        LASSERT(!PageWriteback(page));
++        unlock_page(page);
++        page_cache_release(page);
++
++        return 0;
++}
++
++int ll_extent_lock_cancel_cb(struct ldlm_lock *lock, struct ldlm_lock_desc *new,
++                             void *data, int flag)
++{
++        struct inode *inode;
++        struct ll_inode_info *lli;
++        struct lov_stripe_md *lsm;
++        int stripe;
++        __u64 kms;
++
++        ENTRY;
++
++        if ((unsigned long)data > 0 && (unsigned long)data < 0x1000) {
++                LDLM_ERROR(lock, "cancelling lock with bad data %p", data);
++                LBUG();
++        }
++
++        inode = ll_inode_from_lock(lock);
++        if (inode == NULL)
++                RETURN(0);
++        lli = ll_i2info(inode);
++        if (lli == NULL)
++                GOTO(iput, 0);
++        if (lli->lli_smd == NULL)
++                GOTO(iput, 0);
++        lsm = lli->lli_smd;
++
++        stripe = ll_lock_to_stripe_offset(inode, lock);
++        if (stripe < 0)
++                GOTO(iput, 0);
++
++        lov_stripe_lock(lsm);
++        lock_res_and_lock(lock);
++        kms = ldlm_extent_shift_kms(lock,
++                                    lsm->lsm_oinfo[stripe]->loi_kms);
++
++        if (lsm->lsm_oinfo[stripe]->loi_kms != kms)
++                LDLM_DEBUG(lock, "updating kms from "LPU64" to "LPU64,
++                           lsm->lsm_oinfo[stripe]->loi_kms, kms);
++        lsm->lsm_oinfo[stripe]->loi_kms = kms;
++        unlock_res_and_lock(lock);
++        lov_stripe_unlock(lsm);
++        ll_try_done_writing(inode);
++        EXIT;
++iput:
++        iput(inode);
++
++        return 0;
++}
++
++#if 0
++int ll_async_completion_ast(struct ldlm_lock *lock, int flags, void *data)
++{
++        /* XXX ALLOCATE - 160 bytes */
++        struct inode *inode = ll_inode_from_lock(lock);
++        struct ll_inode_info *lli = ll_i2info(inode);
++        struct lustre_handle lockh = { 0 };
++        struct ost_lvb *lvb;
++        int stripe;
++        ENTRY;
++
++        if (flags & (LDLM_FL_BLOCK_WAIT | LDLM_FL_BLOCK_GRANTED |
++                     LDLM_FL_BLOCK_CONV)) {
++                LBUG(); /* not expecting any blocked async locks yet */
++                LDLM_DEBUG(lock, "client-side async enqueue returned a blocked "
++                           "lock, returning");
++                ldlm_lock_dump(D_OTHER, lock, 0);
++                ldlm_reprocess_all(lock->l_resource);
++                RETURN(0);
++        }
++
++        LDLM_DEBUG(lock, "client-side async enqueue: granted/glimpsed");
++
++        stripe = ll_lock_to_stripe_offset(inode, lock);
++        if (stripe < 0)
++                goto iput;
++
++        if (lock->l_lvb_len) {
++                struct lov_stripe_md *lsm = lli->lli_smd;
++                __u64 kms;
++                lvb = lock->l_lvb_data;
++                lsm->lsm_oinfo[stripe].loi_rss = lvb->lvb_size;
++
++                lock_res_and_lock(lock);
++                ll_inode_size_lock(inode, 1);
++                kms = MAX(lsm->lsm_oinfo[stripe].loi_kms, lvb->lvb_size);
++                kms = ldlm_extent_shift_kms(NULL, kms);
++                if (lsm->lsm_oinfo[stripe].loi_kms != kms)
++                        LDLM_DEBUG(lock, "updating kms from "LPU64" to "LPU64,
++                                   lsm->lsm_oinfo[stripe].loi_kms, kms);
++                lsm->lsm_oinfo[stripe].loi_kms = kms;
++                ll_inode_size_unlock(inode, 1);
++                unlock_res_and_lock(lock);
++        }
++
++iput:
++        iput(inode);
++        wake_up(&lock->l_waitq);
++
++        ldlm_lock2handle(lock, &lockh);
++        ldlm_lock_decref(&lockh, LCK_PR);
++        RETURN(0);
++}
++#endif
++
++static int ll_glimpse_callback(struct ldlm_lock *lock, void *reqp)
++{
++        struct ptlrpc_request *req = reqp;
++        struct inode *inode = ll_inode_from_lock(lock);
++        struct ll_inode_info *lli;
++        struct lov_stripe_md *lsm;
++        struct ost_lvb *lvb;
++        int rc, stripe;
++        int size[2] = { sizeof(struct ptlrpc_body), sizeof(*lvb) };
++        ENTRY;
++
++        if (inode == NULL)
++                GOTO(out, rc = -ELDLM_NO_LOCK_DATA);
++        lli = ll_i2info(inode);
++        if (lli == NULL)
++                GOTO(iput, rc = -ELDLM_NO_LOCK_DATA);
++        lsm = lli->lli_smd;
++        if (lsm == NULL)
++                GOTO(iput, rc = -ELDLM_NO_LOCK_DATA);
++
++        /* First, find out which stripe index this lock corresponds to. */
++        stripe = ll_lock_to_stripe_offset(inode, lock);
++        if (stripe < 0)
++                GOTO(iput, rc = -ELDLM_NO_LOCK_DATA);
++
++        rc = lustre_pack_reply(req, 2, size, NULL);
++        if (rc)
++                GOTO(iput, rc);
++
++        lvb = lustre_msg_buf(req->rq_repmsg, REPLY_REC_OFF, sizeof(*lvb));
++        lvb->lvb_size = lli->lli_smd->lsm_oinfo[stripe]->loi_kms;
++        lvb->lvb_mtime = LTIME_S(inode->i_mtime);
++        lvb->lvb_atime = LTIME_S(inode->i_atime);
++        lvb->lvb_ctime = LTIME_S(inode->i_ctime);
++
++        LDLM_DEBUG(lock, "i_size: %llu -> stripe number %u -> kms "LPU64
++                   " atime "LPU64", mtime "LPU64", ctime "LPU64,
++                   i_size_read(inode), stripe, lvb->lvb_size, lvb->lvb_mtime,
++                   lvb->lvb_atime, lvb->lvb_ctime);
++ iput:
++        iput(inode);
++
++ out:
++        /* These errors are normal races, so we don't want to fill the console
++         * with messages by calling ptlrpc_error() */
++        if (rc == -ELDLM_NO_LOCK_DATA)
++                lustre_pack_reply(req, 1, NULL, NULL);
++
++        req->rq_status = rc;
++        return rc;
++}
++
++int ll_glimpse_ioctl(struct ll_sb_info *sbi, struct lov_stripe_md *lsm,
++                     lstat_t *st)
++{
++        struct lustre_handle lockh = { 0 };
++        struct ldlm_enqueue_info einfo = { 0 };
++        struct obd_info oinfo = { { { 0 } } };
++        struct ost_lvb lvb;
++        int rc;
++
++        ENTRY;
++
++        einfo.ei_type = LDLM_EXTENT;
++        einfo.ei_mode = LCK_PR;
++        einfo.ei_cb_bl = osc_extent_blocking_cb;
++        einfo.ei_cb_cp = ldlm_completion_ast;
++        einfo.ei_cb_gl = ll_glimpse_callback;
++        einfo.ei_cbdata = NULL;
++
++        oinfo.oi_policy.l_extent.end = OBD_OBJECT_EOF;
++        oinfo.oi_lockh = &lockh;
++        oinfo.oi_md = lsm;
++        oinfo.oi_flags = LDLM_FL_HAS_INTENT;
++
++        rc = obd_enqueue_rqset(sbi->ll_osc_exp, &oinfo, &einfo);
++        if (rc == -ENOENT)
++                RETURN(rc);
++        if (rc != 0) {
++                CERROR("obd_enqueue returned rc %d, "
++                       "returning -EIO\n", rc);
++                RETURN(rc > 0 ? -EIO : rc);
++        }
++
++        lov_stripe_lock(lsm);
++        memset(&lvb, 0, sizeof(lvb));
++        obd_merge_lvb(sbi->ll_osc_exp, lsm, &lvb, 0);
++        st->st_size = lvb.lvb_size;
++        st->st_blocks = lvb.lvb_blocks;
++        st->st_mtime = lvb.lvb_mtime;
++        st->st_atime = lvb.lvb_atime;
++        st->st_ctime = lvb.lvb_ctime;
++        lov_stripe_unlock(lsm);
++
++        RETURN(rc);
++}
++
++/* NB: obd_merge_lvb will prefer locally cached writes if they extend the
++ * file (because it prefers KMS over RSS when larger) */
++int ll_glimpse_size(struct inode *inode, int ast_flags)
++{
++        struct ll_inode_info *lli = ll_i2info(inode);
++        struct ll_sb_info *sbi = ll_i2sbi(inode);
++        struct lustre_handle lockh = { 0 };
++        struct ldlm_enqueue_info einfo = { 0 };
++        struct obd_info oinfo = { { { 0 } } };
++        struct ost_lvb lvb;
++        int rc;
++        ENTRY;
++
++        CDEBUG(D_DLMTRACE, "Glimpsing inode %lu\n", inode->i_ino);
++
++        if (!lli->lli_smd) {
++                CDEBUG(D_DLMTRACE, "No objects for inode %lu\n", inode->i_ino);
++                RETURN(0);
++        }
++
++        /* NOTE: this looks like DLM lock request, but it may not be one. Due
++         *       to LDLM_FL_HAS_INTENT flag, this is glimpse request, that
++         *       won't revoke any conflicting DLM locks held. Instead,
++         *       ll_glimpse_callback() will be called on each client
++         *       holding a DLM lock against this file, and resulting size
++         *       will be returned for each stripe. DLM lock on [0, EOF] is
++         *       acquired only if there were no conflicting locks. */
++        einfo.ei_type = LDLM_EXTENT;
++        einfo.ei_mode = LCK_PR;
++        einfo.ei_cb_bl = osc_extent_blocking_cb;
++        einfo.ei_cb_cp = ldlm_completion_ast;
++        einfo.ei_cb_gl = ll_glimpse_callback;
++        einfo.ei_cbdata = inode;
++
++        oinfo.oi_policy.l_extent.end = OBD_OBJECT_EOF;
++        oinfo.oi_lockh = &lockh;
++        oinfo.oi_md = lli->lli_smd;
++        oinfo.oi_flags = ast_flags | LDLM_FL_HAS_INTENT;
++
++        rc = obd_enqueue_rqset(sbi->ll_osc_exp, &oinfo, &einfo);
++        if (rc == -ENOENT)
++                RETURN(rc);
++        if (rc != 0) {
++                CERROR("obd_enqueue returned rc %d, returning -EIO\n", rc);
++                RETURN(rc > 0 ? -EIO : rc);
++        }
++
++        ll_inode_size_lock(inode, 1);
++        inode_init_lvb(inode, &lvb);
++        rc = obd_merge_lvb(sbi->ll_osc_exp, lli->lli_smd, &lvb, 0);
++        i_size_write(inode, lvb.lvb_size);
++        inode->i_blocks = lvb.lvb_blocks;
++        LTIME_S(inode->i_mtime) = lvb.lvb_mtime;
++        LTIME_S(inode->i_atime) = lvb.lvb_atime;
++        LTIME_S(inode->i_ctime) = lvb.lvb_ctime;
++        ll_inode_size_unlock(inode, 1);
++
++        CDEBUG(D_DLMTRACE, "glimpse: size: %llu, blocks: %llu\n",
++               i_size_read(inode), (long long)inode->i_blocks);
++
++        RETURN(rc);
++}
++
++int ll_extent_lock(struct ll_file_data *fd, struct inode *inode,
++                   struct lov_stripe_md *lsm, int mode,
++                   ldlm_policy_data_t *policy, struct lustre_handle *lockh,
++                   int ast_flags)
++{
++        struct ll_sb_info *sbi = ll_i2sbi(inode);
++        struct ost_lvb lvb;
++        struct ldlm_enqueue_info einfo = { 0 };
++        struct obd_info oinfo = { { { 0 } } };
++        int rc;
++        ENTRY;
++
++        LASSERT(!lustre_handle_is_used(lockh));
++        LASSERT(lsm != NULL);
++
++        /* don't drop the mmapped file to LRU */
++        if (mapping_mapped(inode->i_mapping))
++                ast_flags |= LDLM_FL_NO_LRU;
++
++        /* XXX phil: can we do this?  won't it screw the file size up? */
++        if ((fd && (fd->fd_flags & LL_FILE_IGNORE_LOCK)) ||
++            (sbi->ll_flags & LL_SBI_NOLCK))
++                RETURN(0);
++
++        CDEBUG(D_DLMTRACE, "Locking inode %lu, start "LPU64" end "LPU64"\n",
++               inode->i_ino, policy->l_extent.start, policy->l_extent.end);
++
++        einfo.ei_type = LDLM_EXTENT;
++        einfo.ei_mode = mode;
++        einfo.ei_cb_bl = osc_extent_blocking_cb;
++        einfo.ei_cb_cp = ldlm_completion_ast;
++        einfo.ei_cb_gl = ll_glimpse_callback;
++        einfo.ei_cbdata = inode;
++
++        oinfo.oi_policy = *policy;
++        oinfo.oi_lockh = lockh;
++        oinfo.oi_md = lsm;
++        oinfo.oi_flags = ast_flags;
++
++        rc = obd_enqueue(sbi->ll_osc_exp, &oinfo, &einfo, NULL);
++        *policy = oinfo.oi_policy;
++        if (rc > 0)
++                rc = -EIO;
++
++        ll_inode_size_lock(inode, 1);
++        inode_init_lvb(inode, &lvb);
++        obd_merge_lvb(sbi->ll_osc_exp, lsm, &lvb, 1);
++
++        if (policy->l_extent.start == 0 &&
++            policy->l_extent.end == OBD_OBJECT_EOF) {
++                /* vmtruncate()->ll_truncate() first sets the i_size and then
++                 * the kms under both a DLM lock and the
++                 * ll_inode_size_lock().  If we don't get the
++                 * ll_inode_size_lock() here we can match the DLM lock and
++                 * reset i_size from the kms before the truncating path has
++                 * updated the kms.  generic_file_write can then trust the
++                 * stale i_size when doing appending writes and effectively
++                 * cancel the result of the truncate.  Getting the
++                 * ll_inode_size_lock() after the enqueue maintains the DLM
++                 * -> ll_inode_size_lock() acquiring order. */
++                i_size_write(inode, lvb.lvb_size);
++                CDEBUG(D_INODE, "inode=%lu, updating i_size %llu\n",
++                       inode->i_ino, i_size_read(inode));
++        }
++
++        if (rc == 0) {
++                LTIME_S(inode->i_mtime) = lvb.lvb_mtime;
++                LTIME_S(inode->i_atime) = lvb.lvb_atime;
++                LTIME_S(inode->i_ctime) = lvb.lvb_ctime;
++        }
++        ll_inode_size_unlock(inode, 1);
++
++        RETURN(rc);
++}
++
++int ll_extent_unlock(struct ll_file_data *fd, struct inode *inode,
++                     struct lov_stripe_md *lsm, int mode,
++                     struct lustre_handle *lockh)
++{
++        struct ll_sb_info *sbi = ll_i2sbi(inode);
++        int rc;
++        ENTRY;
++
++        /* XXX phil: can we do this?  won't it screw the file size up? */
++        if ((fd && (fd->fd_flags & LL_FILE_IGNORE_LOCK)) ||
++            (sbi->ll_flags & LL_SBI_NOLCK))
++                RETURN(0);
++
++        rc = obd_cancel(sbi->ll_osc_exp, lsm, mode, lockh);
++
++        RETURN(rc);
++}
++
++static void ll_set_file_contended(struct inode *inode)
++{
++        struct ll_inode_info *lli = ll_i2info(inode);
++
++        lli->lli_contention_time = cfs_time_current();
++        set_bit(LLI_F_CONTENDED, &lli->lli_flags);
++}
++
++void ll_clear_file_contended(struct inode *inode)
++{
++        struct ll_inode_info *lli = ll_i2info(inode);
++
++        clear_bit(LLI_F_CONTENDED, &lli->lli_flags);
++}
++
++static int ll_is_file_contended(struct file *file)
++{
++        struct inode *inode = file->f_dentry->d_inode;
++        struct ll_inode_info *lli = ll_i2info(inode);
++        struct ll_sb_info *sbi = ll_i2sbi(inode);
++        struct ll_file_data *fd = LUSTRE_FPRIVATE(file);
++        ENTRY;
++
++        if (!(sbi->ll_lco.lco_flags & OBD_CONNECT_SRVLOCK)) {
++                CDEBUG(D_INFO, "the server does not support SRVLOCK feature,"
++                       " osc connect flags = 0x"LPX64"\n",
++                       sbi->ll_lco.lco_flags);
++                RETURN(0);
++        }
++        if (fd && (fd->fd_flags & LL_FILE_IGNORE_LOCK))
++                RETURN(1);
++        if (test_bit(LLI_F_CONTENDED, &lli->lli_flags)) {
++                cfs_time_t cur_time = cfs_time_current();
++                cfs_time_t retry_time;
++
++                retry_time = cfs_time_add(
++                        lli->lli_contention_time,
++                        cfs_time_seconds(sbi->ll_contention_time));
++                if (cfs_time_after(cur_time, retry_time)) {
++                        ll_clear_file_contended(inode);
++                        RETURN(0);
++                }
++                RETURN(1);
++        }
++        RETURN(0);
++}
++
++static int ll_file_get_tree_lock_iov(struct ll_lock_tree *tree,
++                                     struct file *file, const struct iovec *iov,
++                                     unsigned long nr_segs,
++                                     loff_t start, loff_t end, int rw)
++{
++        int append;
++        int tree_locked = 0;
++        int rc;
++        struct inode * inode = file->f_dentry->d_inode;
++
++        append = (rw == OBD_BRW_WRITE) && (file->f_flags & O_APPEND);
++
++        if (append || !ll_is_file_contended(file)) {
++                struct ll_lock_tree_node *node;
++                int ast_flags;
++
++                ast_flags = append ? 0 : LDLM_FL_DENY_ON_CONTENTION;
++                if (file->f_flags & O_NONBLOCK)
++                        ast_flags |= LDLM_FL_BLOCK_NOWAIT;
++                node = ll_node_from_inode(inode, start, end,
++                                          (rw == OBD_BRW_WRITE) ? LCK_PW : LCK_PR);
++                if (IS_ERR(node)) {
++                        rc = PTR_ERR(node);
++                        GOTO(out, rc);
++                }
++                tree->lt_fd = LUSTRE_FPRIVATE(file);
++                rc = ll_tree_lock_iov(tree, node, iov, nr_segs, ast_flags);
++                if (rc == 0)
++                        tree_locked = 1;
++                else if (rc == -EUSERS)
++                        ll_set_file_contended(inode);
++                else
++                        GOTO(out, rc);
++        }
++        RETURN(tree_locked);
++out:
++        return rc;
++}
++
++/* XXX: exact copy from kernel code (__generic_file_aio_write_nolock from rhel4)
++ */
++static size_t ll_file_get_iov_count(const struct iovec *iov,
++                                     unsigned long *nr_segs)
++{
++        size_t count = 0;
++        unsigned long seg;
++
++        for (seg = 0; seg < *nr_segs; seg++) {
++                const struct iovec *iv = &iov[seg];
++
++                /*
++                 * If any segment has a negative length, or the cumulative
++                 * length ever wraps negative then return -EINVAL.
++                 */
++                count += iv->iov_len;
++                if (unlikely((ssize_t)(count|iv->iov_len) < 0))
++                        return -EINVAL;
++                if (access_ok(VERIFY_WRITE, iv->iov_base, iv->iov_len))
++                        continue;
++                if (seg == 0)
++                        return -EFAULT;
++                *nr_segs = seg;
++                count -= iv->iov_len;   /* This segment is no good */
++                break;
++        }
++        return count;
++}
++
++static int iov_copy_update(unsigned long *nr_segs, const struct iovec **iov_out,
++                           unsigned long *nrsegs_copy,
++                           struct iovec *iov_copy, size_t *offset,
++                           size_t size)
++{
++        int i;
++        const struct iovec *iov = *iov_out;
++        for (i = 0; i < *nr_segs;
++             i++) {
++                const struct iovec *iv = &iov[i];
++                struct iovec *ivc = &iov_copy[i];
++                *ivc = *iv;
++                if (i == 0) {
++                        ivc->iov_len -= *offset;
++                        ivc->iov_base += *offset;
++                }
++                if (ivc->iov_len >= size) {
++                        ivc->iov_len = size;
++                        if (i == 0)
++                                *offset += size;
++                        else
++                                *offset = size;
++                        break;
++                }
++                size -= ivc->iov_len;
++        }
++        *iov_out += i;
++        *nr_segs -= i;
++        *nrsegs_copy = i + 1;
++
++        return 0;
++}
++
++static int ll_reget_short_lock(struct page *page, int rw,
++                               obd_off start, obd_off end,
++                               void **cookie)
++{
++        struct ll_async_page *llap;
++        struct obd_export *exp;
++        struct inode *inode = page->mapping->host;
++
++        ENTRY;
++
++        exp = ll_i2obdexp(inode);
++        if (exp == NULL)
++                RETURN(0);
++
++        llap = llap_cast_private(page);
++        if (llap == NULL)
++                RETURN(0);
++
++        RETURN(obd_reget_short_lock(exp, ll_i2info(inode)->lli_smd,
++                                    &llap->llap_cookie, rw, start, end,
++                                    cookie));
++}
++
++static void ll_release_short_lock(struct inode *inode, obd_off end,
++                                  void *cookie, int rw)
++{
++        struct obd_export *exp;
++        int rc;
++
++        exp = ll_i2obdexp(inode);
++        if (exp == NULL)
++                return;
++
++        rc = obd_release_short_lock(exp, ll_i2info(inode)->lli_smd, end,
++                                    cookie, rw);
++        if (rc < 0)
++                CERROR("unlock failed (%d)\n", rc);
++}
++
++static inline int ll_file_get_fast_lock(struct file *file,
++                                        obd_off ppos, obd_off end,
++                                        const struct iovec *iov,
++                                        unsigned long nr_segs,
++                                        void **cookie, int rw)
++{
++        int rc = 0, seg;
++        struct page *page;
++
++        ENTRY;
++
++        /* we would like this read request to be lockfree */
++        for (seg = 0; seg < nr_segs; seg++) {
++                const struct iovec *iv = &iov[seg];
++                if (ll_region_mapped((unsigned long)iv->iov_base, iv->iov_len))
++                        GOTO(out, rc);
++        }
++
++        page = find_lock_page(file->f_dentry->d_inode->i_mapping,
++                              ppos >> CFS_PAGE_SHIFT);
++        if (page) {
++                if (ll_reget_short_lock(page, rw, ppos, end, cookie))
++                        rc = 1;
++
++                unlock_page(page);
++                page_cache_release(page);
++        }
++
++out:
++        RETURN(rc);
++}
++
++static inline void ll_file_put_fast_lock(struct inode *inode, obd_off end,
++                                         void *cookie, int rw)
++{
++        ll_release_short_lock(inode, end, cookie, rw);
++}
++
++enum ll_lock_style {
++        LL_LOCK_STYLE_NOLOCK   = 0,
++        LL_LOCK_STYLE_FASTLOCK = 1,
++        LL_LOCK_STYLE_TREELOCK = 2
++};
++
++static inline int ll_file_get_lock(struct file *file, obd_off ppos,
++                                   obd_off end, const struct iovec *iov,
++                                   unsigned long nr_segs, void **cookie,
++                                   struct ll_lock_tree *tree, int rw)
++{
++        int rc;
++
++        ENTRY;
++
++        if (ll_file_get_fast_lock(file, ppos, end, iov, nr_segs, cookie, rw))
++                RETURN(LL_LOCK_STYLE_FASTLOCK);
++
++        rc = ll_file_get_tree_lock_iov(tree, file, iov, nr_segs,
++                                       ppos, end, rw);
++        /* rc: 1 for tree lock, 0 for no lock, <0 for error */
++        switch (rc) {
++        case 1:
++                RETURN(LL_LOCK_STYLE_TREELOCK);
++        case 0:
++                RETURN(LL_LOCK_STYLE_NOLOCK);
++        }
++
++        /* an error happened if we reached this point, rc = -errno here */
++        RETURN(rc);
++}
++
++static inline void ll_file_put_lock(struct inode *inode, obd_off end,
++                                    enum ll_lock_style lock_style,
++                                    void *cookie, struct ll_lock_tree *tree,
++                                    int rw)
++
++{
++        switch (lock_style) {
++        case LL_LOCK_STYLE_TREELOCK:
++                ll_tree_unlock(tree);
++                break;
++        case LL_LOCK_STYLE_FASTLOCK:
++                ll_file_put_fast_lock(inode, end, cookie, rw);
++                break;
++        default:
++                CERROR("invalid locking style (%d)\n", lock_style);
++        }
++}
++
++#ifdef HAVE_FILE_READV
++static ssize_t ll_file_readv(struct file *file, const struct iovec *iov,
++                              unsigned long nr_segs, loff_t *ppos)
++{
++#else
++static ssize_t ll_file_aio_read(struct kiocb *iocb, const struct iovec *iov,
++                                unsigned long nr_segs, loff_t pos)
++{
++        struct file *file = iocb->ki_filp;
++        loff_t *ppos = &iocb->ki_pos;
++#endif
++        struct inode *inode = file->f_dentry->d_inode;
++        struct ll_inode_info *lli = ll_i2info(inode);
++        struct lov_stripe_md *lsm = lli->lli_smd;
++        struct ll_sb_info *sbi = ll_i2sbi(inode);
++        struct ll_lock_tree tree;
++        struct ost_lvb lvb;
++        struct ll_ra_read bead;
++        int ra = 0;
++        obd_off end;
++        ssize_t retval, chunk, sum = 0;
++        int lock_style;
++        struct iovec *iov_copy = NULL;
++        unsigned long nrsegs_copy, nrsegs_orig = 0;
++        size_t count, iov_offset = 0;
++        __u64 kms;
++        void *cookie;
++        ENTRY;
++
++        count = ll_file_get_iov_count(iov, &nr_segs);
++        CDEBUG(D_VFSTRACE, "VFS Op:inode=%lu/%u(%p),size="LPSZ",offset=%Ld\n",
++               inode->i_ino, inode->i_generation, inode, count, *ppos);
++        /* "If nbyte is 0, read() will return 0 and have no other results."
++         *                      -- Single Unix Spec */
++        if (count == 0)
++                RETURN(0);
++
++        ll_stats_ops_tally(sbi, LPROC_LL_READ_BYTES, count);
++
++        if (!lsm) {
++                /* Read on file with no objects should return zero-filled
++                 * buffers up to file size (we can get non-zero sizes with
++                 * mknod + truncate, then opening file for read. This is a
++                 * common pattern in NFS case, it seems). Bug 6243 */
++                int notzeroed;
++                /* Since there are no objects on OSTs, we have nothing to get
++                 * lock on and so we are forced to access inode->i_size
++                 * unguarded */
++
++                /* Read beyond end of file */
++                if (*ppos >= i_size_read(inode))
++                        RETURN(0);
++
++                if (count > i_size_read(inode) - *ppos)
++                        count = i_size_read(inode) - *ppos;
++                /* Make sure to correctly adjust the file pos pointer for
++                 * EFAULT case */
++                for (nrsegs_copy = 0; nrsegs_copy < nr_segs; nrsegs_copy++) {
++                        const struct iovec *iv = &iov[nrsegs_copy];
++
++                        if (count < iv->iov_len)
++                                chunk = count;
++                        else
++                                chunk = iv->iov_len;
++                        notzeroed = clear_user(iv->iov_base, chunk);
++                        sum += (chunk - notzeroed);
++                        count -= (chunk - notzeroed);
++                        if (notzeroed || !count)
++                                break;
++                }
++                *ppos += sum;
++                if (!sum)
++                        RETURN(-EFAULT);
++                RETURN(sum);
++        }
++
++repeat:
++        if (sbi->ll_max_rw_chunk != 0) {
++                /* first, let's know the end of the current stripe */
++                end = *ppos;
++                obd_extent_calc(sbi->ll_osc_exp, lsm, OBD_CALC_STRIPE_END,
++                                (obd_off *)&end);
++
++                /* correct, the end is beyond the request */
++                if (end > *ppos + count - 1)
++                        end = *ppos + count - 1;
++
++                /* and chunk shouldn't be too large even if striping is wide */
++                if (end - *ppos > sbi->ll_max_rw_chunk)
++                        end = *ppos + sbi->ll_max_rw_chunk - 1;
++
++                chunk = end - *ppos + 1;
++                if ((count == chunk) && (iov_offset == 0)) {
++                        if (iov_copy)
++                                OBD_FREE(iov_copy, sizeof(*iov) * nrsegs_orig);
++
++                        iov_copy = (struct iovec *)iov;
++                        nrsegs_copy = nr_segs;
++                } else {
++                        if (!iov_copy) {
++                                nrsegs_orig = nr_segs;
++                                OBD_ALLOC(iov_copy, sizeof(*iov) * nr_segs);
++                                if (!iov_copy)
++                                        GOTO(out, retval = -ENOMEM);
++                        }
++
++                        iov_copy_update(&nr_segs, &iov, &nrsegs_copy, iov_copy,
++                                        &iov_offset, chunk);
++                }
++        } else {
++                end = *ppos + count - 1;
++                iov_copy = (struct iovec *)iov;
++                nrsegs_copy = nr_segs;
++        }
++
++        lock_style = ll_file_get_lock(file, (obd_off)(*ppos), end,
++                                      iov_copy, nrsegs_copy, &cookie, &tree,
++                                      OBD_BRW_READ);
++        if (lock_style < 0)
++                GOTO(out, retval = lock_style);
++
++        ll_inode_size_lock(inode, 1);
++        /*
++         * Consistency guarantees: following possibilities exist for the
++         * relation between region being read and real file size at this
++         * moment:
++         *
++         *  (A): the region is completely inside of the file;
++         *
++         *  (B-x): x bytes of region are inside of the file, the rest is
++         *  outside;
++         *
++         *  (C): the region is completely outside of the file.
++         *
++         * This classification is stable under DLM lock acquired by
++         * ll_tree_lock() above, because to change class, other client has to
++         * take DLM lock conflicting with our lock. Also, any updates to
++         * ->i_size by other threads on this client are serialized by
++         * ll_inode_size_lock(). This guarantees that short reads are handled
++         * correctly in the face of concurrent writes and truncates.
++         */
++        inode_init_lvb(inode, &lvb);
++        obd_merge_lvb(ll_i2sbi(inode)->ll_osc_exp, lsm, &lvb, 1);
++        kms = lvb.lvb_size;
++        if (*ppos + count - 1 > kms) {
++                /* A glimpse is necessary to determine whether we return a
++                 * short read (B) or some zeroes at the end of the buffer (C) */
++                ll_inode_size_unlock(inode, 1);
++                retval = ll_glimpse_size(inode, LDLM_FL_BLOCK_GRANTED);
++                if (retval) {
++                        if (lock_style != LL_LOCK_STYLE_NOLOCK)
++                                ll_file_put_lock(inode, end, lock_style,
++                                                 cookie, &tree, OBD_BRW_READ);
++                        goto out;
++                }
++        } else {
++                /* region is within kms and, hence, within real file size (A).
++                 * We need to increase i_size to cover the read region so that
++                 * generic_file_read() will do its job, but that doesn't mean
++                 * the kms size is _correct_, it is only the _minimum_ size.
++                 * If someone does a stat they will get the correct size which
++                 * will always be >= the kms value here.  b=11081 */
++                if (i_size_read(inode) < kms)
++                        i_size_write(inode, kms);
++                ll_inode_size_unlock(inode, 1);
++        }
++
++        chunk = end - *ppos + 1;
++        CDEBUG(D_INODE,"Read ino %lu, "LPSZ" bytes, offset %lld, i_size %llu\n",
++               inode->i_ino, chunk, *ppos, i_size_read(inode));
++
++        /* turn off the kernel's read-ahead */
++        if (lock_style != LL_LOCK_STYLE_NOLOCK) {
++                file->f_ra.ra_pages = 0;
++                /* initialize read-ahead window once per syscall */
++                if (ra == 0) {
++                        ra = 1;
++                        bead.lrr_start = *ppos >> CFS_PAGE_SHIFT;
++                        bead.lrr_count = (count + CFS_PAGE_SIZE - 1) >> CFS_PAGE_SHIFT;
++                        ll_ra_read_in(file, &bead);
++                }
++
++                /* BUG: 5972 */
++                file_accessed(file);
++#ifdef HAVE_FILE_READV
++                retval = generic_file_readv(file, iov_copy, nrsegs_copy, ppos);
++#else
++                retval = generic_file_aio_read(iocb, iov_copy, nrsegs_copy,
++                                               *ppos);
++#endif
++                ll_file_put_lock(inode, end, lock_style, cookie,
++                                 &tree, OBD_BRW_READ);
++        } else {
++                retval = ll_file_lockless_io(file, iov_copy, nrsegs_copy, ppos,
++                                             READ, chunk);
++        }
++        ll_rw_stats_tally(sbi, current->pid, file, count, 0);
++        if (retval > 0) {
++                count -= retval;
++                sum += retval;
++                if (retval == chunk && count > 0)
++                        goto repeat;
++        }
++
++ out:
++        if (ra != 0)
++                ll_ra_read_ex(file, &bead);
++        retval = (sum > 0) ? sum : retval;
++
++        if (iov_copy && iov_copy != iov)
++                OBD_FREE(iov_copy, sizeof(*iov) * nrsegs_orig);
++
++        RETURN(retval);
++}
++
++static ssize_t ll_file_read(struct file *file, char *buf, size_t count,
++                            loff_t *ppos)
++{
++        struct iovec local_iov = { .iov_base = (void __user *)buf,
++                                   .iov_len = count };
++#ifdef HAVE_FILE_READV
++        return ll_file_readv(file, &local_iov, 1, ppos);
++#else
++        struct kiocb kiocb;
++        ssize_t ret;
++
++        init_sync_kiocb(&kiocb, file);
++        kiocb.ki_pos = *ppos;
++        kiocb.ki_left = count;
++
++        ret = ll_file_aio_read(&kiocb, &local_iov, 1, kiocb.ki_pos);
++        *ppos = kiocb.ki_pos;
++        return ret;
++#endif
++}
++
++/*
++ * Write to a file (through the page cache).
++ */
++#ifdef HAVE_FILE_WRITEV
++static ssize_t ll_file_writev(struct file *file, const struct iovec *iov,
++                              unsigned long nr_segs, loff_t *ppos)
++{
++#else /* AIO stuff */
++static ssize_t ll_file_aio_write(struct kiocb *iocb, const struct iovec *iov,
++                                 unsigned long nr_segs, loff_t pos)
++{
++        struct file *file = iocb->ki_filp;
++        loff_t *ppos = &iocb->ki_pos;
++#endif
++        struct inode *inode = file->f_dentry->d_inode;
++        struct ll_sb_info *sbi = ll_i2sbi(inode);
++        struct lov_stripe_md *lsm = ll_i2info(inode)->lli_smd;
++        struct ll_lock_tree tree;
++        loff_t maxbytes = ll_file_maxbytes(inode);
++        loff_t lock_start, lock_end, end;
++        ssize_t retval, chunk, sum = 0;
++        int tree_locked;
++        struct iovec *iov_copy = NULL;
++        unsigned long nrsegs_copy, nrsegs_orig = 0;
++        size_t count, iov_offset = 0;
++        ENTRY;
++
++        count = ll_file_get_iov_count(iov, &nr_segs);
++
++        CDEBUG(D_VFSTRACE, "VFS Op:inode=%lu/%u(%p),size="LPSZ",offset=%Ld\n",
++               inode->i_ino, inode->i_generation, inode, count, *ppos);
++
++        SIGNAL_MASK_ASSERT(); /* XXX BUG 1511 */
++
++        /* POSIX, but surprised the VFS doesn't check this already */
++        if (count == 0)
++                RETURN(0);
++
++        /* If file was opened for LL_IOC_LOV_SETSTRIPE but the ioctl wasn't
++         * called on the file, don't fail the below assertion (bug 2388). */
++        if (file->f_flags & O_LOV_DELAY_CREATE &&
++            ll_i2info(inode)->lli_smd == NULL)
++                RETURN(-EBADF);
++
++        LASSERT(ll_i2info(inode)->lli_smd != NULL);
++
++        down(&ll_i2info(inode)->lli_write_sem);
++
++repeat:
++        chunk = 0; /* just to fix gcc's warning */
++        end = *ppos + count - 1;
++
++        if (file->f_flags & O_APPEND) {
++                lock_start = 0;
++                lock_end = OBD_OBJECT_EOF;
++                iov_copy = (struct iovec *)iov;
++                nrsegs_copy = nr_segs;
++        } else if (sbi->ll_max_rw_chunk != 0) {
++                /* first, let's know the end of the current stripe */
++                end = *ppos;
++                obd_extent_calc(sbi->ll_osc_exp, lsm, OBD_CALC_STRIPE_END,
++                                (obd_off *)&end);
++
++                /* correct, the end is beyond the request */
++                if (end > *ppos + count - 1)
++                        end = *ppos + count - 1;
++
++                /* and chunk shouldn't be too large even if striping is wide */
++                if (end - *ppos > sbi->ll_max_rw_chunk)
++                        end = *ppos + sbi->ll_max_rw_chunk - 1;
++                lock_start = *ppos;
++                lock_end = end;
++                chunk = end - *ppos + 1;
++                if ((count == chunk) && (iov_offset == 0)) {
++                        if (iov_copy)
++                                OBD_FREE(iov_copy, sizeof(*iov) * nrsegs_orig);
++
++                        iov_copy = (struct iovec *)iov;
++                        nrsegs_copy = nr_segs;
++                } else {
++                        if (!iov_copy) {
++                                nrsegs_orig = nr_segs;
++                                OBD_ALLOC(iov_copy, sizeof(*iov) * nr_segs);
++                                if (!iov_copy)
++                                        GOTO(out, retval = -ENOMEM);
++                        }
++                        iov_copy_update(&nr_segs, &iov, &nrsegs_copy, iov_copy,
++                                        &iov_offset, chunk);
++                }
++        } else {
++                lock_start = *ppos;
++                lock_end = end;
++                iov_copy = (struct iovec *)iov;
++                nrsegs_copy = nr_segs;
++        }
++
++        tree_locked = ll_file_get_tree_lock_iov(&tree, file, iov_copy,
++                                                nrsegs_copy,
++                                                (obd_off)lock_start,
++                                                (obd_off)lock_end,
++                                                OBD_BRW_WRITE);
++        if (tree_locked < 0)
++                GOTO(out, retval = tree_locked);
++
++        /* This is ok, g_f_w will overwrite this under i_sem if it races
++         * with a local truncate, it just makes our maxbyte checking easier.
++         * The i_size value gets updated in ll_extent_lock() as a consequence
++         * of the [0,EOF] extent lock we requested above. */
++        if (file->f_flags & O_APPEND) {
++                *ppos = i_size_read(inode);
++                end = *ppos + count - 1;
++        }
++
++        if (*ppos >= maxbytes) {
++                send_sig(SIGXFSZ, current, 0);
++                GOTO(out_unlock, retval = -EFBIG);
++        }
++        if (end > maxbytes - 1)
++                end = maxbytes - 1;
++
++        /* generic_file_write handles O_APPEND after getting i_mutex */
++        chunk = end - *ppos + 1;
++        CDEBUG(D_INFO, "Writing inode %lu, "LPSZ" bytes, offset %Lu\n",
++               inode->i_ino, chunk, *ppos);
++        if (tree_locked)
++#ifdef HAVE_FILE_WRITEV
++                retval = generic_file_writev(file, iov_copy, nrsegs_copy, ppos);
++#else
++                retval = generic_file_aio_write(iocb, iov_copy, nrsegs_copy,
++                                                *ppos);
++#endif
++        else
++                retval = ll_file_lockless_io(file, iov_copy, nrsegs_copy,
++                                             ppos, WRITE, chunk);
++        ll_rw_stats_tally(ll_i2sbi(inode), current->pid, file, chunk, 1);
++
++out_unlock:
++        if (tree_locked)
++                ll_tree_unlock(&tree);
++
++out:
++        if (retval > 0) {
++                count -= retval;
++                sum += retval;
++                if (retval == chunk && count > 0)
++                        goto repeat;
++        }
++
++        up(&ll_i2info(inode)->lli_write_sem);
++
++        if (iov_copy && iov_copy != iov)
++                OBD_FREE(iov_copy, sizeof(*iov) * nrsegs_orig);
++
++        retval = (sum > 0) ? sum : retval;
++        ll_stats_ops_tally(ll_i2sbi(inode), LPROC_LL_WRITE_BYTES,
++                           retval > 0 ? retval : 0);
++        RETURN(retval);
++}
++
++static ssize_t ll_file_write(struct file *file, const char *buf, size_t count,
++                             loff_t *ppos)
++{
++        struct iovec local_iov = { .iov_base = (void __user *)buf,
++                                   .iov_len = count };
++
++#ifdef HAVE_FILE_WRITEV
++        return ll_file_writev(file, &local_iov, 1, ppos);
++#else
++        struct kiocb kiocb;
++        ssize_t ret;
++
++        init_sync_kiocb(&kiocb, file);
++        kiocb.ki_pos = *ppos;
++        kiocb.ki_left = count;
++
++        ret = ll_file_aio_write(&kiocb, &local_iov, 1, kiocb.ki_pos);
++        *ppos = kiocb.ki_pos;
++
++        return ret;
++#endif
++}
++
++/*
++ * Send file content (through pagecache) somewhere with helper
++ */
++static ssize_t ll_file_sendfile(struct file *in_file, loff_t *ppos,size_t count,
++                                read_actor_t actor, void *target)
++{
++        struct inode *inode = in_file->f_dentry->d_inode;
++        struct ll_inode_info *lli = ll_i2info(inode);
++        struct lov_stripe_md *lsm = lli->lli_smd;
++        struct ll_lock_tree tree;
++        struct ll_lock_tree_node *node;
++        struct ost_lvb lvb;
++        struct ll_ra_read bead;
++        int rc;
++        ssize_t retval;
++        __u64 kms;
++        ENTRY;
++        CDEBUG(D_VFSTRACE, "VFS Op:inode=%lu/%u(%p),size="LPSZ",offset=%Ld\n",
++               inode->i_ino, inode->i_generation, inode, count, *ppos);
++
++        /* "If nbyte is 0, read() will return 0 and have no other results."
++         *                      -- Single Unix Spec */
++        if (count == 0)
++                RETURN(0);
++
++        ll_stats_ops_tally(ll_i2sbi(inode), LPROC_LL_READ_BYTES, count);
++        /* turn off the kernel's read-ahead */
++        in_file->f_ra.ra_pages = 0;
++
++        /* File with no objects, nothing to lock */
++        if (!lsm)
++                RETURN(generic_file_sendfile(in_file, ppos, count, actor, target));
++
++        node = ll_node_from_inode(inode, *ppos, *ppos + count - 1, LCK_PR);
++        if (IS_ERR(node))
++                RETURN(PTR_ERR(node));
++
++        tree.lt_fd = LUSTRE_FPRIVATE(in_file);
++        rc = ll_tree_lock(&tree, node, NULL, count,
++                          in_file->f_flags & O_NONBLOCK?LDLM_FL_BLOCK_NOWAIT:0);
++        if (rc != 0)
++                RETURN(rc);
++
++        ll_clear_file_contended(inode);
++        ll_inode_size_lock(inode, 1);
++        /*
++         * Consistency guarantees: following possibilities exist for the
++         * relation between region being read and real file size at this
++         * moment:
++         *
++         *  (A): the region is completely inside of the file;
++         *
++         *  (B-x): x bytes of region are inside of the file, the rest is
++         *  outside;
++         *
++         *  (C): the region is completely outside of the file.
++         *
++         * This classification is stable under DLM lock acquired by
++         * ll_tree_lock() above, because to change class, other client has to
++         * take DLM lock conflicting with our lock. Also, any updates to
++         * ->i_size by other threads on this client are serialized by
++         * ll_inode_size_lock(). This guarantees that short reads are handled
++         * correctly in the face of concurrent writes and truncates.
++         */
++        inode_init_lvb(inode, &lvb);
++        obd_merge_lvb(ll_i2sbi(inode)->ll_osc_exp, lsm, &lvb, 1);
++        kms = lvb.lvb_size;
++        if (*ppos + count - 1 > kms) {
++                /* A glimpse is necessary to determine whether we return a
++                 * short read (B) or some zeroes at the end of the buffer (C) */
++                ll_inode_size_unlock(inode, 1);
++                retval = ll_glimpse_size(inode, LDLM_FL_BLOCK_GRANTED);
++                if (retval)
++                        goto out;
++        } else {
++                /* region is within kms and, hence, within real file size (A) */
++                i_size_write(inode, kms);
++                ll_inode_size_unlock(inode, 1);
++        }
++
++        CDEBUG(D_INFO, "Send ino %lu, "LPSZ" bytes, offset %lld, i_size %llu\n",
++               inode->i_ino, count, *ppos, i_size_read(inode));
++
++        bead.lrr_start = *ppos >> CFS_PAGE_SHIFT;
++        bead.lrr_count = (count + CFS_PAGE_SIZE - 1) >> CFS_PAGE_SHIFT;
++        ll_ra_read_in(in_file, &bead);
++        /* BUG: 5972 */
++        file_accessed(in_file);
++        retval = generic_file_sendfile(in_file, ppos, count, actor, target);
++        ll_ra_read_ex(in_file, &bead);
++
++ out:
++        ll_tree_unlock(&tree);
++        RETURN(retval);
++}
++
++static int ll_lov_recreate_obj(struct inode *inode, struct file *file,
++                               unsigned long arg)
++{
++        struct ll_inode_info *lli = ll_i2info(inode);
++        struct obd_export *exp = ll_i2obdexp(inode);
++        struct ll_recreate_obj ucreatp;
++        struct obd_trans_info oti = { 0 };
++        struct obdo *oa = NULL;
++        int lsm_size;
++        int rc = 0;
++        struct lov_stripe_md *lsm, *lsm2;
++        ENTRY;
++
++        if (!cfs_capable(CFS_CAP_SYS_ADMIN))
++                RETURN(-EPERM);
++
++        rc = copy_from_user(&ucreatp, (struct ll_recreate_obj *)arg,
++                            sizeof(struct ll_recreate_obj));
++        if (rc) {
++                RETURN(-EFAULT);
++        }
++        OBDO_ALLOC(oa);
++        if (oa == NULL)
++                RETURN(-ENOMEM);
++
++        down(&lli->lli_size_sem);
++        lsm = lli->lli_smd;
++        if (lsm == NULL)
++                GOTO(out, rc = -ENOENT);
++        lsm_size = sizeof(*lsm) + (sizeof(struct lov_oinfo) *
++                   (lsm->lsm_stripe_count));
++
++        OBD_ALLOC(lsm2, lsm_size);
++        if (lsm2 == NULL)
++                GOTO(out, rc = -ENOMEM);
++
++        oa->o_id = ucreatp.lrc_id;
++        oa->o_nlink = ucreatp.lrc_ost_idx;
++        oa->o_flags |= OBD_FL_RECREATE_OBJS;
++        oa->o_valid = OBD_MD_FLID | OBD_MD_FLFLAGS;
++        obdo_from_inode(oa, inode, OBD_MD_FLTYPE | OBD_MD_FLATIME |
++                        OBD_MD_FLMTIME | OBD_MD_FLCTIME);
++
++        memcpy(lsm2, lsm, lsm_size);
++        rc = obd_create(exp, oa, &lsm2, &oti);
++
++        OBD_FREE(lsm2, lsm_size);
++        GOTO(out, rc);
++out:
++        up(&lli->lli_size_sem);
++        OBDO_FREE(oa);
++        return rc;
++}
++
++int ll_lov_setstripe_ea_info(struct inode *inode, struct file *file,
++                                    int flags, struct lov_user_md *lum,
++                                    int lum_size)
++{
++        struct ll_inode_info *lli = ll_i2info(inode);
++        struct lov_stripe_md *lsm;
++        struct lookup_intent oit = {.it_op = IT_OPEN, .it_flags = flags};
++        int rc = 0;
++        ENTRY;
++
++        down(&lli->lli_size_sem);
++        lsm = lli->lli_smd;
++        if (lsm) {
++                up(&lli->lli_size_sem);
++                CDEBUG(D_IOCTL, "stripe already exists for ino %lu\n",
++                       inode->i_ino);
++                RETURN(-EEXIST);
++        }
++
++        rc = ll_intent_file_open(file, lum, lum_size, &oit);
++        if (rc)
++                GOTO(out, rc);
++        if (it_disposition(&oit, DISP_LOOKUP_NEG))
++                GOTO(out_req_free, rc = -ENOENT);
++        rc = oit.d.lustre.it_status;
++        if (rc < 0)
++                GOTO(out_req_free, rc);
++
++        ll_release_openhandle(file->f_dentry, &oit);
++
++ out:
++        up(&lli->lli_size_sem);
++        ll_intent_release(&oit);
++        RETURN(rc);
++out_req_free:
++        ptlrpc_req_finished((struct ptlrpc_request *) oit.d.lustre.it_data);
++        goto out;
++}
++
++int ll_lov_getstripe_ea_info(struct inode *inode, const char *filename,
++                             struct lov_mds_md **lmmp, int *lmm_size,
++                             struct ptlrpc_request **request)
++{
++        struct ll_sb_info *sbi = ll_i2sbi(inode);
++        struct ll_fid  fid;
++        struct mds_body  *body;
++        struct lov_mds_md *lmm = NULL;
++        struct ptlrpc_request *req = NULL;
++        int rc, lmmsize;
++
++        ll_inode2fid(&fid, inode);
++
++        rc = ll_get_max_mdsize(sbi, &lmmsize);
++        if (rc)
++                RETURN(rc);
++
++        rc = mdc_getattr_name(sbi->ll_mdc_exp, &fid,
++                        filename, strlen(filename) + 1,
++                        OBD_MD_FLEASIZE | OBD_MD_FLDIREA,
++                        lmmsize, &req);
++        if (rc < 0) {
++                CDEBUG(D_INFO, "mdc_getattr_name failed "
++                                "on %s: rc %d\n", filename, rc);
++                GOTO(out, rc);
++        }
++
++        body = lustre_msg_buf(req->rq_repmsg, REPLY_REC_OFF,
++                        sizeof(*body));
++        LASSERT(body != NULL); /* checked by mdc_getattr_name */
++        /* swabbed by mdc_getattr_name */
++        LASSERT(lustre_rep_swabbed(req, REPLY_REC_OFF));
++
++        lmmsize = body->eadatasize;
++
++        if (!(body->valid & (OBD_MD_FLEASIZE | OBD_MD_FLDIREA)) ||
++                        lmmsize == 0) {
++                GOTO(out, rc = -ENODATA);
++        }
++
++        lmm = lustre_msg_buf(req->rq_repmsg, REPLY_REC_OFF + 1,
++                        lmmsize);
++        LASSERT(lmm != NULL);
++        LASSERT(lustre_rep_swabbed(req, REPLY_REC_OFF + 1));
++
++        if ((lmm->lmm_magic != cpu_to_le32(LOV_MAGIC)) &&
++             (lmm->lmm_magic != cpu_to_le32(LOV_MAGIC_JOIN))) {
++                GOTO(out, rc = -EPROTO);
++        }
++        /*
++         * This is coming from the MDS, so is probably in
++         * little endian.  We convert it to host endian before
++         * passing it to userspace.
++         */
++        if (LOV_MAGIC != cpu_to_le32(LOV_MAGIC)) {
++                if (lmm->lmm_magic == cpu_to_le32(LOV_MAGIC)) {
++                        lustre_swab_lov_user_md((struct lov_user_md *)lmm);
++                        /* if function called for directory - we should be
++                         * avoid swab not existent lsm objects */
++                        if (S_ISREG(body->mode))
++                                lustre_swab_lov_user_md_objects((struct lov_user_md *)lmm);
++                } else if (lmm->lmm_magic == cpu_to_le32(LOV_MAGIC_JOIN)) {
++                        lustre_swab_lov_user_md_join((struct lov_user_md_join *)lmm);
++                }
++        }
++
++        if (lmm->lmm_magic == LOV_MAGIC_JOIN) {
++                struct lov_stripe_md *lsm;
++                struct lov_user_md_join *lmj;
++                int lmj_size, i, aindex = 0;
++
++                rc = obd_unpackmd(sbi->ll_osc_exp, &lsm, lmm, lmmsize);
++                if (rc < 0)
++                        GOTO(out, rc = -ENOMEM);
++                rc = obd_checkmd(sbi->ll_osc_exp, sbi->ll_mdc_exp, lsm);
++                if (rc)
++                        GOTO(out_free_memmd, rc);
++
++                lmj_size = sizeof(struct lov_user_md_join) +
++                        lsm->lsm_stripe_count *
++                        sizeof(struct lov_user_ost_data_join);
++                OBD_ALLOC(lmj, lmj_size);
++                if (!lmj)
++                        GOTO(out_free_memmd, rc = -ENOMEM);
++
++                memcpy(lmj, lmm, sizeof(struct lov_user_md_join));
++                for (i = 0; i < lsm->lsm_stripe_count; i++) {
++                        struct lov_extent *lex =
++                                &lsm->lsm_array->lai_ext_array[aindex];
++
++                        if (lex->le_loi_idx + lex->le_stripe_count <= i)
++                                aindex ++;
++                        CDEBUG(D_INFO, "aindex %d i %d l_extent_start "
++                                        LPU64" len %d\n", aindex, i,
++                                        lex->le_start, (int)lex->le_len);
++                        lmj->lmm_objects[i].l_extent_start =
++                                lex->le_start;
++
++                        if ((int)lex->le_len == -1)
++                                lmj->lmm_objects[i].l_extent_end = -1;
++                        else
++                                lmj->lmm_objects[i].l_extent_end =
++                                        lex->le_start + lex->le_len;
++                        lmj->lmm_objects[i].l_object_id =
++                                lsm->lsm_oinfo[i]->loi_id;
++                        lmj->lmm_objects[i].l_object_gr =
++                                lsm->lsm_oinfo[i]->loi_gr;
++                        lmj->lmm_objects[i].l_ost_gen =
++                                lsm->lsm_oinfo[i]->loi_ost_gen;
++                        lmj->lmm_objects[i].l_ost_idx =
++                                lsm->lsm_oinfo[i]->loi_ost_idx;
++                }
++                lmm = (struct lov_mds_md *)lmj;
++                lmmsize = lmj_size;
++out_free_memmd:
++                obd_free_memmd(sbi->ll_osc_exp, &lsm);
++        }
++out:
++        *lmmp = lmm;
++        *lmm_size = lmmsize;
++        *request = req;
++        return rc;
++}
++static int ll_lov_setea(struct inode *inode, struct file *file,
++                            unsigned long arg)
++{
++        int flags = MDS_OPEN_HAS_OBJS | FMODE_WRITE;
++        struct lov_user_md  *lump;
++        int lum_size = sizeof(struct lov_user_md) +
++                       sizeof(struct lov_user_ost_data);
++        int rc;
++        ENTRY;
++
++        if (!cfs_capable(CFS_CAP_SYS_ADMIN))
++                RETURN(-EPERM);
++
++        OBD_ALLOC(lump, lum_size);
++        if (lump == NULL) {
++                RETURN(-ENOMEM);
++        }
++        rc = copy_from_user(lump, (struct lov_user_md  *)arg, lum_size);
++        if (rc) {
++                OBD_FREE(lump, lum_size);
++                RETURN(-EFAULT);
++        }
++
++        rc = ll_lov_setstripe_ea_info(inode, file, flags, lump, lum_size);
++
++        OBD_FREE(lump, lum_size);
++        RETURN(rc);
++}
++
++static int ll_lov_setstripe(struct inode *inode, struct file *file,
++                            unsigned long arg)
++{
++        struct lov_user_md lum, *lump = (struct lov_user_md *)arg;
++        int rc;
++        int flags = FMODE_WRITE;
++        ENTRY;
++
++        /* Bug 1152: copy properly when this is no longer true */
++        LASSERT(sizeof(lum) == sizeof(*lump));
++        LASSERT(sizeof(lum.lmm_objects[0]) == sizeof(lump->lmm_objects[0]));
++        rc = copy_from_user(&lum, lump, sizeof(lum));
++        if (rc)
++                RETURN(-EFAULT);
++
++        rc = ll_lov_setstripe_ea_info(inode, file, flags, &lum, sizeof(lum));
++        if (rc == 0) {
++                 put_user(0, &lump->lmm_stripe_count);
++                 rc = obd_iocontrol(LL_IOC_LOV_GETSTRIPE, ll_i2obdexp(inode),
++                                    0, ll_i2info(inode)->lli_smd, lump);
++        }
++        RETURN(rc);
++}
++
++static int ll_lov_getstripe(struct inode *inode, unsigned long arg)
++{
++        struct lov_stripe_md *lsm = ll_i2info(inode)->lli_smd;
++
++        if (!lsm)
++                RETURN(-ENODATA);
++
++        return obd_iocontrol(LL_IOC_LOV_GETSTRIPE, ll_i2obdexp(inode), 0, lsm,
++                            (void *)arg);
++}
++
++static int ll_get_grouplock(struct inode *inode, struct file *file,
++                            unsigned long arg)
++{
++        struct ll_file_data *fd = LUSTRE_FPRIVATE(file);
++        ldlm_policy_data_t policy = { .l_extent = { .start = 0,
++                                                    .end = OBD_OBJECT_EOF}};
++        struct lustre_handle lockh = { 0 };
++        struct ll_inode_info *lli = ll_i2info(inode);
++        struct lov_stripe_md *lsm = lli->lli_smd;
++        int flags = 0, rc;
++        ENTRY;
++
++        if (fd->fd_flags & LL_FILE_GROUP_LOCKED) {
++                RETURN(-EINVAL);
++        }
++
++        policy.l_extent.gid = arg;
++        if (file->f_flags & O_NONBLOCK)
++                flags = LDLM_FL_BLOCK_NOWAIT;
++
++        rc = ll_extent_lock(fd, inode, lsm, LCK_GROUP, &policy, &lockh, flags);
++        if (rc)
++                RETURN(rc);
++
++        fd->fd_flags |= LL_FILE_GROUP_LOCKED|LL_FILE_IGNORE_LOCK;
++        fd->fd_gid = arg;
++        memcpy(&fd->fd_cwlockh, &lockh, sizeof(lockh));
++
++        RETURN(0);
++}
++
++static int ll_put_grouplock(struct inode *inode, struct file *file,
++                            unsigned long arg)
++{
++        struct ll_file_data *fd = LUSTRE_FPRIVATE(file);
++        struct ll_inode_info *lli = ll_i2info(inode);
++        struct lov_stripe_md *lsm = lli->lli_smd;
++        int rc;
++        ENTRY;
++
++        if (!(fd->fd_flags & LL_FILE_GROUP_LOCKED)) {
++                /* Ugh, it's already unlocked. */
++                RETURN(-EINVAL);
++        }
++
++        if (fd->fd_gid != arg) /* Ugh? Unlocking with different gid? */
++                RETURN(-EINVAL);
++
++        fd->fd_flags &= ~(LL_FILE_GROUP_LOCKED|LL_FILE_IGNORE_LOCK);
++
++        rc = ll_extent_unlock(fd, inode, lsm, LCK_GROUP, &fd->fd_cwlockh);
++        if (rc)
++                RETURN(rc);
++
++        fd->fd_gid = 0;
++        memset(&fd->fd_cwlockh, 0, sizeof(fd->fd_cwlockh));
++
++        RETURN(0);
++}
++
++#if LUSTRE_FIX >= 50
++static int join_sanity_check(struct inode *head, struct inode *tail)
++{
++        ENTRY;
++        if ((ll_i2sbi(head)->ll_flags & LL_SBI_JOIN) == 0) {
++                CERROR("server do not support join \n");
++                RETURN(-EINVAL);
++        }
++        if (!S_ISREG(tail->i_mode) || !S_ISREG(head->i_mode)) {
++                CERROR("tail ino %lu and ino head %lu must be regular\n",
++                       head->i_ino, tail->i_ino);
++                RETURN(-EINVAL);
++        }
++        if (head->i_ino == tail->i_ino) {
++                CERROR("file %lu can not be joined to itself \n", head->i_ino);
++                RETURN(-EINVAL);
++        }
++        if (i_size_read(head) % JOIN_FILE_ALIGN) {
++                CERROR("hsize %llu must be times of 64K\n", i_size_read(head));
++                RETURN(-EINVAL);
++        }
++        RETURN(0);
++}
++
++static int join_file(struct inode *head_inode, struct file *head_filp,
++                     struct file *tail_filp)
++{
++        struct dentry *tail_dentry = tail_filp->f_dentry;
++        struct lookup_intent oit = {.it_op = IT_OPEN,
++                                   .it_flags = head_filp->f_flags|O_JOIN_FILE};
++        struct ldlm_enqueue_info einfo = { LDLM_IBITS, LCK_PW,
++                ll_mdc_blocking_ast, ldlm_completion_ast, NULL, NULL };
++
++        struct lustre_handle lockh;
++        struct mdc_op_data *op_data;
++        int    rc;
++        loff_t data;
++        ENTRY;
++
++        tail_dentry = tail_filp->f_dentry;
++
++        OBD_ALLOC_PTR(op_data);
++        if (op_data == NULL) {
++                RETURN(-ENOMEM);
++        }
++
++        data = i_size_read(head_inode);
++        ll_prepare_mdc_op_data(op_data, head_inode,
++                               tail_dentry->d_parent->d_inode,
++                               tail_dentry->d_name.name,
++                               tail_dentry->d_name.len, 0, &data);
++        rc = mdc_enqueue(ll_i2mdcexp(head_inode), &einfo, &oit,
++                         op_data, &lockh, NULL, 0, 0);
++
++        if (rc < 0)
++                GOTO(out, rc);
++
++        rc = oit.d.lustre.it_status;
++
++        if (rc < 0 || it_open_error(DISP_OPEN_OPEN, &oit)) {
++                rc = rc ? rc : it_open_error(DISP_OPEN_OPEN, &oit);
++                ptlrpc_req_finished((struct ptlrpc_request *)
++                                    oit.d.lustre.it_data);
++                GOTO(out, rc);
++        }
++
++        if (oit.d.lustre.it_lock_mode) { /* If we got lock - release it right
++                                           * away */
++                ldlm_lock_decref(&lockh, oit.d.lustre.it_lock_mode);
++                oit.d.lustre.it_lock_mode = 0;
++        }
++        ptlrpc_req_finished((struct ptlrpc_request *) oit.d.lustre.it_data);
++        it_clear_disposition(&oit, DISP_ENQ_COMPLETE);
++        ll_release_openhandle(head_filp->f_dentry, &oit);
++out:
++        if (op_data)
++                OBD_FREE_PTR(op_data);
++        ll_intent_release(&oit);
++        RETURN(rc);
++}
++
++static int ll_file_join(struct inode *head, struct file *filp,
++                        char *filename_tail)
++{
++        struct inode *tail = NULL, *first = NULL, *second = NULL;
++        struct dentry *tail_dentry;
++        struct file *tail_filp, *first_filp, *second_filp;
++        struct ll_lock_tree first_tree, second_tree;
++        struct ll_lock_tree_node *first_node, *second_node;
++        struct ll_inode_info *hlli = ll_i2info(head), *tlli;
++        int rc = 0, cleanup_phase = 0;
++        ENTRY;
++
++        CDEBUG(D_VFSTRACE, "VFS Op:head=%lu/%u(%p) tail %s\n",
++               head->i_ino, head->i_generation, head, filename_tail);
++
++        tail_filp = filp_open(filename_tail, O_WRONLY, 0644);
++        if (IS_ERR(tail_filp)) {
++                CERROR("Can not open tail file %s", filename_tail);
++                rc = PTR_ERR(tail_filp);
++                GOTO(cleanup, rc);
++        }
++        tail = igrab(tail_filp->f_dentry->d_inode);
++
++        tlli = ll_i2info(tail);
++        tail_dentry = tail_filp->f_dentry;
++        LASSERT(tail_dentry);
++        cleanup_phase = 1;
++
++        /*reorder the inode for lock sequence*/
++        first = head->i_ino > tail->i_ino ? head : tail;
++        second = head->i_ino > tail->i_ino ? tail : head;
++        first_filp = head->i_ino > tail->i_ino ? filp : tail_filp;
++        second_filp = head->i_ino > tail->i_ino ? tail_filp : filp;
++
++        CDEBUG(D_INFO, "reorder object from %lu:%lu to %lu:%lu \n",
++               head->i_ino, tail->i_ino, first->i_ino, second->i_ino);
++        first_node = ll_node_from_inode(first, 0, OBD_OBJECT_EOF, LCK_EX);
++        if (IS_ERR(first_node)){
++                rc = PTR_ERR(first_node);
++                GOTO(cleanup, rc);
++        }
++        first_tree.lt_fd = first_filp->private_data;
++        rc = ll_tree_lock(&first_tree, first_node, NULL, 0, 0);
++        if (rc != 0)
++                GOTO(cleanup, rc);
++        cleanup_phase = 2;
++
++        second_node = ll_node_from_inode(second, 0, OBD_OBJECT_EOF, LCK_EX);
++        if (IS_ERR(second_node)){
++                rc = PTR_ERR(second_node);
++                GOTO(cleanup, rc);
++        }
++        second_tree.lt_fd = second_filp->private_data;
++        rc = ll_tree_lock(&second_tree, second_node, NULL, 0, 0);
++        if (rc != 0)
++                GOTO(cleanup, rc);
++        cleanup_phase = 3;
++
++        rc = join_sanity_check(head, tail);
++        if (rc)
++                GOTO(cleanup, rc);
++
++        rc = join_file(head, filp, tail_filp);
++        if (rc)
++                GOTO(cleanup, rc);
++cleanup:
++        switch (cleanup_phase) {
++        case 3:
++                ll_tree_unlock(&second_tree);
++                obd_cancel_unused(ll_i2obdexp(second),
++                                  ll_i2info(second)->lli_smd, 0, NULL);
++        case 2:
++                ll_tree_unlock(&first_tree);
++                obd_cancel_unused(ll_i2obdexp(first),
++                                  ll_i2info(first)->lli_smd, 0, NULL);
++        case 1:
++                filp_close(tail_filp, 0);
++                if (tail)
++                        iput(tail);
++                if (head && rc == 0) {
++                        obd_free_memmd(ll_i2sbi(head)->ll_osc_exp,
++                                       &hlli->lli_smd);
++                        hlli->lli_smd = NULL;
++                }
++        case 0:
++                break;
++        default:
++                CERROR("invalid cleanup_phase %d\n", cleanup_phase);
++                LBUG();
++        }
++        RETURN(rc);
++}
++#endif  /* LUSTRE_FIX >= 50 */
++
++/**
++ * Close inode open handle
++ *
++ * \param dentry [in]     dentry which contains the inode
++ * \param it     [in,out] intent which contains open info and result
++ *
++ * \retval 0     success
++ * \retval <0    failure
++ */
++int ll_release_openhandle(struct dentry *dentry, struct lookup_intent *it)
++{
++        struct inode *inode = dentry->d_inode;
++        struct obd_client_handle *och;
++        int rc;
++        ENTRY;
++
++        LASSERT(inode);
++
++        /* Root ? Do nothing. */
++        if (dentry->d_inode->i_sb->s_root == dentry)
++                RETURN(0);
++
++        /* No open handle to close? Move away */
++        if (!it_disposition(it, DISP_OPEN_OPEN))
++                RETURN(0);
++
++        LASSERT(it_open_error(DISP_OPEN_OPEN, it) == 0);
++
++        OBD_ALLOC(och, sizeof(*och));
++        if (!och)
++                GOTO(out, rc = -ENOMEM);
++
++        ll_och_fill(ll_i2info(inode), it, och);
++
++        rc = ll_close_inode_openhandle(inode, och);
++
++        OBD_FREE(och, sizeof(*och));
++ out:
++        /* this one is in place of ll_file_open */
++        if (it_disposition(it, DISP_ENQ_OPEN_REF))
++                ptlrpc_req_finished(it->d.lustre.it_data);
++        it_clear_disposition(it, DISP_ENQ_OPEN_REF);
++        RETURN(rc);
++}
++
++int ll_fiemap(struct inode *inode, struct ll_user_fiemap *fiemap,
++              int num_bytes)
++{
++        struct obd_export *exp = ll_i2obdexp(inode);
++        struct lov_stripe_md *lsm = ll_i2info(inode)->lli_smd;
++        struct ll_fiemap_info_key fm_key = { .name = KEY_FIEMAP, };
++        int vallen = num_bytes;
++        int rc;
++        ENTRY;
++
++        /* If the stripe_count > 1 and the application does not understand
++         * DEVICE_ORDER flag, then it cannot interpret the extents correctly.
++         */
++        if (lsm->lsm_stripe_count > 1 &&
++            !(fiemap->fm_flags & FIEMAP_FLAG_DEVICE_ORDER))
++                return -EOPNOTSUPP;
++
++        fm_key.oa.o_id = lsm->lsm_object_id;
++        fm_key.oa.o_valid = OBD_MD_FLID;
++
++        obdo_from_inode(&fm_key.oa, inode, OBD_MD_FLFID | OBD_MD_FLSIZE);
++
++        /* If filesize is 0, then there would be no objects for mapping */
++        if (fm_key.oa.o_size == 0) {
++                fiemap->fm_mapped_extents = 0;
++                RETURN(0);
++        }
++
++        memcpy(&fm_key.fiemap, fiemap, sizeof(*fiemap));
++
++        rc = obd_get_info(exp, sizeof(fm_key), &fm_key, &vallen, fiemap, lsm);
++        if (rc)
++                CERROR("obd_get_info failed: rc = %d\n", rc);
++
++        RETURN(rc);
++}
++
++int ll_file_ioctl(struct inode *inode, struct file *file, unsigned int cmd,
++                  unsigned long arg)
++{
++        struct ll_file_data *fd = LUSTRE_FPRIVATE(file);
++        int flags;
++        ENTRY;
++
++        CDEBUG(D_VFSTRACE, "VFS Op:inode=%lu/%u(%p),cmd=%x\n", inode->i_ino,
++               inode->i_generation, inode, cmd);
++        ll_stats_ops_tally(ll_i2sbi(inode), LPROC_LL_IOCTL, 1);
++
++        /* asm-ppc{,64} declares TCGETS, et. al. as type 't' not 'T' */
++        if (_IOC_TYPE(cmd) == 'T' || _IOC_TYPE(cmd) == 't') /* tty ioctls */
++                RETURN(-ENOTTY);
++
++        switch(cmd) {
++        case LL_IOC_GETFLAGS:
++                /* Get the current value of the file flags */
++                return put_user(fd->fd_flags, (int *)arg);
++        case LL_IOC_SETFLAGS:
++        case LL_IOC_CLRFLAGS:
++                /* Set or clear specific file flags */
++                /* XXX This probably needs checks to ensure the flags are
++                 *     not abused, and to handle any flag side effects.
++                 */
++                if (get_user(flags, (int *) arg))
++                        RETURN(-EFAULT);
++
++                if (cmd == LL_IOC_SETFLAGS) {
++                        if ((flags & LL_FILE_IGNORE_LOCK) &&
++                            !(file->f_flags & O_DIRECT)) {
++                                CERROR("%s: unable to disable locking on "
++                                       "non-O_DIRECT file\n", current->comm);
++                                RETURN(-EINVAL);
++                        }
++
++                        fd->fd_flags |= flags;
++                } else {
++                        fd->fd_flags &= ~flags;
++                }
++                RETURN(0);
++        case LL_IOC_LOV_SETSTRIPE:
++                RETURN(ll_lov_setstripe(inode, file, arg));
++        case LL_IOC_LOV_SETEA:
++                RETURN(ll_lov_setea(inode, file, arg));
++        case LL_IOC_LOV_GETSTRIPE:
++                RETURN(ll_lov_getstripe(inode, arg));
++        case LL_IOC_RECREATE_OBJ:
++                RETURN(ll_lov_recreate_obj(inode, file, arg));
++        case EXT3_IOC_FIEMAP: {
++                struct ll_user_fiemap *fiemap_s;
++                size_t num_bytes, ret_bytes;
++                unsigned int extent_count;
++                int rc = 0;
++
++                /* Get the extent count so we can calculate the size of
++                 * required fiemap buffer */
++                if (get_user(extent_count,
++                    &((struct ll_user_fiemap __user *)arg)->fm_extent_count))
++                        RETURN(-EFAULT);
++                num_bytes = sizeof(*fiemap_s) + (extent_count *
++                                                 sizeof(struct ll_fiemap_extent));
++                OBD_VMALLOC(fiemap_s, num_bytes);
++                if (fiemap_s == NULL)
++                        RETURN(-ENOMEM);
++
++                if (copy_from_user(fiemap_s,(struct ll_user_fiemap __user *)arg,
++                                   sizeof(*fiemap_s)))
++                        GOTO(error, rc = -EFAULT);
++
++                if (fiemap_s->fm_flags & ~LUSTRE_FIEMAP_FLAGS_COMPAT) {
++                        fiemap_s->fm_flags = fiemap_s->fm_flags &
++                                                    ~LUSTRE_FIEMAP_FLAGS_COMPAT;
++                        if (copy_to_user((char *)arg, fiemap_s,
++                                         sizeof(*fiemap_s)))
++                                GOTO(error, rc = -EFAULT);
++
++                        GOTO(error, rc = -EBADR);
++                }
++
++                /* If fm_extent_count is non-zero, read the first extent since
++                 * it is used to calculate end_offset and device from previous
++                 * fiemap call. */
++                if (extent_count) {
++                        if (copy_from_user(&fiemap_s->fm_extents[0],
++                            (char __user *)arg + sizeof(*fiemap_s),
++                            sizeof(struct ll_fiemap_extent)))
++                                GOTO(error, rc = -EFAULT);
++                }
++
++                if (fiemap_s->fm_flags & FIEMAP_FLAG_SYNC) {
++                        int rc;
++
++                        rc = filemap_fdatawrite(inode->i_mapping);
++                        if (rc)
++                                GOTO(error, rc);
++                }
++
++                rc = ll_fiemap(inode, fiemap_s, num_bytes);
++                if (rc)
++                        GOTO(error, rc);
++
++                ret_bytes = sizeof(struct ll_user_fiemap);
++
++                if (extent_count != 0)
++                        ret_bytes += (fiemap_s->fm_mapped_extents *
++                                         sizeof(struct ll_fiemap_extent));
++
++                if (copy_to_user((void *)arg, fiemap_s, ret_bytes))
++                        rc = -EFAULT;
++
++error:
++                OBD_VFREE(fiemap_s, num_bytes);
++                RETURN(rc);
++        }
++        case EXT3_IOC_GETFLAGS:
++        case EXT3_IOC_SETFLAGS:
++                RETURN(ll_iocontrol(inode, file, cmd, arg));
++        case EXT3_IOC_GETVERSION_OLD:
++        case EXT3_IOC_GETVERSION:
++                RETURN(put_user(inode->i_generation, (int *)arg));
++        case LL_IOC_JOIN: {
++#if LUSTRE_FIX >= 50
++                /* Allow file join in beta builds to allow debuggging */
++                char *ftail;
++                int rc;
++
++                ftail = getname((const char *)arg);
++                if (IS_ERR(ftail))
++                        RETURN(PTR_ERR(ftail));
++                rc = ll_file_join(inode, file, ftail);
++                putname(ftail);
++                RETURN(rc);
++#else
++                CWARN("file join is not supported in this version of Lustre\n");
++                RETURN(-ENOTTY);
++#endif
++        }
++        case LL_IOC_GROUP_LOCK:
++                RETURN(ll_get_grouplock(inode, file, arg));
++        case LL_IOC_GROUP_UNLOCK:
++                RETURN(ll_put_grouplock(inode, file, arg));
++        case IOC_OBD_STATFS:
++                RETURN(ll_obd_statfs(inode, (void *)arg));
++        case OBD_IOC_GETNAME_OLD:
++        case OBD_IOC_GETNAME: {
++                struct obd_device *obd =
++                        class_exp2obd(ll_i2sbi(inode)->ll_osc_exp);
++                if (!obd)
++                        RETURN(-EFAULT);
++                if (copy_to_user((void *)arg, obd->obd_name,
++                                strlen(obd->obd_name) + 1))
++                        RETURN (-EFAULT);
++                RETURN(0);
++        }
++
++        /* We need to special case any other ioctls we want to handle,
++         * to send them to the MDS/OST as appropriate and to properly
++         * network encode the arg field.
++        case EXT3_IOC_SETVERSION_OLD:
++        case EXT3_IOC_SETVERSION:
++        */
++        default: {
++                int err;
++
++                if (LLIOC_STOP ==
++                    ll_iocontrol_call(inode, file, cmd, arg, &err))
++                        RETURN(err);
++
++                RETURN(obd_iocontrol(cmd, ll_i2obdexp(inode), 0, NULL,
++                                     (void *)arg));
++        }
++        }
++}
++
++loff_t ll_file_seek(struct file *file, loff_t offset, int origin)
++{
++        struct inode *inode = file->f_dentry->d_inode;
++        struct ll_inode_info *lli = ll_i2info(inode);
++        struct lov_stripe_md *lsm = lli->lli_smd;
++        loff_t retval;
++        ENTRY;
++        retval = offset + ((origin == 2) ? i_size_read(inode) :
++                           (origin == 1) ? file->f_pos : 0);
++        CDEBUG(D_VFSTRACE, "VFS Op:inode=%lu/%u(%p), to=%Lu=%#Lx(%s)\n",
++               inode->i_ino, inode->i_generation, inode, retval, retval,
++               origin == 2 ? "SEEK_END": origin == 1 ? "SEEK_CUR" : "SEEK_SET");
++        ll_stats_ops_tally(ll_i2sbi(inode), LPROC_LL_LLSEEK, 1);
++
++        if (origin == 2) { /* SEEK_END */
++                int nonblock = 0, rc;
++
++                if (file->f_flags & O_NONBLOCK)
++                        nonblock = LDLM_FL_BLOCK_NOWAIT;
++
++                if (lsm != NULL) {
++                        rc = ll_glimpse_size(inode, nonblock);
++                        if (rc != 0)
++                                RETURN(rc);
++                }
++
++                ll_inode_size_lock(inode, 0);
++                offset += i_size_read(inode);
++                ll_inode_size_unlock(inode, 0);
++        } else if (origin == 1) { /* SEEK_CUR */
++                offset += file->f_pos;
++        }
++
++        retval = -EINVAL;
++        if (offset >= 0 && offset <= ll_file_maxbytes(inode)) {
++                if (offset != file->f_pos) {
++                        file->f_pos = offset;
++                        file->f_version = 0;
++                }
++                retval = offset;
++        }
++
++        RETURN(retval);
++}
++
++int ll_fsync(struct file *file, struct dentry *dentry, int data)
++{
++        struct inode *inode = dentry->d_inode;
++        struct ll_inode_info *lli = ll_i2info(inode);
++        struct lov_stripe_md *lsm = lli->lli_smd;
++        struct ll_fid fid;
++        struct ptlrpc_request *req;
++        int rc, err;
++        ENTRY;
++        CDEBUG(D_VFSTRACE, "VFS Op:inode=%lu/%u(%p)\n", inode->i_ino,
++               inode->i_generation, inode);
++        ll_stats_ops_tally(ll_i2sbi(inode), LPROC_LL_FSYNC, 1);
++
++        /* fsync's caller has already called _fdata{sync,write}, we want
++         * that IO to finish before calling the osc and mdc sync methods */
++        rc = filemap_fdatawait(inode->i_mapping);
++
++        /* catch async errors that were recorded back when async writeback
++         * failed for pages in this mapping. */
++        err = lli->lli_async_rc;
++        lli->lli_async_rc = 0;
++        if (rc == 0)
++                rc = err;
++        if (lsm) {
++                err = lov_test_and_clear_async_rc(lsm);
++                if (rc == 0)
++                        rc = err;
++        }
++
++        ll_inode2fid(&fid, inode);
++        err = mdc_sync(ll_i2sbi(inode)->ll_mdc_exp, &fid, &req);
++        if (!rc)
++                rc = err;
++        if (!err)
++                ptlrpc_req_finished(req);
++
++        if (data && lsm) {
++                struct obdo *oa;
++
++                OBDO_ALLOC(oa);
++                if (!oa)
++                        RETURN(rc ? rc : -ENOMEM);
++
++                oa->o_id = lsm->lsm_object_id;
++                oa->o_valid = OBD_MD_FLID;
++                obdo_from_inode(oa, inode, OBD_MD_FLTYPE | OBD_MD_FLATIME |
++                                           OBD_MD_FLMTIME | OBD_MD_FLCTIME);
++
++                err = obd_sync(ll_i2sbi(inode)->ll_osc_exp, oa, lsm,
++                               0, OBD_OBJECT_EOF);
++                if (!rc)
++                        rc = err;
++                OBDO_FREE(oa);
++        }
++
++        RETURN(rc);
++}
++
++int ll_file_flock(struct file *file, int cmd, struct file_lock *file_lock)
++{
++        struct inode *inode = file->f_dentry->d_inode;
++        struct ll_sb_info *sbi = ll_i2sbi(inode);
++        struct ldlm_res_id res_id =
++                    { .name = {inode->i_ino, inode->i_generation, LDLM_FLOCK} };
++        struct ldlm_enqueue_info einfo = { LDLM_FLOCK, 0, NULL,
++                ldlm_flock_completion_ast, NULL, file_lock };
++        struct lustre_handle lockh = {0};
++        ldlm_policy_data_t flock;
++        int flags = 0;
++        int rc;
++        ENTRY;
++
++        CDEBUG(D_VFSTRACE, "VFS Op:inode=%lu file_lock=%p\n",
++               inode->i_ino, file_lock);
++        ll_stats_ops_tally(ll_i2sbi(inode), LPROC_LL_FLOCK, 1);
++
++        if (file_lock->fl_flags & FL_FLOCK) {
++                LASSERT((cmd == F_SETLKW) || (cmd == F_SETLK));
++                /* set missing params for flock() calls */
++                file_lock->fl_end = OFFSET_MAX;
++                file_lock->fl_pid = current->tgid;
++        }
++        flock.l_flock.pid = file_lock->fl_pid;
++        flock.l_flock.start = file_lock->fl_start;
++        flock.l_flock.end = file_lock->fl_end;
++
++        switch (file_lock->fl_type) {
++        case F_RDLCK:
++                einfo.ei_mode = LCK_PR;
++                break;
++        case F_UNLCK:
++                /* An unlock request may or may not have any relation to
++                 * existing locks so we may not be able to pass a lock handle
++                 * via a normal ldlm_lock_cancel() request. The request may even
++                 * unlock a byte range in the middle of an existing lock. In
++                 * order to process an unlock request we need all of the same
++                 * information that is given with a normal read or write record
++                 * lock request. To avoid creating another ldlm unlock (cancel)
++                 * message we'll treat a LCK_NL flock request as an unlock. */
++                einfo.ei_mode = LCK_NL;
++                break;
++        case F_WRLCK:
++                einfo.ei_mode = LCK_PW;
++                break;
++        default:
++                CERROR("unknown fcntl lock type: %d\n", file_lock->fl_type);
++                RETURN (-EINVAL);
++        }
++
++        switch (cmd) {
++        case F_SETLKW:
++#ifdef F_SETLKW64
++        case F_SETLKW64:
++#endif
++                flags = 0;
++                break;
++        case F_SETLK:
++#ifdef F_SETLK64
++        case F_SETLK64:
++#endif
++                flags = LDLM_FL_BLOCK_NOWAIT;
++                break;
++        case F_GETLK:
++#ifdef F_GETLK64
++        case F_GETLK64:
++#endif
++                flags = LDLM_FL_TEST_LOCK;
++                /* Save the old mode so that if the mode in the lock changes we
++                 * can decrement the appropriate reader or writer refcount. */
++                file_lock->fl_type = einfo.ei_mode;
++                break;
++        default:
++                CERROR("unknown fcntl lock command: %d\n", cmd);
++                RETURN (-EINVAL);
++        }
++
++        CDEBUG(D_DLMTRACE, "inode=%lu, pid=%u, flags=%#x, mode=%u, "
++               "start="LPU64", end="LPU64"\n", inode->i_ino, flock.l_flock.pid,
++               flags, einfo.ei_mode, flock.l_flock.start, flock.l_flock.end);
++
++        rc = ldlm_cli_enqueue(sbi->ll_mdc_exp, NULL, &einfo, res_id,
++                              &flock, &flags, NULL, 0, NULL, &lockh, 0);
++        if ((file_lock->fl_flags & FL_FLOCK) &&
++            (rc == 0 || file_lock->fl_type == F_UNLCK))
++                ll_flock_lock_file_wait(file, file_lock, (cmd == F_SETLKW));
++#ifdef HAVE_F_OP_FLOCK
++        if ((file_lock->fl_flags & FL_POSIX) &&
++            (rc == 0 || file_lock->fl_type == F_UNLCK) &&
++            !(flags & LDLM_FL_TEST_LOCK))
++                posix_lock_file_wait(file, file_lock);
++#endif
++
++        RETURN(rc);
++}
++
++int ll_file_noflock(struct file *file, int cmd, struct file_lock *file_lock)
++{
++        ENTRY;
++
++        RETURN(-ENOSYS);
++}
++
++int ll_have_md_lock(struct inode *inode, __u64 bits)
++{
++        struct lustre_handle lockh;
++        struct ldlm_res_id res_id = { .name = {0} };
++        struct obd_device *obddev;
++        ldlm_policy_data_t policy = { .l_inodebits = {bits}};
++        int flags;
++        ENTRY;
++
++        if (!inode)
++               RETURN(0);
++
++        obddev = ll_i2mdcexp(inode)->exp_obd;
++        res_id.name[0] = inode->i_ino;
++        res_id.name[1] = inode->i_generation;
++
++        CDEBUG(D_INFO, "trying to match res "LPU64"\n", res_id.name[0]);
++
++        flags = LDLM_FL_BLOCK_GRANTED | LDLM_FL_CBPENDING | LDLM_FL_TEST_LOCK;
++        if (ldlm_lock_match(obddev->obd_namespace, flags, &res_id, LDLM_IBITS,
++                            &policy, LCK_CR|LCK_CW|LCK_PR|LCK_PW, &lockh)) {
++                RETURN(1);
++        }
++
++        RETURN(0);
++}
++
++static int ll_inode_revalidate_fini(struct inode *inode, int rc) {
++        if (rc == -ENOENT) { /* Already unlinked. Just update nlink
++                              * and return success */
++                inode->i_nlink = 0;
++                /* This path cannot be hit for regular files unless in
++                 * case of obscure races, so no need to to validate
++                 * size. */
++                if (!S_ISREG(inode->i_mode) &&
++                    !S_ISDIR(inode->i_mode))
++                        return 0;
++        }
++
++        if (rc) {
++                CERROR("failure %d inode %lu\n", rc, inode->i_ino);
++                return -abs(rc);
++
++        }
++
++        return 0;
++}
++
++int ll_inode_revalidate_it(struct dentry *dentry, struct lookup_intent *it)
++{
++        struct inode *inode = dentry->d_inode;
++        struct ptlrpc_request *req = NULL;
++        struct obd_export *exp;
++        int rc;
++        ENTRY;
++
++        if (!inode) {
++                CERROR("REPORT THIS LINE TO PETER\n");
++                RETURN(0);
++        }
++        CDEBUG(D_VFSTRACE, "VFS Op:inode=%lu/%u(%p),name=%s\n",
++               inode->i_ino, inode->i_generation, inode, dentry->d_name.name);
++
++        exp = ll_i2mdcexp(inode);
++
++        if (exp->exp_connect_flags & OBD_CONNECT_ATTRFID) {
++                struct lookup_intent oit = { .it_op = IT_GETATTR };
++                struct mdc_op_data op_data;
++
++                /* Call getattr by fid, so do not provide name at all. */
++                ll_prepare_mdc_op_data(&op_data, dentry->d_parent->d_inode,
++                                       dentry->d_inode, NULL, 0, 0, NULL);
++                rc = mdc_intent_lock(exp, &op_data, NULL, 0,
++                                     /* we are not interested in name
++                                        based lookup */
++                                     &oit, 0, &req,
++                                     ll_mdc_blocking_ast, 0);
++                if (rc < 0) {
++                        rc = ll_inode_revalidate_fini(inode, rc);
++                        GOTO (out, rc);
++                }
++
++                rc = revalidate_it_finish(req, DLM_REPLY_REC_OFF, &oit, dentry);
++                if (rc != 0) {
++                        ll_intent_release(&oit);
++                        GOTO(out, rc);
++                }
++
++                /* Unlinked? Unhash dentry, so it is not picked up later by
++                   do_lookup() -> ll_revalidate_it(). We cannot use d_drop
++                   here to preserve get_cwd functionality on 2.6.
++                   Bug 10503 */
++                if (!dentry->d_inode->i_nlink) {
++                        spin_lock(&ll_lookup_lock);
++                        spin_lock(&dcache_lock);
++                        ll_drop_dentry(dentry);
++                        spin_unlock(&dcache_lock);
++                        spin_unlock(&ll_lookup_lock);
++                }
++
++                ll_lookup_finish_locks(&oit, dentry);
++        } else if (!ll_have_md_lock(dentry->d_inode,
++                                  MDS_INODELOCK_UPDATE|MDS_INODELOCK_LOOKUP)) {
++                struct ll_sb_info *sbi = ll_i2sbi(dentry->d_inode);
++                struct ll_fid fid;
++                obd_valid valid = OBD_MD_FLGETATTR;
++                int ealen = 0;
++
++                if (S_ISREG(inode->i_mode)) {
++                        rc = ll_get_max_mdsize(sbi, &ealen);
++                        if (rc)
++                                RETURN(rc);
++                        valid |= OBD_MD_FLEASIZE | OBD_MD_FLMODEASIZE;
++                }
++                ll_inode2fid(&fid, inode);
++                rc = mdc_getattr(sbi->ll_mdc_exp, &fid, valid, ealen, &req);
++                if (rc) {
++                        rc = ll_inode_revalidate_fini(inode, rc);
++                        RETURN(rc);
++                }
++
++                rc = ll_prep_inode(sbi->ll_osc_exp, &inode, req, REPLY_REC_OFF,
++                                   NULL);
++                if (rc)
++                        GOTO(out, rc);
++        }
++
++        /* if object not yet allocated, don't validate size */
++        if (ll_i2info(inode)->lli_smd == NULL)
++                GOTO(out, rc = 0);
++
++        /* ll_glimpse_size will prefer locally cached writes if they extend
++         * the file */
++        rc = ll_glimpse_size(inode, 0);
++
++out:
++        ptlrpc_req_finished(req);
++        RETURN(rc);
++}
++
++int ll_getattr_it(struct vfsmount *mnt, struct dentry *de,
++                  struct lookup_intent *it, struct kstat *stat)
++{
++        struct inode *inode = de->d_inode;
++        int res = 0;
++
++        res = ll_inode_revalidate_it(de, it);
++        ll_stats_ops_tally(ll_i2sbi(inode), LPROC_LL_GETATTR, 1);
++
++        if (res)
++                return res;
++
++        stat->dev = inode->i_sb->s_dev;
++        stat->ino = inode->i_ino;
++        stat->mode = inode->i_mode;
++        stat->nlink = inode->i_nlink;
++        stat->uid = inode->i_uid;
++        stat->gid = inode->i_gid;
++        stat->rdev = kdev_t_to_nr(inode->i_rdev);
++        stat->atime = inode->i_atime;
++        stat->mtime = inode->i_mtime;
++        stat->ctime = inode->i_ctime;
++#ifdef HAVE_INODE_BLKSIZE
++        stat->blksize = inode->i_blksize;
++#else
++        stat->blksize = 1<<inode->i_blkbits;
++#endif
++
++        ll_inode_size_lock(inode, 0);
++        stat->size = i_size_read(inode);
++        stat->blocks = inode->i_blocks;
++        ll_inode_size_unlock(inode, 0);
++
++        return 0;
++}
++int ll_getattr(struct vfsmount *mnt, struct dentry *de, struct kstat *stat)
++{
++        struct lookup_intent it = { .it_op = IT_GETATTR };
++
++        return ll_getattr_it(mnt, de, &it, stat);
++}
++
++static
++int lustre_check_acl(struct inode *inode, int mask)
++{
++#ifdef CONFIG_FS_POSIX_ACL
++        struct ll_inode_info *lli = ll_i2info(inode);
++        struct posix_acl *acl;
++        int rc;
++        ENTRY;
++
++        spin_lock(&lli->lli_lock);
++        acl = posix_acl_dup(lli->lli_posix_acl);
++        spin_unlock(&lli->lli_lock);
++
++        if (!acl)
++                RETURN(-EAGAIN);
++
++        rc = posix_acl_permission(inode, acl, mask);
++        posix_acl_release(acl);
++
++        RETURN(rc);
++#else
++        return -EAGAIN;
++#endif
++}
++
++#if (LINUX_VERSION_CODE >= KERNEL_VERSION(2,6,10))
++int ll_inode_permission(struct inode *inode, int mask, struct nameidata *nd)
++{
++        CDEBUG(D_VFSTRACE, "VFS Op:inode=%lu/%u(%p), mask %o\n",
++               inode->i_ino, inode->i_generation, inode, mask);
++
++        ll_stats_ops_tally(ll_i2sbi(inode), LPROC_LL_INODE_PERM, 1);
++        return generic_permission(inode, mask, lustre_check_acl);
++}
++#else
++#if (LINUX_VERSION_CODE >= KERNEL_VERSION(2,6,0))
++int ll_inode_permission(struct inode *inode, int mask, struct nameidata *nd)
++#else
++int ll_inode_permission(struct inode *inode, int mask)
++#endif
++{
++        int mode = inode->i_mode;
++        int rc;
++
++        CDEBUG(D_VFSTRACE, "VFS Op:inode=%lu/%u(%p), mask %o\n",
++               inode->i_ino, inode->i_generation, inode, mask);
++        ll_stats_ops_tally(ll_i2sbi(inode), LPROC_LL_INODE_PERM, 1);
++
++        if ((mask & MAY_WRITE) && IS_RDONLY(inode) &&
++            (S_ISREG(mode) || S_ISDIR(mode) || S_ISLNK(mode)))
++                return -EROFS;
++        if ((mask & MAY_WRITE) && IS_IMMUTABLE(inode))
++                return -EACCES;
++        if (current->fsuid == inode->i_uid) {
++                mode >>= 6;
++        } else if (1) {
++                if (((mode >> 3) & mask & S_IRWXO) != mask)
++                        goto check_groups;
++                rc = lustre_check_acl(inode, mask);
++                if (rc == -EAGAIN)
++                        goto check_groups;
++                if (rc == -EACCES)
++                        goto check_capabilities;
++                return rc;
++        } else {
++check_groups:
++                if (in_group_p(inode->i_gid))
++                        mode >>= 3;
++        }
++        if ((mode & mask & S_IRWXO) == mask)
++                return 0;
++
++check_capabilities:
++        if (!(mask & MAY_EXEC) ||
++            (inode->i_mode & S_IXUGO) || S_ISDIR(inode->i_mode))
++                if (cfs_capable(CFS_CAP_DAC_OVERRIDE))
++                        return 0;
++
++        if (cfs_capable(CFS_CAP_DAC_READ_SEARCH) && ((mask == MAY_READ) ||
++            (S_ISDIR(inode->i_mode) && !(mask & MAY_WRITE))))
++                return 0;
++
++        return -EACCES;
++}
++#endif
++
++/* -o localflock - only provides locally consistent flock locks */
++struct file_operations ll_file_operations = {
++        .read           = ll_file_read,
++#ifdef HAVE_FILE_READV
++        .readv          = ll_file_readv,
++#else
++        .aio_read       = ll_file_aio_read,
++#endif
++        .write          = ll_file_write,
++#ifdef HAVE_FILE_WRITEV
++        .writev         = ll_file_writev,
++#else
++        .aio_write      = ll_file_aio_write,
++#endif
++        .ioctl          = ll_file_ioctl,
++        .open           = ll_file_open,
++        .release        = ll_file_release,
++        .mmap           = ll_file_mmap,
++        .llseek         = ll_file_seek,
++        .sendfile       = ll_file_sendfile,
++        .fsync          = ll_fsync,
++};
++
++struct file_operations ll_file_operations_flock = {
++        .read           = ll_file_read,
++#ifdef HAVE_FILE_READV
++        .readv          = ll_file_readv,
++#else
++        .aio_read       = ll_file_aio_read,
++#endif
++        .write          = ll_file_write,
++#ifdef HAVE_FILE_WRITEV
++        .writev         = ll_file_writev,
++#else
++        .aio_write      = ll_file_aio_write,
++#endif
++        .ioctl          = ll_file_ioctl,
++        .open           = ll_file_open,
++        .release        = ll_file_release,
++        .mmap           = ll_file_mmap,
++        .llseek         = ll_file_seek,
++        .sendfile       = ll_file_sendfile,
++        .fsync          = ll_fsync,
++#ifdef HAVE_F_OP_FLOCK
++        .flock          = ll_file_flock,
++#endif
++        .lock           = ll_file_flock
++};
++
++/* These are for -o noflock - to return ENOSYS on flock calls */
++struct file_operations ll_file_operations_noflock = {
++        .read           = ll_file_read,
++#ifdef HAVE_FILE_READV
++        .readv          = ll_file_readv,
++#else
++        .aio_read       = ll_file_aio_read,
++#endif
++        .write          = ll_file_write,
++#ifdef HAVE_FILE_WRITEV
++        .writev         = ll_file_writev,
++#else
++        .aio_write      = ll_file_aio_write,
++#endif
++        .ioctl          = ll_file_ioctl,
++        .open           = ll_file_open,
++        .release        = ll_file_release,
++        .mmap           = ll_file_mmap,
++        .llseek         = ll_file_seek,
++        .sendfile       = ll_file_sendfile,
++        .fsync          = ll_fsync,
++#ifdef HAVE_F_OP_FLOCK
++        .flock          = ll_file_noflock,
++#endif
++        .lock           = ll_file_noflock
++};
++
++struct inode_operations ll_file_inode_operations = {
++#ifdef HAVE_VFS_INTENT_PATCHES
++        .setattr_raw    = ll_setattr_raw,
++#endif
++        .setattr        = ll_setattr,
++        .truncate       = ll_truncate,
++        .getattr        = ll_getattr,
++        .permission     = ll_inode_permission,
++        .setxattr       = ll_setxattr,
++        .getxattr       = ll_getxattr,
++        .listxattr      = ll_listxattr,
++        .removexattr    = ll_removexattr,
++};
++
++/* dynamic ioctl number support routins */
++static struct llioc_ctl_data {
++        struct rw_semaphore ioc_sem;
++        struct list_head    ioc_head;
++} llioc = {
++        __RWSEM_INITIALIZER(llioc.ioc_sem),
++        CFS_LIST_HEAD_INIT(llioc.ioc_head)
++};
++
++
++struct llioc_data {
++        struct list_head        iocd_list;
++        unsigned int            iocd_size;
++        llioc_callback_t        iocd_cb;
++        unsigned int            iocd_count;
++        unsigned int            iocd_cmd[0];
++};
++
++void *ll_iocontrol_register(llioc_callback_t cb, int count, unsigned int *cmd)
++{
++        unsigned int size;
++        struct llioc_data *in_data = NULL;
++        ENTRY;
++
++        if (cb == NULL || cmd == NULL ||
++            count > LLIOC_MAX_CMD || count < 0)
++                RETURN(NULL);
++
++        size = sizeof(*in_data) + count * sizeof(unsigned int);
++        OBD_ALLOC(in_data, size);
++        if (in_data == NULL)
++                RETURN(NULL);
++
++        memset(in_data, 0, sizeof(*in_data));
++        in_data->iocd_size = size;
++        in_data->iocd_cb = cb;
++        in_data->iocd_count = count;
++        memcpy(in_data->iocd_cmd, cmd, sizeof(unsigned int) * count);
++
++        down_write(&llioc.ioc_sem);
++        list_add_tail(&in_data->iocd_list, &llioc.ioc_head);
++        up_write(&llioc.ioc_sem);
++
++        RETURN(in_data);
++}
++
++void ll_iocontrol_unregister(void *magic)
++{
++        struct llioc_data *tmp;
++
++        if (magic == NULL)
++                return;
++
++        down_write(&llioc.ioc_sem);
++        list_for_each_entry(tmp, &llioc.ioc_head, iocd_list) {
++                if (tmp == magic) {
++                        unsigned int size = tmp->iocd_size;
++
++                        list_del(&tmp->iocd_list);
++                        up_write(&llioc.ioc_sem);
++
++                        OBD_FREE(tmp, size);
++                        return;
++                }
++        }
++        up_write(&llioc.ioc_sem);
++
++        CWARN("didn't find iocontrol register block with magic: %p\n", magic);
++}
++
++EXPORT_SYMBOL(ll_iocontrol_register);
++EXPORT_SYMBOL(ll_iocontrol_unregister);
++
++enum llioc_iter ll_iocontrol_call(struct inode *inode, struct file *file,
++                        unsigned int cmd, unsigned long arg, int *rcp)
++{
++        enum llioc_iter ret = LLIOC_CONT;
++        struct llioc_data *data;
++        int rc = -EINVAL, i;
++
++        down_read(&llioc.ioc_sem);
++        list_for_each_entry(data, &llioc.ioc_head, iocd_list) {
++                for (i = 0; i < data->iocd_count; i++) {
++                        if (cmd != data->iocd_cmd[i])
++                                continue;
++
++                        ret = data->iocd_cb(inode, file, cmd, arg, data, &rc);
++                        break;
++                }
++
++                if (ret == LLIOC_STOP)
++                        break;
++        }
++        up_read(&llioc.ioc_sem);
++
++        if (rcp)
++                *rcp = rc;
++        return ret;
++}
+diff -urNad lustre~/lustre/llite/llite_internal.h lustre/lustre/llite/llite_internal.h
+--- lustre~/lustre/llite/llite_internal.h	2009-03-12 10:32:27.000000000 +0100
++++ lustre/lustre/llite/llite_internal.h	2009-03-12 11:02:51.000000000 +0100
+@@ -647,7 +647,7 @@
+                struct lookup_intent *it, struct kstat *stat);
+ int ll_getattr(struct vfsmount *mnt, struct dentry *de, struct kstat *stat);
+ struct ll_file_data *ll_file_data_get(void);
+-#if (LINUX_VERSION_CODE >= KERNEL_VERSION(2,6,0))
++#ifndef HAVE_INODE_PERMISION_2ARGS
+ int ll_inode_permission(struct inode *inode, int mask, struct nameidata *nd);
+ #else
+ int ll_inode_permission(struct inode *inode, int mask);
+@@ -727,9 +727,6 @@
+ /* llite/llite_nfs.c */
+ extern struct export_operations lustre_export_operations;
+ __u32 get_uuid2int(const char *name, int len);
+-struct dentry *ll_fh_to_dentry(struct super_block *sb, __u32 *data, int len,
+-                               int fhtype, int parent);
+-int ll_dentry_to_fh(struct dentry *, __u32 *datap, int *lenp, int need_parent);
+ 
+ /* llite/special.c */
+ extern struct inode_operations ll_special_inode_operations;
+diff -urNad lustre~/lustre/llite/llite_internal.h.orig lustre/lustre/llite/llite_internal.h.orig
+--- lustre~/lustre/llite/llite_internal.h.orig	1970-01-01 00:00:00.000000000 +0000
++++ lustre/lustre/llite/llite_internal.h.orig	2009-03-12 10:32:27.000000000 +0100
+@@ -0,0 +1,1027 @@
++/* -*- mode: c; c-basic-offset: 8; indent-tabs-mode: nil; -*-
++ * vim:expandtab:shiftwidth=8:tabstop=8:
++ *
++ * GPL HEADER START
++ *
++ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
++ *
++ * This program is free software; you can redistribute it and/or modify
++ * it under the terms of the GNU General Public License version 2 only,
++ * as published by the Free Software Foundation.
++ *
++ * This program is distributed in the hope that it will be useful, but
++ * WITHOUT ANY WARRANTY; without even the implied warranty of
++ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
++ * General Public License version 2 for more details (a copy is included
++ * in the LICENSE file that accompanied this code).
++ *
++ * You should have received a copy of the GNU General Public License
++ * version 2 along with this program; If not, see
++ * http://www.sun.com/software/products/lustre/docs/GPLv2.pdf
++ *
++ * Please contact Sun Microsystems, Inc., 4150 Network Circle, Santa Clara,
++ * CA 95054 USA or visit www.sun.com if you need additional information or
++ * have any questions.
++ *
++ * GPL HEADER END
++ */
++/*
++ * Copyright  2008 Sun Microsystems, Inc. All rights reserved
++ * Use is subject to license terms.
++ */
++/*
++ * This file is part of Lustre, http://www.lustre.org/
++ * Lustre is a trademark of Sun Microsystems, Inc.
++ */
++
++#ifndef LLITE_INTERNAL_H
++#define LLITE_INTERNAL_H
++
++#ifdef CONFIG_FS_POSIX_ACL
++# include <linux/fs.h>
++#ifdef HAVE_XATTR_ACL
++# include <linux/xattr_acl.h>
++#endif
++#ifdef HAVE_LINUX_POSIX_ACL_XATTR_H
++# include <linux/posix_acl_xattr.h>
++#endif
++#endif
++
++#include <lustre_debug.h>
++#include <lustre_ver.h>
++#include <linux/lustre_version.h>
++#include <lustre_disk.h>  /* for s2sbi */
++
++#ifndef HAVE_LE_TYPES
++typedef __u16 __le16;
++typedef __u32 __le32;
++#endif
++ 
++/*
++struct lustre_intent_data {
++        __u64 it_lock_handle[2];
++        __u32 it_disposition;
++        __u32 it_status;
++        __u32 it_lock_mode;
++        }; */
++
++/* If there is no FMODE_EXEC defined, make it to match nothing */
++#ifndef FMODE_EXEC
++#define FMODE_EXEC 0
++#endif
++
++#define LL_IT2STR(it) ((it) ? ldlm_it2str((it)->it_op) : "0")
++#define LUSTRE_FPRIVATE(file) ((file)->private_data)
++
++#ifdef HAVE_VFS_INTENT_PATCHES
++static inline struct lookup_intent *ll_nd2it(struct nameidata *nd)
++{
++        return &nd->intent;
++}
++#endif
++
++/*
++ * Directory entries are currently in the same format as ext2/ext3, but will
++ * be changed in the future to accomodate FIDs
++ */
++#define LL_DIR_NAME_LEN (255)
++#define LL_DIR_PAD      (4)
++
++struct ll_dir_entry {
++        /* number of inode, referenced by this entry */
++	__le32	lde_inode;
++        /* total record length, multiple of LL_DIR_PAD */
++	__le16	lde_rec_len;
++        /* length of name */
++	__u8	lde_name_len;
++        /* file type: regular, directory, device, etc. */
++	__u8	lde_file_type;
++        /* name. NOT NUL-terminated */
++	char	lde_name[LL_DIR_NAME_LEN];
++};
++
++struct ll_dentry_data {
++        int                      lld_cwd_count;
++        int                      lld_mnt_count;
++        struct obd_client_handle lld_cwd_och;
++        struct obd_client_handle lld_mnt_och;
++#ifndef HAVE_VFS_INTENT_PATCHES
++        struct lookup_intent    *lld_it;
++#endif
++        unsigned int             lld_sa_generation;
++};
++
++#define ll_d2d(de) ((struct ll_dentry_data*)((de)->d_fsdata))
++
++extern struct file_operations ll_pgcache_seq_fops;
++
++#define LLI_INODE_MAGIC                 0x111d0de5
++#define LLI_INODE_DEAD                  0xdeadd00d
++#define LLI_F_HAVE_OST_SIZE_LOCK        0
++#define LLI_F_HAVE_MDS_SIZE_LOCK        1
++#define LLI_F_CONTENDED                 2
++#define LLI_F_SRVLOCK                   3
++
++struct ll_inode_info {
++        int                     lli_inode_magic;
++        struct semaphore        lli_size_sem;           /* protect open and change size */
++        void                   *lli_size_sem_owner;
++        struct semaphore        lli_write_sem;
++        struct lov_stripe_md   *lli_smd;
++        char                   *lli_symlink_name;
++        __u64                   lli_maxbytes;
++        __u64                   lli_io_epoch;
++        unsigned long           lli_flags;
++        cfs_time_t              lli_contention_time;
++
++        /* this lock protects s_d_w and p_w_ll and mmap_cnt */
++        spinlock_t              lli_lock;
++#ifdef HAVE_CLOSE_THREAD
++        struct list_head        lli_pending_write_llaps;
++        struct list_head        lli_close_item;
++        int                     lli_send_done_writing;
++#endif
++        atomic_t                lli_mmap_cnt;
++
++        /* for writepage() only to communicate to fsync */
++        int                     lli_async_rc;
++
++        struct posix_acl       *lli_posix_acl;
++
++        struct list_head        lli_dead_list;
++
++        struct semaphore        lli_och_sem; /* Protects access to och pointers
++                                                and their usage counters */
++        /* We need all three because every inode may be opened in different
++           modes */
++        struct obd_client_handle *lli_mds_read_och;
++        __u64                   lli_open_fd_read_count;
++        struct obd_client_handle *lli_mds_write_och;
++        __u64                   lli_open_fd_write_count;
++        struct obd_client_handle *lli_mds_exec_och;
++        __u64                   lli_open_fd_exec_count;
++        struct inode            lli_vfs_inode;
++
++        /* metadata stat-ahead */
++        /*
++         * "opendir_pid" is the token when lookup/revalid -- I am the owner of
++         * dir statahead.
++         */
++        pid_t                   lli_opendir_pid;
++        /* 
++         * since parent-child threads can share the same @file struct,
++         * "opendir_key" is the token when dir close for case of parent exit
++         * before child -- it is me should cleanup the dir readahead. */
++        void                   *lli_opendir_key;
++        struct ll_statahead_info *lli_sai;
++};
++
++/*
++ * Locking to guarantee consistency of non-atomic updates to long long i_size,
++ * consistency between file size and KMS, and consistency within
++ * ->lli_smd->lsm_oinfo[]'s.
++ *
++ * Implemented by ->lli_size_sem and ->lsm_sem, nested in that order.
++ */
++
++void ll_inode_size_lock(struct inode *inode, int lock_lsm);
++void ll_inode_size_unlock(struct inode *inode, int unlock_lsm);
++
++// FIXME: replace the name of this with LL_I to conform to kernel stuff
++// static inline struct ll_inode_info *LL_I(struct inode *inode)
++static inline struct ll_inode_info *ll_i2info(struct inode *inode)
++{
++        return container_of(inode, struct ll_inode_info, lli_vfs_inode);
++}
++
++/* default to about 40meg of readahead on a given system.  That much tied
++ * up in 512k readahead requests serviced at 40ms each is about 1GB/s. */
++#define SBI_DEFAULT_READAHEAD_MAX (40UL << (20 - CFS_PAGE_SHIFT))
++
++/* default to read-ahead full files smaller than 2MB on the second read */
++#define SBI_DEFAULT_READAHEAD_WHOLE_MAX (2UL << (20 - CFS_PAGE_SHIFT))
++
++enum ra_stat {
++        RA_STAT_HIT = 0,
++        RA_STAT_MISS,
++        RA_STAT_DISTANT_READPAGE,
++        RA_STAT_MISS_IN_WINDOW,
++        RA_STAT_FAILED_GRAB_PAGE,
++        RA_STAT_FAILED_MATCH,
++        RA_STAT_DISCARDED,
++        RA_STAT_ZERO_LEN,
++        RA_STAT_ZERO_WINDOW,
++        RA_STAT_EOF,
++        RA_STAT_MAX_IN_FLIGHT,
++        RA_STAT_WRONG_GRAB_PAGE,
++        _NR_RA_STAT,
++};
++
++struct ll_ra_info {
++        unsigned long             ra_cur_pages;
++        unsigned long             ra_max_pages;
++        unsigned long             ra_max_read_ahead_whole_pages;
++        unsigned long             ra_stats[_NR_RA_STAT];
++};
++
++/* LL_HIST_MAX=32 causes an overflow */
++#define LL_HIST_MAX 28
++#define LL_HIST_START 12 /* buckets start at 2^12 = 4k */
++#define LL_PROCESS_HIST_MAX 10
++struct per_process_info {
++        pid_t pid;
++        struct obd_histogram pp_r_hist;
++        struct obd_histogram pp_w_hist;
++};
++
++/* pp_extents[LL_PROCESS_HIST_MAX] will hold the combined process info */
++struct ll_rw_extents_info {
++        struct per_process_info pp_extents[LL_PROCESS_HIST_MAX + 1];
++};
++
++#define LL_OFFSET_HIST_MAX 100
++struct ll_rw_process_info {
++        pid_t                     rw_pid;
++        int                       rw_op;
++        loff_t                    rw_range_start;
++        loff_t                    rw_range_end;
++        loff_t                    rw_last_file_pos;
++        loff_t                    rw_offset;
++        size_t                    rw_smallest_extent;
++        size_t                    rw_largest_extent;
++        struct file               *rw_last_file;
++};
++
++
++enum stats_track_type {
++        STATS_TRACK_ALL = 0,  /* track all processes */
++        STATS_TRACK_PID,      /* track process with this pid */
++        STATS_TRACK_PPID,     /* track processes with this ppid */
++        STATS_TRACK_GID,      /* track processes with this gid */
++        STATS_TRACK_LAST,
++};
++
++/* flags for sbi->ll_flags */
++#define LL_SBI_NOLCK            0x01 /* DLM locking disabled (directio-only) */
++#define LL_SBI_DATA_CHECKSUM    0x02 /* checksum each page on the wire */
++#define LL_SBI_FLOCK            0x04
++#define LL_SBI_USER_XATTR       0x08 /* support user xattr */
++#define LL_SBI_ACL              0x10 /* support ACL */
++#define LL_SBI_JOIN             0x20 /* support JOIN */
++#define LL_SBI_LOCALFLOCK       0x40 /* Local flocks support by kernel */
++#define LL_SBI_LRU_RESIZE       0x80 /* support lru resize */
++#define LL_SBI_LLITE_CHECKSUM  0x100 /* checksum each page in memory */
++
++/* default value for ll_sb_info->contention_time */
++#define SBI_DEFAULT_CONTENTION_SECONDS     60
++/* default value for lockless_truncate_enable */
++#define SBI_DEFAULT_LOCKLESS_TRUNCATE_ENABLE 1
++
++struct ll_sb_info {
++        struct list_head          ll_list;
++        /* this protects pglist and ra_info.  It isn't safe to
++         * grab from interrupt contexts */
++        spinlock_t                ll_lock;
++        spinlock_t                ll_pp_extent_lock; /* Lock for pp_extent entries */
++        spinlock_t                ll_process_lock; /* Lock for ll_rw_process_info */
++        struct obd_uuid           ll_sb_uuid;
++        struct obd_export        *ll_mdc_exp;
++        struct obd_export        *ll_osc_exp;
++        struct proc_dir_entry    *ll_proc_root;
++        obd_id                    ll_rootino; /* number of root inode */
++
++        int                       ll_flags;
++        struct list_head          ll_conn_chain; /* per-conn chain of SBs */
++        struct lustre_client_ocd  ll_lco;
++
++        struct list_head          ll_orphan_dentry_list; /*please don't ask -p*/
++        struct ll_close_queue    *ll_lcq;
++
++        struct lprocfs_stats     *ll_stats; /* lprocfs stats counter */
++
++        unsigned long             ll_async_page_max;
++        unsigned long             ll_async_page_count;
++        unsigned long             ll_pglist_gen;
++        struct list_head          ll_pglist; /* all pages (llap_pglist_item) */
++
++        unsigned                  ll_contention_time; /* seconds */
++        unsigned                  ll_lockless_truncate_enable; /* true/false */
++
++        struct ll_ra_info         ll_ra_info;
++        unsigned int              ll_namelen;
++        struct file_operations   *ll_fop;
++
++#ifdef HAVE_EXPORT___IGET
++        struct list_head          ll_deathrow; /* inodes to be destroyed (b1443) */
++        spinlock_t                ll_deathrow_lock;
++#endif
++        /* =0 - hold lock over whole read/write
++         * >0 - max. chunk to be read/written w/o lock re-acquiring */
++        unsigned long             ll_max_rw_chunk;
++
++        /* Statistics */
++        struct ll_rw_extents_info ll_rw_extents_info;
++        int                       ll_extent_process_count;
++        struct ll_rw_process_info ll_rw_process_info[LL_PROCESS_HIST_MAX];
++        unsigned int              ll_offset_process_count;
++        struct ll_rw_process_info ll_rw_offset_info[LL_OFFSET_HIST_MAX];
++        unsigned int              ll_rw_offset_entry_count;
++        enum stats_track_type     ll_stats_track_type;
++        int                       ll_stats_track_id;
++        int                       ll_rw_stats_on;
++        dev_t                     ll_sdev_orig; /* save s_dev before assign for
++                                                 * clustred nfs */
++
++        /* metadata stat-ahead */
++        unsigned int              ll_sa_max;     /* max statahead RPCs */
++        unsigned int              ll_sa_wrong;   /* statahead thread stopped for
++                                                  * low hit ratio */
++        unsigned int              ll_sa_total;   /* statahead thread started
++                                                  * count */
++        unsigned long long        ll_sa_blocked; /* ls count waiting for
++                                                  * statahead */
++        unsigned long long        ll_sa_cached;  /* ls count got in cache */
++        unsigned long long        ll_sa_hit;     /* hit count */
++        unsigned long long        ll_sa_miss;    /* miss count */
++};
++
++#define LL_DEFAULT_MAX_RW_CHUNK         (32 * 1024 * 1024)
++
++struct ll_ra_read {
++        pgoff_t             lrr_start;
++        pgoff_t             lrr_count;
++        struct task_struct *lrr_reader;
++        struct list_head    lrr_linkage;
++};
++
++/*
++ * per file-descriptor read-ahead data.
++ */
++struct ll_readahead_state {
++        spinlock_t      ras_lock;
++        /*
++         * index of the last page that read(2) needed and that wasn't in the
++         * cache. Used by ras_update() to detect seeks.
++         *
++         * XXX nikita: if access seeks into cached region, Lustre doesn't see
++         * this.
++         */
++        unsigned long   ras_last_readpage;
++        /*
++         * number of pages read after last read-ahead window reset. As window
++         * is reset on each seek, this is effectively a number of consecutive
++         * accesses. Maybe ->ras_accessed_in_window is better name.
++         *
++         * XXX nikita: window is also reset (by ras_update()) when Lustre
++         * believes that memory pressure evicts read-ahead pages. In that
++         * case, it probably doesn't make sense to expand window to
++         * PTLRPC_MAX_BRW_PAGES on the third access.
++         */
++        unsigned long   ras_consecutive_pages;
++        /*
++         * number of read requests after the last read-ahead window reset
++         * As window is reset on each seek, this is effectively the number 
++         * on consecutive read request and is used to trigger read-ahead.
++         */
++        unsigned long   ras_consecutive_requests;
++        /*
++         * Parameters of current read-ahead window. Handled by
++         * ras_update(). On the initial access to the file or after a seek,
++         * window is reset to 0. After 3 consecutive accesses, window is
++         * expanded to PTLRPC_MAX_BRW_PAGES. Afterwards, window is enlarged by
++         * PTLRPC_MAX_BRW_PAGES chunks up to ->ra_max_pages.
++         */
++        unsigned long   ras_window_start, ras_window_len;
++        /*
++         * Where next read-ahead should start at. This lies within read-ahead
++         * window. Read-ahead window is read in pieces rather than at once
++         * because: 1. lustre limits total number of pages under read-ahead by
++         * ->ra_max_pages (see ll_ra_count_get()), 2. client cannot read pages
++         * not covered by DLM lock.
++         */
++        unsigned long   ras_next_readahead;
++        /*
++         * Total number of ll_file_read requests issued, reads originating
++         * due to mmap are not counted in this total.  This value is used to
++         * trigger full file read-ahead after multiple reads to a small file.
++         */
++        unsigned long   ras_requests;
++        /*
++         * Page index with respect to the current request, these value 
++         * will not be accurate when dealing with reads issued via mmap.
++         */
++        unsigned long   ras_request_index;
++        /*
++         * list of struct ll_ra_read's one per read(2) call current in
++         * progress against this file descriptor. Used by read-ahead code,
++         * protected by ->ras_lock.
++         */
++        struct list_head ras_read_beads;
++        /* 
++         * The following 3 items are used for detecting the stride I/O
++         * mode. 
++ 	 * In stride I/O mode, 
++         * ...............|-----data-----|****gap*****|--------|******|.... 
++         *    offset      |-stride_pages-|-stride_gap-| 
++         * ras_stride_offset = offset;
++         * ras_stride_length = stride_pages + stride_gap;
++         * ras_stride_pages = stride_pages;
++         * Note: all these three items are counted by pages.
++         */
++        unsigned long ras_stride_length;
++        unsigned long ras_stride_pages;
++        pgoff_t ras_stride_offset;
++        /* 
++         * number of consecutive stride request count, and it is similar as
++         * ras_consecutive_requests, but used for stride I/O mode.
++         * Note: only more than 2 consecutive stride request are detected,
++         * stride read-ahead will be enable
++         */
++        unsigned long ras_consecutive_stride_requests;
++};
++
++extern cfs_mem_cache_t *ll_file_data_slab;
++struct lustre_handle;
++struct ll_file_data {
++        struct ll_readahead_state fd_ras;
++        int fd_omode;
++        struct lustre_handle fd_cwlockh;
++        unsigned long fd_gid;
++        __u32 fd_flags;
++};
++
++struct lov_stripe_md;
++
++extern spinlock_t inode_lock;
++
++extern struct proc_dir_entry *proc_lustre_fs_root;
++
++static inline struct inode *ll_info2i(struct ll_inode_info *lli)
++{
++        return &lli->lli_vfs_inode;
++}
++
++struct it_cb_data {
++        struct inode *icbd_parent;
++        struct dentry **icbd_childp;
++        obd_id hash;
++};
++
++void ll_i2gids(__u32 *suppgids, struct inode *i1,struct inode *i2);
++
++#define LLAP_MAGIC 98764321
++
++extern cfs_mem_cache_t *ll_async_page_slab;
++extern size_t ll_async_page_slab_size;
++struct ll_async_page {
++        int              llap_magic;
++         /* only trust these if the page lock is providing exclusion */
++        unsigned int     llap_write_queued:1,
++                         llap_defer_uptodate:1,
++                         llap_origin:3,
++                         llap_ra_used:1,
++                         llap_ignore_quota:1,
++                         llap_nocache:1,
++                         llap_lockless_io_page:1;
++        void            *llap_cookie;
++        struct page     *llap_page;
++        struct list_head llap_pending_write;
++        struct list_head llap_pglist_item;
++        /* checksum for paranoid I/O debugging */
++        __u32 llap_checksum;
++};
++
++/*
++ * enumeration of llap_from_page() call-sites. Used to export statistics in
++ * /proc/fs/lustre/llite/fsN/dump_page_cache.
++ */
++enum {
++        LLAP_ORIGIN_UNKNOWN = 0,
++        LLAP_ORIGIN_READPAGE,
++        LLAP_ORIGIN_READAHEAD,
++        LLAP_ORIGIN_COMMIT_WRITE,
++        LLAP_ORIGIN_WRITEPAGE,
++        LLAP_ORIGIN_REMOVEPAGE,
++        LLAP_ORIGIN_LOCKLESS_IO,
++        LLAP__ORIGIN_MAX,
++};
++extern char *llap_origins[];
++
++#ifdef HAVE_REGISTER_CACHE
++#define ll_register_cache(cache) register_cache(cache)
++#define ll_unregister_cache(cache) unregister_cache(cache)
++#else
++#define ll_register_cache(cache) do {} while (0)
++#define ll_unregister_cache(cache) do {} while (0)
++#endif
++
++void ll_ra_read_in(struct file *f, struct ll_ra_read *rar);
++void ll_ra_read_ex(struct file *f, struct ll_ra_read *rar);
++struct ll_ra_read *ll_ra_read_get(struct file *f);
++
++/* llite/lproc_llite.c */
++#ifdef LPROCFS
++int lprocfs_register_mountpoint(struct proc_dir_entry *parent,
++                                struct super_block *sb, char *osc, char *mdc);
++void lprocfs_unregister_mountpoint(struct ll_sb_info *sbi);
++void ll_stats_ops_tally(struct ll_sb_info *sbi, int op, int count);
++void lprocfs_llite_init_vars(struct lprocfs_static_vars *lvars);
++#else
++static inline int lprocfs_register_mountpoint(struct proc_dir_entry *parent,
++                        struct super_block *sb, char *osc, char *mdc){return 0;}
++static inline void lprocfs_unregister_mountpoint(struct ll_sb_info *sbi) {}
++static void ll_stats_ops_tally(struct ll_sb_info *sbi, int op, int count) {}
++static void lprocfs_llite_init_vars(struct lprocfs_static_vars *lvars)
++{
++        memset(lvars, 0, sizeof(*lvars));
++}
++#endif
++
++
++/* llite/dir.c */
++extern struct file_operations ll_dir_operations;
++extern struct inode_operations ll_dir_inode_operations;
++
++struct page *ll_get_dir_page(struct inode *dir, unsigned long n);
++
++static inline unsigned ll_dir_rec_len(unsigned name_len)
++{
++        return (name_len + 8 + LL_DIR_PAD - 1) & ~(LL_DIR_PAD - 1);
++}
++
++static inline struct ll_dir_entry *ll_entry_at(void *base, unsigned offset)
++{
++        return (struct ll_dir_entry *)((char *)base + offset);
++}
++
++/*
++ * p is at least 6 bytes before the end of page
++ */
++static inline struct ll_dir_entry *ll_dir_next_entry(struct ll_dir_entry *p)
++{
++        return ll_entry_at(p, le16_to_cpu(p->lde_rec_len));
++}
++
++static inline void ll_put_page(struct page *page)
++{
++        kunmap(page);
++        page_cache_release(page);
++}
++
++static inline unsigned long dir_pages(struct inode *inode)
++{
++        return (inode->i_size + CFS_PAGE_SIZE - 1) >> CFS_PAGE_SHIFT;
++}
++
++int ll_objects_destroy(struct ptlrpc_request *request, struct inode *dir);
++struct inode *ll_iget(struct super_block *sb, ino_t hash,
++                      struct lustre_md *lic);
++int ll_mdc_cancel_unused(struct lustre_handle *, struct inode *, int flags,
++                         void *opaque);
++int ll_mdc_blocking_ast(struct ldlm_lock *, struct ldlm_lock_desc *,
++                        void *data, int flag);
++int ll_prepare_mdc_op_data(struct mdc_op_data *,
++                           struct inode *i1, struct inode *i2,
++                           const char *name, int namelen, int mode, void *data);
++#ifndef HAVE_VFS_INTENT_PATCHES
++struct lookup_intent *ll_convert_intent(struct open_intent *oit,
++                                        int lookup_flags);
++#endif
++void ll_pin_extent_cb(void *data);
++int ll_page_removal_cb(void *data, int discard);
++int ll_extent_lock_cancel_cb(struct ldlm_lock *lock, struct ldlm_lock_desc *new,
++                             void *data, int flag);
++int lookup_it_finish(struct ptlrpc_request *request, int offset,
++                     struct lookup_intent *it, void *data);
++void ll_lookup_finish_locks(struct lookup_intent *it, struct dentry *dentry);
++
++/* llite/rw.c */
++int ll_prepare_write(struct file *, struct page *, unsigned from, unsigned to);
++int ll_commit_write(struct file *, struct page *, unsigned from, unsigned to);
++int ll_writepage(struct page *page);
++void ll_inode_fill_obdo(struct inode *inode, int cmd, struct obdo *oa);
++int ll_ap_completion(void *data, int cmd, struct obdo *oa, int rc);
++int llap_shrink_cache(struct ll_sb_info *sbi, int shrink_fraction);
++extern struct cache_definition ll_cache_definition;
++void ll_removepage(struct page *page);
++int ll_readpage(struct file *file, struct page *page);
++struct ll_async_page *llap_cast_private(struct page *page);
++void ll_readahead_init(struct inode *inode, struct ll_readahead_state *ras);
++void ll_ra_accounting(struct ll_async_page *llap,struct address_space *mapping);
++void ll_truncate(struct inode *inode);
++int ll_file_punch(struct inode *, loff_t, int);
++ssize_t ll_file_lockless_io(struct file *, const struct iovec *,
++                            unsigned long, loff_t *, int, ssize_t);
++void ll_clear_file_contended(struct inode*);
++int ll_sync_page_range(struct inode *, struct address_space *, loff_t, size_t);
++
++/* llite/file.c */
++extern struct file_operations ll_file_operations;
++extern struct file_operations ll_file_operations_flock;
++extern struct file_operations ll_file_operations_noflock;
++extern struct inode_operations ll_file_inode_operations;
++extern int ll_inode_revalidate_it(struct dentry *, struct lookup_intent *);
++extern int ll_have_md_lock(struct inode *inode, __u64 bits);
++int ll_region_mapped(unsigned long addr, size_t count);
++int ll_extent_lock(struct ll_file_data *, struct inode *,
++                   struct lov_stripe_md *, int mode, ldlm_policy_data_t *,
++                   struct lustre_handle *, int ast_flags);
++int ll_extent_unlock(struct ll_file_data *, struct inode *,
++                     struct lov_stripe_md *, int mode, struct lustre_handle *);
++int ll_file_open(struct inode *inode, struct file *file);
++int ll_file_release(struct inode *inode, struct file *file);
++int ll_lsm_getattr(struct obd_export *, struct lov_stripe_md *, struct obdo *);
++int ll_glimpse_ioctl(struct ll_sb_info *sbi, 
++                     struct lov_stripe_md *lsm, lstat_t *st);
++int ll_glimpse_size(struct inode *inode, int ast_flags);
++int ll_local_open(struct file *file,
++                  struct lookup_intent *it, struct ll_file_data *fd,
++                  struct obd_client_handle *och);
++int ll_release_openhandle(struct dentry *, struct lookup_intent *);
++int ll_mdc_close(struct obd_export *mdc_exp, struct inode *inode,
++                 struct file *file);
++int ll_mdc_real_close(struct inode *inode, int flags);
++extern void ll_rw_stats_tally(struct ll_sb_info *sbi, pid_t pid, struct file
++                               *file, size_t count, int rw);
++int ll_getattr_it(struct vfsmount *mnt, struct dentry *de,
++               struct lookup_intent *it, struct kstat *stat);
++int ll_getattr(struct vfsmount *mnt, struct dentry *de, struct kstat *stat);
++struct ll_file_data *ll_file_data_get(void);
++#if (LINUX_VERSION_CODE >= KERNEL_VERSION(2,6,0))
++int ll_inode_permission(struct inode *inode, int mask, struct nameidata *nd);
++#else
++int ll_inode_permission(struct inode *inode, int mask);
++#endif
++int ll_lov_setstripe_ea_info(struct inode *inode, struct file *file,
++                             int flags, struct lov_user_md *lum,
++                             int lum_size);
++int ll_lov_getstripe_ea_info(struct inode *inode, const char *filename,
++                             struct lov_mds_md **lmm, int *lmm_size,
++                             struct ptlrpc_request **request);
++int ll_dir_setstripe(struct inode *inode, struct lov_user_md *lump,
++                     int set_default);
++int ll_dir_getstripe(struct inode *inode, struct lov_mds_md **lmm, 
++                     int *lmm_size, struct ptlrpc_request **request);
++int ll_fsync(struct file *file, struct dentry *dentry, int data);
++int ll_fiemap(struct inode *inode, struct ll_user_fiemap *fiemap,
++              int num_bytes);
++
++/* llite/dcache.c */
++/* llite/namei.c */
++/**
++ * protect race ll_find_aliases vs ll_revalidate_it vs ll_unhash_aliases
++ */
++extern spinlock_t ll_lookup_lock;
++extern struct dentry_operations ll_d_ops;
++void ll_intent_drop_lock(struct lookup_intent *);
++void ll_intent_release(struct lookup_intent *);
++extern void ll_set_dd(struct dentry *de);
++int ll_drop_dentry(struct dentry *dentry);
++void ll_unhash_aliases(struct inode *);
++void ll_frob_intent(struct lookup_intent **itp, struct lookup_intent *deft);
++void ll_lookup_finish_locks(struct lookup_intent *it, struct dentry *dentry);
++int ll_dcompare(struct dentry *parent, struct qstr *d_name, struct qstr *name);
++int revalidate_it_finish(struct ptlrpc_request *request, int offset,
++                         struct lookup_intent *it, struct dentry *de);
++
++/* llite/llite_lib.c */
++extern struct super_operations lustre_super_operations;
++
++char *ll_read_opt(const char *opt, char *data);
++void ll_lli_init(struct ll_inode_info *lli);
++int ll_fill_super(struct super_block *sb);
++void ll_put_super(struct super_block *sb);
++void ll_kill_super(struct super_block *sb);
++struct inode *ll_inode_from_lock(struct ldlm_lock *lock);
++void ll_clear_inode(struct inode *inode);
++int ll_setattr_raw(struct inode *inode, struct iattr *attr);
++int ll_setattr(struct dentry *de, struct iattr *attr);
++#ifndef HAVE_STATFS_DENTRY_PARAM
++int ll_statfs(struct super_block *sb, struct kstatfs *sfs);
++#else
++int ll_statfs(struct dentry *de, struct kstatfs *sfs);
++#endif
++int ll_statfs_internal(struct super_block *sb, struct obd_statfs *osfs,
++                       __u64 max_age, __u32 flags);
++void ll_update_inode(struct inode *inode, struct lustre_md *md);
++void ll_read_inode2(struct inode *inode, void *opaque);
++int ll_iocontrol(struct inode *inode, struct file *file,
++                 unsigned int cmd, unsigned long arg);
++#ifdef HAVE_UMOUNTBEGIN_VFSMOUNT
++void ll_umount_begin(struct vfsmount *vfsmnt, int flags);
++#else
++void ll_umount_begin(struct super_block *sb);
++#endif
++int ll_remount_fs(struct super_block *sb, int *flags, char *data);
++int ll_show_options(struct seq_file *seq, struct vfsmount *vfs);
++int ll_prep_inode(struct obd_export *exp, struct inode **inode,
++                  struct ptlrpc_request *req, int offset, struct super_block *);
++void lustre_dump_dentry(struct dentry *, int recur);
++void lustre_dump_inode(struct inode *);
++struct ll_async_page *llite_pglist_next_llap(struct ll_sb_info *sbi,
++                                             struct list_head *list);
++int ll_obd_statfs(struct inode *inode, void *arg);
++int ll_get_max_mdsize(struct ll_sb_info *sbi, int *max_mdsize);
++int ll_process_config(struct lustre_cfg *lcfg);
++
++/* llite/llite_nfs.c */
++extern struct export_operations lustre_export_operations;
++__u32 get_uuid2int(const char *name, int len);
++struct dentry *ll_fh_to_dentry(struct super_block *sb, __u32 *data, int len,
++                               int fhtype, int parent);
++int ll_dentry_to_fh(struct dentry *, __u32 *datap, int *lenp, int need_parent);
++
++/* llite/special.c */
++extern struct inode_operations ll_special_inode_operations;
++extern struct file_operations ll_special_chr_inode_fops;
++extern struct file_operations ll_special_chr_file_fops;
++extern struct file_operations ll_special_blk_inode_fops;
++extern struct file_operations ll_special_fifo_inode_fops;
++extern struct file_operations ll_special_fifo_file_fops;
++extern struct file_operations ll_special_sock_inode_fops;
++
++/* llite/symlink.c */
++extern struct inode_operations ll_fast_symlink_inode_operations;
++
++/* llite/llite_close.c */
++struct ll_close_queue {
++        spinlock_t              lcq_lock;
++        struct list_head        lcq_list;
++        wait_queue_head_t       lcq_waitq;
++        struct completion       lcq_comp;
++};
++
++#ifdef HAVE_CLOSE_THREAD
++void llap_write_pending(struct inode *inode, struct ll_async_page *llap);
++void llap_write_complete(struct inode *inode, struct ll_async_page *llap);
++void ll_open_complete(struct inode *inode);
++int ll_is_inode_dirty(struct inode *inode);
++void ll_try_done_writing(struct inode *inode);
++void ll_queue_done_writing(struct inode *inode);
++#else
++static inline void llap_write_pending(struct inode *inode,
++                                      struct ll_async_page *llap) { return; };
++static inline void llap_write_complete(struct inode *inode,
++                                       struct ll_async_page *llap) { return; };
++static inline void ll_open_complete(struct inode *inode) { return; };
++static inline int ll_is_inode_dirty(struct inode *inode) { return 0; };
++static inline void ll_try_done_writing(struct inode *inode) { return; };
++static inline void ll_queue_done_writing(struct inode *inode) { return; };
++//static inline void ll_close_thread_shutdown(struct ll_close_queue *lcq) { return; };
++//static inline int ll_close_thread_start(struct ll_close_queue **lcq_ret) { return 0; };
++#endif
++void ll_close_thread_shutdown(struct ll_close_queue *lcq);
++int ll_close_thread_start(struct ll_close_queue **lcq_ret);
++
++/* llite/llite_mmap.c */
++typedef struct rb_root  rb_root_t;
++typedef struct rb_node  rb_node_t;
++
++struct ll_lock_tree_node;
++struct ll_lock_tree {
++        rb_root_t                       lt_root;
++        struct list_head                lt_locked_list;
++        struct ll_file_data             *lt_fd;
++};
++
++int ll_teardown_mmaps(struct address_space *mapping, __u64 first, __u64 last);
++int ll_file_mmap(struct file * file, struct vm_area_struct * vma);
++struct ll_lock_tree_node * ll_node_from_inode(struct inode *inode, __u64 start,
++                                              __u64 end, ldlm_mode_t mode);
++int ll_tree_lock(struct ll_lock_tree *tree,
++                 struct ll_lock_tree_node *first_node,
++                 const char *buf, size_t count, int ast_flags);
++int ll_tree_lock_iov(struct ll_lock_tree *tree,
++                     struct ll_lock_tree_node *first_node,
++                     const struct iovec *iov, unsigned long nr_segs,
++                     int ast_flags);
++int ll_tree_unlock(struct ll_lock_tree *tree);
++
++#define    ll_s2sbi(sb)        (s2lsi(sb)->lsi_llsbi)
++
++static inline __u64 ll_ts2u64(struct timespec *time)
++{
++        __u64 t = time->tv_sec;
++        return t;
++}
++
++/* don't need an addref as the sb_info should be holding one */
++static inline struct obd_export *ll_s2obdexp(struct super_block *sb)
++{
++        return ll_s2sbi(sb)->ll_osc_exp;
++}
++
++/* don't need an addref as the sb_info should be holding one */
++static inline struct obd_export *ll_s2mdcexp(struct super_block *sb)
++{
++        return ll_s2sbi(sb)->ll_mdc_exp;
++}
++
++static inline struct client_obd *sbi2mdc(struct ll_sb_info *sbi)
++{
++        struct obd_device *obd = sbi->ll_mdc_exp->exp_obd;
++        if (obd == NULL)
++                LBUG();
++        return &obd->u.cli;
++}
++
++// FIXME: replace the name of this with LL_SB to conform to kernel stuff
++static inline struct ll_sb_info *ll_i2sbi(struct inode *inode)
++{
++        return ll_s2sbi(inode->i_sb);
++}
++
++static inline struct obd_export *ll_i2obdexp(struct inode *inode)
++{
++        return ll_s2obdexp(inode->i_sb);
++}
++
++static inline struct obd_export *ll_i2mdcexp(struct inode *inode)
++{
++        return ll_s2mdcexp(inode->i_sb);
++}
++
++static inline void ll_inode2fid(struct ll_fid *fid, struct inode *inode)
++{
++        mdc_pack_fid(fid, inode->i_ino, inode->i_generation,
++                     inode->i_mode & S_IFMT);
++}
++
++static inline int ll_mds_max_easize(struct super_block *sb)
++{
++        return sbi2mdc(ll_s2sbi(sb))->cl_max_mds_easize;
++}
++
++static inline __u64 ll_file_maxbytes(struct inode *inode)
++{
++        return ll_i2info(inode)->lli_maxbytes;
++}
++
++/* llite/xattr.c */
++int ll_setxattr(struct dentry *dentry, const char *name,
++                const void *value, size_t size, int flags);
++ssize_t ll_getxattr(struct dentry *dentry, const char *name,
++                    void *buffer, size_t size);
++ssize_t ll_listxattr(struct dentry *dentry, char *buffer, size_t size);
++int ll_removexattr(struct dentry *dentry, const char *name);
++
++/* statahead.c */
++
++#define LL_SA_RPC_MIN   2
++#define LL_SA_RPC_DEF   32
++#define LL_SA_RPC_MAX   8192
++
++/* per inode struct, for dir only */
++struct ll_statahead_info {
++        struct inode           *sai_inode;
++        unsigned int            sai_generation; /* generation for statahead */
++        atomic_t                sai_refcount;   /* when access this struct, hold
++                                                 * refcount */
++        unsigned int            sai_sent;       /* stat requests sent count */
++        unsigned int            sai_replied;    /* stat requests which received
++                                                 * reply */
++        unsigned int            sai_max;        /* max ahead of lookup */
++        unsigned int            sai_index;      /* index of statahead entry */
++        unsigned int            sai_index_next; /* index for the next statahead
++                                                 * entry to be stated */
++        unsigned int            sai_hit;        /* hit count */
++        unsigned int            sai_miss;       /* miss count:
++                                                 * for "ls -al" case, it includes
++                                                 * hidden dentry miss;
++                                                 * for "ls -l" case, it does not
++                                                 * include hidden dentry miss.
++                                                 * "sai_miss_hidden" is used for
++                                                 * the later case.
++                                                 */
++        unsigned int            sai_consecutive_miss; /* consecutive miss */
++        unsigned int            sai_miss_hidden;/* "ls -al", but first dentry
++                                                 * is not a hidden one */
++        unsigned int            sai_skip_hidden;/* skipped hidden dentry count */
++        unsigned int            sai_ls_all:1;   /* "ls -al", do stat-ahead for
++                                                 * hidden entries */
++        cfs_waitq_t             sai_waitq;      /* stat-ahead wait queue */
++        struct ptlrpc_thread    sai_thread;     /* stat-ahead thread */
++        struct list_head        sai_entries_sent;     /* entries sent out */
++        struct list_head        sai_entries_received; /* entries returned */
++        struct list_head        sai_entries_stated;   /* entries stated */
++};
++
++int do_statahead_enter(struct inode *dir, struct dentry **dentry, int lookup);
++void ll_statahead_exit(struct dentry *dentry, int result);
++void ll_stop_statahead(struct inode *inode, void *key);
++
++static inline
++void ll_statahead_mark(struct dentry *dentry)
++{
++        struct ll_inode_info *lli = ll_i2info(dentry->d_parent->d_inode);
++        struct ll_dentry_data *ldd = ll_d2d(dentry);
++
++        /* not the same process, don't mark */
++        if (lli->lli_opendir_pid != cfs_curproc_pid())
++                return;
++
++        spin_lock(&lli->lli_lock);
++        if (likely(lli->lli_sai != NULL && ldd != NULL))
++                ldd->lld_sa_generation = lli->lli_sai->sai_generation;
++        spin_unlock(&lli->lli_lock);
++}
++
++static inline
++int ll_statahead_enter(struct inode *dir, struct dentry **dentryp, int lookup)
++{
++        struct ll_sb_info        *sbi = ll_i2sbi(dir);
++        struct ll_inode_info     *lli = ll_i2info(dir);
++        struct ll_dentry_data    *ldd = ll_d2d(*dentryp);
++
++        if (sbi->ll_sa_max == 0)
++                return -ENOTSUPP;
++
++        /* not the same process, don't statahead */
++        if (lli->lli_opendir_pid != cfs_curproc_pid())
++                return -EBADF;
++
++        /*
++         * When "ls" a dentry, the system trigger more than once "revalidate" or
++         * "lookup", for "getattr", for "getxattr", and maybe for others.
++         * Under patchless client mode, the operation intent is not accurate,
++         * it maybe misguide the statahead thread. For example:
++         * The "revalidate" call for "getattr" and "getxattr" of a dentry maybe
++         * have the same operation intent -- "IT_GETATTR".
++         * In fact, one dentry should has only one chance to interact with the
++         * statahead thread, otherwise the statahead windows will be confused.
++         * The solution is as following:
++         * Assign "lld_sa_generation" with "sai_generation" when a dentry
++         * "IT_GETATTR" for the first time, and the subsequent "IT_GETATTR"
++         * will bypass interacting with statahead thread for checking:
++         * "lld_sa_generation == lli_sai->sai_generation"
++         */ 
++        if (ldd && lli->lli_sai &&
++            ldd->lld_sa_generation == lli->lli_sai->sai_generation)
++                return -EAGAIN;
++
++        return do_statahead_enter(dir, dentryp, lookup);
++}
++
++static void inline ll_dops_init(struct dentry *de, int block)
++{
++        struct ll_dentry_data *lld = ll_d2d(de);
++
++        if (lld == NULL && block != 0) {
++                ll_set_dd(de);
++                lld = ll_d2d(de);
++        }
++
++        if (lld != NULL)
++                lld->lld_sa_generation = 0;
++
++        de->d_op = &ll_d_ops;
++}
++
++/* llite ioctl register support rountine */
++#ifdef __KERNEL__
++enum llioc_iter {
++        LLIOC_CONT = 0,
++        LLIOC_STOP
++};
++
++#define LLIOC_MAX_CMD           256
++
++/*
++ * Rules to write a callback function:
++ *
++ * Parameters:
++ *  @magic: Dynamic ioctl call routine will feed this vaule with the pointer
++ *      returned to ll_iocontrol_register.  Callback functions should use this
++ *      data to check the potential collasion of ioctl cmd. If collasion is 
++ *      found, callback function should return LLIOC_CONT.
++ *  @rcp: The result of ioctl command.
++ *
++ *  Return values:
++ *      If @magic matches the pointer returned by ll_iocontrol_data, the 
++ *      callback should return LLIOC_STOP; return LLIOC_STOP otherwise.
++ */
++typedef enum llioc_iter (*llioc_callback_t)(struct inode *inode, 
++                struct file *file, unsigned int cmd, unsigned long arg,
++                void *magic, int *rcp);
++
++enum llioc_iter ll_iocontrol_call(struct inode *inode, struct file *file, 
++                unsigned int cmd, unsigned long arg, int *rcp);
++
++/* export functions */
++/* Register ioctl block dynamatically for a regular file. 
++ *
++ * @cmd: the array of ioctl command set
++ * @count: number of commands in the @cmd
++ * @cb: callback function, it will be called if an ioctl command is found to 
++ *      belong to the command list @cmd.
++ *
++ * Return vaule:
++ *      A magic pointer will be returned if success; 
++ *      otherwise, NULL will be returned. 
++ * */
++void *ll_iocontrol_register(llioc_callback_t cb, int count, unsigned int *cmd);
++void ll_iocontrol_unregister(void *magic);
++
++#endif
++
++#endif /* LLITE_INTERNAL_H */
+diff -urNad lustre~/lustre/llite/llite_lib.c lustre/lustre/llite/llite_lib.c
+--- lustre~/lustre/llite/llite_lib.c	2009-03-12 10:32:27.000000000 +0100
++++ lustre/lustre/llite/llite_lib.c	2009-03-12 11:02:51.000000000 +0100
+@@ -1346,7 +1346,7 @@
+                 rc = vmtruncate(inode, new_size);
+                 clear_bit(LLI_F_SRVLOCK, &lli->lli_flags);
+                 if (rc != 0) {
+-                        LASSERT(atomic_read(&lli->lli_size_sem.count) <= 0);
++                        LASSERT(SEM_COUNT(&lli->lli_size_sem) <= 0);
+                         ll_inode_size_unlock(inode, 0);
+                 }
+         }
+diff -urNad lustre~/lustre/llite/llite_lib.c.orig lustre/lustre/llite/llite_lib.c.orig
+--- lustre~/lustre/llite/llite_lib.c.orig	1970-01-01 00:00:00.000000000 +0000
++++ lustre/lustre/llite/llite_lib.c.orig	2009-03-12 10:32:27.000000000 +0100
+@@ -0,0 +1,2232 @@
++/* -*- mode: c; c-basic-offset: 8; indent-tabs-mode: nil; -*-
++ * vim:expandtab:shiftwidth=8:tabstop=8:
++ *
++ * GPL HEADER START
++ *
++ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
++ *
++ * This program is free software; you can redistribute it and/or modify
++ * it under the terms of the GNU General Public License version 2 only,
++ * as published by the Free Software Foundation.
++ *
++ * This program is distributed in the hope that it will be useful, but
++ * WITHOUT ANY WARRANTY; without even the implied warranty of
++ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
++ * General Public License version 2 for more details (a copy is included
++ * in the LICENSE file that accompanied this code).
++ *
++ * You should have received a copy of the GNU General Public License
++ * version 2 along with this program; If not, see
++ * http://www.sun.com/software/products/lustre/docs/GPLv2.pdf
++ *
++ * Please contact Sun Microsystems, Inc., 4150 Network Circle, Santa Clara,
++ * CA 95054 USA or visit www.sun.com if you need additional information or
++ * have any questions.
++ *
++ * GPL HEADER END
++ */
++/*
++ * Copyright  2008 Sun Microsystems, Inc. All rights reserved
++ * Use is subject to license terms.
++ */
++/*
++ * This file is part of Lustre, http://www.lustre.org/
++ * Lustre is a trademark of Sun Microsystems, Inc.
++ *
++ * lustre/llite/llite_lib.c
++ *
++ * Lustre Light Super operations
++ */
++
++#define DEBUG_SUBSYSTEM S_LLITE
++
++#include <linux/module.h>
++#include <linux/types.h>
++#include <linux/random.h>
++#include <linux/version.h>
++#include <linux/mm.h>
++
++#include <lustre_lite.h>
++#include <lustre_ha.h>
++#include <lustre_dlm.h>
++#include <lprocfs_status.h>
++#include <lustre_disk.h>
++#include <lustre_param.h>
++#include <lustre_cache.h>
++#include "llite_internal.h"
++
++cfs_mem_cache_t *ll_file_data_slab;
++
++LIST_HEAD(ll_super_blocks);
++spinlock_t ll_sb_lock = SPIN_LOCK_UNLOCKED;
++
++extern struct address_space_operations ll_aops;
++extern struct address_space_operations ll_dir_aops;
++
++#ifndef log2
++#define log2(n) ffz(~(n))
++#endif
++
++
++static struct ll_sb_info *ll_init_sbi(void)
++{
++        struct ll_sb_info *sbi = NULL;
++        unsigned long pages;
++        struct sysinfo si;
++        class_uuid_t uuid;
++        int i;
++        ENTRY;
++
++        OBD_ALLOC(sbi, sizeof(*sbi));
++        if (!sbi)
++                RETURN(NULL);
++
++        spin_lock_init(&sbi->ll_lock);
++        spin_lock_init(&sbi->ll_lco.lco_lock);
++        spin_lock_init(&sbi->ll_pp_extent_lock);
++        spin_lock_init(&sbi->ll_process_lock);
++        sbi->ll_rw_stats_on = 0;
++        INIT_LIST_HEAD(&sbi->ll_pglist);
++
++        si_meminfo(&si);
++        pages = si.totalram - si.totalhigh;
++        if (pages >> (20 - CFS_PAGE_SHIFT) < 512) {
++#ifdef HAVE_BGL_SUPPORT
++                sbi->ll_async_page_max = pages / 4;
++#else
++                sbi->ll_async_page_max = pages / 2;
++#endif
++        } else {
++                sbi->ll_async_page_max = (pages / 4) * 3;
++        }
++        sbi->ll_ra_info.ra_max_pages = min(pages / 32,
++                                           SBI_DEFAULT_READAHEAD_MAX);
++        sbi->ll_ra_info.ra_max_read_ahead_whole_pages =
++                                           SBI_DEFAULT_READAHEAD_WHOLE_MAX;
++        sbi->ll_contention_time = SBI_DEFAULT_CONTENTION_SECONDS;
++        sbi->ll_lockless_truncate_enable = SBI_DEFAULT_LOCKLESS_TRUNCATE_ENABLE;
++        INIT_LIST_HEAD(&sbi->ll_conn_chain);
++        INIT_LIST_HEAD(&sbi->ll_orphan_dentry_list);
++
++        ll_generate_random_uuid(uuid);
++        class_uuid_unparse(uuid, &sbi->ll_sb_uuid);
++        CDEBUG(D_CONFIG, "generated uuid: %s\n", sbi->ll_sb_uuid.uuid);
++
++        spin_lock(&ll_sb_lock);
++        list_add_tail(&sbi->ll_list, &ll_super_blocks);
++        spin_unlock(&ll_sb_lock);
++
++#ifdef ENABLE_CHECKSUM
++        sbi->ll_flags |= LL_SBI_DATA_CHECKSUM;
++#endif
++#ifdef ENABLE_LLITE_CHECKSUM
++        sbi->ll_flags |= LL_SBI_LLITE_CHECKSUM;
++#endif
++
++#ifdef HAVE_LRU_RESIZE_SUPPORT
++        sbi->ll_flags |= LL_SBI_LRU_RESIZE;
++#endif
++
++#ifdef HAVE_EXPORT___IGET
++        INIT_LIST_HEAD(&sbi->ll_deathrow);
++        spin_lock_init(&sbi->ll_deathrow_lock);
++#endif
++        for (i = 0; i <= LL_PROCESS_HIST_MAX; i++) {
++                spin_lock_init(&sbi->ll_rw_extents_info.pp_extents[i].pp_r_hist.oh_lock);
++                spin_lock_init(&sbi->ll_rw_extents_info.pp_extents[i].pp_w_hist.oh_lock);
++        }
++
++        /* metadata statahead is enabled by default */
++        sbi->ll_sa_max = LL_SA_RPC_DEF;
++
++        RETURN(sbi);
++}
++
++void ll_free_sbi(struct super_block *sb)
++{
++        struct ll_sb_info *sbi = ll_s2sbi(sb);
++        ENTRY;
++
++        if (sbi != NULL) {
++                spin_lock(&ll_sb_lock);
++                list_del(&sbi->ll_list);
++                spin_unlock(&ll_sb_lock);
++                OBD_FREE(sbi, sizeof(*sbi));
++        }
++        EXIT;
++}
++
++static struct dentry_operations ll_d_root_ops = {
++#ifdef DCACHE_LUSTRE_INVALID
++        .d_compare = ll_dcompare,
++#endif
++};
++
++static int client_common_fill_super(struct super_block *sb,
++                                    char *mdc, char *osc)
++{
++        struct inode *root = 0;
++        struct ll_sb_info *sbi = ll_s2sbi(sb);
++        struct obd_device *obd;
++        struct ll_fid rootfid;
++        struct obd_statfs osfs;
++        struct ptlrpc_request *request = NULL;
++        struct lustre_handle osc_conn = {0, };
++        struct lustre_handle mdc_conn = {0, };
++        struct lustre_md md;
++        struct obd_connect_data *data = NULL;
++        int err, checksum;
++        ENTRY;
++
++        obd = class_name2obd(mdc);
++        if (!obd) {
++                CERROR("MDC %s: not setup or attached\n", mdc);
++                RETURN(-EINVAL);
++        }
++
++        OBD_ALLOC(data, sizeof(*data));
++        if (data == NULL)
++                RETURN(-ENOMEM);
++
++        if (proc_lustre_fs_root) {
++                err = lprocfs_register_mountpoint(proc_lustre_fs_root, sb,
++                                                  osc, mdc);
++                if (err < 0)
++                        CERROR("could not register mount in /proc/fs/lustre\n");
++        }
++
++        /* indicate the features supported by this client */
++        data->ocd_connect_flags = OBD_CONNECT_VERSION | OBD_CONNECT_IBITS |
++                OBD_CONNECT_JOIN | OBD_CONNECT_ATTRFID | OBD_CONNECT_NODEVOH |
++                OBD_CONNECT_CANCELSET | OBD_CONNECT_AT;
++#ifdef HAVE_LRU_RESIZE_SUPPORT
++        if (sbi->ll_flags & LL_SBI_LRU_RESIZE)
++                data->ocd_connect_flags |= OBD_CONNECT_LRU_RESIZE;
++#endif
++#ifdef CONFIG_FS_POSIX_ACL
++        data->ocd_connect_flags |= OBD_CONNECT_ACL;
++#endif
++        data->ocd_ibits_known = MDS_INODELOCK_FULL;
++        data->ocd_version = LUSTRE_VERSION_CODE;
++
++        if (sb->s_flags & MS_RDONLY)
++                data->ocd_connect_flags |= OBD_CONNECT_RDONLY;
++        if (sbi->ll_flags & LL_SBI_USER_XATTR)
++                data->ocd_connect_flags |= OBD_CONNECT_XATTR;
++
++#ifdef HAVE_MS_FLOCK_LOCK
++        /* force vfs to use lustre handler for flock() calls - bug 10743 */
++        sb->s_flags |= MS_FLOCK_LOCK;
++#endif
++
++        if (sbi->ll_flags & LL_SBI_FLOCK)
++                sbi->ll_fop = &ll_file_operations_flock;
++        else if (sbi->ll_flags & LL_SBI_LOCALFLOCK)
++                sbi->ll_fop = &ll_file_operations;
++        else
++                sbi->ll_fop = &ll_file_operations_noflock;
++
++
++        err = obd_connect(&mdc_conn, obd, &sbi->ll_sb_uuid, data, &sbi->ll_mdc_exp);
++        if (err == -EBUSY) {
++                LCONSOLE_ERROR_MSG(0x14f, "An MDT (mdc %s) is performing "
++                                   "recovery, of which this client is not a "
++                                   "part. Please wait for recovery to complete,"
++                                   " abort, or time out.\n", mdc);
++                GOTO(out, err);
++        } else if (err) {
++                CERROR("cannot connect to %s: rc = %d\n", mdc, err);
++                GOTO(out, err);
++        }
++
++        err = obd_statfs(obd, &osfs, cfs_time_current_64() - HZ, 0);
++        if (err)
++                GOTO(out_mdc, err);
++
++        /* MDC connect is surely finished by now because we actually sent
++         * a statfs RPC, otherwise obd_connect() is asynchronous. */
++        *data = class_exp2cliimp(sbi->ll_mdc_exp)->imp_connect_data;
++
++        LASSERT(osfs.os_bsize);
++        sb->s_blocksize = osfs.os_bsize;
++        sb->s_blocksize_bits = log2(osfs.os_bsize);
++        sb->s_magic = LL_SUPER_MAGIC;
++
++        /* for bug 11559. in $LINUX/fs/read_write.c, function do_sendfile():
++         *         retval = in_file->f_op->sendfile(...);
++         *         if (*ppos > max)
++         *                 retval = -EOVERFLOW;
++         *
++         * it will check if *ppos is greater than max. However, max equals to
++         * s_maxbytes, which is a negative integer in a x86_64 box since loff_t
++         * has been defined as a signed long long ineger in linux kernel. */
++#if BITS_PER_LONG == 64
++        sb->s_maxbytes = PAGE_CACHE_MAXBYTES >> 1;
++#else
++        sb->s_maxbytes = PAGE_CACHE_MAXBYTES;
++#endif
++        sbi->ll_namelen = osfs.os_namelen;
++        sbi->ll_max_rw_chunk = LL_DEFAULT_MAX_RW_CHUNK;
++
++        if ((sbi->ll_flags & LL_SBI_USER_XATTR) &&
++            !(data->ocd_connect_flags & OBD_CONNECT_XATTR)) {
++                LCONSOLE_INFO("Disabling user_xattr feature because "
++                              "it is not supported on the server\n");
++                sbi->ll_flags &= ~LL_SBI_USER_XATTR;
++        }
++
++        if (data->ocd_connect_flags & OBD_CONNECT_ACL) {
++#ifdef MS_POSIXACL
++                sb->s_flags |= MS_POSIXACL;
++#endif
++                sbi->ll_flags |= LL_SBI_ACL;
++        } else
++                sbi->ll_flags &= ~LL_SBI_ACL;
++
++        if (data->ocd_connect_flags & OBD_CONNECT_JOIN)
++                sbi->ll_flags |= LL_SBI_JOIN;
++
++        obd = class_name2obd(osc);
++        if (!obd) {
++                CERROR("OSC %s: not setup or attached\n", osc);
++                GOTO(out_mdc, err = -ENODEV);
++        }
++
++        data->ocd_connect_flags = OBD_CONNECT_VERSION | OBD_CONNECT_GRANT |
++                OBD_CONNECT_REQPORTAL | OBD_CONNECT_BRW_SIZE | 
++                OBD_CONNECT_SRVLOCK | OBD_CONNECT_CANCELSET | OBD_CONNECT_AT |
++                OBD_CONNECT_TRUNCLOCK;
++
++        if (!OBD_FAIL_CHECK(OBD_FAIL_OSC_CONNECT_CKSUM)) {
++                /* OBD_CONNECT_CKSUM should always be set, even if checksums are
++                 * disabled by default, because it can still be enabled on the
++                 * fly via /proc. As a consequence, we still need to come to an
++                 * agreement on the supported algorithms at connect time */
++                data->ocd_connect_flags |= OBD_CONNECT_CKSUM;
++
++                if (OBD_FAIL_CHECK(OBD_FAIL_OSC_CKSUM_ADLER_ONLY))
++                        data->ocd_cksum_types = OBD_CKSUM_ADLER;
++                else
++                        /* send the list of supported checksum types */
++                        data->ocd_cksum_types = OBD_CKSUM_ALL;
++        }
++
++#ifdef HAVE_LRU_RESIZE_SUPPORT
++        if (sbi->ll_flags & LL_SBI_LRU_RESIZE)
++                data->ocd_connect_flags |= OBD_CONNECT_LRU_RESIZE;
++#endif
++
++        CDEBUG(D_RPCTRACE, "ocd_connect_flags: "LPX64" ocd_version: %d "
++               "ocd_grant: %d\n", data->ocd_connect_flags,
++               data->ocd_version, data->ocd_grant);
++
++        obd->obd_upcall.onu_owner = &sbi->ll_lco;
++        obd->obd_upcall.onu_upcall = ll_ocd_update;
++        data->ocd_brw_size = PTLRPC_MAX_BRW_PAGES << CFS_PAGE_SHIFT;
++
++        obd_register_lock_cancel_cb(obd, ll_extent_lock_cancel_cb);
++        obd_register_page_removal_cb(obd, ll_page_removal_cb, ll_pin_extent_cb);
++
++
++        err = obd_connect(&osc_conn, obd, &sbi->ll_sb_uuid, data, &sbi->ll_osc_exp);
++        if (err == -EBUSY) {
++                LCONSOLE_ERROR_MSG(0x150, "An OST (osc %s) is performing "
++                                   "recovery, of which this client is not a "
++                                   "part.  Please wait for recovery to "
++                                   "complete, abort, or time out.\n", osc);
++                GOTO(out, err); // need clear cb?
++        } else if (err) {
++                CERROR("cannot connect to %s: rc = %d\n", osc, err);
++                GOTO(out_cb, err);
++        }
++        spin_lock(&sbi->ll_lco.lco_lock);
++        sbi->ll_lco.lco_flags = data->ocd_connect_flags;
++        sbi->ll_lco.lco_mdc_exp = sbi->ll_mdc_exp;
++        sbi->ll_lco.lco_osc_exp = sbi->ll_osc_exp;
++        spin_unlock(&sbi->ll_lco.lco_lock);
++
++        err = mdc_init_ea_size(sbi->ll_mdc_exp, sbi->ll_osc_exp);
++        if (err) {
++                CERROR("cannot set max EA and cookie sizes: rc = %d\n", err);
++                GOTO(out_osc, err);
++        }
++
++        err = obd_prep_async_page(sbi->ll_osc_exp, NULL, NULL, NULL,
++                                  0, NULL, NULL, NULL, 0, NULL);
++        if (err < 0) {
++                LCONSOLE_ERROR_MSG(0x151, "There are no OST's in this "
++                                   "filesystem. There must be at least one "
++                                   "active OST for a client to start.\n");
++                GOTO(out_osc, err);
++        }
++
++        if (!ll_async_page_slab) {
++                ll_async_page_slab_size =
++                        size_round(sizeof(struct ll_async_page)) + err;
++                ll_async_page_slab = cfs_mem_cache_create("ll_async_page",
++                                                          ll_async_page_slab_size,
++                                                          0, 0);
++                if (!ll_async_page_slab)
++                        GOTO(out_osc, err = -ENOMEM);
++        }
++
++        err = mdc_getstatus(sbi->ll_mdc_exp, &rootfid);
++        if (err) {
++                CERROR("cannot mds_connect: rc = %d\n", err);
++                GOTO(out_osc, err);
++        }
++        CDEBUG(D_SUPER, "rootfid "LPU64"\n", rootfid.id);
++        sbi->ll_rootino = rootfid.id;
++
++        sb->s_op = &lustre_super_operations;
++#if THREAD_SIZE >= 8192
++        /* Disable the NFS export because of stack overflow
++         * when THREAD_SIZE < 8192. Please refer to 17630. */
++        sb->s_export_op = &lustre_export_operations;
++#endif
++
++        /* make root inode
++         * XXX: move this to after cbd setup? */
++        err = mdc_getattr(sbi->ll_mdc_exp, &rootfid,
++                          OBD_MD_FLGETATTR | OBD_MD_FLBLOCKS |
++                          (sbi->ll_flags & LL_SBI_ACL ? OBD_MD_FLACL : 0),
++                          0, &request);
++        if (err) {
++                CERROR("mdc_getattr failed for root: rc = %d\n", err);
++                GOTO(out_osc, err);
++        }
++
++        err = mdc_req2lustre_md(request, REPLY_REC_OFF, sbi->ll_osc_exp, &md);
++        if (err) {
++                CERROR("failed to understand root inode md: rc = %d\n",err);
++                ptlrpc_req_finished (request);
++                GOTO(out_osc, err);
++        }
++
++        LASSERT(sbi->ll_rootino != 0);
++        root = ll_iget(sb, sbi->ll_rootino, &md);
++
++        ptlrpc_req_finished(request);
++
++        if (root == NULL || is_bad_inode(root)) {
++                mdc_free_lustre_md(sbi->ll_osc_exp, &md);
++                CERROR("lustre_lite: bad iget4 for root\n");
++                GOTO(out_root, err = -EBADF);
++        }
++
++        err = ll_close_thread_start(&sbi->ll_lcq);
++        if (err) {
++                CERROR("cannot start close thread: rc %d\n", err);
++                GOTO(out_root, err);
++        }
++
++        checksum = sbi->ll_flags & LL_SBI_DATA_CHECKSUM;
++        err = obd_set_info_async(sbi->ll_osc_exp, sizeof(KEY_CHECKSUM),
++                                 KEY_CHECKSUM, sizeof(checksum),
++                                 &checksum, NULL);
++
++        /* making vm readahead 0 for 2.4.x. In the case of 2.6.x,
++           backing dev info assigned to inode mapping is used for
++           determining maximal readahead. */
++#if (LINUX_VERSION_CODE < KERNEL_VERSION(2,6,0)) && \
++    !defined(KERNEL_HAS_AS_MAX_READAHEAD)
++        /* bug 2805 - set VM readahead to zero */
++        vm_max_readahead = vm_min_readahead = 0;
++#endif
++
++        sb->s_root = d_alloc_root(root);
++        if (data != NULL)
++                OBD_FREE(data, sizeof(*data));
++        sb->s_root->d_op = &ll_d_root_ops;
++
++        sbi->ll_sdev_orig = sb->s_dev;
++        /* We set sb->s_dev equal on all lustre clients in order to support
++         * NFS export clustering.  NFSD requires that the FSID be the same
++         * on all clients. */
++        /* s_dev is also used in lt_compare() to compare two fs, but that is
++         * only a node-local comparison. */
++        sb->s_dev = get_uuid2int(sbi2mdc(sbi)->cl_target_uuid.uuid,
++                                 strlen(sbi2mdc(sbi)->cl_target_uuid.uuid));
++
++        RETURN(err);
++
++out_root:
++        if (root)
++                iput(root);
++out_osc:
++        obd_disconnect(sbi->ll_osc_exp);
++        sbi->ll_osc_exp = NULL;
++out_cb:
++        obd = class_name2obd(osc);
++        obd_unregister_lock_cancel_cb(obd, ll_extent_lock_cancel_cb);
++        obd_unregister_page_removal_cb(obd, ll_page_removal_cb);
++out_mdc:
++        obd_disconnect(sbi->ll_mdc_exp);
++        sbi->ll_mdc_exp = NULL;
++out:
++        if (data != NULL)
++                OBD_FREE(data, sizeof(*data));
++        lprocfs_unregister_mountpoint(sbi);
++        RETURN(err);
++}
++
++int ll_get_max_mdsize(struct ll_sb_info *sbi, int *lmmsize)
++{
++        int size, rc;
++
++        *lmmsize = obd_size_diskmd(sbi->ll_osc_exp, NULL);
++        size = sizeof(int);
++        rc = obd_get_info(sbi->ll_mdc_exp, sizeof(KEY_MAX_EASIZE),
++                          KEY_MAX_EASIZE, &size, lmmsize, NULL);
++        if (rc)
++                CERROR("Get max mdsize error rc %d \n", rc);
++
++        RETURN(rc);
++}
++
++void ll_dump_inode(struct inode *inode)
++{
++        struct list_head *tmp;
++        int dentry_count = 0;
++
++        LASSERT(inode != NULL);
++
++        list_for_each(tmp, &inode->i_dentry)
++                dentry_count++;
++
++        CERROR("inode %p dump: dev=%s ino=%lu mode=%o count=%u, %d dentries\n",
++               inode, ll_i2mdcexp(inode)->exp_obd->obd_name, inode->i_ino,
++               inode->i_mode, atomic_read(&inode->i_count), dentry_count);
++}
++
++void lustre_dump_dentry(struct dentry *dentry, int recur)
++{
++        struct list_head *tmp;
++        int subdirs = 0;
++
++        LASSERT(dentry != NULL);
++
++        list_for_each(tmp, &dentry->d_subdirs)
++                subdirs++;
++
++        CERROR("dentry %p dump: name=%.*s parent=%.*s (%p), inode=%p, count=%u,"
++               " flags=0x%x, fsdata=%p, %d subdirs\n", dentry,
++               dentry->d_name.len, dentry->d_name.name,
++               dentry->d_parent->d_name.len, dentry->d_parent->d_name.name,
++               dentry->d_parent, dentry->d_inode, atomic_read(&dentry->d_count),
++               dentry->d_flags, dentry->d_fsdata, subdirs);
++        if (dentry->d_inode != NULL)
++                ll_dump_inode(dentry->d_inode);
++
++        if (recur == 0)
++                return;
++
++        list_for_each(tmp, &dentry->d_subdirs) {
++                struct dentry *d = list_entry(tmp, struct dentry, d_child);
++                lustre_dump_dentry(d, recur - 1);
++        }
++}
++
++#ifdef HAVE_EXPORT___IGET
++static void prune_dir_dentries(struct inode *inode)
++{
++        struct dentry *dentry, *prev = NULL;
++
++        /* due to lustre specific logic, a directory
++         * can have few dentries - a bug from VFS POV */
++restart:
++        spin_lock(&dcache_lock);
++        if (!list_empty(&inode->i_dentry)) {
++                dentry = list_entry(inode->i_dentry.prev,
++                                    struct dentry, d_alias);
++                /* in order to prevent infinite loops we
++                 * break if previous dentry is busy */
++                if (dentry != prev) {
++                        prev = dentry;
++                        dget_locked(dentry);
++                        spin_unlock(&dcache_lock);
++
++                        /* try to kill all child dentries */
++                        shrink_dcache_parent(dentry);
++                        dput(dentry);
++
++                        /* now try to get rid of current dentry */
++                        d_prune_aliases(inode);
++                        goto restart;
++                }
++        }
++        spin_unlock(&dcache_lock);
++}
++
++static void prune_deathrow_one(struct ll_inode_info *lli)
++{
++        struct inode *inode = ll_info2i(lli);
++
++        /* first, try to drop any dentries - they hold a ref on the inode */
++        if (S_ISDIR(inode->i_mode))
++                prune_dir_dentries(inode);
++        else
++                d_prune_aliases(inode);
++
++
++        /* if somebody still uses it, leave it */
++        LASSERT(atomic_read(&inode->i_count) > 0);
++        if (atomic_read(&inode->i_count) > 1)
++                goto out;
++
++        CDEBUG(D_INODE, "inode %lu/%u(%d) looks a good candidate for prune\n",
++               inode->i_ino,inode->i_generation, atomic_read(&inode->i_count));
++
++        /* seems nobody uses it anymore */
++        inode->i_nlink = 0;
++
++out:
++        iput(inode);
++        return;
++}
++
++static void prune_deathrow(struct ll_sb_info *sbi, int try)
++{
++        struct ll_inode_info *lli;
++        int empty;
++
++        do {
++                if (need_resched() && try)
++                        break;
++
++                if (try) {
++                        if (!spin_trylock(&sbi->ll_deathrow_lock))
++                                break;
++                } else {
++                        spin_lock(&sbi->ll_deathrow_lock);
++                }
++
++                empty = 1;
++                lli = NULL;
++                if (!list_empty(&sbi->ll_deathrow)) {
++                        lli = list_entry(sbi->ll_deathrow.next,
++                                         struct ll_inode_info,
++                                         lli_dead_list);
++                        list_del_init(&lli->lli_dead_list);
++                        if (!list_empty(&sbi->ll_deathrow))
++                                empty = 0;
++                }
++                spin_unlock(&sbi->ll_deathrow_lock);
++
++                if (lli)
++                        prune_deathrow_one(lli);
++
++        } while (empty == 0);
++}
++#else /* !HAVE_EXPORT___IGET */
++#define prune_deathrow(sbi, try) do {} while (0)
++#endif /* HAVE_EXPORT___IGET */
++
++void client_common_put_super(struct super_block *sb)
++{
++        struct ll_sb_info *sbi = ll_s2sbi(sb);
++        ENTRY;
++
++        ll_close_thread_shutdown(sbi->ll_lcq);
++
++        lprocfs_unregister_mountpoint(sbi);
++
++        /* destroy inodes in deathrow */
++        prune_deathrow(sbi, 0);
++
++        list_del(&sbi->ll_conn_chain);
++
++        /* callbacks is cleared after disconnect each target */
++        obd_disconnect(sbi->ll_osc_exp);
++        sbi->ll_osc_exp = NULL;
++
++        obd_disconnect(sbi->ll_mdc_exp);
++        sbi->ll_mdc_exp = NULL;
++
++        EXIT;
++}
++
++void ll_kill_super(struct super_block *sb)
++{
++        struct ll_sb_info *sbi;
++
++        ENTRY;
++
++        /* not init sb ?*/
++        if (!(sb->s_flags & MS_ACTIVE))
++                return;
++
++        sbi = ll_s2sbi(sb);
++        /* we need restore s_dev from changed for clustred NFS before put_super
++         * because new kernels have cached s_dev and change sb->s_dev in 
++         * put_super not affected real removing devices */
++        if (sbi)
++                sb->s_dev = sbi->ll_sdev_orig;
++        EXIT;
++}
++
++char *ll_read_opt(const char *opt, char *data)
++{
++        char *value;
++        char *retval;
++        ENTRY;
++
++        CDEBUG(D_SUPER, "option: %s, data %s\n", opt, data);
++        if (strncmp(opt, data, strlen(opt)))
++                RETURN(NULL);
++        if ((value = strchr(data, '=')) == NULL)
++                RETURN(NULL);
++
++        value++;
++        OBD_ALLOC(retval, strlen(value) + 1);
++        if (!retval) {
++                CERROR("out of memory!\n");
++                RETURN(NULL);
++        }
++
++        memcpy(retval, value, strlen(value)+1);
++        CDEBUG(D_SUPER, "Assigned option: %s, value %s\n", opt, retval);
++        RETURN(retval);
++}
++
++static inline int ll_set_opt(const char *opt, char *data, int fl)
++{
++        if (strncmp(opt, data, strlen(opt)) != 0)
++                return(0);
++        else
++                return(fl);
++}
++
++/* non-client-specific mount options are parsed in lmd_parse */
++static int ll_options(char *options, int *flags)
++{
++        int tmp;
++        char *s1 = options, *s2;
++        ENTRY;
++
++        if (!options)
++                RETURN(0);
++
++        CDEBUG(D_CONFIG, "Parsing opts %s\n", options);
++
++        while (*s1) {
++                CDEBUG(D_SUPER, "next opt=%s\n", s1);
++                tmp = ll_set_opt("nolock", s1, LL_SBI_NOLCK);
++                if (tmp) {
++                        *flags |= tmp;
++                        goto next;
++                }
++                tmp = ll_set_opt("flock", s1, LL_SBI_FLOCK);
++                if (tmp) {
++                        *flags |= tmp;
++                        goto next;
++                }
++                tmp = ll_set_opt("localflock", s1, LL_SBI_LOCALFLOCK);
++                if (tmp) {
++                        *flags |= tmp;
++                        goto next;
++                }
++                tmp = ll_set_opt("noflock", s1, LL_SBI_FLOCK|LL_SBI_LOCALFLOCK);
++                if (tmp) {
++                        *flags &= ~tmp;
++                        goto next;
++                }
++                tmp = ll_set_opt("user_xattr", s1, LL_SBI_USER_XATTR);
++                if (tmp) {
++                        *flags |= tmp;
++                        goto next;
++                }
++                tmp = ll_set_opt("nouser_xattr", s1, LL_SBI_USER_XATTR);
++                if (tmp) {
++                        *flags &= ~tmp;
++                        goto next;
++                }
++                tmp = ll_set_opt("acl", s1, LL_SBI_ACL);
++                if (tmp) {
++                        /* Ignore deprecated mount option.  The client will
++                         * always try to mount with ACL support, whether this
++                         * is used depends on whether server supports it. */
++                        LCONSOLE_ERROR_MSG(0x152, "Ignoring deprecated "
++                                                  "mount option 'acl'.\n");
++                        goto next;
++                }
++                tmp = ll_set_opt("noacl", s1, LL_SBI_ACL);
++                if (tmp) {
++                        LCONSOLE_ERROR_MSG(0x152, "Ignoring deprecated "
++                                                  "mount option 'noacl'.\n");
++                        goto next;
++                }
++
++                tmp = ll_set_opt("checksum", s1, LL_SBI_DATA_CHECKSUM);
++                if (tmp) {
++                        *flags |= tmp;
++                        goto next;
++                }
++                tmp = ll_set_opt("nochecksum", s1, LL_SBI_DATA_CHECKSUM);
++                if (tmp) {
++                        *flags &= ~tmp;
++                        goto next;
++                }
++
++                tmp = ll_set_opt("lruresize", s1, LL_SBI_LRU_RESIZE);
++                if (tmp) {
++                        *flags |= tmp;
++                        goto next;
++                }
++                tmp = ll_set_opt("nolruresize", s1, LL_SBI_LRU_RESIZE);
++                if (tmp) {
++                        *flags &= ~tmp;
++                        goto next;
++                }
++                LCONSOLE_ERROR_MSG(0x152, "Unknown option '%s', won't mount.\n",
++                                   s1);
++                RETURN(-EINVAL);
++
++next:
++                /* Find next opt */
++                s2 = strchr(s1, ',');
++                if (s2 == NULL)
++                        break;
++                s1 = s2 + 1;
++        }
++        RETURN(0);
++}
++
++void ll_lli_init(struct ll_inode_info *lli)
++{
++        lli->lli_inode_magic = LLI_INODE_MAGIC;
++        sema_init(&lli->lli_size_sem, 1);
++        sema_init(&lli->lli_write_sem, 1);
++        lli->lli_flags = 0;
++        lli->lli_maxbytes = PAGE_CACHE_MAXBYTES;
++        spin_lock_init(&lli->lli_lock);
++        sema_init(&lli->lli_och_sem, 1);
++        lli->lli_mds_read_och = lli->lli_mds_write_och = NULL;
++        lli->lli_mds_exec_och = NULL;
++        lli->lli_open_fd_read_count = lli->lli_open_fd_write_count = 0;
++        lli->lli_open_fd_exec_count = 0;
++        INIT_LIST_HEAD(&lli->lli_dead_list);
++#ifdef HAVE_CLOSE_THREAD
++        INIT_LIST_HEAD(&lli->lli_pending_write_llaps);
++#endif
++}
++
++/* COMPAT_146 */
++#define MDCDEV "mdc_dev"
++static int old_lustre_process_log(struct super_block *sb, char *newprofile,
++                                  struct config_llog_instance *cfg)
++{
++        struct lustre_sb_info *lsi = s2lsi(sb);
++        struct obd_device *obd;
++        struct lustre_handle mdc_conn = {0, };
++        struct obd_export *exp;
++        char *ptr, *mdt, *profile;
++        char niduuid[10] = "mdtnid0";
++        class_uuid_t uuid;
++        struct obd_uuid mdc_uuid;
++        struct llog_ctxt *ctxt;
++        struct obd_connect_data ocd = { 0 };
++        lnet_nid_t nid;
++        int i, rc = 0, recov_bk = 1, failnodes = 0;
++        ENTRY;
++
++        ll_generate_random_uuid(uuid);
++        class_uuid_unparse(uuid, &mdc_uuid);
++        CDEBUG(D_HA, "generated uuid: %s\n", mdc_uuid.uuid);
++
++        /* Figure out the old mdt and profile name from new-style profile
++           ("lustre" from "mds/lustre-client") */
++        mdt = newprofile;
++        profile = strchr(mdt, '/');
++        if (profile == NULL) {
++                CDEBUG(D_CONFIG, "Can't find MDT name in %s\n", newprofile);
++                GOTO(out, rc = -EINVAL);
++        }
++        *profile = '\0';
++        profile++;
++        ptr = strrchr(profile, '-');
++        if (ptr == NULL) {
++                CDEBUG(D_CONFIG, "Can't find client name in %s\n", newprofile);
++                GOTO(out, rc = -EINVAL);
++        }
++        *ptr = '\0';
++
++        LCONSOLE_WARN("This looks like an old mount command; I will try to "
++                      "contact MDT '%s' for profile '%s'\n", mdt, profile);
++
++        /* Use nids from mount line: uml1,1 at elan:uml2,2 at elan:/lustre */
++        i = 0;
++        ptr = lsi->lsi_lmd->lmd_dev;
++        while (class_parse_nid(ptr, &nid, &ptr) == 0) {
++                rc = do_lcfg(MDCDEV, nid, LCFG_ADD_UUID, niduuid, 0,0,0);
++                i++;
++                /* Stop at the first failover nid */
++                if (*ptr == ':')
++                        break;
++        }
++        if (i == 0) {
++                CERROR("No valid MDT nids found.\n");
++                GOTO(out, rc = -EINVAL);
++        }
++        failnodes++;
++
++        rc = do_lcfg(MDCDEV, 0, LCFG_ATTACH, LUSTRE_MDC_NAME,mdc_uuid.uuid,0,0);
++        if (rc < 0)
++                GOTO(out_del_uuid, rc);
++
++        rc = do_lcfg(MDCDEV, 0, LCFG_SETUP, mdt, niduuid, 0, 0);
++        if (rc < 0) {
++                LCONSOLE_ERROR_MSG(0x153, "I couldn't establish a connection "
++                                   "with the MDT. Check that the MDT host NID "
++                                   "is correct and the networks are up.\n");
++                GOTO(out_detach, rc);
++        }
++
++        obd = class_name2obd(MDCDEV);
++        if (obd == NULL)
++                GOTO(out_cleanup, rc = -EINVAL);
++
++        /* Add any failover nids */
++        while (*ptr == ':') {
++                /* New failover node */
++                sprintf(niduuid, "mdtnid%d", failnodes);
++                i = 0;
++                while (class_parse_nid(ptr, &nid, &ptr) == 0) {
++                        i++;
++                        rc = do_lcfg(MDCDEV, nid, LCFG_ADD_UUID, niduuid,0,0,0);
++                        if (rc)
++                                CERROR("Add uuid for %s failed %d\n",
++                                       libcfs_nid2str(nid), rc);
++                        if (*ptr == ':')
++                                break;
++                }
++                if (i > 0) {
++                        rc = do_lcfg(MDCDEV, 0, LCFG_ADD_CONN, niduuid, 0, 0,0);
++                        if (rc)
++                                CERROR("Add conn for %s failed %d\n",
++                                       libcfs_nid2str(nid), rc);
++                        failnodes++;
++                } else {
++                        /* at ":/fsname" */
++                        break;
++                }
++        }
++
++        /* Try all connections, but only once. */
++        rc = obd_set_info_async(obd->obd_self_export,
++                                sizeof(KEY_INIT_RECOV_BACKUP), KEY_INIT_RECOV_BACKUP,
++                                sizeof(recov_bk), &recov_bk, NULL);
++        if (rc)
++                GOTO(out_cleanup, rc);
++
++        /* If we don't have this then an ACL MDS will refuse the connection */
++        ocd.ocd_connect_flags = OBD_CONNECT_ACL;
++
++        rc = obd_connect(&mdc_conn, obd, &mdc_uuid, &ocd, &exp);
++        if (rc) {
++                CERROR("cannot connect to %s: rc = %d\n", mdt, rc);
++                GOTO(out_cleanup, rc);
++        }
++
++        ctxt = llog_get_context(exp->exp_obd, LLOG_CONFIG_REPL_CTXT);
++
++        cfg->cfg_flags |= CFG_F_COMPAT146;
++
++#if 1
++        rc = class_config_parse_llog(ctxt, profile, cfg);
++#else
++        /*
++         * For debugging, it's useful to just dump the log
++         */
++        rc = class_config_dump_llog(ctxt, profile, cfg);
++#endif
++        llog_ctxt_put(ctxt);
++        switch (rc) {
++        case 0: {
++                /* Set the caller's profile name to the old-style */
++                memcpy(newprofile, profile, strlen(profile) + 1);
++                break;
++        }
++        case -EINVAL:
++                LCONSOLE_ERROR_MSG(0x154, "%s: The configuration '%s' could not"
++                                   " be read from the MDT '%s'.  Make sure this"
++                                   " client and the MDT are running compatible "
++                                   "versions of Lustre.\n",
++                                   obd->obd_name, profile, mdt);
++                /* fall through */
++        default:
++                LCONSOLE_ERROR_MSG(0x155, "%s: The configuration '%s' could not"
++                                   " be read from the MDT '%s'.  This may be "
++                                   "the result of communication errors between "
++                                   "the client and the MDT, or if the MDT is "
++                                   "not running.\n", obd->obd_name, profile,
++                                   mdt);
++                break;
++        }
++
++        /* We don't so much care about errors in cleaning up the config llog
++         * connection, as we have already read the config by this point. */
++        obd_disconnect(exp);
++
++out_cleanup:
++        do_lcfg(MDCDEV, 0, LCFG_CLEANUP, 0, 0, 0, 0);
++
++out_detach:
++        do_lcfg(MDCDEV, 0, LCFG_DETACH, 0, 0, 0, 0);
++
++out_del_uuid:
++        /* class_add_uuid adds a nid even if the same uuid exists; we might
++           delete any copy here.  So they all better match. */
++        for (i = 0; i < failnodes; i++) {
++                sprintf(niduuid, "mdtnid%d", i);
++                do_lcfg(MDCDEV, 0, LCFG_DEL_UUID, niduuid, 0, 0, 0);
++        }
++        /* class_import_put will get rid of the additional connections */
++out:
++        RETURN(rc);
++}
++/* end COMPAT_146 */
++
++int ll_fill_super(struct super_block *sb)
++{
++        struct lustre_profile *lprof;
++        struct lustre_sb_info *lsi = s2lsi(sb);
++        struct ll_sb_info *sbi;
++        char  *osc = NULL, *mdc = NULL;
++        char  *profilenm = get_profile_name(sb);
++        struct config_llog_instance cfg = {0, };
++        char   ll_instance[sizeof(sb) * 2 + 1];
++        int    err;
++        ENTRY;
++
++        CDEBUG(D_VFSTRACE, "VFS Op: sb %p\n", sb);
++
++        cfs_module_get();
++
++        /* client additional sb info */
++        lsi->lsi_llsbi = sbi = ll_init_sbi();
++        if (!sbi) {
++                cfs_module_put();
++                RETURN(-ENOMEM);
++        }
++
++        err = ll_options(lsi->lsi_lmd->lmd_opts, &sbi->ll_flags);
++        if (err)
++                GOTO(out_free, err);
++
++        /* Generate a string unique to this super, in case some joker tries
++           to mount the same fs at two mount points.
++           Use the address of the super itself.*/
++        sprintf(ll_instance, "%p", sb);
++        cfg.cfg_instance = ll_instance;
++        cfg.cfg_uuid = lsi->lsi_llsbi->ll_sb_uuid;
++        cfg.cfg_sb = sb;
++
++        /* set up client obds */
++        if (strchr(profilenm, '/') != NULL) /* COMPAT_146 */
++                err = -EINVAL; /* skip error messages, use old config code */
++        else
++                err = lustre_process_log(sb, profilenm, &cfg);
++        /* COMPAT_146 */
++        if (err < 0) {
++                char *oldname;
++                int rc, oldnamelen;
++                oldnamelen = strlen(profilenm) + 1;
++                /* Temp storage for 1.4.6 profile name */
++                OBD_ALLOC(oldname, oldnamelen);
++                if (oldname) {
++                        memcpy(oldname, profilenm, oldnamelen);
++                        rc = old_lustre_process_log(sb, oldname, &cfg);
++                        if (rc >= 0) {
++                                /* That worked - update the profile name
++                                   permanently */
++                                err = rc;
++                                OBD_FREE(lsi->lsi_lmd->lmd_profile,
++                                         strlen(lsi->lsi_lmd->lmd_profile) + 1);
++                                OBD_ALLOC(lsi->lsi_lmd->lmd_profile,
++                                         strlen(oldname) + 1);
++                                if (!lsi->lsi_lmd->lmd_profile) {
++                                        OBD_FREE(oldname, oldnamelen);
++                                        GOTO(out_free, err = -ENOMEM);
++                                }
++                                memcpy(lsi->lsi_lmd->lmd_profile, oldname,
++                                       strlen(oldname) + 1);
++                                profilenm = get_profile_name(sb);
++                                /* Don't ever try to recover the MGS */
++                                rc = ptlrpc_set_import_active(
++                                        lsi->lsi_mgc->u.cli.cl_import, 0);
++                        }
++                        OBD_FREE(oldname, oldnamelen);
++                }
++        }
++        /* end COMPAT_146 */
++        if (err < 0) {
++                CERROR("Unable to process log: %d\n", err);
++                GOTO(out_free, err);
++        }
++
++        lprof = class_get_profile(profilenm);
++        if (lprof == NULL) {
++                LCONSOLE_ERROR_MSG(0x156, "The client profile '%s' could not be"
++                                   " read from the MGS.  Does that filesystem "
++                                   "exist?\n", profilenm);
++                GOTO(out_free, err = -EINVAL);
++        }
++        CDEBUG(D_CONFIG, "Found profile %s: mdc=%s osc=%s\n", profilenm,
++               lprof->lp_mdc, lprof->lp_osc);
++
++        OBD_ALLOC(osc, strlen(lprof->lp_osc) +
++                  strlen(ll_instance) + 2);
++        if (!osc)
++                GOTO(out_free, err = -ENOMEM);
++        sprintf(osc, "%s-%s", lprof->lp_osc, ll_instance);
++
++        OBD_ALLOC(mdc, strlen(lprof->lp_mdc) +
++                  strlen(ll_instance) + 2);
++        if (!mdc)
++                GOTO(out_free, err = -ENOMEM);
++        sprintf(mdc, "%s-%s", lprof->lp_mdc, ll_instance);
++
++        /* connections, registrations, sb setup */
++        err = client_common_fill_super(sb, mdc, osc);
++
++out_free:
++        if (mdc)
++                OBD_FREE(mdc, strlen(mdc) + 1);
++        if (osc)
++                OBD_FREE(osc, strlen(osc) + 1);
++        if (err)
++                ll_put_super(sb);
++        else
++                LCONSOLE_WARN("Client %s has started\n", profilenm);
++
++        RETURN(err);
++} /* ll_fill_super */
++
++
++void ll_put_super(struct super_block *sb)
++{
++        struct config_llog_instance cfg;
++        char   ll_instance[sizeof(sb) * 2 + 1];
++        struct obd_device *obd;
++        struct lustre_sb_info *lsi = s2lsi(sb);
++        struct ll_sb_info *sbi = ll_s2sbi(sb);
++        char *profilenm = get_profile_name(sb);
++        int force = 1, next;
++        ENTRY;
++
++        CDEBUG(D_VFSTRACE, "VFS Op: sb %p - %s\n", sb, profilenm);
++
++        sprintf(ll_instance, "%p", sb);
++        cfg.cfg_instance = ll_instance;
++        lustre_end_log(sb, NULL, &cfg);
++
++        if (sbi->ll_mdc_exp) {
++                obd = class_exp2obd(sbi->ll_mdc_exp);
++                if (obd) 
++                        force = obd->obd_force;
++        }
++
++        /* We need to set force before the lov_disconnect in
++           lustre_common_put_super, since l_d cleans up osc's as well. */
++        if (force) {
++                next = 0;
++                while ((obd = class_devices_in_group(&sbi->ll_sb_uuid,
++                                                     &next)) != NULL) {
++                        obd->obd_force = force;
++                }
++        }
++
++        if (sbi->ll_lcq) {
++                /* Only if client_common_fill_super succeeded */
++                client_common_put_super(sb);
++        }
++
++        next = 0;
++        while ((obd = class_devices_in_group(&sbi->ll_sb_uuid, &next)) !=NULL) {
++                class_manual_cleanup(obd);
++        }
++
++        if (profilenm)
++                class_del_profile(profilenm);
++
++        ll_free_sbi(sb);
++        lsi->lsi_llsbi = NULL;
++
++        lustre_common_put_super(sb);
++
++        LCONSOLE_WARN("client %s umount complete\n", ll_instance);
++
++        cfs_module_put();
++
++        EXIT;
++} /* client_put_super */
++
++#ifdef HAVE_REGISTER_CACHE
++#include <linux/cache_def.h>
++#ifdef HAVE_CACHE_RETURN_INT
++static int
++#else
++static void
++#endif
++ll_shrink_cache(int priority, unsigned int gfp_mask)
++{
++        struct ll_sb_info *sbi;
++        int count = 0;
++
++        list_for_each_entry(sbi, &ll_super_blocks, ll_list)
++                count += llap_shrink_cache(sbi, priority);
++
++#ifdef HAVE_CACHE_RETURN_INT
++        return count;
++#endif
++}
++
++struct cache_definition ll_cache_definition = {
++        .name = "llap_cache",
++        .shrink = ll_shrink_cache
++};
++#endif /* HAVE_REGISTER_CACHE */
++
++struct inode *ll_inode_from_lock(struct ldlm_lock *lock)
++{
++        struct inode *inode = NULL;
++        /* NOTE: we depend on atomic igrab() -bzzz */
++        lock_res_and_lock(lock);
++        if (lock->l_ast_data) {
++                struct ll_inode_info *lli = ll_i2info(lock->l_ast_data);
++                if (lli->lli_inode_magic == LLI_INODE_MAGIC) {
++                        inode = igrab(lock->l_ast_data);
++                } else {
++                        inode = lock->l_ast_data;
++                        ldlm_lock_debug(NULL, inode->i_state & I_FREEING ?
++                                                D_INFO : D_WARNING,
++                                        lock, __FILE__, __func__, __LINE__,
++                                        "l_ast_data %p is bogus: magic %08x",
++                                        lock->l_ast_data, lli->lli_inode_magic);
++                        inode = NULL;
++                }
++        }
++        unlock_res_and_lock(lock);
++        return inode;
++}
++
++static int null_if_equal(struct ldlm_lock *lock, void *data)
++{
++        if (data == lock->l_ast_data) {
++                lock->l_ast_data = NULL;
++
++                if (lock->l_req_mode != lock->l_granted_mode)
++                        LDLM_ERROR(lock,"clearing inode with ungranted lock");
++        }
++
++        return LDLM_ITER_CONTINUE;
++}
++
++void ll_clear_inode(struct inode *inode)
++{
++        struct ll_fid fid;
++        struct ll_inode_info *lli = ll_i2info(inode);
++        struct ll_sb_info *sbi = ll_i2sbi(inode);
++        ENTRY;
++
++        CDEBUG(D_VFSTRACE, "VFS Op:inode=%lu/%u(%p)\n", inode->i_ino,
++               inode->i_generation, inode);
++
++        if (S_ISDIR(inode->i_mode)) {
++                /* these should have been cleared in ll_file_release */
++                LASSERT(lli->lli_sai == NULL);
++                LASSERT(lli->lli_opendir_key == NULL);
++                LASSERT(lli->lli_opendir_pid == 0);
++        }
++
++        ll_inode2fid(&fid, inode);
++        clear_bit(LLI_F_HAVE_MDS_SIZE_LOCK, &lli->lli_flags);
++        mdc_change_cbdata(sbi->ll_mdc_exp, &fid, null_if_equal, inode);
++
++        LASSERT(!lli->lli_open_fd_write_count);
++        LASSERT(!lli->lli_open_fd_read_count);
++        LASSERT(!lli->lli_open_fd_exec_count);
++
++        if (lli->lli_mds_write_och)
++                ll_mdc_real_close(inode, FMODE_WRITE);
++        if (lli->lli_mds_exec_och) {
++                if (!FMODE_EXEC)
++                        CERROR("No FMODE exec, bug exec och is present for "
++                               "inode %ld\n", inode->i_ino);
++                ll_mdc_real_close(inode, FMODE_EXEC);
++        }
++        if (lli->lli_mds_read_och)
++                ll_mdc_real_close(inode, FMODE_READ);
++
++
++        if (lli->lli_smd) {
++                obd_change_cbdata(sbi->ll_osc_exp, lli->lli_smd,
++                                  null_if_equal, inode);
++
++                obd_free_memmd(sbi->ll_osc_exp, &lli->lli_smd);
++                lli->lli_smd = NULL;
++        }
++
++        if (lli->lli_symlink_name) {
++                OBD_FREE(lli->lli_symlink_name,
++                         strlen(lli->lli_symlink_name) + 1);
++                lli->lli_symlink_name = NULL;
++        }
++
++#ifdef CONFIG_FS_POSIX_ACL
++        if (lli->lli_posix_acl) {
++                LASSERT(atomic_read(&lli->lli_posix_acl->a_refcount) == 1);
++                posix_acl_release(lli->lli_posix_acl);
++                lli->lli_posix_acl = NULL;
++        }
++#endif
++
++        lli->lli_inode_magic = LLI_INODE_DEAD;
++
++#ifdef HAVE_EXPORT___IGET
++        spin_lock(&sbi->ll_deathrow_lock);
++        list_del_init(&lli->lli_dead_list);
++        spin_unlock(&sbi->ll_deathrow_lock);
++#endif
++
++        EXIT;
++}
++static int ll_setattr_do_truncate(struct inode *inode, loff_t new_size)
++{
++        struct ll_sb_info *sbi = ll_i2sbi(inode);
++        struct ll_inode_info *lli = ll_i2info(inode);
++        struct lov_stripe_md *lsm = lli->lli_smd;
++        int rc;
++        ldlm_policy_data_t policy = { .l_extent = {new_size,
++                                                   OBD_OBJECT_EOF } };
++        struct lustre_handle lockh = { 0 };
++        int local_lock = 0; /* 0 - no local lock;
++                             * 1 - lock taken by lock_extent;
++                             * 2 - by obd_match*/
++        int ast_flags;
++        int err;
++        ENTRY;
++
++        UNLOCK_INODE_MUTEX(inode);
++        UP_WRITE_I_ALLOC_SEM(inode);
++
++        if (sbi->ll_lockless_truncate_enable && 
++            (sbi->ll_lco.lco_flags & OBD_CONNECT_TRUNCLOCK)) {
++                ast_flags = LDLM_FL_BLOCK_GRANTED;
++                rc = obd_match(sbi->ll_osc_exp, lsm, LDLM_EXTENT,
++                               &policy, LCK_PW, &ast_flags, inode, &lockh);
++                if (rc > 0) {
++                        local_lock = 2;
++                        rc = 0;
++                } else if (rc == 0) {
++                        rc = ll_file_punch(inode, new_size, 1);
++                }
++        } else {
++                /* XXX when we fix the AST intents to pass the discard-range
++                 * XXX extent, make ast_flags always LDLM_AST_DISCARD_DATA
++                 * XXX here. */
++                ast_flags = (new_size == 0) ? LDLM_AST_DISCARD_DATA : 0;
++                rc = ll_extent_lock(NULL, inode, lsm, LCK_PW, &policy,
++                                    &lockh, ast_flags);
++                if (likely(rc == 0))
++                        local_lock = 1;
++        }
++
++        LOCK_INODE_MUTEX(inode);
++        DOWN_WRITE_I_ALLOC_SEM(inode);
++        if (likely(rc == 0)) {
++                /* Only ll_inode_size_lock is taken at this level.
++                 * lov_stripe_lock() is grabbed by ll_truncate() only over
++                 * call to obd_adjust_kms().  If vmtruncate returns 0, then
++                 * ll_truncate dropped ll_inode_size_lock() */
++                ll_inode_size_lock(inode, 0);
++                if (!local_lock)
++                        set_bit(LLI_F_SRVLOCK, &lli->lli_flags);
++                rc = vmtruncate(inode, new_size);
++                clear_bit(LLI_F_SRVLOCK, &lli->lli_flags);
++                if (rc != 0) {
++                        LASSERT(atomic_read(&lli->lli_size_sem.count) <= 0);
++                        ll_inode_size_unlock(inode, 0);
++                }
++        }
++        if (local_lock) {
++                if (local_lock == 2)
++                        err = obd_cancel(sbi->ll_osc_exp, lsm, LCK_PW, &lockh);
++                else
++                        err = ll_extent_unlock(NULL, inode, lsm, LCK_PW, &lockh);
++                if (unlikely(err != 0)){
++                        CERROR("extent unlock failed: err=%d,"
++                               " unlock method =%d\n", err, local_lock);
++                        if (rc == 0)
++                                rc = err;
++                }
++        }
++        RETURN(rc);
++}
++
++/* If this inode has objects allocated to it (lsm != NULL), then the OST
++ * object(s) determine the file size and mtime.  Otherwise, the MDS will
++ * keep these values until such a time that objects are allocated for it.
++ * We do the MDS operations first, as it is checking permissions for us.
++ * We don't to the MDS RPC if there is nothing that we want to store there,
++ * otherwise there is no harm in updating mtime/atime on the MDS if we are
++ * going to do an RPC anyways.
++ *
++ * If we are doing a truncate, we will send the mtime and ctime updates
++ * to the OST with the punch RPC, otherwise we do an explicit setattr RPC.
++ * I don't believe it is possible to get e.g. ATTR_MTIME_SET and ATTR_SIZE
++ * at the same time.
++ */
++int ll_setattr_raw(struct inode *inode, struct iattr *attr)
++{
++        struct ll_inode_info *lli = ll_i2info(inode);
++        struct lov_stripe_md *lsm = lli->lli_smd;
++        struct ll_sb_info *sbi = ll_i2sbi(inode);
++        struct ptlrpc_request *request = NULL;
++        struct mdc_op_data op_data;
++        struct lustre_md md;
++        int ia_valid = attr->ia_valid;
++        int rc = 0;
++        ENTRY;
++
++        CDEBUG(D_VFSTRACE, "VFS Op:inode=%lu valid %x\n", inode->i_ino,
++               attr->ia_valid);
++        ll_stats_ops_tally(ll_i2sbi(inode), LPROC_LL_SETATTR, 1);
++
++        if (ia_valid & ATTR_SIZE) {
++                if (attr->ia_size > ll_file_maxbytes(inode)) {
++                        CDEBUG(D_INODE, "file too large %llu > "LPU64"\n",
++                               attr->ia_size, ll_file_maxbytes(inode));
++                        RETURN(-EFBIG);
++                }
++
++                attr->ia_valid |= ATTR_MTIME | ATTR_CTIME;
++        }
++
++        /* POSIX: check before ATTR_*TIME_SET set (from inode_change_ok) */
++        if (ia_valid & (ATTR_MTIME_SET | ATTR_ATIME_SET)) {
++                if (current->fsuid != inode->i_uid &&
++                    !cfs_capable(CFS_CAP_FOWNER))
++                        RETURN(-EPERM);
++        }
++
++        /* We mark all of the fields "set" so MDS/OST does not re-set them */
++        if (attr->ia_valid & ATTR_CTIME) {
++                attr->ia_ctime = CURRENT_TIME;
++                attr->ia_valid |= ATTR_CTIME_SET;
++        }
++        if (!(ia_valid & ATTR_ATIME_SET) && (attr->ia_valid & ATTR_ATIME)) {
++                attr->ia_atime = CURRENT_TIME;
++                attr->ia_valid |= ATTR_ATIME_SET;
++        }
++        if (!(ia_valid & ATTR_MTIME_SET) && (attr->ia_valid & ATTR_MTIME)) {
++                attr->ia_mtime = CURRENT_TIME;
++                attr->ia_valid |= ATTR_MTIME_SET;
++        }
++        if ((attr->ia_valid & ATTR_CTIME) && !(attr->ia_valid & ATTR_MTIME)) {
++                /* To avoid stale mtime on mds, obtain it from ost and send
++                   to mds. */
++                rc = ll_glimpse_size(inode, 0);
++                if (rc)
++                        RETURN(rc);
++
++                attr->ia_valid |= ATTR_MTIME_SET | ATTR_MTIME;
++                attr->ia_mtime = inode->i_mtime;
++        }
++
++        if (attr->ia_valid & (ATTR_MTIME | ATTR_CTIME))
++                CDEBUG(D_INODE, "setting mtime %lu, ctime %lu, now = %lu\n",
++                       LTIME_S(attr->ia_mtime), LTIME_S(attr->ia_ctime),
++                       CURRENT_SECONDS);
++
++        /* NB: ATTR_SIZE will only be set after this point if the size
++         * resides on the MDS, ie, this file has no objects. */
++        if (lsm)
++                attr->ia_valid &= ~ATTR_SIZE;
++
++        /* We always do an MDS RPC, even if we're only changing the size;
++         * only the MDS knows whether truncate() should fail with -ETXTBUSY */
++        ll_prepare_mdc_op_data(&op_data, inode, NULL, NULL, 0, 0, NULL);
++
++        rc = mdc_setattr(sbi->ll_mdc_exp, &op_data,
++                         attr, NULL, 0, NULL, 0, &request);
++
++        if (rc) {
++                ptlrpc_req_finished(request);
++                if (rc == -ENOENT) {
++                        inode->i_nlink = 0;
++                        /* Unlinked special device node?  Or just a race?
++                         * Pretend we done everything. */
++                        if (!S_ISREG(inode->i_mode) &&
++                            !S_ISDIR(inode->i_mode))
++                                rc = inode_setattr(inode, attr);
++                } else if (rc != -EPERM && rc != -EACCES && rc != -ETXTBSY)
++                        CERROR("mdc_setattr fails: rc = %d\n", rc);
++                RETURN(rc);
++        }
++
++        rc = mdc_req2lustre_md(request, REPLY_REC_OFF, sbi->ll_osc_exp, &md);
++        if (rc) {
++                ptlrpc_req_finished(request);
++                RETURN(rc);
++        }
++
++        /* We call inode_setattr to adjust timestamps.
++         * If there is at least some data in file, we cleared ATTR_SIZE above to
++         * avoid invoking vmtruncate, otherwise it is important to call
++         * vmtruncate in inode_setattr to update inode->i_size (bug 6196) */
++        rc = inode_setattr(inode, attr);
++
++        ll_update_inode(inode, &md);
++        ptlrpc_req_finished(request);
++
++        if (!lsm || !S_ISREG(inode->i_mode)) {
++                CDEBUG(D_INODE, "no lsm: not setting attrs on OST\n");
++                RETURN(rc);
++        }
++
++        /* We really need to get our PW lock before we change inode->i_size.
++         * If we don't we can race with other i_size updaters on our node, like
++         * ll_file_read.  We can also race with i_size propogation to other
++         * nodes through dirtying and writeback of final cached pages.  This
++         * last one is especially bad for racing o_append users on other
++         * nodes. */
++        if (ia_valid & ATTR_SIZE) {
++                rc = ll_setattr_do_truncate(inode, attr->ia_size);
++        } else if (ia_valid & (ATTR_MTIME | ATTR_MTIME_SET)) {
++                obd_flag flags;
++                struct obd_info oinfo = { { { 0 } } };
++                struct obdo *oa;
++                OBDO_ALLOC(oa);
++
++                CDEBUG(D_INODE, "set mtime on OST inode %lu to %lu\n",
++                       inode->i_ino, LTIME_S(attr->ia_mtime));
++
++                if (oa) {
++                        oa->o_id = lsm->lsm_object_id;
++                        oa->o_valid = OBD_MD_FLID;
++
++                        flags = OBD_MD_FLTYPE | OBD_MD_FLATIME |
++                                OBD_MD_FLMTIME | OBD_MD_FLCTIME |
++                                OBD_MD_FLFID | OBD_MD_FLGENER;
++
++                        obdo_from_inode(oa, inode, flags);
++
++                        oinfo.oi_oa = oa;
++                        oinfo.oi_md = lsm;
++
++                        rc = obd_setattr_rqset(sbi->ll_osc_exp, &oinfo, NULL);
++                        if (rc)
++                                CERROR("obd_setattr_async fails: rc=%d\n", rc);
++                        OBDO_FREE(oa);
++                } else {
++                        rc = -ENOMEM;
++                }
++        }
++        RETURN(rc);
++}
++
++int ll_setattr(struct dentry *de, struct iattr *attr)
++{
++        int mode;
++
++        if ((attr->ia_valid & (ATTR_CTIME|ATTR_SIZE|ATTR_MODE)) ==
++            (ATTR_CTIME|ATTR_SIZE|ATTR_MODE))
++                attr->ia_valid |= MDS_OPEN_OWNEROVERRIDE;
++        if ((attr->ia_valid & (ATTR_MODE|ATTR_FORCE|ATTR_SIZE)) == 
++            (ATTR_SIZE|ATTR_MODE)) {
++                mode = de->d_inode->i_mode;
++                if (((mode & S_ISUID) && (!(attr->ia_mode & S_ISUID))) ||
++                    ((mode & S_ISGID) && (mode & S_IXGRP) &&
++                    (!(attr->ia_mode & S_ISGID))))
++                        attr->ia_valid |= ATTR_FORCE;
++        }
++
++        return ll_setattr_raw(de->d_inode, attr);
++}
++
++int ll_statfs_internal(struct super_block *sb, struct obd_statfs *osfs,
++                       __u64 max_age, __u32 flags)
++{
++        struct ll_sb_info *sbi = ll_s2sbi(sb);
++        struct obd_statfs obd_osfs;
++        int rc;
++        ENTRY;
++
++        rc = obd_statfs(class_exp2obd(sbi->ll_mdc_exp), osfs, max_age, flags);
++        if (rc) {
++                CERROR("mdc_statfs fails: rc = %d\n", rc);
++                RETURN(rc);
++        }
++
++        osfs->os_type = sb->s_magic;
++
++        CDEBUG(D_SUPER, "MDC blocks "LPU64"/"LPU64" objects "LPU64"/"LPU64"\n",
++               osfs->os_bavail, osfs->os_blocks, osfs->os_ffree,osfs->os_files);
++
++        rc = obd_statfs_rqset(class_exp2obd(sbi->ll_osc_exp),
++                              &obd_osfs, max_age, flags);
++        if (rc) {
++                CERROR("obd_statfs fails: rc = %d\n", rc);
++                RETURN(rc);
++        }
++
++        CDEBUG(D_SUPER, "OSC blocks "LPU64"/"LPU64" objects "LPU64"/"LPU64"\n",
++               obd_osfs.os_bavail, obd_osfs.os_blocks, obd_osfs.os_ffree,
++               obd_osfs.os_files);
++
++        osfs->os_bsize = obd_osfs.os_bsize;
++        osfs->os_blocks = obd_osfs.os_blocks;
++        osfs->os_bfree = obd_osfs.os_bfree;
++        osfs->os_bavail = obd_osfs.os_bavail;
++
++        /* If we don't have as many objects free on the OST as inodes
++         * on the MDS, we reduce the total number of inodes to
++         * compensate, so that the "inodes in use" number is correct.
++         */
++        if (obd_osfs.os_ffree < osfs->os_ffree) {
++                osfs->os_files = (osfs->os_files - osfs->os_ffree) +
++                        obd_osfs.os_ffree;
++                osfs->os_ffree = obd_osfs.os_ffree;
++        }
++
++        RETURN(rc);
++}
++#ifndef HAVE_STATFS_DENTRY_PARAM
++int ll_statfs(struct super_block *sb, struct kstatfs *sfs)
++{
++#else
++int ll_statfs(struct dentry *de, struct kstatfs *sfs)
++{
++        struct super_block *sb = de->d_sb;
++#endif
++        struct obd_statfs osfs;
++        int rc;
++
++        CDEBUG(D_VFSTRACE, "VFS Op: at "LPU64" jiffies\n", get_jiffies_64());
++        ll_stats_ops_tally(ll_s2sbi(sb), LPROC_LL_STAFS, 1);
++
++        /* For now we will always get up-to-date statfs values, but in the
++         * future we may allow some amount of caching on the client (e.g.
++         * from QOS or lprocfs updates). */
++        rc = ll_statfs_internal(sb, &osfs, cfs_time_current_64() - 1, 0);
++        if (rc)
++                return rc;
++
++        statfs_unpack(sfs, &osfs);
++
++        /* We need to downshift for all 32-bit kernels, because we can't
++         * tell if the kernel is being called via sys_statfs64() or not.
++         * Stop before overflowing f_bsize - in which case it is better
++         * to just risk EOVERFLOW if caller is using old sys_statfs(). */
++        if (sizeof(long) < 8) {
++                while (osfs.os_blocks > ~0UL && sfs->f_bsize < 0x40000000) {
++                        sfs->f_bsize <<= 1;
++
++                        osfs.os_blocks >>= 1;
++                        osfs.os_bfree >>= 1;
++                        osfs.os_bavail >>= 1;
++                }
++        }
++
++        sfs->f_blocks = osfs.os_blocks;
++        sfs->f_bfree = osfs.os_bfree;
++        sfs->f_bavail = osfs.os_bavail;
++
++        return 0;
++}
++
++void ll_inode_size_lock(struct inode *inode, int lock_lsm)
++{
++        struct ll_inode_info *lli;
++        struct lov_stripe_md *lsm;
++
++        lli = ll_i2info(inode);
++        LASSERT(lli->lli_size_sem_owner != current);
++        down(&lli->lli_size_sem);
++        LASSERT(lli->lli_size_sem_owner == NULL);
++        lli->lli_size_sem_owner = current;
++        lsm = lli->lli_smd;
++        LASSERTF(lsm != NULL || lock_lsm == 0, "lsm %p, lock_lsm %d\n",
++                 lsm, lock_lsm);
++        if (lock_lsm)
++                lov_stripe_lock(lsm);
++}
++
++void ll_inode_size_unlock(struct inode *inode, int unlock_lsm)
++{
++        struct ll_inode_info *lli;
++        struct lov_stripe_md *lsm;
++
++        lli = ll_i2info(inode);
++        lsm = lli->lli_smd;
++        LASSERTF(lsm != NULL || unlock_lsm == 0, "lsm %p, lock_lsm %d\n",
++                 lsm, unlock_lsm);
++        if (unlock_lsm)
++                lov_stripe_unlock(lsm);
++        LASSERT(lli->lli_size_sem_owner == current);
++        lli->lli_size_sem_owner = NULL;
++        up(&lli->lli_size_sem);
++}
++
++static void ll_replace_lsm(struct inode *inode, struct lov_stripe_md *lsm)
++{
++        struct ll_inode_info *lli = ll_i2info(inode);
++
++        dump_lsm(D_INODE, lsm);
++        dump_lsm(D_INODE, lli->lli_smd);
++        LASSERTF(lsm->lsm_magic == LOV_MAGIC_JOIN,
++                 "lsm must be joined lsm %p\n", lsm);
++        obd_free_memmd(ll_i2obdexp(inode), &lli->lli_smd);
++        CDEBUG(D_INODE, "replace lsm %p to lli_smd %p for inode %lu%u(%p)\n",
++               lsm, lli->lli_smd, inode->i_ino, inode->i_generation, inode);
++        lli->lli_smd = lsm;
++        lli->lli_maxbytes = lsm->lsm_maxbytes;
++        if (lli->lli_maxbytes > PAGE_CACHE_MAXBYTES)
++                lli->lli_maxbytes = PAGE_CACHE_MAXBYTES;
++}
++
++void ll_update_inode(struct inode *inode, struct lustre_md *md)
++{
++        struct ll_inode_info *lli = ll_i2info(inode);
++        struct mds_body *body = md->body;
++        struct lov_stripe_md *lsm = md->lsm;
++
++        LASSERT ((lsm != NULL) == ((body->valid & OBD_MD_FLEASIZE) != 0));
++        if (lsm != NULL) {
++                if (lli->lli_smd == NULL) {
++                        if (lsm->lsm_magic != LOV_MAGIC &&
++                            lsm->lsm_magic != LOV_MAGIC_JOIN) {
++                                dump_lsm(D_ERROR, lsm);
++                                LBUG();
++                        }
++                        CDEBUG(D_INODE, "adding lsm %p to inode %lu/%u(%p)\n",
++                               lsm, inode->i_ino, inode->i_generation, inode);
++                        /* ll_inode_size_lock() requires it is only called
++                         * with lli_smd != NULL or lock_lsm == 0 or we can
++                         * race between lock/unlock.  bug 9547 */
++                        lli->lli_smd = lsm;
++                        lli->lli_maxbytes = lsm->lsm_maxbytes;
++                        if (lli->lli_maxbytes > PAGE_CACHE_MAXBYTES)
++                                lli->lli_maxbytes = PAGE_CACHE_MAXBYTES;
++                } else {
++                        if (lli->lli_smd->lsm_magic == lsm->lsm_magic &&
++                             lli->lli_smd->lsm_stripe_count ==
++                                        lsm->lsm_stripe_count) {
++                                if (lov_stripe_md_cmp(lli->lli_smd, lsm)) {
++                                        CERROR("lsm mismatch for inode %ld\n",
++                                                inode->i_ino);
++                                        CERROR("lli_smd:\n");
++                                        dump_lsm(D_ERROR, lli->lli_smd);
++                                        CERROR("lsm:\n");
++                                        dump_lsm(D_ERROR, lsm);
++                                        LBUG();
++                                }
++                        } else
++                                ll_replace_lsm(inode, lsm);
++                }
++                if (lli->lli_smd != lsm)
++                        obd_free_memmd(ll_i2obdexp(inode), &lsm);
++        }
++
++#ifdef CONFIG_FS_POSIX_ACL
++        LASSERT(!md->posix_acl || (body->valid & OBD_MD_FLACL));
++        if (body->valid & OBD_MD_FLACL) {
++                spin_lock(&lli->lli_lock);
++                if (lli->lli_posix_acl)
++                        posix_acl_release(lli->lli_posix_acl);
++                lli->lli_posix_acl = md->posix_acl;
++                spin_unlock(&lli->lli_lock);
++        }
++#endif
++
++        if (body->valid & OBD_MD_FLID)
++                inode->i_ino = body->ino;
++        if (body->valid & OBD_MD_FLATIME &&
++            body->atime > LTIME_S(inode->i_atime))
++                LTIME_S(inode->i_atime) = body->atime;
++
++        /* mtime is always updated with ctime, but can be set in past.
++           As write and utime(2) may happen within 1 second, and utime's
++           mtime has a priority over write's one, so take mtime from mds
++           for the same ctimes. */
++        if (body->valid & OBD_MD_FLCTIME &&
++            body->ctime >= LTIME_S(inode->i_ctime)) {
++                LTIME_S(inode->i_ctime) = body->ctime;
++                if (body->valid & OBD_MD_FLMTIME) {
++                        CDEBUG(D_INODE, "setting ino %lu mtime "
++                               "from %lu to "LPU64"\n", inode->i_ino,
++                               LTIME_S(inode->i_mtime), body->mtime);
++                        LTIME_S(inode->i_mtime) = body->mtime;
++                }
++        }
++        if (body->valid & OBD_MD_FLMODE)
++                inode->i_mode = (inode->i_mode & S_IFMT)|(body->mode & ~S_IFMT);
++        if (body->valid & OBD_MD_FLTYPE)
++                inode->i_mode = (inode->i_mode & ~S_IFMT)|(body->mode & S_IFMT);
++        if (S_ISREG(inode->i_mode)) {
++                inode->i_blkbits = min(PTLRPC_MAX_BRW_BITS+1, LL_MAX_BLKSIZE_BITS);
++        } else {
++                inode->i_blkbits = inode->i_sb->s_blocksize_bits;
++        }
++#ifdef HAVE_INODE_BLKSIZE
++        inode->i_blksize = 1<<inode->i_blkbits;
++#endif
++        if (body->valid & OBD_MD_FLUID)
++                inode->i_uid = body->uid;
++        if (body->valid & OBD_MD_FLGID)
++                inode->i_gid = body->gid;
++        if (body->valid & OBD_MD_FLFLAGS)
++                inode->i_flags = ll_ext_to_inode_flags(body->flags);
++
++        if (body->valid & OBD_MD_FLNLINK)
++                inode->i_nlink = body->nlink;
++        if (body->valid & OBD_MD_FLGENER)
++                inode->i_generation = body->generation;
++        if (body->valid & OBD_MD_FLRDEV)
++                inode->i_rdev = old_decode_dev(body->rdev);
++        if (body->valid & OBD_MD_FLSIZE) {
++#if 0           /* Can't block ll_test_inode->ll_update_inode, b=14326*/
++                ll_inode_size_lock(inode, 0);
++                i_size_write(inode, body->size);
++                ll_inode_size_unlock(inode, 0);
++#else
++                inode->i_size = body->size;
++#endif
++        }
++        if (body->valid & OBD_MD_FLBLOCKS)
++                inode->i_blocks = body->blocks;
++
++        if (body->valid & OBD_MD_FLSIZE)
++                set_bit(LLI_F_HAVE_MDS_SIZE_LOCK, &lli->lli_flags);
++}
++
++static struct backing_dev_info ll_backing_dev_info = {
++        .ra_pages       = 0,    /* No readahead */
++#if (LINUX_VERSION_CODE >= KERNEL_VERSION(2,6,12))
++        .capabilities   = 0,    /* Does contribute to dirty memory */
++#else
++        .memory_backed  = 0,    /* Does contribute to dirty memory */
++#endif
++};
++
++void ll_read_inode2(struct inode *inode, void *opaque)
++{
++        struct lustre_md *md = opaque;
++        struct ll_inode_info *lli = ll_i2info(inode);
++        ENTRY;
++
++        CDEBUG(D_VFSTRACE, "VFS Op:inode=%lu/%u(%p)\n", inode->i_ino,
++               inode->i_generation, inode);
++
++        ll_lli_init(lli);
++
++        LASSERT(!lli->lli_smd);
++
++        /* Core attributes from the MDS first.  This is a new inode, and
++         * the VFS doesn't zero times in the core inode so we have to do
++         * it ourselves.  They will be overwritten by either MDS or OST
++         * attributes - we just need to make sure they aren't newer. */
++        LTIME_S(inode->i_mtime) = 0;
++        LTIME_S(inode->i_atime) = 0;
++        LTIME_S(inode->i_ctime) = 0;
++        inode->i_rdev = 0;
++        ll_update_inode(inode, md);
++
++        /* OIDEBUG(inode); */
++
++        if (S_ISREG(inode->i_mode)) {
++                struct ll_sb_info *sbi = ll_i2sbi(inode);
++                inode->i_op = &ll_file_inode_operations;
++                inode->i_fop = sbi->ll_fop;
++                inode->i_mapping->a_ops = &ll_aops;
++                EXIT;
++        } else if (S_ISDIR(inode->i_mode)) {
++                inode->i_op = &ll_dir_inode_operations;
++                inode->i_fop = &ll_dir_operations;
++                inode->i_mapping->a_ops = &ll_dir_aops;
++                EXIT;
++        } else if (S_ISLNK(inode->i_mode)) {
++                inode->i_op = &ll_fast_symlink_inode_operations;
++                EXIT;
++        } else {
++                inode->i_op = &ll_special_inode_operations;
++                init_special_inode(inode, inode->i_mode,
++                                   kdev_t_to_nr(inode->i_rdev));
++                /* initializing backing dev info. */
++                inode->i_mapping->backing_dev_info = &ll_backing_dev_info;
++                EXIT;
++        }
++}
++
++int ll_iocontrol(struct inode *inode, struct file *file,
++                 unsigned int cmd, unsigned long arg)
++{
++        struct ll_sb_info *sbi = ll_i2sbi(inode);
++        struct ptlrpc_request *req = NULL;
++        int rc, flags = 0;
++        ENTRY;
++
++        switch(cmd) {
++        case EXT3_IOC_GETFLAGS: {
++                struct ll_fid fid;
++                struct mds_body *body;
++
++                ll_inode2fid(&fid, inode);
++                rc = mdc_getattr(sbi->ll_mdc_exp, &fid, OBD_MD_FLFLAGS,0,&req);
++                if (rc) {
++                        CERROR("failure %d inode %lu\n", rc, inode->i_ino);
++                        RETURN(-abs(rc));
++                }
++
++                body = lustre_msg_buf(req->rq_repmsg, REPLY_REC_OFF,
++                                      sizeof(*body));
++
++                /* We want to return EXT3_*_FL flags to the caller via this
++                 * ioctl.  An older MDS may be sending S_* flags, fix it up. */
++                flags = ll_inode_to_ext_flags(body->flags,
++                                              body->flags &MDS_BFLAG_EXT_FLAGS);
++                ptlrpc_req_finished (req);
++
++                RETURN(put_user(flags, (int *)arg));
++        }
++        case EXT3_IOC_SETFLAGS: {
++                struct mdc_op_data op_data;
++                struct ll_iattr_struct attr;
++                struct obd_info oinfo = { { { 0 } } };
++                struct lov_stripe_md *lsm = ll_i2info(inode)->lli_smd;
++
++                if (get_user(flags, (int *)arg))
++                        RETURN(-EFAULT);
++
++                oinfo.oi_md = lsm;
++                OBDO_ALLOC(oinfo.oi_oa);
++                if (!oinfo.oi_oa)
++                        RETURN(-ENOMEM);
++
++                ll_prepare_mdc_op_data(&op_data, inode, NULL, NULL, 0, 0, NULL);
++
++                memset(&attr, 0, sizeof(attr));
++                attr.ia_attr_flags = flags;
++                ((struct iattr *)&attr)->ia_valid |= ATTR_ATTR_FLAG;
++
++                rc = mdc_setattr(sbi->ll_mdc_exp, &op_data,
++                                 (struct iattr *)&attr, NULL, 0, NULL, 0, &req);
++                ptlrpc_req_finished(req);
++                if (rc || lsm == NULL) {
++                        OBDO_FREE(oinfo.oi_oa);
++                        RETURN(rc);
++                }
++
++                oinfo.oi_oa->o_id = lsm->lsm_object_id;
++                oinfo.oi_oa->o_flags = flags;
++                oinfo.oi_oa->o_valid = OBD_MD_FLID | OBD_MD_FLFLAGS;
++
++                obdo_from_inode(oinfo.oi_oa, inode,
++                                OBD_MD_FLFID | OBD_MD_FLGENER);
++                rc = obd_setattr_rqset(sbi->ll_osc_exp, &oinfo, NULL);
++                OBDO_FREE(oinfo.oi_oa);
++                if (rc) {
++                        if (rc != -EPERM && rc != -EACCES)
++                                CERROR("mdc_setattr_async fails: rc = %d\n", rc);
++                        RETURN(rc);
++                }
++
++                inode->i_flags = ll_ext_to_inode_flags(flags |
++                                                       MDS_BFLAG_EXT_FLAGS);
++                RETURN(0);
++        }
++        default:
++                RETURN(-ENOSYS);
++        }
++
++        RETURN(0);
++}
++
++/* umount -f client means force down, don't save state */
++#ifdef HAVE_UMOUNTBEGIN_VFSMOUNT
++void ll_umount_begin(struct vfsmount *vfsmnt, int flags)
++{
++        struct super_block *sb = vfsmnt->mnt_sb;
++#else
++void ll_umount_begin(struct super_block *sb)
++{
++#endif
++        struct lustre_sb_info *lsi = s2lsi(sb);
++        struct ll_sb_info *sbi = ll_s2sbi(sb);
++        struct obd_device *obd;
++        struct obd_ioctl_data ioc_data = { 0 };
++        ENTRY;
++
++#ifdef HAVE_UMOUNTBEGIN_VFSMOUNT
++        if (!(flags & MNT_FORCE)) {
++                EXIT;
++                return;
++        }
++#endif
++
++        /* Tell the MGC we got umount -f */
++        lsi->lsi_flags |= LSI_UMOUNT_FORCE;
++
++        CDEBUG(D_VFSTRACE, "VFS Op: superblock %p count %d active %d\n", sb,
++               sb->s_count, atomic_read(&sb->s_active));
++
++        obd = class_exp2obd(sbi->ll_mdc_exp);
++        if (obd == NULL) {
++                CERROR("Invalid MDC connection handle "LPX64"\n",
++                       sbi->ll_mdc_exp->exp_handle.h_cookie);
++                EXIT;
++                return;
++        }
++        obd->obd_force = 1;
++        obd_iocontrol(IOC_OSC_SET_ACTIVE, sbi->ll_mdc_exp, sizeof ioc_data,
++                      &ioc_data, NULL);
++
++        obd = class_exp2obd(sbi->ll_osc_exp);
++        if (obd == NULL) {
++                CERROR("Invalid LOV connection handle "LPX64"\n",
++                       sbi->ll_osc_exp->exp_handle.h_cookie);
++                EXIT;
++                return;
++        }
++
++        obd->obd_force = 1;
++        obd_iocontrol(IOC_OSC_SET_ACTIVE, sbi->ll_osc_exp, sizeof ioc_data,
++                      &ioc_data, NULL);
++
++        /* Really, we'd like to wait until there are no requests outstanding,
++         * and then continue.  For now, we just invalidate the requests,
++         * schedule() and sleep one second if needed, and hope.
++         */
++        schedule();
++#ifdef HAVE_UMOUNTBEGIN_VFSMOUNT
++        if (atomic_read(&vfsmnt->mnt_count) > 2) {
++                cfs_schedule_timeout(CFS_TASK_INTERRUPTIBLE,
++                                     cfs_time_seconds(1));
++                if (atomic_read(&vfsmnt->mnt_count) > 2)
++                        LCONSOLE_WARN("Mount still busy with %d refs! You "
++                                      "may try to umount it a bit later\n",
++                                      atomic_read(&vfsmnt->mnt_count));
++        }
++#endif
++
++        EXIT;
++}
++
++int ll_remount_fs(struct super_block *sb, int *flags, char *data)
++{
++        struct ll_sb_info *sbi = ll_s2sbi(sb);
++        int err;
++        __u32 read_only;
++
++        if ((*flags & MS_RDONLY) != (sb->s_flags & MS_RDONLY)) {
++                read_only = *flags & MS_RDONLY;
++                err = obd_set_info_async(sbi->ll_mdc_exp, sizeof(KEY_READONLY),
++                                         KEY_READONLY, sizeof(read_only),
++                                         &read_only, NULL);
++
++                /* MDS might have expected a different ro key value, b=17493 */
++                if (err == -EINVAL) {
++                        CDEBUG(D_CONFIG, "Retrying remount with 1.6.6 ro key\n");
++                        err = obd_set_info_async(sbi->ll_mdc_exp,
++                                                 sizeof(KEY_READONLY_166COMPAT),
++                                                 KEY_READONLY_166COMPAT,
++                                                 sizeof(read_only),
++                                                 &read_only, NULL);
++                }
++
++                if (err) {
++                        CERROR("Failed to change the read-only flag during "
++                               "remount: %d\n", err);
++                        return err;
++                }
++
++                if (read_only)
++                        sb->s_flags |= MS_RDONLY;
++                else
++                        sb->s_flags &= ~MS_RDONLY;
++        }
++        return 0;
++}
++
++int ll_prep_inode(struct obd_export *exp, struct inode **inode,
++                  struct ptlrpc_request *req, int offset,struct super_block *sb)
++{
++        struct lustre_md md;
++        struct ll_sb_info *sbi = NULL;
++        int rc = 0;
++        ENTRY;
++
++        LASSERT(*inode || sb);
++        sbi = sb ? ll_s2sbi(sb) : ll_i2sbi(*inode);
++        prune_deathrow(sbi, 1);
++
++        rc = mdc_req2lustre_md(req, offset, exp, &md);
++        if (rc)
++                RETURN(rc);
++
++        if (*inode) {
++                ll_update_inode(*inode, &md);
++        } else {
++                LASSERT(sb);
++                *inode = ll_iget(sb, md.body->ino, &md);
++                if (*inode == NULL || is_bad_inode(*inode)) {
++                        mdc_free_lustre_md(exp, &md);
++                        rc = -ENOMEM;
++                        CERROR("new_inode -fatal: rc %d\n", rc);
++                        GOTO(out, rc);
++                }
++        }
++
++        rc = obd_checkmd(exp, ll_i2mdcexp(*inode),
++                         ll_i2info(*inode)->lli_smd);
++out:
++        RETURN(rc);
++}
++
++char *llap_origins[] = {
++        [LLAP_ORIGIN_UNKNOWN] = "--",
++        [LLAP_ORIGIN_READPAGE] = "rp",
++        [LLAP_ORIGIN_READAHEAD] = "ra",
++        [LLAP_ORIGIN_COMMIT_WRITE] = "cw",
++        [LLAP_ORIGIN_WRITEPAGE] = "wp",
++        [LLAP_ORIGIN_LOCKLESS_IO] = "ls"
++};
++
++struct ll_async_page *llite_pglist_next_llap(struct ll_sb_info *sbi,
++                                             struct list_head *list)
++{
++        struct ll_async_page *llap;
++        struct list_head *pos;
++
++        list_for_each(pos, list) {
++                if (pos == &sbi->ll_pglist)
++                        return NULL;
++                llap = list_entry(pos, struct ll_async_page, llap_pglist_item);
++                if (llap->llap_page == NULL)
++                        continue;
++                return llap;
++        }
++        LBUG();
++        return NULL;
++}
++
++int ll_obd_statfs(struct inode *inode, void *arg)
++{
++        struct ll_sb_info *sbi = NULL;
++        struct obd_device *client_obd = NULL, *lov_obd = NULL;
++        struct lov_obd *lov = NULL;
++        struct obd_statfs stat_buf = {0};
++        char *buf = NULL;
++        struct obd_ioctl_data *data = NULL;
++        __u32 type, index;
++        int len = 0, rc;
++
++        if (!inode || !(sbi = ll_i2sbi(inode)))
++                GOTO(out_statfs, rc = -EINVAL);
++
++        rc = obd_ioctl_getdata(&buf, &len, arg);
++        if (rc)
++                GOTO(out_statfs, rc);
++
++        data = (void*)buf;
++        if (!data->ioc_inlbuf1 || !data->ioc_inlbuf2 ||
++            !data->ioc_pbuf1 || !data->ioc_pbuf2)
++                GOTO(out_statfs, rc = -EINVAL);
++
++        memcpy(&type, data->ioc_inlbuf1, sizeof(__u32));
++        memcpy(&index, data->ioc_inlbuf2, sizeof(__u32));
++
++        if (type == LL_STATFS_MDC) {
++                if (index > 0)
++                        GOTO(out_statfs, rc = -ENODEV);
++                client_obd = class_exp2obd(sbi->ll_mdc_exp);
++        } else if (type == LL_STATFS_LOV) {
++                lov_obd = class_exp2obd(sbi->ll_osc_exp);
++                lov = &lov_obd->u.lov;
++
++                if (index >= lov->desc.ld_tgt_count)
++                        GOTO(out_statfs, rc = -ENODEV);
++
++                if (!lov->lov_tgts[index])
++                        /* Try again with the next index */
++                        GOTO(out_statfs, rc = -EAGAIN);
++
++                client_obd = class_exp2obd(lov->lov_tgts[index]->ltd_exp);
++                if (!lov->lov_tgts[index]->ltd_active)
++                        GOTO(out_uuid, rc = -ENODATA);
++        }
++
++        if (!client_obd)
++                GOTO(out_statfs, rc = -EINVAL);
++
++        rc = obd_statfs(client_obd, &stat_buf, cfs_time_current_64() - HZ, 1);
++        if (rc)
++                GOTO(out_statfs, rc);
++
++        if (copy_to_user(data->ioc_pbuf1, &stat_buf, data->ioc_plen1))
++                GOTO(out_statfs, rc = -EFAULT);
++
++out_uuid:
++        if (copy_to_user(data->ioc_pbuf2, obd2cli_tgt(client_obd),
++                         data->ioc_plen2))
++                rc = -EFAULT;
++
++out_statfs:
++        if (buf)
++                obd_ioctl_freedata(buf, len);
++        return rc;
++}
++
++int ll_process_config(struct lustre_cfg *lcfg)
++{
++        char *ptr;
++        void *sb;
++        struct lprocfs_static_vars lvars;
++        unsigned long x;
++        int rc = 0;
++
++        lprocfs_llite_init_vars(&lvars);
++
++        /* The instance name contains the sb: lustre-client-aacfe000 */
++        ptr = strrchr(lustre_cfg_string(lcfg, 0), '-');
++        if (!ptr || !*(++ptr))
++                return -EINVAL;
++        if (sscanf(ptr, "%lx", &x) != 1)
++                return -EINVAL;
++        sb = (void *)x;
++        /* This better be a real Lustre superblock! */
++        LASSERT(s2lsi((struct super_block *)sb)->lsi_lmd->lmd_magic == LMD_MAGIC);
++
++        /* Note we have not called client_common_fill_super yet, so
++           proc fns must be able to handle that! */
++        rc = class_process_proc_param(PARAM_LLITE, lvars.obd_vars,
++                                      lcfg, sb);
++        return(rc);
++}
++
++int ll_show_options(struct seq_file *seq, struct vfsmount *vfs)
++{
++        struct ll_sb_info *sbi;
++
++        LASSERT((seq != NULL) && (vfs != NULL));
++        sbi = ll_s2sbi(vfs->mnt_sb);
++
++        if (sbi->ll_flags & LL_SBI_NOLCK)
++                seq_puts(seq, ",nolock");
++
++        if (sbi->ll_flags & LL_SBI_FLOCK)
++                seq_puts(seq, ",flock");
++
++        if (sbi->ll_flags & LL_SBI_LOCALFLOCK)
++                seq_puts(seq, ",localflock");
++
++        if (sbi->ll_flags & LL_SBI_USER_XATTR)
++                seq_puts(seq, ",user_xattr");
++
++        if (sbi->ll_flags & LL_SBI_ACL)
++                seq_puts(seq, ",acl");
++
++        RETURN(0);
++}
+diff -urNad lustre~/lustre/llite/llite_mmap.c lustre/lustre/llite/llite_mmap.c
+--- lustre~/lustre/llite/llite_mmap.c	2009-03-12 10:32:27.000000000 +0100
++++ lustre/lustre/llite/llite_mmap.c	2009-03-12 11:02:51.000000000 +0100
+@@ -81,8 +81,7 @@
+ int lt_get_mmap_locks(struct ll_lock_tree *tree,
+                       unsigned long addr, size_t count);
+ 
+-struct page *ll_nopage(struct vm_area_struct *vma, unsigned long address,
+-                       int *type);
++static struct vm_operations_struct ll_file_vm_ops;
+ 
+ struct ll_lock_tree_node * ll_node_from_inode(struct inode *inode, __u64 start,
+                                               __u64 end, ldlm_mode_t mode)
+@@ -285,9 +284,19 @@
+         return LCK_PR;
+ }
+ 
++static void policy_from_vma_pgoff(ldlm_policy_data_t *policy,
++                                  struct vm_area_struct *vma,
++                                  __u64 pgoff, size_t count)
++{
++        policy->l_extent.start = pgoff << CFS_PAGE_SHIFT;
++        policy->l_extent.end = (policy->l_extent.start + count - 1) |
++                               ~CFS_PAGE_MASK;
++}
++
+ static void policy_from_vma(ldlm_policy_data_t *policy,
+                             struct vm_area_struct *vma, unsigned long addr,
+                             size_t count)
++
+ {
+         policy->l_extent.start = ((addr - vma->vm_start) & CFS_PAGE_MASK) +
+                                  ((__u64)vma->vm_pgoff << CFS_PAGE_SHIFT);
+@@ -308,7 +317,7 @@
+         spin_lock(&mm->page_table_lock);
+         for(vma = find_vma(mm, addr);
+             vma != NULL && vma->vm_start < (addr + count); vma = vma->vm_next) {
+-                if (vma->vm_ops && vma->vm_ops->nopage == ll_nopage &&
++                if (vma->vm_ops && vma->vm_ops == &ll_file_vm_ops &&
+                     vma->vm_flags & VM_SHARED) {
+                         ret = vma;
+                         break;
+@@ -360,44 +369,30 @@
+         }
+         RETURN(0);
+ }
+-/**
+- * Page fault handler.
+- *
+- * \param vma - is virtiual area struct related to page fault
+- * \param address - address when hit fault
+- * \param type - of fault
+- *
+- * \return allocated and filled page for address
+- * \retval NOPAGE_SIGBUS if page not exist on this address
+- * \retval NOPAGE_OOM not have memory for allocate new page
+- */
+-struct page *ll_nopage(struct vm_area_struct *vma, unsigned long address,
+-                       int *type)
++
++static int ll_get_extent_lock(struct vm_area_struct *vma, unsigned long pgoff,
++                              int *save_flags, struct lustre_handle *lockh)
+ {
+         struct file *filp = vma->vm_file;
+         struct ll_file_data *fd = LUSTRE_FPRIVATE(filp);
+         struct inode *inode = filp->f_dentry->d_inode;
+-        struct lustre_handle lockh = { 0 };
+         ldlm_policy_data_t policy;
+         ldlm_mode_t mode;
+-        struct page *page = NULL;
+         struct ll_inode_info *lli = ll_i2info(inode);
+-        struct lov_stripe_md *lsm;
+         struct ost_lvb lvb;
+         __u64 kms, old_mtime;
+-        unsigned long pgoff, size, rand_read, seq_read;
+-        int rc = 0;
++        unsigned long size;
+         ENTRY;
+ 
+         if (lli->lli_smd == NULL) {
+                 CERROR("No lsm on fault?\n");
+-                RETURN(NOPAGE_SIGBUS);
++                RETURN(0);
+         }
+ 
+         ll_clear_file_contended(inode);
+ 
+         /* start and end the lock on the first and last bytes in the page */
+-        policy_from_vma(&policy, vma, address, CFS_PAGE_SIZE);
++        policy_from_vma_pgoff(&policy, vma, pgoff, CFS_PAGE_SIZE);
+ 
+         CDEBUG(D_MMAP, "nopage vma %p inode %lu, locking ["LPU64", "LPU64"]\n",
+                vma, inode->i_ino, policy.l_extent.start, policy.l_extent.end);
+@@ -405,26 +400,28 @@
+         mode = mode_from_vma(vma);
+         old_mtime = LTIME_S(inode->i_mtime);
+ 
+-        lsm = lli->lli_smd;
+-        rc = ll_extent_lock(fd, inode, lsm, mode, &policy,
+-                            &lockh, LDLM_FL_CBPENDING | LDLM_FL_NO_LRU);
+-        if (rc != 0)
+-                RETURN(NOPAGE_SIGBUS);
++        if(ll_extent_lock(fd, inode, lli->lli_smd, mode, &policy,
++                          lockh, LDLM_FL_CBPENDING | LDLM_FL_NO_LRU) != 0)
++                RETURN(0);
+ 
+         if (vma->vm_flags & VM_EXEC && LTIME_S(inode->i_mtime) != old_mtime)
+                 CWARN("binary changed. inode %lu\n", inode->i_ino);
+ 
+-        lov_stripe_lock(lsm);
++        lov_stripe_lock(lli->lli_smd);
+         inode_init_lvb(inode, &lvb);
+-        obd_merge_lvb(ll_i2obdexp(inode), lsm, &lvb, 1);
++        if(obd_merge_lvb(ll_i2obdexp(inode), lli->lli_smd, &lvb, 1)) {
++                lov_stripe_unlock(lli->lli_smd);
++                RETURN(0);
++        }
+         kms = lvb.lvb_size;
+ 
+-        pgoff = ((address - vma->vm_start) >> CFS_PAGE_SHIFT) + vma->vm_pgoff;
+         size = (kms + CFS_PAGE_SIZE - 1) >> CFS_PAGE_SHIFT;
++        CDEBUG(D_INFO, "Kms %lu - %lu\n", size, pgoff);
+ 
+         if (pgoff >= size) {
+-                lov_stripe_unlock(lsm);
++                lov_stripe_unlock(lli->lli_smd);
+                 ll_glimpse_size(inode, LDLM_FL_BLOCK_GRANTED);
++                lov_stripe_lock(lli->lli_smd);
+         } else {
+                 /* XXX change inode size without ll_inode_size_lock() held!
+                  *     there is a race condition with truncate path. (see
+@@ -446,29 +443,69 @@
+                         CDEBUG(D_INODE, "ino=%lu, updating i_size %llu\n",
+                                inode->i_ino, i_size_read(inode));
+                 }
+-                lov_stripe_unlock(lsm);
+         }
+ 
+         /* If mapping is writeable, adjust kms to cover this page,
+          * but do not extend kms beyond actual file size.
+          * policy.l_extent.end is set to the end of the page by policy_from_vma
+          * bug 10919 */
+-        lov_stripe_lock(lsm);
+         if (mode == LCK_PW)
+-                obd_adjust_kms(ll_i2obdexp(inode), lsm,
++                obd_adjust_kms(ll_i2obdexp(inode), lli->lli_smd,
+                                min_t(loff_t, policy.l_extent.end + 1,
+                                i_size_read(inode)), 0);
+-        lov_stripe_unlock(lsm);
++        lov_stripe_unlock(lli->lli_smd);
+ 
+         /* disable VM_SEQ_READ and use VM_RAND_READ to make sure that
+          * the kernel will not read other pages not covered by ldlm in
+          * filemap_nopage. we do our readahead in ll_readpage.
+          */
+-        rand_read = vma->vm_flags & VM_RAND_READ;
+-        seq_read = vma->vm_flags & VM_SEQ_READ;
++        *save_flags = vma->vm_flags & (VM_RAND_READ | VM_SEQ_READ);
+         vma->vm_flags &= ~ VM_SEQ_READ;
+         vma->vm_flags |= VM_RAND_READ;
+ 
++        return 1;
++}
++
++static void ll_put_extent_lock(struct vm_area_struct *vma, int save_flags,
++                             struct lustre_handle *lockh)
++{
++        struct file *filp = vma->vm_file;
++        struct ll_file_data *fd = LUSTRE_FPRIVATE(filp);
++        struct inode *inode = filp->f_dentry->d_inode;
++        ldlm_mode_t mode;
++
++        mode = mode_from_vma(vma);
++        vma->vm_flags &= ~(VM_RAND_READ | VM_SEQ_READ);
++        vma->vm_flags |= save_flags;
++
++        ll_extent_unlock(fd, inode, ll_i2info(inode)->lli_smd, mode, lockh);
++}
++
++#ifndef HAVE_VM_OP_FAULT
++/**
++ * Page fault handler.
++ *
++ * \param vma - is virtiual area struct related to page fault
++ * \param address - address when hit fault
++ * \param type - of fault
++ *
++ * \return allocated and filled page for address
++ * \retval NOPAGE_SIGBUS if page not exist on this address
++ * \retval NOPAGE_OOM not have memory for allocate new page
++ */
++struct page *ll_nopage(struct vm_area_struct *vma, unsigned long address,
++                       int *type)
++{
++        struct lustre_handle lockh = { 0 };
++        int save_fags = 0;
++        unsigned long pgoff;
++        struct page *page;
++        ENTRY;
++
++        pgoff = ((address - vma->vm_start) >> CFS_PAGE_SHIFT) + vma->vm_pgoff;
++        if(!ll_get_extent_lock(vma, pgoff, &save_fags, &lockh))
++                RETURN(NOPAGE_SIGBUS);
++
+         page = filemap_nopage(vma, address, type);
+         if (page != NOPAGE_SIGBUS && page != NOPAGE_OOM)
+                 LL_CDEBUG_PAGE(D_PAGE, page, "got addr %lu type %lx\n", address,
+@@ -477,13 +514,48 @@
+                 CDEBUG(D_PAGE, "got addr %lu type %lx - SIGBUS\n",  address,
+                                (long)type);
+ 
+-        vma->vm_flags &= ~VM_RAND_READ;
+-        vma->vm_flags |= (rand_read | seq_read);
++        ll_put_extent_lock(vma, save_fags, &lockh);
+ 
+-        ll_extent_unlock(fd, inode, ll_i2info(inode)->lli_smd, mode, &lockh);
+         RETURN(page);
+ }
+ 
++#else
++/* New fault() API*/
++/**
++ * Page fault handler.
++ *
++ * \param vma - is virtiual area struct related to page fault
++ * \param address - address when hit fault
++ * \param type - of fault
++ *
++ * \return allocated and filled page for address
++ * \retval NOPAGE_SIGBUS if page not exist on this address
++ * \retval NOPAGE_OOM not have memory for allocate new page
++ */
++int ll_fault(struct vm_area_struct *vma, struct vm_fault *vmf)
++{
++        struct lustre_handle lockh = { 0 };
++        int save_fags = 0;
++        int rc;
++        ENTRY;
++
++        if(!ll_get_extent_lock(vma, vmf->pgoff, &save_fags, &lockh))
++               RETURN(VM_FAULT_SIGBUS);
++
++        rc = filemap_fault(vma, vmf);
++        if (vmf->page)
++                LL_CDEBUG_PAGE(D_PAGE, vmf->page, "got addr %p type NOPAGE\n",
++                               vmf->virtual_address);
++        else
++                CDEBUG(D_PAGE, "got addr %p - SIGBUS\n",
++                       vmf->virtual_address);
++
++        ll_put_extent_lock(vma, save_fags, &lockh);
++
++        RETURN(rc);
++}
++#endif
++
+ /* To avoid cancel the locks covering mmapped region for lock cache pressure,
+  * we track the mapped vma count by lli_mmap_cnt.
+  * ll_vm_open():  when first vma is linked, split locks from lru.
+@@ -548,6 +620,7 @@
+         }
+ }
+ 
++#ifndef HAVE_VM_OP_FAULT
+ #ifndef HAVE_FILEMAP_POPULATE
+ static int (*filemap_populate)(struct vm_area_struct * area, unsigned long address, unsigned long len, pgprot_t prot, unsigned long pgoff, int nonblock);
+ #endif
+@@ -562,6 +635,7 @@
+         rc = filemap_populate(area, address, len, prot, pgoff, 1);
+         RETURN(rc);
+ }
++#endif
+ 
+ /* return the user space pointer that maps to a file offset via a vma */
+ static inline unsigned long file_to_user(struct vm_area_struct *vma, __u64 byte)
+@@ -588,10 +662,14 @@
+ }
+ 
+ static struct vm_operations_struct ll_file_vm_ops = {
+-        .nopage         = ll_nopage,
+         .open           = ll_vm_open,
+         .close          = ll_vm_close,
++#ifdef HAVE_VM_OP_FAULT
++        .fault          = ll_fault,
++#else
++        .nopage         = ll_nopage,
+         .populate       = ll_populate,
++#endif
+ };
+ 
+ int ll_file_mmap(struct file * file, struct vm_area_struct * vma)
+@@ -602,7 +680,7 @@
+         ll_stats_ops_tally(ll_i2sbi(file->f_dentry->d_inode), LPROC_LL_MAP, 1);
+         rc = generic_file_mmap(file, vma);
+         if (rc == 0) {
+-#ifndef HAVE_FILEMAP_POPULATE
++#if !defined(HAVE_FILEMAP_POPULATE) && !defined(HAVE_VM_OP_FAULT)
+                 if (!filemap_populate)
+                         filemap_populate = vma->vm_ops->populate;
+ #endif
+diff -urNad lustre~/lustre/llite/llite_nfs.c lustre/lustre/llite/llite_nfs.c
+--- lustre~/lustre/llite/llite_nfs.c	2009-03-12 10:32:27.000000000 +0100
++++ lustre/lustre/llite/llite_nfs.c	2009-03-12 11:02:51.000000000 +0100
+@@ -68,36 +68,30 @@
+ }
+ 
+ static struct inode * search_inode_for_lustre(struct super_block *sb,
+-                                              unsigned long ino,
+-                                              unsigned long generation,
+-                                              int mode)
++                                              struct ll_fid *iid)
+ {
+         struct ptlrpc_request *req = NULL;
+         struct ll_sb_info *sbi = ll_s2sbi(sb);
+-        struct ll_fid fid;
+         unsigned long valid = 0;
+         int eadatalen = 0, rc;
+         struct inode *inode = NULL;
+-        struct ll_fid iid = { .id = ino, .generation = generation };
+         ENTRY;
+ 
+-        inode = ILOOKUP(sb, ino, ll_nfs_test_inode, &iid);
++        inode = ILOOKUP(sb, iid->id, ll_nfs_test_inode, iid);
+ 
+         if (inode)
+                 RETURN(inode);
+-        if (S_ISREG(mode)) {
+-                rc = ll_get_max_mdsize(sbi, &eadatalen);
+-                if (rc) 
+-                        RETURN(ERR_PTR(rc));
+-                valid |= OBD_MD_FLEASIZE;
+-        }
+-        fid.id = (__u64)ino;
+-        fid.generation = generation;
+-        fid.f_type = mode;
+ 
+-        rc = mdc_getattr(sbi->ll_mdc_exp, &fid, valid, eadatalen, &req);
++        rc = ll_get_max_mdsize(sbi, &eadatalen);
++        if (rc)
++                RETURN(ERR_PTR(rc));
++
++        valid |= OBD_MD_FLEASIZE;
++
++        /* mds_fid2dentry is ignore f_type */
++        rc = mdc_getattr(sbi->ll_mdc_exp, iid, valid, eadatalen, &req);
+         if (rc) {
+-                CERROR("failure %d inode %lu\n", rc, ino);
++                CERROR("failure %d inode "LPU64"\n", rc, iid->id);
+                 RETURN(ERR_PTR(rc));
+         }
+ 
+@@ -111,27 +105,27 @@
+         RETURN(inode);
+ }
+ 
+-static struct dentry *ll_iget_for_nfs(struct super_block *sb, unsigned long ino,
+-                                      __u32 generation, umode_t mode)
++static struct dentry *ll_iget_for_nfs(struct super_block *sb,
++                                      struct ll_fid *iid)
+ {
+         struct inode *inode;
+         struct dentry *result;
+         ENTRY;
+ 
+-        if (ino == 0)
++        if (iid->id == 0)
+                 RETURN(ERR_PTR(-ESTALE));
+ 
+-        inode = search_inode_for_lustre(sb, ino, generation, mode);
+-        if (IS_ERR(inode)) {
++        inode = search_inode_for_lustre(sb, iid);
++        if (IS_ERR(inode))
+                 RETURN(ERR_PTR(PTR_ERR(inode)));
+-        }
++
+         if (is_bad_inode(inode) ||
+-            (generation && inode->i_generation != generation)){
++            (iid->generation && inode->i_generation != iid->generation)) {
+                 /* we didn't find the right inode.. */
+                 CERROR("Inode %lu, Bad count: %lu %d or version  %u %u\n",
+                        inode->i_ino, (unsigned long)inode->i_nlink,
+                        atomic_read(&inode->i_count), inode->i_generation,
+-                       generation);
++                       iid->generation);
+                 iput(inode);
+                 RETURN(ERR_PTR(-ESTALE));
+         }
+@@ -146,57 +140,102 @@
+         RETURN(result);
+ }
+ 
+-struct dentry *ll_fh_to_dentry(struct super_block *sb, __u32 *data, int len,
+-                               int fhtype, int parent)
++#define LUSTRE_NFS_FID                0x94
++
++struct lustre_nfs_fid {
++        struct ll_fid   child;
++        struct ll_fid   parent;
++        umode_t         mode;
++};
++
++/* The return value is file handle type:
++ * 1 -- contains child file handle;
++ * 2 -- contains child file handle and parent file handle;
++ * 255 -- error.
++ */
++static int ll_encode_fh(struct dentry *de, __u32 *fh, int *plen,
++                        int connectable)
+ {
+-        switch (fhtype) {
+-                case 2:
+-                        if (len < 5)
+-                                break;
+-                        if (parent)
+-                                return ll_iget_for_nfs(sb, data[3], 0, data[4]);
+-                case 1:
+-                        if (len < 3)
+-                                break;
+-                        if (parent)
+-                                break;
+-                        return ll_iget_for_nfs(sb, data[0], data[1], data[2]);
+-                default: break;
+-        }
+-        return ERR_PTR(-EINVAL);
++        struct inode *inode = de->d_inode;
++        struct inode *parent = de->d_parent->d_inode;
++        struct lustre_nfs_fid *nfs_fid = (void *)fh;
++        ENTRY;
++
++        CDEBUG(D_INFO, "encoding for (%lu) maxlen=%d minlen=%lu\n",
++              inode->i_ino, *plen,
++              sizeof(struct lustre_nfs_fid));
++
++        if (*plen < sizeof(struct lustre_nfs_fid))
++                RETURN(255);
++
++        ll_inode2fid(&nfs_fid->child, inode);
++        ll_inode2fid(&nfs_fid->parent, parent);
++
++        nfs_fid->mode = (S_IFMT & inode->i_mode);
++        *plen = sizeof(struct lustre_nfs_fid);
++
++        RETURN(LUSTRE_NFS_FID);
+ }
+ 
+-int ll_dentry_to_fh(struct dentry *dentry, __u32 *datap, int *lenp,
+-                    int need_parent)
++#ifdef HAVE_FH_TO_DENTRY
++static struct dentry *ll_fh_to_dentry(struct super_block *sb, struct fid *fid,
++                                      int fh_len, int fh_type)
+ {
+-        if (*lenp < 3)
+-                return 255;
+-        *datap++ = dentry->d_inode->i_ino;
+-        *datap++ = dentry->d_inode->i_generation;
+-        *datap++ = (__u32)(S_IFMT & dentry->d_inode->i_mode);
++        struct lustre_nfs_fid *nfs_fid = (struct lustre_nfs_fid *)fid;
+ 
+-        if (*lenp == 3 || S_ISDIR(dentry->d_inode->i_mode)) {
+-                *lenp = 3;
+-                return 1;
+-        }
+-        if (dentry->d_parent) {
+-                *datap++ = dentry->d_parent->d_inode->i_ino;
+-                *datap++ = (__u32)(S_IFMT & dentry->d_parent->d_inode->i_mode);
++        if (fh_type != LUSTRE_NFS_FID)
++                RETURN(ERR_PTR(-EINVAL));
+ 
+-                *lenp = 5;
+-                return 2;
+-        }
+-        *lenp = 3;
+-        return 1;
++        RETURN(ll_iget_for_nfs(sb, &nfs_fid->child));
++}
++static struct dentry *ll_fh_to_parent(struct super_block *sb, struct fid *fid,
++                                      int fh_len, int fh_type)
++{
++        struct lustre_nfs_fid *nfs_fid = (struct lustre_nfs_fid *)fid;
++
++        if (fh_type != LUSTRE_NFS_FID)
++                RETURN(ERR_PTR(-EINVAL));
++        RETURN(ll_iget_for_nfs(sb, &nfs_fid->parent));
+ }
+ 
+-#if THREAD_SIZE >= 8192
++#else
++/*
++ * This length is counted as amount of __u32,
++ *  It is composed of a fid and a mode
++ */
++static struct dentry *ll_decode_fh(struct super_block *sb, __u32 *fh, int fh_len,
++                                     int fh_type,
++                                     int (*acceptable)(void *, struct dentry *),
++                                     void *context)
++{
++        struct lustre_nfs_fid *nfs_fid = (void *)fh;
++        struct dentry *entry;
++        ENTRY;
++
++        CDEBUG(D_INFO, "decoding for "LPU64" fh_len=%d fh_type=%x\n",
++                nfs_fid->child.id, fh_len, fh_type);
++
++        if (fh_type != LUSTRE_NFS_FID)
++                  RETURN(ERR_PTR(-ESTALE));
++
++        entry = sb->s_export_op->find_exported_dentry(sb, &nfs_fid->child,
++                                                      &nfs_fid->parent,
++                                                      acceptable, context);
++        RETURN(entry);
++}
++
++
+ struct dentry *ll_get_dentry(struct super_block *sb, void *data)
+ {
+-        __u32 *inump = (__u32*)data;
+-        return ll_iget_for_nfs(sb, inump[0], inump[1], S_IFREG);
++        struct lustre_nfs_fid *fid = data;
++        ENTRY;
++
++        RETURN(ll_iget_for_nfs(sb, &fid->child));
++
+ }
+ 
++#endif
++
+ struct dentry *ll_get_parent(struct dentry *dchild)
+ {
+         struct ptlrpc_request *req = NULL;
+@@ -208,11 +247,11 @@
+         char dotdot[] = "..";
+         int  rc = 0;
+         ENTRY;
+-        
++
+         LASSERT(dir && S_ISDIR(dir->i_mode));
+-        
+-        sbi = ll_s2sbi(dir->i_sb);       
+- 
++
++        sbi = ll_s2sbi(dir->i_sb);
++
+         fid.id = (__u64)dir->i_ino;
+         fid.generation = dir->i_generation;
+         fid.f_type = S_IFDIR;
+@@ -223,11 +262,12 @@
+                 CERROR("failure %d inode %lu get parent\n", rc, dir->i_ino);
+                 return ERR_PTR(rc);
+         }
+-        body = lustre_msg_buf(req->rq_repmsg, REPLY_REC_OFF, sizeof (*body)); 
+-       
++        body = lustre_msg_buf(req->rq_repmsg, REPLY_REC_OFF, sizeof (*body));
++
+         LASSERT((body->valid & OBD_MD_FLGENER) && (body->valid & OBD_MD_FLID));
+-        
+-        result = ll_iget_for_nfs(dir->i_sb, body->ino, body->generation, S_IFDIR);
++        fid.id = body->ino;
++        fid.generation = body->generation;
++        result = ll_iget_for_nfs(dir->i_sb, &fid);
+ 
+         if (IS_ERR(result))
+                 rc = PTR_ERR(result);
+@@ -236,10 +276,18 @@
+         if (rc)
+                 return ERR_PTR(rc);
+         RETURN(result);
+-} 
++}
+ 
++
++#if THREAD_SIZE >= 8192
+ struct export_operations lustre_export_operations = {
+-       .get_parent = ll_get_parent,
+-       .get_dentry = ll_get_dentry, 
++        .encode_fh  = ll_encode_fh,
++#ifdef HAVE_FH_TO_DENTRY
++        .fh_to_dentry = ll_fh_to_dentry,
++        .fh_to_parent = ll_fh_to_parent,
++#else
++        .get_dentry = ll_get_dentry,
++        .decode_fh  = ll_decode_fh,
++#endif
+ };
+ #endif
+diff -urNad lustre~/lustre/llite/lloop.c lustre/lustre/llite/lloop.c
+--- lustre~/lustre/llite/lloop.c	2009-03-12 10:32:27.000000000 +0100
++++ lustre/lustre/llite/lloop.c	2009-03-12 11:04:30.000000000 +0100
+@@ -152,7 +152,7 @@
+         struct semaphore   lo_bh_mutex;
+         atomic_t           lo_pending;
+ 
+-        request_queue_t    *lo_queue;
++        struct request_queue  *lo_queue;
+ 
+         /* data to handle bio for lustre. */
+         struct lo_request_data {
+@@ -283,7 +283,7 @@
+         return bio;
+ }
+ 
+-static int loop_make_request(request_queue_t *q, struct bio *old_bio)
++static int loop_make_request(struct request_queue *q, struct bio *old_bio)
+ {
+         struct lloop_device *lo = q->queuedata;
+         int rw = bio_rw(old_bio);
+@@ -312,7 +312,7 @@
+         if (atomic_dec_and_test(&lo->lo_pending))
+                 up(&lo->lo_bh_mutex);
+ out:
+-        bio_io_error(old_bio, old_bio->bi_size);
++        cfs_bio_io_error(old_bio, old_bio->bi_size);
+         return 0;
+ inactive:
+         spin_unlock_irq(&lo->lo_lock);
+@@ -322,7 +322,7 @@
+ /*
+  * kick off io on the underlying address space
+  */
+-static void loop_unplug(request_queue_t *q)
++static void loop_unplug(struct request_queue *q)
+ {
+         struct lloop_device *lo = q->queuedata;
+ 
+@@ -736,7 +736,7 @@
+ 
+ out_mem4:
+         while (i--)
+-                blk_put_queue(loop_dev[i].lo_queue);
++                blk_cleanup_queue(loop_dev[i].lo_queue);
+         i = max_loop;
+ out_mem3:
+         while (i--)
+@@ -758,7 +758,7 @@
+         ll_iocontrol_unregister(ll_iocontrol_magic);
+         for (i = 0; i < max_loop; i++) {
+                 del_gendisk(disks[i]);
+-                blk_put_queue(loop_dev[i].lo_queue);
++                blk_cleanup_queue(loop_dev[i].lo_queue);
+                 put_disk(disks[i]);
+         }
+         if (ll_unregister_blkdev(lloop_major, "lloop"))
+diff -urNad lustre~/lustre/llite/lloop.c.orig lustre/lustre/llite/lloop.c.orig
+--- lustre~/lustre/llite/lloop.c.orig	1970-01-01 00:00:00.000000000 +0000
++++ lustre/lustre/llite/lloop.c.orig	2009-03-12 10:32:27.000000000 +0100
+@@ -0,0 +1,777 @@
++/* -*- mode: c; c-basic-offset: 8; indent-tabs-mode: nil; -*-
++ * vim:expandtab:shiftwidth=8:tabstop=8:
++ *
++ * GPL HEADER START
++ *
++ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
++ *
++ * This program is free software; you can redistribute it and/or modify
++ * it under the terms of the GNU General Public License version 2 only,
++ * as published by the Free Software Foundation.
++ *
++ * This program is distributed in the hope that it will be useful, but
++ * WITHOUT ANY WARRANTY; without even the implied warranty of
++ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
++ * General Public License version 2 for more details (a copy is included
++ * in the LICENSE file that accompanied this code).
++ *
++ * You should have received a copy of the GNU General Public License
++ * version 2 along with this program; If not, see
++ * http://www.sun.com/software/products/lustre/docs/GPLv2.pdf
++ *
++ * Please contact Sun Microsystems, Inc., 4150 Network Circle, Santa Clara,
++ * CA 95054 USA or visit www.sun.com if you need additional information or
++ * have any questions.
++ *
++ * GPL HEADER END
++ */
++/*
++ * Copyright  2008 Sun Microsystems, Inc. All rights reserved
++ * Use is subject to license terms.
++ */
++/*
++ * This file is part of Lustre, http://www.lustre.org/
++ * Lustre is a trademark of Sun Microsystems, Inc.
++ */
++
++/*
++ *  linux/drivers/block/loop.c
++ *
++ *  Written by Theodore Ts'o, 3/29/93
++ *
++ * Copyright 1993 by Theodore Ts'o.  Redistribution of this file is
++ * permitted under the GNU General Public License.
++ *
++ * DES encryption plus some minor changes by Werner Almesberger, 30-MAY-1993
++ * more DES encryption plus IDEA encryption by Nicholas J. Leon, June 20, 1996
++ *
++ * Modularized and updated for 1.1.16 kernel - Mitch Dsouza 28th May 1994
++ * Adapted for 1.3.59 kernel - Andries Brouwer, 1 Feb 1996
++ *
++ * Fixed do_loop_request() re-entrancy - Vincent.Renardias at waw.com Mar 20, 1997
++ *
++ * Added devfs support - Richard Gooch <rgooch at atnf.csiro.au> 16-Jan-1998
++ *
++ * Handle sparse backing files correctly - Kenn Humborg, Jun 28, 1998
++ *
++ * Loadable modules and other fixes by AK, 1998
++ *
++ * Make real block number available to downstream transfer functions, enables
++ * CBC (and relatives) mode encryption requiring unique IVs per data block.
++ * Reed H. Petty, rhp at draper.net
++ *
++ * Maximum number of loop devices now dynamic via max_loop module parameter.
++ * Russell Kroll <rkroll at exploits.org> 19990701
++ *
++ * Maximum number of loop devices when compiled-in now selectable by passing
++ * max_loop=<1-255> to the kernel on boot.
++ * Erik I. Bols?, <eriki at himolde.no>, Oct 31, 1999
++ *
++ * Completely rewrite request handling to be make_request_fn style and
++ * non blocking, pushing work to a helper thread. Lots of fixes from
++ * Al Viro too.
++ * Jens Axboe <axboe at suse.de>, Nov 2000
++ *
++ * Support up to 256 loop devices
++ * Heinz Mauelshagen <mge at sistina.com>, Feb 2002
++ *
++ * Support for falling back on the write file operation when the address space
++ * operations prepare_write and/or commit_write are not available on the
++ * backing filesystem.
++ * Anton Altaparmakov, 16 Feb 2005
++ *
++ * Still To Fix:
++ * - Advisory locking is ignored here.
++ * - Should use an own CAP_* category instead of CAP_SYS_ADMIN
++ *
++ */
++
++#ifndef AUTOCONF_INCLUDED
++#include <linux/config.h>
++#endif
++#include <linux/module.h>
++
++#include <linux/sched.h>
++#include <linux/fs.h>
++#include <linux/file.h>
++#include <linux/stat.h>
++#include <linux/errno.h>
++#include <linux/major.h>
++#include <linux/wait.h>
++#include <linux/blkdev.h>
++#include <linux/blkpg.h>
++#include <linux/init.h>
++#include <linux/smp_lock.h>
++#include <linux/swap.h>
++#include <linux/slab.h>
++#include <linux/suspend.h>
++#include <linux/writeback.h>
++#include <linux/buffer_head.h>                /* for invalidate_bdev() */
++#include <linux/completion.h>
++#include <linux/highmem.h>
++#include <linux/gfp.h>
++#include <linux/swap.h>
++#include <linux/pagevec.h>
++
++#include <asm/uaccess.h>
++
++#include <lustre_lib.h>
++#include <lustre_lite.h>
++#include "llite_internal.h"
++
++#define LLOOP_MAX_SEGMENTS        PTLRPC_MAX_BRW_PAGES
++
++/* Possible states of device */
++enum {
++        LLOOP_UNBOUND,
++        LLOOP_BOUND,
++        LLOOP_RUNDOWN,
++};
++
++struct lloop_device {
++        int                lo_number;
++        int                lo_refcnt;
++        loff_t             lo_offset;
++        loff_t             lo_sizelimit;
++        int                lo_flags;
++        int                (*ioctl)(struct lloop_device *, int cmd, 
++                                 unsigned long arg); 
++
++        struct file *      lo_backing_file;
++        struct block_device *lo_device;
++        unsigned           lo_blocksize;
++
++        int                old_gfp_mask;
++
++        spinlock_t         lo_lock;
++        struct bio         *lo_bio;
++        struct bio         *lo_biotail;
++        int                lo_state;
++        struct semaphore   lo_sem;
++        struct semaphore   lo_ctl_mutex;
++        struct semaphore   lo_bh_mutex;
++        atomic_t           lo_pending;
++
++        request_queue_t    *lo_queue;
++
++        /* data to handle bio for lustre. */
++        struct lo_request_data {
++                struct brw_page    lrd_pages[LLOOP_MAX_SEGMENTS];
++                struct obdo        lrd_oa;
++        } lo_requests[1];
++
++};
++
++/*
++ * Loop flags
++ */
++enum {
++        LO_FLAGS_READ_ONLY       = 1,
++};
++
++static int lloop_major;
++static int max_loop = 8;
++static struct lloop_device *loop_dev;
++static struct gendisk **disks;
++static struct semaphore lloop_mutex;
++static void *ll_iocontrol_magic = NULL;
++
++static loff_t get_loop_size(struct lloop_device *lo, struct file *file)
++{
++        loff_t size, offset, loopsize;
++
++        /* Compute loopsize in bytes */
++        size = i_size_read(file->f_mapping->host);
++        offset = lo->lo_offset;
++        loopsize = size - offset;
++        if (lo->lo_sizelimit > 0 && lo->lo_sizelimit < loopsize)
++                loopsize = lo->lo_sizelimit;
++
++        /*
++         * Unfortunately, if we want to do I/O on the device,
++         * the number of 512-byte sectors has to fit into a sector_t.
++         */
++        return loopsize >> 9;
++}
++
++static int do_bio_filebacked(struct lloop_device *lo, struct bio *bio)
++{
++        struct inode *inode = lo->lo_backing_file->f_dentry->d_inode;
++        struct ll_inode_info *lli = ll_i2info(inode);
++        struct lov_stripe_md *lsm = lli->lli_smd;
++        struct obd_info oinfo = {{{0}}};
++        struct brw_page *pg = lo->lo_requests[0].lrd_pages;
++        struct obdo *oa = &lo->lo_requests[0].lrd_oa;
++        pgoff_t offset;
++        int ret, cmd, i;
++        struct bio_vec *bvec;
++
++        BUG_ON(bio->bi_hw_segments > LLOOP_MAX_SEGMENTS);
++
++        offset = (pgoff_t)(bio->bi_sector << 9) + lo->lo_offset;
++        bio_for_each_segment(bvec, bio, i) {
++                BUG_ON(bvec->bv_offset != 0);
++                BUG_ON(bvec->bv_len != CFS_PAGE_SIZE);
++
++                pg->pg = bvec->bv_page;
++                pg->off = offset;
++                pg->count = bvec->bv_len;
++                pg->flag = OBD_BRW_SRVLOCK;
++
++                pg++;
++                offset += bvec->bv_len;
++        }
++
++        oa->o_mode = inode->i_mode;
++        oa->o_id = lsm->lsm_object_id;
++        oa->o_valid = OBD_MD_FLID | OBD_MD_FLMODE | OBD_MD_FLTYPE;
++        obdo_from_inode(oa, inode, OBD_MD_FLFID | OBD_MD_FLGENER);
++
++        cmd = OBD_BRW_READ;
++        if (bio_rw(bio) == WRITE)
++                cmd = OBD_BRW_WRITE;
++
++        if (cmd == OBD_BRW_WRITE)
++                ll_stats_ops_tally(ll_i2sbi(inode), LPROC_LL_BRW_WRITE, bio->bi_size);
++        else
++                ll_stats_ops_tally(ll_i2sbi(inode), LPROC_LL_BRW_READ, bio->bi_size);
++        oinfo.oi_oa = oa;
++        oinfo.oi_md = lsm;
++        ret = obd_brw(cmd, ll_i2obdexp(inode), &oinfo, 
++                      (obd_count)(i - bio->bi_idx), 
++                      lo->lo_requests[0].lrd_pages, NULL);
++        if (ret == 0)
++                obdo_to_inode(inode, oa, OBD_MD_FLBLOCKS);
++        return ret;
++}
++
++
++/*
++ * Add bio to back of pending list
++ */
++static void loop_add_bio(struct lloop_device *lo, struct bio *bio)
++{
++        unsigned long flags;
++
++        spin_lock_irqsave(&lo->lo_lock, flags);
++        if (lo->lo_biotail) {
++                lo->lo_biotail->bi_next = bio;
++                lo->lo_biotail = bio;
++        } else
++                lo->lo_bio = lo->lo_biotail = bio;
++        spin_unlock_irqrestore(&lo->lo_lock, flags);
++
++        up(&lo->lo_bh_mutex);
++}
++
++/*
++ * Grab first pending buffer
++ */
++static struct bio *loop_get_bio(struct lloop_device *lo)
++{
++        struct bio *bio;
++
++        spin_lock_irq(&lo->lo_lock);
++        if ((bio = lo->lo_bio)) {
++                if (bio == lo->lo_biotail)
++                        lo->lo_biotail = NULL;
++                lo->lo_bio = bio->bi_next;
++                bio->bi_next = NULL;
++        }
++        spin_unlock_irq(&lo->lo_lock);
++
++        return bio;
++}
++
++static int loop_make_request(request_queue_t *q, struct bio *old_bio)
++{
++        struct lloop_device *lo = q->queuedata;
++        int rw = bio_rw(old_bio);
++
++        if (!lo)
++                goto out;
++
++        spin_lock_irq(&lo->lo_lock);
++        if (lo->lo_state != LLOOP_BOUND)
++                goto inactive;
++        atomic_inc(&lo->lo_pending);
++        spin_unlock_irq(&lo->lo_lock);
++
++        if (rw == WRITE) {
++                if (lo->lo_flags & LO_FLAGS_READ_ONLY)
++                        goto err;
++        } else if (rw == READA) {
++                rw = READ;
++        } else if (rw != READ) {
++                CERROR("lloop: unknown command (%x)\n", rw);
++                goto err;
++        }
++        loop_add_bio(lo, old_bio);
++        return 0;
++err:
++        if (atomic_dec_and_test(&lo->lo_pending))
++                up(&lo->lo_bh_mutex);
++out:
++        bio_io_error(old_bio, old_bio->bi_size);
++        return 0;
++inactive:
++        spin_unlock_irq(&lo->lo_lock);
++        goto out;
++}
++
++/*
++ * kick off io on the underlying address space
++ */
++static void loop_unplug(request_queue_t *q)
++{
++        struct lloop_device *lo = q->queuedata;
++
++        clear_bit(QUEUE_FLAG_PLUGGED, &q->queue_flags);
++        blk_run_address_space(lo->lo_backing_file->f_mapping);
++}
++
++static inline void loop_handle_bio(struct lloop_device *lo, struct bio *bio)
++{
++        int ret;
++        ret = do_bio_filebacked(lo, bio);
++        bio_endio(bio, bio->bi_size, ret);
++}
++
++/*
++ * worker thread that handles reads/writes to file backed loop devices,
++ * to avoid blocking in our make_request_fn. it also does loop decrypting
++ * on reads for block backed loop, as that is too heavy to do from
++ * b_end_io context where irqs may be disabled.
++ */
++static int loop_thread(void *data)
++{
++        struct lloop_device *lo = data;
++        struct bio *bio;
++
++        daemonize("lloop%d", lo->lo_number);
++
++        set_user_nice(current, -20);
++
++        lo->lo_state = LLOOP_BOUND;
++        atomic_inc(&lo->lo_pending);
++
++        /*
++         * up sem, we are running
++         */
++        up(&lo->lo_sem);
++
++        for (;;) {
++                down_interruptible(&lo->lo_bh_mutex);
++                /*
++                 * could be upped because of tear-down, not because of
++                 * pending work
++                 */
++                if (!atomic_read(&lo->lo_pending))
++                        break;
++
++                bio = loop_get_bio(lo);
++                if (!bio) {
++                        CWARN("lloop(minor: %d): missing bio\n", lo->lo_number);
++                        continue;
++                }
++                loop_handle_bio(lo, bio);
++
++                /*
++                 * upped both for pending work and tear-down, lo_pending
++                 * will hit zero then
++                 */
++                if (atomic_dec_and_test(&lo->lo_pending))
++                        break;
++        }
++
++        up(&lo->lo_sem);
++        return 0;
++}
++
++static int loop_set_fd(struct lloop_device *lo, struct file *unused,
++                       struct block_device *bdev, struct file *file)
++{
++        struct inode        *inode;
++        struct address_space *mapping;
++        int                lo_flags = 0;
++        int                error;
++        loff_t                size;
++
++        if (!try_module_get(THIS_MODULE))
++                return -ENODEV;
++
++        error = -EBUSY;
++        if (lo->lo_state != LLOOP_UNBOUND)
++                goto out;
++
++        mapping = file->f_mapping;
++        inode = mapping->host;
++
++        error = -EINVAL;
++        if (!S_ISREG(inode->i_mode) || inode->i_sb->s_magic != LL_SUPER_MAGIC)
++                goto out;
++
++        if (!(file->f_mode & FMODE_WRITE))
++                lo_flags |= LO_FLAGS_READ_ONLY;
++
++        size = get_loop_size(lo, file);
++
++        if ((loff_t)(sector_t)size != size) {
++                error = -EFBIG;
++                goto out;
++        }
++
++        /* remove all pages in cache so as dirty pages not to be existent. */
++        truncate_inode_pages(mapping, 0);
++
++        set_device_ro(bdev, (lo_flags & LO_FLAGS_READ_ONLY) != 0);
++
++        lo->lo_blocksize = CFS_PAGE_SIZE;
++        lo->lo_device = bdev;
++        lo->lo_flags = lo_flags;
++        lo->lo_backing_file = file;
++        lo->ioctl = NULL;
++        lo->lo_sizelimit = 0;
++        lo->old_gfp_mask = mapping_gfp_mask(mapping);
++        mapping_set_gfp_mask(mapping, lo->old_gfp_mask & ~(__GFP_IO|__GFP_FS));
++
++        lo->lo_bio = lo->lo_biotail = NULL;
++
++        /*
++         * set queue make_request_fn, and add limits based on lower level
++         * device
++         */
++        blk_queue_make_request(lo->lo_queue, loop_make_request);
++        lo->lo_queue->queuedata = lo;
++        lo->lo_queue->unplug_fn = loop_unplug;
++
++        /* queue parameters */
++        blk_queue_hardsect_size(lo->lo_queue, CFS_PAGE_SIZE);
++        blk_queue_max_sectors(lo->lo_queue, LLOOP_MAX_SEGMENTS);
++        blk_queue_max_phys_segments(lo->lo_queue, LLOOP_MAX_SEGMENTS);
++
++        set_capacity(disks[lo->lo_number], size);
++        bd_set_size(bdev, size << 9);
++
++        set_blocksize(bdev, lo->lo_blocksize);
++
++        kernel_thread(loop_thread, lo, CLONE_KERNEL);
++        down(&lo->lo_sem);
++        return 0;
++
++ out:
++        /* This is safe: open() is still holding a reference. */
++        module_put(THIS_MODULE);
++        return error;
++}
++
++static int loop_clr_fd(struct lloop_device *lo, struct block_device *bdev, 
++                       int count)
++{
++        struct file *filp = lo->lo_backing_file;
++        int gfp = lo->old_gfp_mask;
++
++        if (lo->lo_state != LLOOP_BOUND)
++                return -ENXIO;
++
++        if (lo->lo_refcnt > count)        /* we needed one fd for the ioctl */
++                return -EBUSY;
++
++        if (filp == NULL)
++                return -EINVAL;
++
++        spin_lock_irq(&lo->lo_lock);
++        lo->lo_state = LLOOP_RUNDOWN;
++        if (atomic_dec_and_test(&lo->lo_pending))
++                up(&lo->lo_bh_mutex);
++        spin_unlock_irq(&lo->lo_lock);
++
++        down(&lo->lo_sem);
++        lo->lo_backing_file = NULL;
++        lo->ioctl = NULL;
++        lo->lo_device = NULL;
++        lo->lo_offset = 0;
++        lo->lo_sizelimit = 0;
++        lo->lo_flags = 0;
++        ll_invalidate_bdev(bdev, 0);
++        set_capacity(disks[lo->lo_number], 0);
++        bd_set_size(bdev, 0);
++        mapping_set_gfp_mask(filp->f_mapping, gfp);
++        lo->lo_state = LLOOP_UNBOUND;
++        fput(filp);
++        /* This is safe: open() is still holding a reference. */
++        module_put(THIS_MODULE);
++        return 0;
++}
++
++static int lo_open(struct inode *inode, struct file *file)
++{
++        struct lloop_device *lo = inode->i_bdev->bd_disk->private_data;
++
++        down(&lo->lo_ctl_mutex);
++        lo->lo_refcnt++;
++        up(&lo->lo_ctl_mutex);
++
++        return 0;
++}
++
++static int lo_release(struct inode *inode, struct file *file)
++{
++        struct lloop_device *lo = inode->i_bdev->bd_disk->private_data;
++
++        down(&lo->lo_ctl_mutex);
++        --lo->lo_refcnt;
++        up(&lo->lo_ctl_mutex);
++
++        return 0;
++}
++
++/* lloop device node's ioctl function. */
++static int lo_ioctl(struct inode *inode, struct file *unused, 
++        unsigned int cmd, unsigned long arg)
++{
++        struct lloop_device *lo = inode->i_bdev->bd_disk->private_data;
++        struct block_device *bdev = inode->i_bdev;
++        int err = 0;
++
++        down(&lloop_mutex);
++        switch (cmd) {
++        case LL_IOC_LLOOP_DETACH: {
++                err = loop_clr_fd(lo, bdev, 2);
++                if (err == 0)
++                        blkdev_put(bdev); /* grabbed in LLOOP_ATTACH */
++                break;
++        }
++
++        case LL_IOC_LLOOP_INFO: {
++                __u64 ino = 0;
++
++                if (lo->lo_state == LLOOP_BOUND)
++                        ino = lo->lo_backing_file->f_dentry->d_inode->i_ino;
++
++                if (put_user(ino, (__u64 *)arg))
++                        err = -EFAULT;
++                break; 
++        }
++
++        default:
++                err = -EINVAL;
++                break;
++        }
++        up(&lloop_mutex);
++
++        return err;
++}
++
++static struct block_device_operations lo_fops = {
++        .owner =        THIS_MODULE,
++        .open =         lo_open,
++        .release =      lo_release,
++        .ioctl =        lo_ioctl,
++};
++
++/* dynamic iocontrol callback. 
++ * This callback is registered in lloop_init and will be called by 
++ * ll_iocontrol_call. 
++ * This is a llite regular file ioctl function. It takes the responsibility 
++ * of attaching a file, and detaching a file by a lloop's device numner. 
++ */
++static enum llioc_iter lloop_ioctl(struct inode *unused, struct file *file, 
++                unsigned int cmd, unsigned long arg,
++                void *magic, int *rcp)
++{
++        struct lloop_device *lo = NULL;
++        struct block_device *bdev = NULL;
++        int err = 0;
++        dev_t dev;
++
++        if (magic != ll_iocontrol_magic)
++                return LLIOC_CONT;
++
++        if (disks == NULL)
++                GOTO(out1, err = -ENODEV);
++
++        down(&lloop_mutex);
++        switch (cmd) {
++        case LL_IOC_LLOOP_ATTACH: {
++                struct lloop_device *lo_free = NULL;
++                int i;
++
++                for (i = 0; i < max_loop; i++, lo = NULL) {
++                        lo = &loop_dev[i];
++                        if (lo->lo_state == LLOOP_UNBOUND) {
++                                if (!lo_free)
++                                        lo_free = lo;
++                                continue;
++                        }
++                        if (lo->lo_backing_file->f_dentry->d_inode == 
++                            file->f_dentry->d_inode)
++                                break;
++                }
++                if (lo || !lo_free)
++                        GOTO(out, err = -EBUSY);
++
++                lo = lo_free;
++                dev = MKDEV(lloop_major, lo->lo_number);
++
++                /* quit if the used pointer is writable */
++                if (put_user((long)old_encode_dev(dev), (long*)arg))
++                        GOTO(out, err = -EFAULT);
++
++                bdev = open_by_devnum(dev, file->f_mode);
++                if (IS_ERR(bdev))
++                        GOTO(out, err = PTR_ERR(bdev));
++
++                get_file(file);
++                err = loop_set_fd(lo, NULL, bdev, file);
++                if (err) {
++                        fput(file);
++                        blkdev_put(bdev);
++                }
++
++                break;
++        }
++
++        case LL_IOC_LLOOP_DETACH_BYDEV: {
++                int minor;
++                
++                dev = old_decode_dev(arg);
++                if (MAJOR(dev) != lloop_major)
++                        GOTO(out, err = -EINVAL);
++
++                minor = MINOR(dev);
++                if (minor > max_loop - 1)
++                        GOTO(out, err = -EINVAL);
++
++                lo = &loop_dev[minor];
++                if (lo->lo_state != LLOOP_BOUND)
++                        GOTO(out, err = -EINVAL);
++
++                bdev = lo->lo_device;
++                err = loop_clr_fd(lo, bdev, 1);
++                if (err == 0)
++                        blkdev_put(bdev); /* grabbed in LLOOP_ATTACH */
++
++                break;
++        }
++
++        default:
++                err = -EINVAL;
++                break;
++        }
++
++out:
++        up(&lloop_mutex);
++out1:
++        if (rcp)
++                *rcp = err;
++        return LLIOC_STOP;
++}
++
++static int __init lloop_init(void)
++{
++        int        i;
++        unsigned int cmdlist[] = {
++                LL_IOC_LLOOP_ATTACH,
++                LL_IOC_LLOOP_DETACH_BYDEV,
++        };
++
++        if (max_loop < 1 || max_loop > 256) {
++                CWARN("lloop: invalid max_loop (must be between"
++                      " 1 and 256), using default (8)\n");
++                max_loop = 8;
++        }
++
++        lloop_major = register_blkdev(0, "lloop");
++        if (lloop_major < 0)
++                return -EIO;
++
++        ll_iocontrol_magic = ll_iocontrol_register(lloop_ioctl, 2, cmdlist);
++        if (ll_iocontrol_magic == NULL)
++                goto out_mem1;
++
++        loop_dev = kmalloc(max_loop * sizeof(struct lloop_device), GFP_KERNEL);
++        if (!loop_dev)
++                goto out_mem1;
++        memset(loop_dev, 0, max_loop * sizeof(struct lloop_device));
++
++        disks = kmalloc(max_loop * sizeof(struct gendisk *), GFP_KERNEL);
++        if (!disks)
++                goto out_mem2;
++
++        for (i = 0; i < max_loop; i++) {
++                disks[i] = alloc_disk(1);
++                if (!disks[i])
++                        goto out_mem3;
++        }
++
++        init_MUTEX(&lloop_mutex);
++
++        for (i = 0; i < max_loop; i++) {
++                struct lloop_device *lo = &loop_dev[i];
++                struct gendisk *disk = disks[i];
++
++                memset(lo, 0, sizeof(*lo));
++                lo->lo_queue = blk_alloc_queue(GFP_KERNEL);
++                if (!lo->lo_queue)
++                        goto out_mem4;
++
++                init_MUTEX(&lo->lo_ctl_mutex);
++                init_MUTEX_LOCKED(&lo->lo_sem);
++                init_MUTEX_LOCKED(&lo->lo_bh_mutex);
++                lo->lo_number = i;
++                spin_lock_init(&lo->lo_lock);
++                disk->major = lloop_major;
++                disk->first_minor = i;
++                disk->fops = &lo_fops;
++                sprintf(disk->disk_name, "lloop%d", i);
++                disk->private_data = lo;
++                disk->queue = lo->lo_queue;
++        }
++
++        /* We cannot fail after we call this, so another loop!*/
++        for (i = 0; i < max_loop; i++)
++                add_disk(disks[i]);
++        return 0;
++
++out_mem4:
++        while (i--)
++                blk_put_queue(loop_dev[i].lo_queue);
++        i = max_loop;
++out_mem3:
++        while (i--)
++                put_disk(disks[i]);
++        kfree(disks);
++out_mem2:
++        kfree(loop_dev);
++out_mem1:
++        unregister_blkdev(lloop_major, "lloop");
++        ll_iocontrol_unregister(ll_iocontrol_magic);
++        CERROR("lloop: ran out of memory\n");
++        return -ENOMEM;
++}
++
++static void lloop_exit(void)
++{
++        int i;
++
++        ll_iocontrol_unregister(ll_iocontrol_magic);
++        for (i = 0; i < max_loop; i++) {
++                del_gendisk(disks[i]);
++                blk_put_queue(loop_dev[i].lo_queue);
++                put_disk(disks[i]);
++        }
++        if (ll_unregister_blkdev(lloop_major, "lloop"))
++                CWARN("lloop: cannot unregister blkdev\n");
++
++        kfree(disks);
++        kfree(loop_dev);
++}
++
++module_init(lloop_init);
++module_exit(lloop_exit);
++
++CFS_MODULE_PARM(max_loop, "i", int, 0444, "maximum of lloop_device");
++MODULE_AUTHOR("Sun Microsystems, Inc. <http://www.lustre.org/>");
++MODULE_DESCRIPTION("Lustre virtual block device");
++MODULE_LICENSE("GPL");
+diff -urNad lustre~/lustre/llite/rw.c lustre/lustre/llite/rw.c
+--- lustre~/lustre/llite/rw.c	2009-03-12 10:32:27.000000000 +0100
++++ lustre/lustre/llite/rw.c	2009-03-12 11:02:51.000000000 +0100
+@@ -61,6 +61,8 @@
+ 
+ #define DEBUG_SUBSYSTEM S_LLITE
+ 
++#include <linux/page-flags.h>
++
+ #include <lustre_lite.h>
+ #include "llite_internal.h"
+ #include <linux/lustre_compat25.h>
+@@ -186,7 +188,7 @@
+                 GOTO(out_unlock, 0);
+         }
+ 
+-        LASSERT(atomic_read(&lli->lli_size_sem.count) <= 0);
++        LASSERT(SEM_COUNT(&lli->lli_size_sem) <= 0);
+ 
+         if (!srvlock) {
+                 struct ost_lvb lvb;
+@@ -2122,7 +2124,7 @@
+                 rc = generic_write_checks(file, ppos, &count, 0);
+                 if (rc)
+                         GOTO(out, rc);
+-                rc = ll_remove_suid(file->f_dentry, file->f_vfsmnt);
++                rc = ll_remove_suid(file, file->f_vfsmnt);
+                 if (rc)
+                         GOTO(out, rc);
+         }
+diff -urNad lustre~/lustre/llite/rw.c.orig lustre/lustre/llite/rw.c.orig
+--- lustre~/lustre/llite/rw.c.orig	1970-01-01 00:00:00.000000000 +0000
++++ lustre/lustre/llite/rw.c.orig	2009-03-12 10:32:27.000000000 +0100
+@@ -0,0 +1,2215 @@
++/* -*- mode: c; c-basic-offset: 8; indent-tabs-mode: nil; -*-
++ * vim:expandtab:shiftwidth=8:tabstop=8:
++ *
++ * GPL HEADER START
++ *
++ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
++ *
++ * This program is free software; you can redistribute it and/or modify
++ * it under the terms of the GNU General Public License version 2 only,
++ * as published by the Free Software Foundation.
++ *
++ * This program is distributed in the hope that it will be useful, but
++ * WITHOUT ANY WARRANTY; without even the implied warranty of
++ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
++ * General Public License version 2 for more details (a copy is included
++ * in the LICENSE file that accompanied this code).
++ *
++ * You should have received a copy of the GNU General Public License
++ * version 2 along with this program; If not, see
++ * http://www.sun.com/software/products/lustre/docs/GPLv2.pdf
++ *
++ * Please contact Sun Microsystems, Inc., 4150 Network Circle, Santa Clara,
++ * CA 95054 USA or visit www.sun.com if you need additional information or
++ * have any questions.
++ *
++ * GPL HEADER END
++ */
++/*
++ * Copyright  2008 Sun Microsystems, Inc. All rights reserved
++ * Use is subject to license terms.
++ */
++/*
++ * This file is part of Lustre, http://www.lustre.org/
++ * Lustre is a trademark of Sun Microsystems, Inc.
++ *
++ * lustre/llite/rw.c
++ *
++ * Lustre Lite I/O page cache routines shared by different kernel revs
++ */
++
++#ifndef AUTOCONF_INCLUDED
++#include <linux/config.h>
++#endif
++#include <linux/kernel.h>
++#include <linux/mm.h>
++#include <linux/string.h>
++#include <linux/stat.h>
++#include <linux/errno.h>
++#include <linux/smp_lock.h>
++#include <linux/unistd.h>
++#include <linux/version.h>
++#include <asm/system.h>
++#include <asm/uaccess.h>
++
++#include <linux/fs.h>
++#include <linux/stat.h>
++#include <asm/uaccess.h>
++#include <linux/mm.h>
++#include <linux/pagemap.h>
++#include <linux/smp_lock.h>
++
++#define DEBUG_SUBSYSTEM S_LLITE
++
++#include <lustre_lite.h>
++#include "llite_internal.h"
++#include <linux/lustre_compat25.h>
++
++#ifndef list_for_each_prev_safe
++#define list_for_each_prev_safe(pos, n, head) \
++        for (pos = (head)->prev, n = pos->prev; pos != (head); \
++                pos = n, n = pos->prev )
++#endif
++
++cfs_mem_cache_t *ll_async_page_slab = NULL;
++size_t ll_async_page_slab_size = 0;
++
++/* SYNCHRONOUS I/O to object storage for an inode */
++static int ll_brw(int cmd, struct inode *inode, struct obdo *oa,
++                  struct page *page, int flags)
++{
++        struct ll_inode_info *lli = ll_i2info(inode);
++        struct lov_stripe_md *lsm = lli->lli_smd;
++        struct obd_info oinfo = { { { 0 } } };
++        struct brw_page pg;
++        int rc;
++        ENTRY;
++
++        pg.pg = page;
++        pg.off = ((obd_off)page->index) << CFS_PAGE_SHIFT;
++
++        if ((cmd & OBD_BRW_WRITE) && (pg.off+CFS_PAGE_SIZE>i_size_read(inode)))
++                pg.count = i_size_read(inode) % CFS_PAGE_SIZE;
++        else
++                pg.count = CFS_PAGE_SIZE;
++
++        LL_CDEBUG_PAGE(D_PAGE, page, "%s %d bytes ino %lu at "LPU64"/"LPX64"\n",
++                       cmd & OBD_BRW_WRITE ? "write" : "read", pg.count,
++                       inode->i_ino, pg.off, pg.off);
++        if (pg.count == 0) {
++                CERROR("ZERO COUNT: ino %lu: size %p:%Lu(%p:%Lu) idx %lu off "
++                       LPU64"\n", inode->i_ino, inode, i_size_read(inode),
++                       page->mapping->host, i_size_read(page->mapping->host),
++                       page->index, pg.off);
++        }
++
++        pg.flag = flags;
++
++        if (cmd & OBD_BRW_WRITE)
++                ll_stats_ops_tally(ll_i2sbi(inode), LPROC_LL_BRW_WRITE,
++                                   pg.count);
++        else
++                ll_stats_ops_tally(ll_i2sbi(inode), LPROC_LL_BRW_READ,
++                           pg.count);
++        oinfo.oi_oa = oa;
++        oinfo.oi_md = lsm;
++        rc = obd_brw(cmd, ll_i2obdexp(inode), &oinfo, 1, &pg, NULL);
++        if (rc == 0)
++                obdo_to_inode(inode, oa, OBD_MD_FLBLOCKS);
++        else if (rc != -EIO)
++                CERROR("error from obd_brw: rc = %d\n", rc);
++        RETURN(rc);
++}
++
++int ll_file_punch(struct inode * inode, loff_t new_size, int srvlock)
++{
++        struct ll_inode_info *lli = ll_i2info(inode);
++        struct obd_info oinfo = { { { 0 } } };
++        struct obdo oa;
++        int rc;
++
++        ENTRY;
++        CDEBUG(D_INFO, "calling punch for "LPX64" (new size %Lu=%#Lx)\n",
++               lli->lli_smd->lsm_object_id, new_size, new_size);
++
++        oinfo.oi_md = lli->lli_smd;
++        oinfo.oi_policy.l_extent.start = new_size;
++        oinfo.oi_policy.l_extent.end = OBD_OBJECT_EOF;
++        oinfo.oi_oa = &oa;
++        oa.o_id = lli->lli_smd->lsm_object_id;
++        oa.o_valid = OBD_MD_FLID;
++        if (srvlock) {
++                /* set OBD_MD_FLFLAGS in o_valid, only if we 
++                 * set OBD_FL_TRUNCLOCK, otherwise ost_punch
++                 * and filter_setattr get confused, see the comment
++                 * in ost_punch */
++                oa.o_flags = OBD_FL_TRUNCLOCK;
++                oa.o_valid |= OBD_MD_FLFLAGS;
++        }
++        obdo_from_inode(&oa, inode, OBD_MD_FLTYPE | OBD_MD_FLMODE |OBD_MD_FLFID|
++                        OBD_MD_FLATIME | OBD_MD_FLMTIME | OBD_MD_FLCTIME |
++                        OBD_MD_FLUID | OBD_MD_FLGID | OBD_MD_FLGENER |
++                        OBD_MD_FLBLOCKS);
++        rc = obd_punch_rqset(ll_i2obdexp(inode), &oinfo, NULL);
++        if (rc) {
++                CERROR("obd_truncate fails (%d) ino %lu\n", rc, inode->i_ino);
++                RETURN(rc);
++        }
++        obdo_to_inode(inode, &oa, OBD_MD_FLSIZE | OBD_MD_FLBLOCKS |
++                      OBD_MD_FLATIME | OBD_MD_FLMTIME | OBD_MD_FLCTIME);
++        RETURN(0);
++}
++/* this isn't where truncate starts.   roughly:
++ * sys_truncate->ll_setattr_raw->vmtruncate->ll_truncate. setattr_raw grabs
++ * DLM lock on [size, EOF], i_mutex, ->lli_size_sem, and WRITE_I_ALLOC_SEM to
++ * avoid races.
++ *
++ * must be called under ->lli_size_sem */
++void ll_truncate(struct inode *inode)
++{
++        struct ll_inode_info *lli = ll_i2info(inode);
++        int srvlock = test_bit(LLI_F_SRVLOCK, &lli->lli_flags);
++        loff_t new_size;
++        ENTRY;
++        CDEBUG(D_VFSTRACE, "VFS Op:inode=%lu/%u(%p) to %Lu=%#Lx\n",inode->i_ino,
++               inode->i_generation, inode, i_size_read(inode), i_size_read(inode));
++
++        ll_stats_ops_tally(ll_i2sbi(inode), LPROC_LL_TRUNC, 1);
++        if (lli->lli_size_sem_owner != current) {
++                EXIT;
++                return;
++        }
++
++        if (!lli->lli_smd) {
++                CDEBUG(D_INODE, "truncate on inode %lu with no objects\n",
++                       inode->i_ino);
++                GOTO(out_unlock, 0);
++        }
++
++        LASSERT(atomic_read(&lli->lli_size_sem.count) <= 0);
++
++        if (!srvlock) {
++                struct ost_lvb lvb;
++                int rc;
++
++                /* XXX I'm pretty sure this is a hack to paper over a more fundamental
++                 * race condition. */
++                lov_stripe_lock(lli->lli_smd);
++                inode_init_lvb(inode, &lvb);
++                rc = obd_merge_lvb(ll_i2obdexp(inode), lli->lli_smd, &lvb, 0);
++                inode->i_blocks = lvb.lvb_blocks;
++                if (lvb.lvb_size == i_size_read(inode) && rc == 0) {
++                        CDEBUG(D_VFSTRACE, "skipping punch for obj "LPX64", %Lu=%#Lx\n",
++                               lli->lli_smd->lsm_object_id, i_size_read(inode),
++                               i_size_read(inode));
++                        lov_stripe_unlock(lli->lli_smd);
++                        GOTO(out_unlock, 0);
++                }
++
++                obd_adjust_kms(ll_i2obdexp(inode), lli->lli_smd,
++                               i_size_read(inode), 1);
++                lov_stripe_unlock(lli->lli_smd);
++        }
++
++        if (unlikely((ll_i2sbi(inode)->ll_flags & LL_SBI_LLITE_CHECKSUM) &&
++                     (i_size_read(inode) & ~CFS_PAGE_MASK))) {
++                /* If the truncate leaves a partial page, update its checksum */
++                struct page *page = find_get_page(inode->i_mapping,
++                                                  i_size_read(inode) >>
++                                                  CFS_PAGE_SHIFT);
++                if (page != NULL) {
++                        struct ll_async_page *llap = llap_cast_private(page);
++                        if (llap != NULL) {
++                                char *kaddr = kmap_atomic(page, KM_USER0);
++                                llap->llap_checksum =
++                                        init_checksum(OSC_DEFAULT_CKSUM);
++                                llap->llap_checksum =
++                                        compute_checksum(llap->llap_checksum,
++                                                         kaddr, CFS_PAGE_SIZE,
++                                                         OSC_DEFAULT_CKSUM);
++                                kunmap_atomic(kaddr, KM_USER0);
++                        }
++                        page_cache_release(page);
++                }
++        }
++
++        new_size = i_size_read(inode);
++        ll_inode_size_unlock(inode, 0);
++        if (!srvlock)
++                ll_file_punch(inode, new_size, 0);
++        else
++                ll_stats_ops_tally(ll_i2sbi(inode), LPROC_LL_LOCKLESS_TRUNC, 1);
++
++        EXIT;
++        return;
++
++ out_unlock:
++        ll_inode_size_unlock(inode, 0);
++} /* ll_truncate */
++
++int ll_prepare_write(struct file *file, struct page *page, unsigned from,
++                     unsigned to)
++{
++        struct inode *inode = page->mapping->host;
++        struct ll_inode_info *lli = ll_i2info(inode);
++        struct lov_stripe_md *lsm = lli->lli_smd;
++        obd_off offset = ((obd_off)page->index) << CFS_PAGE_SHIFT;
++        struct obd_info oinfo = { { { 0 } } };
++        struct brw_page pga;
++        struct obdo oa;
++        struct ost_lvb lvb;
++        int rc = 0;
++        ENTRY;
++
++        LASSERT(PageLocked(page));
++        (void)llap_cast_private(page); /* assertion */
++
++        /* Check to see if we should return -EIO right away */
++        pga.pg = page;
++        pga.off = offset;
++        pga.count = CFS_PAGE_SIZE;
++        pga.flag = 0;
++
++        oa.o_mode = inode->i_mode;
++        oa.o_id = lsm->lsm_object_id;
++        oa.o_valid = OBD_MD_FLID | OBD_MD_FLMODE | OBD_MD_FLTYPE;
++        obdo_from_inode(&oa, inode, OBD_MD_FLFID | OBD_MD_FLGENER);
++
++        oinfo.oi_oa = &oa;
++        oinfo.oi_md = lsm;
++        rc = obd_brw(OBD_BRW_CHECK, ll_i2obdexp(inode), &oinfo, 1, &pga, NULL);
++        if (rc)
++                RETURN(rc);
++
++        if (PageUptodate(page)) {
++                LL_CDEBUG_PAGE(D_PAGE, page, "uptodate\n");
++                RETURN(0);
++        }
++
++        /* We're completely overwriting an existing page, so _don't_ set it up
++         * to date until commit_write */
++        if (from == 0 && to == CFS_PAGE_SIZE) {
++                LL_CDEBUG_PAGE(D_PAGE, page, "full page write\n");
++                POISON_PAGE(page, 0x11);
++                RETURN(0);
++        }
++
++        /* If are writing to a new page, no need to read old data.  The extent
++         * locking will have updated the KMS, and for our purposes here we can
++         * treat it like i_size. */
++        lov_stripe_lock(lsm);
++        inode_init_lvb(inode, &lvb);
++        obd_merge_lvb(ll_i2obdexp(inode), lsm, &lvb, 1);
++        lov_stripe_unlock(lsm);
++        if (lvb.lvb_size <= offset) {
++                char *kaddr = kmap_atomic(page, KM_USER0);
++                LL_CDEBUG_PAGE(D_PAGE, page, "kms "LPU64" <= offset "LPU64"\n",
++                               lvb.lvb_size, offset);
++                memset(kaddr, 0, CFS_PAGE_SIZE);
++                kunmap_atomic(kaddr, KM_USER0);
++                GOTO(prepare_done, rc = 0);
++        }
++
++        /* XXX could be an async ocp read.. read-ahead? */
++        rc = ll_brw(OBD_BRW_READ, inode, &oa, page, 0);
++        if (rc == 0) {
++                /* bug 1598: don't clobber blksize */
++                oa.o_valid &= ~(OBD_MD_FLSIZE | OBD_MD_FLBLKSZ);
++                obdo_refresh_inode(inode, &oa, oa.o_valid);
++        }
++
++        EXIT;
++ prepare_done:
++        if (rc == 0)
++                SetPageUptodate(page);
++
++        return rc;
++}
++
++/**
++ * make page ready for ASYNC write
++ * \param data - pointer to llap cookie
++ * \param cmd - is OBD_BRW_* macroses
++ *
++ * \retval 0 is page successfully prepared to send
++ * \retval -EAGAIN is page not need to send
++ */
++static int ll_ap_make_ready(void *data, int cmd)
++{
++        struct ll_async_page *llap;
++        struct page *page;
++        ENTRY;
++
++        llap = LLAP_FROM_COOKIE(data);
++        page = llap->llap_page;
++
++        /* we're trying to write, but the page is locked.. come back later */
++        if (TryLockPage(page))
++                RETURN(-EAGAIN);
++
++        LASSERTF(!(cmd & OBD_BRW_READ) || !PageWriteback(page),
++                "cmd %x page %p ino %lu index %lu fl %lx\n", cmd, page,
++                 page->mapping->host->i_ino, page->index, page->flags);
++
++        /* if we left PageDirty we might get another writepage call
++         * in the future.  list walkers are bright enough
++         * to check page dirty so we can leave it on whatever list
++         * its on.  XXX also, we're called with the cli list so if
++         * we got the page cache list we'd create a lock inversion
++         * with the removepage path which gets the page lock then the
++         * cli lock */
++        if(!clear_page_dirty_for_io(page)) {
++		unlock_page(page);
++		RETURN(-EAGAIN);
++	}
++
++        /* This actually clears the dirty bit in the radix tree.*/
++        set_page_writeback(page);
++
++        LL_CDEBUG_PAGE(D_PAGE, page, "made ready\n");
++        page_cache_get(page);
++
++        RETURN(0);
++}
++
++/* We have two reasons for giving llite the opportunity to change the
++ * write length of a given queued page as it builds the RPC containing
++ * the page:
++ *
++ * 1) Further extending writes may have landed in the page cache
++ *    since a partial write first queued this page requiring us
++ *    to write more from the page cache.  (No further races are possible, since
++ *    by the time this is called, the page is locked.)
++ * 2) We might have raced with truncate and want to avoid performing
++ *    write RPCs that are just going to be thrown away by the
++ *    truncate's punch on the storage targets.
++ *
++ * The kms serves these purposes as it is set at both truncate and extending
++ * writes.
++ */
++static int ll_ap_refresh_count(void *data, int cmd)
++{
++        struct ll_inode_info *lli;
++        struct ll_async_page *llap;
++        struct lov_stripe_md *lsm;
++        struct page *page;
++        struct inode *inode;
++        struct ost_lvb lvb;
++        __u64 kms;
++        ENTRY;
++
++        /* readpage queues with _COUNT_STABLE, shouldn't get here. */
++        LASSERT(cmd != OBD_BRW_READ);
++
++        llap = LLAP_FROM_COOKIE(data);
++        page = llap->llap_page;
++        inode = page->mapping->host;
++        lli = ll_i2info(inode);
++        lsm = lli->lli_smd;
++
++        lov_stripe_lock(lsm);
++        inode_init_lvb(inode, &lvb);
++        obd_merge_lvb(ll_i2obdexp(inode), lsm, &lvb, 1);
++        kms = lvb.lvb_size;
++        lov_stripe_unlock(lsm);
++
++        /* catch race with truncate */
++        if (((__u64)page->index << CFS_PAGE_SHIFT) >= kms)
++                return 0;
++
++        /* catch sub-page write at end of file */
++        if (((__u64)page->index << CFS_PAGE_SHIFT) + CFS_PAGE_SIZE > kms)
++                return kms % CFS_PAGE_SIZE;
++
++        return CFS_PAGE_SIZE;
++}
++
++void ll_inode_fill_obdo(struct inode *inode, int cmd, struct obdo *oa)
++{
++        struct lov_stripe_md *lsm;
++        obd_flag valid_flags;
++
++        lsm = ll_i2info(inode)->lli_smd;
++
++        oa->o_id = lsm->lsm_object_id;
++        oa->o_valid = OBD_MD_FLID;
++        valid_flags = OBD_MD_FLTYPE | OBD_MD_FLATIME;
++        if (cmd & OBD_BRW_WRITE) {
++                oa->o_valid |= OBD_MD_FLEPOCH;
++                oa->o_easize = ll_i2info(inode)->lli_io_epoch;
++
++                valid_flags |= OBD_MD_FLMTIME | OBD_MD_FLCTIME |
++                        OBD_MD_FLUID | OBD_MD_FLGID |
++                        OBD_MD_FLFID | OBD_MD_FLGENER;
++        }
++
++        obdo_from_inode(oa, inode, valid_flags);
++}
++
++static void ll_ap_fill_obdo(void *data, int cmd, struct obdo *oa)
++{
++        struct ll_async_page *llap;
++        ENTRY;
++
++        llap = LLAP_FROM_COOKIE(data);
++        ll_inode_fill_obdo(llap->llap_page->mapping->host, cmd, oa);
++
++        EXIT;
++}
++
++static void ll_ap_update_obdo(void *data, int cmd, struct obdo *oa,
++                              obd_valid valid)
++{
++        struct ll_async_page *llap;
++        ENTRY;
++
++        llap = LLAP_FROM_COOKIE(data);
++        obdo_from_inode(oa, llap->llap_page->mapping->host, valid);
++
++        EXIT;
++}
++
++static struct obd_async_page_ops ll_async_page_ops = {
++        .ap_make_ready =        ll_ap_make_ready,
++        .ap_refresh_count =     ll_ap_refresh_count,
++        .ap_fill_obdo =         ll_ap_fill_obdo,
++        .ap_update_obdo =       ll_ap_update_obdo,
++        .ap_completion =        ll_ap_completion,
++};
++
++struct ll_async_page *llap_cast_private(struct page *page)
++{
++        struct ll_async_page *llap = (struct ll_async_page *)page_private(page);
++
++        LASSERTF(llap == NULL || llap->llap_magic == LLAP_MAGIC,
++                 "page %p private %lu gave magic %d which != %d\n",
++                 page, page_private(page), llap->llap_magic, LLAP_MAGIC);
++
++        return llap;
++}
++
++/* Try to shrink the page cache for the @sbi filesystem by 1/@shrink_fraction.
++ *
++ * There is an llap attached onto every page in lustre, linked off @sbi.
++ * We add an llap to the list so we don't lose our place during list walking.
++ * If llaps in the list are being moved they will only move to the end
++ * of the LRU, and we aren't terribly interested in those pages here (we
++ * start at the beginning of the list where the least-used llaps are.
++ */
++int llap_shrink_cache(struct ll_sb_info *sbi, int shrink_fraction)
++{
++        struct ll_async_page *llap, dummy_llap = { .llap_magic = 0xd11ad11a };
++        unsigned long total, want, count = 0;
++
++        total = sbi->ll_async_page_count;
++
++        /* There can be a large number of llaps (600k or more in a large
++         * memory machine) so the VM 1/6 shrink ratio is likely too much.
++         * Since we are freeing pages also, we don't necessarily want to
++         * shrink so much.  Limit to 40MB of pages + llaps per call. */
++        if (shrink_fraction == 0)
++                want = sbi->ll_async_page_count - sbi->ll_async_page_max + 32;
++        else
++                want = (total + shrink_fraction - 1) / shrink_fraction;
++
++        if (want > 40 << (20 - CFS_PAGE_SHIFT))
++                want = 40 << (20 - CFS_PAGE_SHIFT);
++
++        CDEBUG(D_CACHE, "shrinking %lu of %lu pages (1/%d)\n",
++               want, total, shrink_fraction);
++
++        spin_lock(&sbi->ll_lock);
++        list_add(&dummy_llap.llap_pglist_item, &sbi->ll_pglist);
++
++        while (--total >= 0 && count < want) {
++                struct page *page;
++                int keep;
++
++                if (unlikely(need_resched())) {
++                        spin_unlock(&sbi->ll_lock);
++                        cond_resched();
++                        spin_lock(&sbi->ll_lock);
++                }
++
++                llap = llite_pglist_next_llap(sbi,&dummy_llap.llap_pglist_item);
++                list_del_init(&dummy_llap.llap_pglist_item);
++                if (llap == NULL)
++                        break;
++
++                page = llap->llap_page;
++                LASSERT(page != NULL);
++
++                list_add(&dummy_llap.llap_pglist_item, &llap->llap_pglist_item);
++
++                /* Page needs/undergoing IO */
++                if (TryLockPage(page)) {
++                        LL_CDEBUG_PAGE(D_PAGE, page, "can't lock\n");
++                        continue;
++                }
++
++               keep = (llap->llap_write_queued || PageDirty(page) ||
++                      PageWriteback(page) || (!PageUptodate(page) &&
++                      llap->llap_origin != LLAP_ORIGIN_READAHEAD));
++
++                LL_CDEBUG_PAGE(D_PAGE, page,"%s LRU page: %s%s%s%s%s origin %s\n",
++                               keep ? "keep" : "drop",
++                               llap->llap_write_queued ? "wq " : "",
++                               PageDirty(page) ? "pd " : "",
++                               PageUptodate(page) ? "" : "!pu ",
++                               PageWriteback(page) ? "wb" : "",
++                               llap->llap_defer_uptodate ? "" : "!du",
++                               llap_origins[llap->llap_origin]);
++
++                /* If page is dirty or undergoing IO don't discard it */
++                if (keep) {
++                        unlock_page(page);
++                        continue;
++                }
++
++                page_cache_get(page);
++                spin_unlock(&sbi->ll_lock);
++
++                if (page->mapping != NULL) {
++                        ll_teardown_mmaps(page->mapping,
++                                         (__u64)page->index << CFS_PAGE_SHIFT,
++                                         ((__u64)page->index << CFS_PAGE_SHIFT)|
++                                          ~CFS_PAGE_MASK);
++                        if (!PageDirty(page) && !page_mapped(page)) {
++                                ll_ra_accounting(llap, page->mapping);
++                                ll_truncate_complete_page(page);
++                                ++count;
++                        } else {
++                                LL_CDEBUG_PAGE(D_PAGE, page, "Not dropping page"
++                                                             " because it is "
++                                                             "%s\n",
++                                                              PageDirty(page)?
++                                                              "dirty":"mapped");
++                        }
++                }
++                unlock_page(page);
++                page_cache_release(page);
++
++                spin_lock(&sbi->ll_lock);
++        }
++        list_del(&dummy_llap.llap_pglist_item);
++        spin_unlock(&sbi->ll_lock);
++
++        CDEBUG(D_CACHE, "shrank %lu/%lu and left %lu unscanned\n",
++               count, want, total);
++
++        return count;
++}
++
++static struct ll_async_page *llap_from_page_with_lockh(struct page *page,
++                                                       unsigned origin,
++                                                       struct lustre_handle *lockh)
++{
++        struct ll_async_page *llap;
++        struct obd_export *exp;
++        struct inode *inode = page->mapping->host;
++        struct ll_sb_info *sbi;
++        int rc;
++        ENTRY;
++
++        if (!inode) {
++                static int triggered;
++
++                if (!triggered) {
++                        LL_CDEBUG_PAGE(D_ERROR, page, "Bug 10047. Wrong anon "
++                                       "page received\n");
++                        libcfs_debug_dumpstack(NULL);
++                        triggered = 1;
++                }
++                RETURN(ERR_PTR(-EINVAL));
++        }
++        sbi = ll_i2sbi(inode);
++        LASSERT(ll_async_page_slab);
++        LASSERTF(origin < LLAP__ORIGIN_MAX, "%u\n", origin);
++
++        llap = llap_cast_private(page);
++        if (llap != NULL) {
++                /* move to end of LRU list, except when page is just about to
++                 * die */
++                if (origin != LLAP_ORIGIN_REMOVEPAGE) {
++                        spin_lock(&sbi->ll_lock);
++                        sbi->ll_pglist_gen++;
++                        list_del_init(&llap->llap_pglist_item);
++                        list_add_tail(&llap->llap_pglist_item, &sbi->ll_pglist);
++                        spin_unlock(&sbi->ll_lock);
++                }
++                GOTO(out, llap);
++        }
++
++        exp = ll_i2obdexp(page->mapping->host);
++        if (exp == NULL)
++                RETURN(ERR_PTR(-EINVAL));
++
++        /* limit the number of lustre-cached pages */
++        if (sbi->ll_async_page_count >= sbi->ll_async_page_max)
++                llap_shrink_cache(sbi, 0);
++
++        OBD_SLAB_ALLOC(llap, ll_async_page_slab, CFS_ALLOC_STD,
++                       ll_async_page_slab_size);
++        if (llap == NULL)
++                RETURN(ERR_PTR(-ENOMEM));
++        llap->llap_magic = LLAP_MAGIC;
++        llap->llap_cookie = (void *)llap + size_round(sizeof(*llap));
++
++        /* XXX: for bug 11270 - check for lockless origin here! */
++        if (origin == LLAP_ORIGIN_LOCKLESS_IO)
++                llap->llap_nocache = 1;
++
++        rc = obd_prep_async_page(exp, ll_i2info(inode)->lli_smd, NULL, page,
++                                 (obd_off)page->index << CFS_PAGE_SHIFT,
++                                 &ll_async_page_ops, llap, &llap->llap_cookie,
++                                 llap->llap_nocache, lockh);
++        if (rc) {
++                OBD_SLAB_FREE(llap, ll_async_page_slab,
++                              ll_async_page_slab_size);
++                RETURN(ERR_PTR(rc));
++        }
++
++        CDEBUG(D_CACHE, "llap %p page %p cookie %p obj off "LPU64"\n", llap,
++               page, llap->llap_cookie, (obd_off)page->index << CFS_PAGE_SHIFT);
++        /* also zeroing the PRIVBITS low order bitflags */
++        __set_page_ll_data(page, llap);
++        llap->llap_page = page;
++
++        spin_lock(&sbi->ll_lock);
++        sbi->ll_pglist_gen++;
++        sbi->ll_async_page_count++;
++        list_add_tail(&llap->llap_pglist_item, &sbi->ll_pglist);
++        spin_unlock(&sbi->ll_lock);
++
++ out:
++        if (unlikely(sbi->ll_flags & LL_SBI_LLITE_CHECKSUM)) {
++                __u32 csum;
++                char *kaddr = kmap_atomic(page, KM_USER0);
++                csum = init_checksum(OSC_DEFAULT_CKSUM);
++                csum = compute_checksum(csum, kaddr, CFS_PAGE_SIZE,
++                                        OSC_DEFAULT_CKSUM);
++                kunmap_atomic(kaddr, KM_USER0);
++                if (origin == LLAP_ORIGIN_READAHEAD ||
++                    origin == LLAP_ORIGIN_READPAGE ||
++                    origin == LLAP_ORIGIN_LOCKLESS_IO) {
++                        llap->llap_checksum = 0;
++                } else if (origin == LLAP_ORIGIN_COMMIT_WRITE ||
++                           llap->llap_checksum == 0) {
++                        llap->llap_checksum = csum;
++                        CDEBUG(D_PAGE, "page %p cksum %x\n", page, csum);
++                } else if (llap->llap_checksum == csum) {
++                        /* origin == LLAP_ORIGIN_WRITEPAGE */
++                        CDEBUG(D_PAGE, "page %p cksum %x confirmed\n",
++                               page, csum);
++                } else {
++                        /* origin == LLAP_ORIGIN_WRITEPAGE */
++                        LL_CDEBUG_PAGE(D_ERROR, page, "old cksum %x != new "
++                                       "%x!\n", llap->llap_checksum, csum);
++                }
++        }
++
++        llap->llap_origin = origin;
++        RETURN(llap);
++}
++
++static inline struct ll_async_page *llap_from_page(struct page *page,
++                                                   unsigned origin)
++{
++        return llap_from_page_with_lockh(page, origin, NULL);
++}
++
++static int queue_or_sync_write(struct obd_export *exp, struct inode *inode,
++                               struct ll_async_page *llap,
++                               unsigned to, obd_flag async_flags)
++{
++        unsigned long size_index = i_size_read(inode) >> CFS_PAGE_SHIFT;
++        struct obd_io_group *oig;
++        struct ll_sb_info *sbi = ll_i2sbi(inode);
++        int rc, noquot = llap->llap_ignore_quota ? OBD_BRW_NOQUOTA : 0;
++        ENTRY;
++
++        /* _make_ready only sees llap once we've unlocked the page */
++        llap->llap_write_queued = 1;
++        rc = obd_queue_async_io(exp, ll_i2info(inode)->lli_smd, NULL,
++                                llap->llap_cookie, OBD_BRW_WRITE | noquot,
++                                0, 0, 0, async_flags);
++        if (rc == 0) {
++                LL_CDEBUG_PAGE(D_PAGE, llap->llap_page, "write queued\n");
++                llap_write_pending(inode, llap);
++                GOTO(out, 0);
++        }
++
++        llap->llap_write_queued = 0;
++
++        rc = oig_init(&oig);
++        if (rc)
++                GOTO(out, rc);
++
++        /* make full-page requests if we are not at EOF (bug 4410) */
++        if (to != CFS_PAGE_SIZE && llap->llap_page->index < size_index) {
++                LL_CDEBUG_PAGE(D_PAGE, llap->llap_page,
++                               "sync write before EOF: size_index %lu, to %d\n",
++                               size_index, to);
++                to = CFS_PAGE_SIZE;
++        } else if (to != CFS_PAGE_SIZE && llap->llap_page->index == size_index){
++                int size_to = i_size_read(inode) & ~CFS_PAGE_MASK;
++                LL_CDEBUG_PAGE(D_PAGE, llap->llap_page,
++                               "sync write at EOF: size_index %lu, to %d/%d\n",
++                               size_index, to, size_to);
++                if (to < size_to)
++                        to = size_to;
++        }
++
++        /* compare the checksum once before the page leaves llite */
++        if (unlikely((sbi->ll_flags & LL_SBI_LLITE_CHECKSUM) &&
++                     llap->llap_checksum != 0)) {
++                __u32 csum;
++                struct page *page = llap->llap_page;
++                char *kaddr = kmap_atomic(page, KM_USER0);
++                csum = init_checksum(OSC_DEFAULT_CKSUM);
++                csum = compute_checksum(csum, kaddr, CFS_PAGE_SIZE,
++                                        OSC_DEFAULT_CKSUM);
++                kunmap_atomic(kaddr, KM_USER0);
++                if (llap->llap_checksum == csum) {
++                        CDEBUG(D_PAGE, "page %p cksum %x confirmed\n",
++                               page, csum);
++                } else {
++                        CERROR("page %p old cksum %x != new cksum %x!\n",
++                               page, llap->llap_checksum, csum);
++                }
++        }
++
++        rc = obd_queue_group_io(exp, ll_i2info(inode)->lli_smd, NULL, oig,
++                                llap->llap_cookie, OBD_BRW_WRITE | noquot,
++                                0, to, 0, ASYNC_READY | ASYNC_URGENT |
++                                ASYNC_COUNT_STABLE | ASYNC_GROUP_SYNC);
++        if (rc)
++                GOTO(free_oig, rc);
++
++        rc = obd_trigger_group_io(exp, ll_i2info(inode)->lli_smd, NULL, oig);
++        if (rc)
++                GOTO(free_oig, rc);
++
++        rc = oig_wait(oig);
++
++        if (!rc && async_flags & ASYNC_READY) {
++                unlock_page(llap->llap_page);
++                if (PageWriteback(llap->llap_page))
++                        end_page_writeback(llap->llap_page);
++        }
++
++        LL_CDEBUG_PAGE(D_PAGE, llap->llap_page, "sync write returned %d\n", rc);
++
++free_oig:
++        oig_release(oig);
++out:
++        RETURN(rc);
++}
++
++/* update our write count to account for i_size increases that may have
++ * happened since we've queued the page for io. */
++
++/* be careful not to return success without setting the page Uptodate or
++ * the next pass through prepare_write will read in stale data from disk. */
++int ll_commit_write(struct file *file, struct page *page, unsigned from,
++                    unsigned to)
++{
++        struct ll_file_data *fd = LUSTRE_FPRIVATE(file);
++        struct inode *inode = page->mapping->host;
++        struct ll_inode_info *lli = ll_i2info(inode);
++        struct lov_stripe_md *lsm = lli->lli_smd;
++        struct obd_export *exp;
++        struct ll_async_page *llap;
++        loff_t size;
++        struct lustre_handle *lockh = NULL;
++        int rc = 0;
++        ENTRY;
++
++        SIGNAL_MASK_ASSERT(); /* XXX BUG 1511 */
++        LASSERT(inode == file->f_dentry->d_inode);
++        LASSERT(PageLocked(page));
++
++        CDEBUG(D_INODE, "inode %p is writing page %p from %d to %d at %lu\n",
++               inode, page, from, to, page->index);
++
++        if (fd->fd_flags & LL_FILE_GROUP_LOCKED)
++                lockh = &fd->fd_cwlockh;
++
++        llap = llap_from_page_with_lockh(page, LLAP_ORIGIN_COMMIT_WRITE, lockh);
++        if (IS_ERR(llap))
++                RETURN(PTR_ERR(llap));
++
++        exp = ll_i2obdexp(inode);
++        if (exp == NULL)
++                RETURN(-EINVAL);
++
++        llap->llap_ignore_quota = cfs_capable(CFS_CAP_SYS_RESOURCE);
++
++        /* queue a write for some time in the future the first time we
++         * dirty the page */
++        if (!PageDirty(page)) {
++                ll_stats_ops_tally(ll_i2sbi(inode), LPROC_LL_DIRTY_MISSES, 1);
++
++                rc = queue_or_sync_write(exp, inode, llap, to, 0);
++                if (rc)
++                        GOTO(out, rc);
++        } else {
++                ll_stats_ops_tally(ll_i2sbi(inode), LPROC_LL_DIRTY_HITS, 1);
++        }
++
++        /* put the page in the page cache, from now on ll_removepage is
++         * responsible for cleaning up the llap.
++         * only set page dirty when it's queued to be write out */
++        if (llap->llap_write_queued)
++                set_page_dirty(page);
++
++out:
++        size = (((obd_off)page->index) << CFS_PAGE_SHIFT) + to;
++        ll_inode_size_lock(inode, 0);
++        if (rc == 0) {
++                lov_stripe_lock(lsm);
++                obd_adjust_kms(exp, lsm, size, 0);
++                lov_stripe_unlock(lsm);
++                if (size > i_size_read(inode))
++                        i_size_write(inode, size);
++                SetPageUptodate(page);
++        } else if (size > i_size_read(inode)) {
++                /* this page beyond the pales of i_size, so it can't be
++                 * truncated in ll_p_r_e during lock revoking. we must
++                 * teardown our book-keeping here. */
++                ll_removepage(page);
++        }
++        ll_inode_size_unlock(inode, 0);
++        RETURN(rc);
++}
++
++static unsigned long ll_ra_count_get(struct ll_sb_info *sbi, unsigned long len)
++{
++        struct ll_ra_info *ra = &sbi->ll_ra_info;
++        unsigned long ret;
++        ENTRY;
++
++        spin_lock(&sbi->ll_lock);
++        ret = min(ra->ra_max_pages - ra->ra_cur_pages, len);
++        ra->ra_cur_pages += ret;
++        spin_unlock(&sbi->ll_lock);
++
++        RETURN(ret);
++}
++
++static void ll_ra_count_put(struct ll_sb_info *sbi, unsigned long len)
++{
++        struct ll_ra_info *ra = &sbi->ll_ra_info;
++        spin_lock(&sbi->ll_lock);
++        LASSERTF(ra->ra_cur_pages >= len, "r_c_p %lu len %lu\n",
++                 ra->ra_cur_pages, len);
++        ra->ra_cur_pages -= len;
++        spin_unlock(&sbi->ll_lock);
++}
++
++/* called for each page in a completed rpc.*/
++int ll_ap_completion(void *data, int cmd, struct obdo *oa, int rc)
++{
++        struct ll_async_page *llap;
++        struct page *page;
++        int ret = 0;
++        ENTRY;
++
++        llap = LLAP_FROM_COOKIE(data);
++        page = llap->llap_page;
++        LASSERT(PageLocked(page));
++        LASSERT(CheckWriteback(page,cmd));
++
++        LL_CDEBUG_PAGE(D_PAGE, page, "completing cmd %d with %d\n", cmd, rc);
++
++        if (cmd & OBD_BRW_READ && llap->llap_defer_uptodate)
++                ll_ra_count_put(ll_i2sbi(page->mapping->host), 1);
++
++        if (rc == 0)  {
++                if (cmd & OBD_BRW_READ) {
++                        if (!llap->llap_defer_uptodate)
++                                SetPageUptodate(page);
++                } else {
++                        llap->llap_write_queued = 0;
++                }
++                ClearPageError(page);
++        } else {
++                if (cmd & OBD_BRW_READ) {
++                        llap->llap_defer_uptodate = 0;
++                }
++                SetPageError(page);
++                if (rc == -ENOSPC)
++                        set_bit(AS_ENOSPC, &page->mapping->flags);
++                else
++                        set_bit(AS_EIO, &page->mapping->flags);
++        }
++
++        /* be carefull about clear WB.
++         * if WB will cleared after page lock is released - paralel IO can be
++         * started before ap_make_ready is finished - so we will be have page
++         * with PG_Writeback set from ->writepage() and completed READ which
++         * clear this flag */
++        if ((cmd & OBD_BRW_WRITE) && PageWriteback(page))
++                end_page_writeback(page);
++
++        unlock_page(page);
++
++        if (cmd & OBD_BRW_WRITE) {
++                llap_write_complete(page->mapping->host, llap);
++                ll_try_done_writing(page->mapping->host);
++        }
++
++        page_cache_release(page);
++
++        RETURN(ret);
++}
++
++static void __ll_put_llap(struct page *page)
++{
++        struct inode *inode = page->mapping->host;
++        struct obd_export *exp;
++        struct ll_async_page *llap;
++        struct ll_sb_info *sbi = ll_i2sbi(inode);
++        int rc;
++        ENTRY;
++
++        exp = ll_i2obdexp(inode);
++        if (exp == NULL) {
++                CERROR("page %p ind %lu gave null export\n", page, page->index);
++                EXIT;
++                return;
++        }
++
++        llap = llap_from_page(page, LLAP_ORIGIN_REMOVEPAGE);
++        if (IS_ERR(llap)) {
++                CERROR("page %p ind %lu couldn't find llap: %ld\n", page,
++                       page->index, PTR_ERR(llap));
++                EXIT;
++                return;
++        }
++
++        //llap_write_complete(inode, llap);
++        rc = obd_teardown_async_page(exp, ll_i2info(inode)->lli_smd, NULL,
++                                     llap->llap_cookie);
++        if (rc != 0)
++                CERROR("page %p ind %lu failed: %d\n", page, page->index, rc);
++
++        /* this unconditional free is only safe because the page lock
++         * is providing exclusivity to memory pressure/truncate/writeback..*/
++        __clear_page_ll_data(page);
++
++        spin_lock(&sbi->ll_lock);
++        if (!list_empty(&llap->llap_pglist_item))
++                list_del_init(&llap->llap_pglist_item);
++        sbi->ll_pglist_gen++;
++        sbi->ll_async_page_count--;
++        spin_unlock(&sbi->ll_lock);
++        OBD_SLAB_FREE(llap, ll_async_page_slab, ll_async_page_slab_size);
++
++        EXIT;
++}
++
++/* the kernel calls us here when a page is unhashed from the page cache.
++ * the page will be locked and the kernel is holding a spinlock, so
++ * we need to be careful.  we're just tearing down our book-keeping
++ * here. */
++void ll_removepage(struct page *page)
++{
++        struct ll_async_page *llap = llap_cast_private(page);
++        ENTRY;
++
++        LASSERT(!in_interrupt());
++
++        /* sync pages or failed read pages can leave pages in the page
++         * cache that don't have our data associated with them anymore */
++        if (page_private(page) == 0) {
++                EXIT;
++                return;
++        }
++
++        LASSERT(!llap->llap_lockless_io_page);
++        LASSERT(!llap->llap_nocache);
++
++        LL_CDEBUG_PAGE(D_PAGE, page, "being evicted\n");
++        __ll_put_llap(page);
++
++        EXIT;
++}
++
++static int ll_issue_page_read(struct obd_export *exp,
++                              struct ll_async_page *llap,
++                              struct obd_io_group *oig, int defer)
++{
++        struct page *page = llap->llap_page;
++        int rc;
++
++        page_cache_get(page);
++        llap->llap_defer_uptodate = defer;
++        llap->llap_ra_used = 0;
++        rc = obd_queue_group_io(exp, ll_i2info(page->mapping->host)->lli_smd,
++                                NULL, oig, llap->llap_cookie, OBD_BRW_READ, 0,
++                                CFS_PAGE_SIZE, 0, ASYNC_COUNT_STABLE | ASYNC_READY |
++                                              ASYNC_URGENT);
++        if (rc) {
++                LL_CDEBUG_PAGE(D_ERROR, page, "read queue failed: rc %d\n", rc);
++                page_cache_release(page);
++        }
++        RETURN(rc);
++}
++
++static void ll_ra_stats_inc_unlocked(struct ll_ra_info *ra, enum ra_stat which)
++{
++        LASSERTF(which >= 0 && which < _NR_RA_STAT, "which: %u\n", which);
++        ra->ra_stats[which]++;
++}
++
++static void ll_ra_stats_inc(struct address_space *mapping, enum ra_stat which)
++{
++        struct ll_sb_info *sbi = ll_i2sbi(mapping->host);
++        struct ll_ra_info *ra = &ll_i2sbi(mapping->host)->ll_ra_info;
++
++        spin_lock(&sbi->ll_lock);
++        ll_ra_stats_inc_unlocked(ra, which);
++        spin_unlock(&sbi->ll_lock);
++}
++
++void ll_ra_accounting(struct ll_async_page *llap, struct address_space *mapping)
++{
++        if (!llap->llap_defer_uptodate || llap->llap_ra_used)
++                return;
++
++        ll_ra_stats_inc(mapping, RA_STAT_DISCARDED);
++}
++
++#define RAS_CDEBUG(ras) \
++        CDEBUG(D_READA,                                                      \
++               "lrp %lu cr %lu cp %lu ws %lu wl %lu nra %lu r %lu ri %lu"    \
++               "csr %lu sf %lu sp %lu sl %lu \n", 		     	     \
++               ras->ras_last_readpage, ras->ras_consecutive_requests,        \
++               ras->ras_consecutive_pages, ras->ras_window_start,            \
++               ras->ras_window_len, ras->ras_next_readahead,                 \
++               ras->ras_requests, ras->ras_request_index,		     \
++               ras->ras_consecutive_stride_requests, ras->ras_stride_offset, \
++               ras->ras_stride_pages, ras->ras_stride_length)
++
++static int index_in_window(unsigned long index, unsigned long point,
++                           unsigned long before, unsigned long after)
++{
++        unsigned long start = point - before, end = point + after;
++
++        if (start > point)
++               start = 0;
++        if (end < point)
++               end = ~0;
++
++        return start <= index && index <= end;
++}
++
++static struct ll_readahead_state *ll_ras_get(struct file *f)
++{
++        struct ll_file_data       *fd;
++
++        fd = LUSTRE_FPRIVATE(f);
++        return &fd->fd_ras;
++}
++
++void ll_ra_read_in(struct file *f, struct ll_ra_read *rar)
++{
++        struct ll_readahead_state *ras;
++
++        ras = ll_ras_get(f);
++
++        spin_lock(&ras->ras_lock);
++        ras->ras_requests++;
++        ras->ras_request_index = 0;
++        ras->ras_consecutive_requests++;
++        rar->lrr_reader = current;
++
++        list_add(&rar->lrr_linkage, &ras->ras_read_beads);
++        spin_unlock(&ras->ras_lock);
++}
++
++void ll_ra_read_ex(struct file *f, struct ll_ra_read *rar)
++{
++        struct ll_readahead_state *ras;
++
++        ras = ll_ras_get(f);
++
++        spin_lock(&ras->ras_lock);
++        list_del_init(&rar->lrr_linkage);
++        spin_unlock(&ras->ras_lock);
++}
++
++static struct ll_ra_read *ll_ra_read_get_locked(struct ll_readahead_state *ras)
++{
++        struct ll_ra_read *scan;
++
++        list_for_each_entry(scan, &ras->ras_read_beads, lrr_linkage) {
++                if (scan->lrr_reader == current)
++                        return scan;
++        }
++        return NULL;
++}
++
++struct ll_ra_read *ll_ra_read_get(struct file *f)
++{
++        struct ll_readahead_state *ras;
++        struct ll_ra_read         *bead;
++
++        ras = ll_ras_get(f);
++
++        spin_lock(&ras->ras_lock);
++        bead = ll_ra_read_get_locked(ras);
++        spin_unlock(&ras->ras_lock);
++        return bead;
++}
++
++static int ll_read_ahead_page(struct obd_export *exp, struct obd_io_group *oig,
++                              int index, struct address_space *mapping)
++{
++        struct ll_async_page *llap;
++        struct page *page;
++        unsigned int gfp_mask = 0;
++        int rc = 0;
++
++        gfp_mask = GFP_HIGHUSER & ~__GFP_WAIT;
++#ifdef __GFP_NOWARN
++        gfp_mask |= __GFP_NOWARN;
++#endif
++        page = grab_cache_page_nowait_gfp(mapping, index, gfp_mask);
++        if (page == NULL) {
++                ll_ra_stats_inc(mapping, RA_STAT_FAILED_GRAB_PAGE);
++                CDEBUG(D_READA, "g_c_p_n failed\n");
++                return 0;
++        }
++
++        /* Check if page was truncated or reclaimed */
++        if (page->mapping != mapping) {
++                ll_ra_stats_inc(mapping, RA_STAT_WRONG_GRAB_PAGE);
++                CDEBUG(D_READA, "g_c_p_n returned invalid page\n");
++                GOTO(unlock_page, rc = 0);	
++        }
++
++        /* we do this first so that we can see the page in the /proc
++         * accounting */
++        llap = llap_from_page(page, LLAP_ORIGIN_READAHEAD);
++        if (IS_ERR(llap) || llap->llap_defer_uptodate) {
++                if (PTR_ERR(llap) == -ENOLCK) {
++                        ll_ra_stats_inc(mapping, RA_STAT_FAILED_MATCH);
++                        CDEBUG(D_READA | D_PAGE,
++                               "Adding page to cache failed index "
++                                "%d\n", index);
++                                CDEBUG(D_READA, "nolock page\n");
++                                GOTO(unlock_page, rc = -ENOLCK);
++                }
++                CDEBUG(D_READA, "read-ahead page\n");
++                GOTO(unlock_page, rc = 0);	
++        }
++
++        /* skip completed pages */
++        if (Page_Uptodate(page))
++                GOTO(unlock_page, rc = 0);	
++
++        /* bail out when we hit the end of the lock. */
++        rc = ll_issue_page_read(exp, llap, oig, 1);
++        if (rc == 0) {
++                LL_CDEBUG_PAGE(D_READA | D_PAGE, page, "started read-ahead\n");
++                rc = 1;
++        } else {
++unlock_page:	
++                unlock_page(page);
++                LL_CDEBUG_PAGE(D_READA | D_PAGE, page, "skipping read-ahead\n");
++        }
++        page_cache_release(page);
++        return rc;
++}
++
++/* ra_io_arg will be filled in the beginning of ll_readahead with
++ * ras_lock, then the following ll_read_ahead_pages will read RA
++ * pages according to this arg, all the items in this structure are
++ * counted by page index.
++ */
++struct ra_io_arg {
++        unsigned long ria_start;  /* start offset of read-ahead*/
++        unsigned long ria_end;    /* end offset of read-ahead*/
++        /* If stride read pattern is detected, ria_stoff means where
++         * stride read is started. Note: for normal read-ahead, the
++         * value here is meaningless, and also it will not be accessed*/
++        pgoff_t ria_stoff;
++        /* ria_length and ria_pages are the length and pages length in the
++         * stride I/O mode. And they will also be used to check whether
++         * it is stride I/O read-ahead in the read-ahead pages*/
++        unsigned long ria_length;
++        unsigned long ria_pages;
++};
++
++#define RIA_DEBUG(ria) 						      \
++        CDEBUG(D_READA, "rs %lu re %lu ro %lu rl %lu rp %lu\n",       \
++        ria->ria_start, ria->ria_end, ria->ria_stoff, ria->ria_length,\
++        ria->ria_pages)
++
++#define RAS_INCREASE_STEP (1024 * 1024 >> CFS_PAGE_SHIFT)
++
++static inline int stride_io_mode(struct ll_readahead_state *ras)
++{
++        return ras->ras_consecutive_stride_requests > 1;
++}
++
++/* The function calculates how much pages will be read in
++ * [off, off + length], which will be read by stride I/O mode,
++ * stride_offset = st_off, stride_lengh = st_len,
++ * stride_pages = st_pgs
++ */
++static unsigned long
++stride_pg_count(pgoff_t st_off, unsigned long st_len, unsigned long st_pgs,
++                unsigned long off, unsigned length)
++{
++        unsigned long cont_len = st_off > off ?  st_off - off : 0;
++        __u64 stride_len = length + off > st_off ?
++                           length + off + 1 - st_off : 0;
++        unsigned long left, pg_count;
++
++        if (st_len == 0 || length == 0)
++                return length;
++
++        left = do_div(stride_len, st_len);
++        left = min(left, st_pgs);
++
++        pg_count = left + stride_len * st_pgs + cont_len;
++
++        LASSERT(pg_count >= left);
++
++        CDEBUG(D_READA, "st_off %lu, st_len %lu st_pgs %lu off %lu length %u"
++               "pgcount %lu\n", st_off, st_len, st_pgs, off, length, pg_count);
++
++        return pg_count;
++}
++
++static int ria_page_count(struct ra_io_arg *ria)
++{
++        __u64 length = ria->ria_end >= ria->ria_start ?
++                       ria->ria_end - ria->ria_start + 1 : 0;
++
++        return stride_pg_count(ria->ria_stoff, ria->ria_length,
++                               ria->ria_pages, ria->ria_start,
++                               length);
++}
++
++/*Check whether the index is in the defined ra-window */
++static int ras_inside_ra_window(unsigned long idx, struct ra_io_arg *ria)
++{
++        /* If ria_length == ria_pages, it means non-stride I/O mode,
++         * idx should always inside read-ahead window in this case
++         * For stride I/O mode, just check whether the idx is inside
++         * the ria_pages. */
++        return ria->ria_length == 0 || ria->ria_length == ria->ria_pages ||
++               (idx - ria->ria_stoff) % ria->ria_length < ria->ria_pages;
++}
++
++static int ll_read_ahead_pages(struct obd_export *exp,
++                               struct obd_io_group *oig,
++                               struct ra_io_arg *ria,	
++                               unsigned long *reserved_pages,
++                               struct address_space *mapping,
++                               unsigned long *ra_end)
++{
++        int rc, count = 0, stride_ria;
++        unsigned long page_idx;
++
++        LASSERT(ria != NULL);
++        RIA_DEBUG(ria);
++
++        stride_ria = ria->ria_length > ria->ria_pages && ria->ria_pages > 0;
++        for (page_idx = ria->ria_start; page_idx <= ria->ria_end &&
++                        *reserved_pages > 0; page_idx++) {
++                if (ras_inside_ra_window(page_idx, ria)) {
++                        /* If the page is inside the read-ahead window*/
++                        rc = ll_read_ahead_page(exp, oig, page_idx, mapping);
++        		if (rc == 1) {
++	        		(*reserved_pages)--;
++		        	count ++;
++		        } else if (rc == -ENOLCK)
++			        break;
++                } else if (stride_ria) {
++                        /* If it is not in the read-ahead window, and it is
++                         * read-ahead mode, then check whether it should skip
++                         * the stride gap */
++			pgoff_t offset;
++                        /* FIXME: This assertion only is valid when it is for
++                         * forward read-ahead, it will be fixed when backward
++                         * read-ahead is implemented */
++                        LASSERTF(page_idx > ria->ria_stoff, "since %lu in the"
++                                " gap of ra window,it should bigger than stride"
++                                " offset %lu \n", page_idx, ria->ria_stoff);
++
++                        offset = page_idx - ria->ria_stoff;
++			offset = offset % (ria->ria_length);
++			if (offset > ria->ria_pages) {
++				page_idx += ria->ria_length - offset;
++                                CDEBUG(D_READA, "i %lu skip %lu \n", page_idx,
++                                       ria->ria_length - offset);
++                                continue;
++                        }
++                }
++        }
++        *ra_end = page_idx;
++        return count;
++}
++
++static int ll_readahead(struct ll_readahead_state *ras,
++                         struct obd_export *exp, struct address_space *mapping,
++                         struct obd_io_group *oig, int flags)
++{
++        unsigned long start = 0, end = 0, reserved;
++        unsigned long ra_end, len;
++        struct inode *inode;
++        struct lov_stripe_md *lsm;
++        struct ll_ra_read *bead;
++        struct ost_lvb lvb;
++        struct ra_io_arg ria = { 0 };
++        int ret = 0;
++        __u64 kms;
++        ENTRY;
++
++        inode = mapping->host;
++        lsm = ll_i2info(inode)->lli_smd;
++
++        lov_stripe_lock(lsm);
++        inode_init_lvb(inode, &lvb);
++        obd_merge_lvb(ll_i2obdexp(inode), lsm, &lvb, 1);
++        kms = lvb.lvb_size;
++        lov_stripe_unlock(lsm);
++        if (kms == 0) {
++                ll_ra_stats_inc(mapping, RA_STAT_ZERO_LEN);
++                RETURN(0);
++        }
++
++        spin_lock(&ras->ras_lock);
++        bead = ll_ra_read_get_locked(ras);
++        /* Enlarge the RA window to encompass the full read */
++        if (bead != NULL && ras->ras_window_start + ras->ras_window_len <
++            bead->lrr_start + bead->lrr_count) {
++                ras->ras_window_len = bead->lrr_start + bead->lrr_count -
++                                      ras->ras_window_start;
++        }
++       	/* Reserve a part of the read-ahead window that we'll be issuing */
++        if (ras->ras_window_len) {
++                start = ras->ras_next_readahead;
++                end = ras->ras_window_start + ras->ras_window_len - 1;
++        }
++        if (end != 0) {
++                /* Truncate RA window to end of file */
++                end = min(end, (unsigned long)((kms - 1) >> CFS_PAGE_SHIFT));
++                ras->ras_next_readahead = max(end, end + 1);
++                RAS_CDEBUG(ras);
++        }
++        ria.ria_start = start;
++        ria.ria_end = end;
++        /* If stride I/O mode is detected, get stride window*/
++        if (stride_io_mode(ras)) {
++                ria.ria_stoff = ras->ras_stride_offset;
++                ria.ria_length = ras->ras_stride_length;
++                ria.ria_pages = ras->ras_stride_pages;
++        }
++        spin_unlock(&ras->ras_lock);
++
++        if (end == 0) {
++                ll_ra_stats_inc(mapping, RA_STAT_ZERO_WINDOW);
++                RETURN(0);
++        }
++
++        len = ria_page_count(&ria);
++        if (len == 0)
++                RETURN(0);
++
++        reserved = ll_ra_count_get(ll_i2sbi(inode), len);
++        if (reserved < len)
++                ll_ra_stats_inc(mapping, RA_STAT_MAX_IN_FLIGHT);
++
++        CDEBUG(D_READA, "reserved page %lu \n", reserved);
++
++        ret = ll_read_ahead_pages(exp, oig, &ria, &reserved, mapping, &ra_end);
++
++        LASSERTF(reserved >= 0, "reserved %lu\n", reserved);
++        if (reserved != 0)
++                ll_ra_count_put(ll_i2sbi(inode), reserved);
++
++        if (ra_end == end + 1 && ra_end == (kms >> CFS_PAGE_SHIFT))
++                ll_ra_stats_inc(mapping, RA_STAT_EOF);
++
++        /* if we didn't get to the end of the region we reserved from
++         * the ras we need to go back and update the ras so that the
++         * next read-ahead tries from where we left off.  we only do so
++         * if the region we failed to issue read-ahead on is still ahead
++         * of the app and behind the next index to start read-ahead from */
++        CDEBUG(D_READA, "ra_end %lu end %lu stride end %lu \n",
++               ra_end, end, ria.ria_end);
++
++        if (ra_end != (end + 1)) {
++                spin_lock(&ras->ras_lock);
++                if (ra_end < ras->ras_next_readahead &&
++                    index_in_window(ra_end, ras->ras_window_start, 0,
++                                    ras->ras_window_len)) {
++                	ras->ras_next_readahead = ra_end;
++                       	RAS_CDEBUG(ras);
++                }
++                spin_unlock(&ras->ras_lock);
++        }
++
++        RETURN(ret);
++}
++
++static void ras_set_start(struct ll_readahead_state *ras, unsigned long index)
++{
++        ras->ras_window_start = index & (~(RAS_INCREASE_STEP - 1));
++}
++
++/* called with the ras_lock held or from places where it doesn't matter */
++static void ras_reset(struct ll_readahead_state *ras, unsigned long index)
++{
++        ras->ras_last_readpage = index;
++        ras->ras_consecutive_requests = 0;
++        ras->ras_consecutive_pages = 0;
++        ras->ras_window_len = 0;
++        ras_set_start(ras, index);
++        ras->ras_next_readahead = max(ras->ras_window_start, index);
++
++        RAS_CDEBUG(ras);
++}
++
++/* called with the ras_lock held or from places where it doesn't matter */
++static void ras_stride_reset(struct ll_readahead_state *ras)
++{
++        ras->ras_consecutive_stride_requests = 0;
++        ras->ras_stride_length = 0;
++        ras->ras_stride_pages = 0;
++        RAS_CDEBUG(ras);
++}
++
++void ll_readahead_init(struct inode *inode, struct ll_readahead_state *ras)
++{
++        spin_lock_init(&ras->ras_lock);
++        ras_reset(ras, 0);
++        ras->ras_requests = 0;
++        INIT_LIST_HEAD(&ras->ras_read_beads);
++}
++
++/* 
++ * Check whether the read request is in the stride window.
++ * If it is in the stride window, return 1, otherwise return 0.
++ */
++static int index_in_stride_window(unsigned long index,
++                                  struct ll_readahead_state *ras,
++                                  struct inode *inode)
++{
++        unsigned long stride_gap = index - ras->ras_last_readpage - 1;
++ 
++        if (ras->ras_stride_length == 0 || ras->ras_stride_pages == 0)
++                return 0;
++
++        /* If it is contiguous read */
++        if (stride_gap == 0) 
++                return ras->ras_consecutive_pages + 1 <= ras->ras_stride_pages;
++        
++        /*Otherwise check the stride by itself */
++        return (ras->ras_stride_length - ras->ras_stride_pages) == stride_gap &&
++                 ras->ras_consecutive_pages == ras->ras_stride_pages;
++}
++
++static void ras_update_stride_detector(struct ll_readahead_state *ras,
++                                       unsigned long index)
++{
++        unsigned long stride_gap = index - ras->ras_last_readpage - 1;
++
++        if (!stride_io_mode(ras) && (stride_gap != 0 || 
++             ras->ras_consecutive_stride_requests == 0)) {
++                ras->ras_stride_pages = ras->ras_consecutive_pages;
++                ras->ras_stride_length = stride_gap +ras->ras_consecutive_pages;
++        }
++        RAS_CDEBUG(ras);
++}
++
++static unsigned long
++stride_page_count(struct ll_readahead_state *ras, unsigned long len)
++{
++        return stride_pg_count(ras->ras_stride_offset, ras->ras_stride_length,
++                               ras->ras_stride_pages, ras->ras_stride_offset,
++                               len);
++}
++
++/* Stride Read-ahead window will be increased inc_len according to
++ * stride I/O pattern */
++static void ras_stride_increase_window(struct ll_readahead_state *ras,
++                                       struct ll_ra_info *ra,
++                                       unsigned long inc_len)
++{
++        unsigned long left, step, window_len;
++        unsigned long stride_len;
++
++        LASSERT(ras->ras_stride_length > 0);
++
++        stride_len = ras->ras_window_start + ras->ras_window_len -
++                     ras->ras_stride_offset;
++
++        LASSERTF(stride_len >= 0, "window_start %lu, window_len %lu"
++                 " stride_offset %lu\n", ras->ras_window_start,
++                 ras->ras_window_len, ras->ras_stride_offset);
++
++        left = stride_len % ras->ras_stride_length;
++
++        window_len = ras->ras_window_len - left;
++
++        if (left < ras->ras_stride_pages)
++                left += inc_len;
++        else
++                left = ras->ras_stride_pages + inc_len;
++
++        LASSERT(ras->ras_stride_pages != 0);
++
++        step = left / ras->ras_stride_pages;
++        left %= ras->ras_stride_pages;
++
++        window_len += step * ras->ras_stride_length + left;
++
++        if (stride_page_count(ras, window_len) <= ra->ra_max_pages)
++                ras->ras_window_len = window_len;
++
++        RAS_CDEBUG(ras);
++}
++
++/* Set stride I/O read-ahead window start offset */
++static void ras_set_stride_offset(struct ll_readahead_state *ras)
++{
++        unsigned long window_len = ras->ras_next_readahead -
++                                   ras->ras_window_start;
++        unsigned long left;
++
++        LASSERT(ras->ras_stride_length != 0);
++
++        left = window_len % ras->ras_stride_length;
++
++        ras->ras_stride_offset = ras->ras_next_readahead - left;
++
++        RAS_CDEBUG(ras);
++}
++
++static void ras_update(struct ll_sb_info *sbi, struct inode *inode,
++                       struct ll_readahead_state *ras, unsigned long index,
++                       unsigned hit)
++{
++        struct ll_ra_info *ra = &sbi->ll_ra_info;
++        int zero = 0, stride_detect = 0, ra_miss = 0;
++        ENTRY;
++
++        spin_lock(&sbi->ll_lock);
++        spin_lock(&ras->ras_lock);
++
++        ll_ra_stats_inc_unlocked(ra, hit ? RA_STAT_HIT : RA_STAT_MISS);
++
++        /* reset the read-ahead window in two cases.  First when the app seeks
++         * or reads to some other part of the file.  Secondly if we get a
++         * read-ahead miss that we think we've previously issued.  This can
++         * be a symptom of there being so many read-ahead pages that the VM is
++         * reclaiming it before we get to it. */
++        if (!index_in_window(index, ras->ras_last_readpage, 8, 8)) {
++                zero = 1;
++                ll_ra_stats_inc_unlocked(ra, RA_STAT_DISTANT_READPAGE);
++        } else if (!hit && ras->ras_window_len &&
++                   index < ras->ras_next_readahead &&
++                   index_in_window(index, ras->ras_window_start, 0,
++                                   ras->ras_window_len)) {
++		ra_miss = 1;
++                ll_ra_stats_inc_unlocked(ra, RA_STAT_MISS_IN_WINDOW);
++        }
++
++        /* On the second access to a file smaller than the tunable
++         * ra_max_read_ahead_whole_pages trigger RA on all pages in the
++         * file up to ra_max_pages.  This is simply a best effort and
++         * only occurs once per open file.  Normal RA behavior is reverted
++         * to for subsequent IO.  The mmap case does not increment
++         * ras_requests and thus can never trigger this behavior. */
++        if (ras->ras_requests == 2 && !ras->ras_request_index) {
++                __u64 kms_pages;
++
++                kms_pages = (i_size_read(inode) + CFS_PAGE_SIZE - 1) >>
++                            CFS_PAGE_SHIFT;
++
++                CDEBUG(D_READA, "kmsp "LPU64" mwp %lu mp %lu\n", kms_pages,
++                       ra->ra_max_read_ahead_whole_pages, ra->ra_max_pages);
++
++                if (kms_pages &&
++                    kms_pages <= ra->ra_max_read_ahead_whole_pages) {
++                        ras->ras_window_start = 0;
++                        ras->ras_last_readpage = 0;
++                        ras->ras_next_readahead = 0;
++                        ras->ras_window_len = min(ra->ra_max_pages,
++                                ra->ra_max_read_ahead_whole_pages);
++                        GOTO(out_unlock, 0);
++                }
++        }
++        if (zero) {
++		/* check whether it is in stride I/O mode*/
++                if (!index_in_stride_window(index, ras, inode)) {
++                        ras_reset(ras, index);
++                        ras->ras_consecutive_pages++;
++                        ras_stride_reset(ras);
++                        GOTO(out_unlock, 0);
++                } else {
++        	        ras->ras_consecutive_requests = 0;
++                        if (++ras->ras_consecutive_stride_requests > 1)
++                                stride_detect = 1;
++                        RAS_CDEBUG(ras);
++                }
++        } else {
++                if (ra_miss) {
++                        if (index_in_stride_window(index, ras, inode) &&
++                            stride_io_mode(ras)) {
++                                /*If stride-RA hit cache miss, the stride dector 
++                                 *will not be reset to avoid the overhead of
++                                 *redetecting read-ahead mode */
++                                if (index != ras->ras_last_readpage + 1)
++                                       ras->ras_consecutive_pages = 0;
++                                RAS_CDEBUG(ras);
++                        } else {
++                                /*Reset both stride window and normal RA window*/ 
++                                ras_reset(ras, index);
++                                ras->ras_consecutive_pages++;
++                                ras_stride_reset(ras);
++                                GOTO(out_unlock, 0);
++                        }
++                } else if (stride_io_mode(ras)) {
++                        /* If this is contiguous read but in stride I/O mode
++                         * currently, check whether stride step still is valid,
++                         * if invalid, it will reset the stride ra window*/ 	
++                        if (!index_in_stride_window(index, ras, inode)) {
++                                /*Shrink stride read-ahead window to be zero*/
++                                ras_stride_reset(ras);
++                                ras->ras_window_len = 0;
++                                ras->ras_next_readahead = index;
++                        }
++                }
++        }
++        ras->ras_consecutive_pages++;
++        ras_update_stride_detector(ras, index);
++        ras->ras_last_readpage = index;
++        ras_set_start(ras, index);
++        ras->ras_next_readahead = max(ras->ras_window_start,
++                                      ras->ras_next_readahead);
++        RAS_CDEBUG(ras);
++
++        /* Trigger RA in the mmap case where ras_consecutive_requests
++         * is not incremented and thus can't be used to trigger RA */
++        if (!ras->ras_window_len && ras->ras_consecutive_pages == 4) {
++                ras->ras_window_len = RAS_INCREASE_STEP;
++                GOTO(out_unlock, 0);
++        }
++
++        /* Initially reset the stride window offset to next_readahead*/
++        if (ras->ras_consecutive_stride_requests == 2 && stride_detect)
++                ras_set_stride_offset(ras);
++
++        /* The initial ras_window_len is set to the request size.  To avoid
++         * uselessly reading and discarding pages for random IO the window is
++         * only increased once per consecutive request received. */
++        if ((ras->ras_consecutive_requests > 1 &&
++            !ras->ras_request_index) || stride_detect) {
++                if (stride_io_mode(ras))
++                        ras_stride_increase_window(ras, ra, RAS_INCREASE_STEP);
++                else
++                        ras->ras_window_len = min(ras->ras_window_len +
++                                                  RAS_INCREASE_STEP,
++                                                  ra->ra_max_pages);
++        }
++        EXIT;
++out_unlock:
++        RAS_CDEBUG(ras);
++        ras->ras_request_index++;
++        spin_unlock(&ras->ras_lock);
++        spin_unlock(&sbi->ll_lock);
++        return;
++}
++
++int ll_writepage(struct page *page)
++{
++        struct inode *inode = page->mapping->host;
++        struct ll_inode_info *lli = ll_i2info(inode);
++        struct obd_export *exp;
++        struct ll_async_page *llap;
++        int rc = 0;
++        ENTRY;
++
++        LASSERT(PageLocked(page));
++
++        exp = ll_i2obdexp(inode);
++        if (exp == NULL)
++                GOTO(out, rc = -EINVAL);
++
++        llap = llap_from_page(page, LLAP_ORIGIN_WRITEPAGE);
++        if (IS_ERR(llap))
++                GOTO(out, rc = PTR_ERR(llap));
++
++        LASSERT(!llap->llap_nocache);
++        LASSERT(!PageWriteback(page));
++        set_page_writeback(page);
++
++        page_cache_get(page);
++        if (llap->llap_write_queued) {
++                LL_CDEBUG_PAGE(D_PAGE, page, "marking urgent\n");
++                rc = obd_set_async_flags(exp, lli->lli_smd, NULL,
++                                         llap->llap_cookie,
++                                         ASYNC_READY | ASYNC_URGENT);
++        } else {
++                rc = queue_or_sync_write(exp, inode, llap, CFS_PAGE_SIZE,
++                                         ASYNC_READY | ASYNC_URGENT);
++        }
++        if (rc) {
++                /* re-dirty page on error so it retries write */
++                if (PageWriteback(page))
++                        end_page_writeback(page);
++
++                /* resend page only for not started IO*/
++                if (!PageError(page))
++                        ll_redirty_page(page);
++
++                page_cache_release(page);
++        }
++out:
++        if (rc) {
++                if (!lli->lli_async_rc)
++                        lli->lli_async_rc = rc;
++                /* resend page only for not started IO*/
++                unlock_page(page);
++        }
++        RETURN(rc);
++}
++
++/*
++ * for now we do our readpage the same on both 2.4 and 2.5.  The kernel's
++ * read-ahead assumes it is valid to issue readpage all the way up to
++ * i_size, but our dlm locks make that not the case.  We disable the
++ * kernel's read-ahead and do our own by walking ahead in the page cache
++ * checking for dlm lock coverage.  the main difference between 2.4 and
++ * 2.6 is how read-ahead gets batched and issued, but we're using our own,
++ * so they look the same.
++ */
++int ll_readpage(struct file *filp, struct page *page)
++{
++        struct ll_file_data *fd = LUSTRE_FPRIVATE(filp);
++        struct inode *inode = page->mapping->host;
++        struct obd_export *exp;
++        struct ll_async_page *llap;
++        struct obd_io_group *oig = NULL;
++        struct lustre_handle *lockh = NULL;
++        int rc;
++        ENTRY;
++
++        LASSERT(PageLocked(page));
++        LASSERT(!PageUptodate(page));
++        CDEBUG(D_VFSTRACE, "VFS Op:inode=%lu/%u(%p),offset=%Lu=%#Lx\n",
++               inode->i_ino, inode->i_generation, inode,
++               (((loff_t)page->index) << CFS_PAGE_SHIFT),
++               (((loff_t)page->index) << CFS_PAGE_SHIFT));
++        LASSERT(atomic_read(&filp->f_dentry->d_inode->i_count) > 0);
++
++        if (!ll_i2info(inode)->lli_smd) {
++                /* File with no objects - one big hole */
++                /* We use this just for remove_from_page_cache that is not
++                 * exported, we'd make page back up to date. */
++                ll_truncate_complete_page(page);
++                clear_page(kmap(page));
++                kunmap(page);
++                SetPageUptodate(page);
++                unlock_page(page);
++                RETURN(0);
++        }
++
++        rc = oig_init(&oig);
++        if (rc < 0)
++                GOTO(out, rc);
++
++        exp = ll_i2obdexp(inode);
++        if (exp == NULL)
++                GOTO(out, rc = -EINVAL);
++
++        if (fd->fd_flags & LL_FILE_GROUP_LOCKED)
++                lockh = &fd->fd_cwlockh;
++
++        llap = llap_from_page_with_lockh(page, LLAP_ORIGIN_READPAGE, lockh);
++        if (IS_ERR(llap)) {
++                if (PTR_ERR(llap) == -ENOLCK) {
++                        CWARN("ino %lu page %lu (%llu) not covered by "
++                              "a lock (mmap?).  check debug logs.\n",
++                              inode->i_ino, page->index,
++                              (long long)page->index << PAGE_CACHE_SHIFT);
++                }
++                GOTO(out, rc = PTR_ERR(llap));
++        }
++
++        if (ll_i2sbi(inode)->ll_ra_info.ra_max_pages)
++                ras_update(ll_i2sbi(inode), inode, &fd->fd_ras, page->index,
++                           llap->llap_defer_uptodate);
++
++
++        if (llap->llap_defer_uptodate) {
++                /* This is the callpath if we got the page from a readahead */
++                llap->llap_ra_used = 1;
++                rc = ll_readahead(&fd->fd_ras, exp, page->mapping, oig,
++                                  fd->fd_flags);
++                if (rc > 0)
++                        obd_trigger_group_io(exp, ll_i2info(inode)->lli_smd,
++                                             NULL, oig);
++                LL_CDEBUG_PAGE(D_PAGE, page, "marking uptodate from defer\n");
++                SetPageUptodate(page);
++                unlock_page(page);
++                GOTO(out_oig, rc = 0);
++        }
++
++        rc = ll_issue_page_read(exp, llap, oig, 0);
++        if (rc)
++                GOTO(out, rc);
++
++        LL_CDEBUG_PAGE(D_PAGE, page, "queued readpage\n");
++        /* We have just requested the actual page we want, see if we can tack
++         * on some readahead to that page's RPC before it is sent. */
++        if (ll_i2sbi(inode)->ll_ra_info.ra_max_pages)
++                ll_readahead(&fd->fd_ras, exp, page->mapping, oig,
++                             fd->fd_flags);
++
++        rc = obd_trigger_group_io(exp, ll_i2info(inode)->lli_smd, NULL, oig);
++
++out:
++        if (rc)
++                unlock_page(page);
++out_oig:
++        if (oig != NULL)
++                oig_release(oig);
++        RETURN(rc);
++}
++
++static void ll_file_put_pages(struct page **pages, int numpages)
++{
++        int i;
++        struct page **pp;
++        ENTRY;
++
++        for (i = 0, pp = pages; i < numpages; i++, pp++) {
++                if (*pp) {
++                        LL_CDEBUG_PAGE(D_PAGE, (*pp), "free\n");
++                        __ll_put_llap(*pp);
++                        if (page_private(*pp))
++                                CERROR("the llap wasn't freed\n");
++                        (*pp)->mapping = NULL;
++                        if (page_count(*pp) != 1)
++                                CERROR("page %p, flags %#lx, count %i, private %p\n",
++                                (*pp), (unsigned long)(*pp)->flags, page_count(*pp),
++                                (void*)page_private(*pp));
++                        __free_pages(*pp, 0);
++                }
++        }
++        OBD_FREE(pages, numpages * sizeof(struct page*));
++        EXIT;
++}
++
++static struct page **ll_file_prepare_pages(int numpages, struct inode *inode,
++                                           unsigned long first)
++{
++        struct page **pages;
++        int i;
++        int rc = 0;
++        ENTRY;
++
++        OBD_ALLOC(pages, sizeof(struct page *) * numpages);
++        if (pages == NULL)
++                RETURN(ERR_PTR(-ENOMEM));
++        for (i = 0; i < numpages; i++) {
++                struct page *page;
++                struct ll_async_page *llap;
++
++                page = alloc_pages(GFP_HIGHUSER, 0);
++                if (page == NULL)
++                        GOTO(err, rc = -ENOMEM);
++                pages[i] = page;
++                /* llap_from_page needs page index and mapping to be set */
++                page->index = first++;
++                page->mapping = inode->i_mapping;
++                llap = llap_from_page(page, LLAP_ORIGIN_LOCKLESS_IO);
++                if (IS_ERR(llap))
++                        GOTO(err, rc = PTR_ERR(llap));
++                llap->llap_lockless_io_page = 1;
++        }
++        RETURN(pages);
++err:
++        ll_file_put_pages(pages, numpages);
++        RETURN(ERR_PTR(rc));
++ }
++
++static ssize_t ll_file_copy_pages(struct page **pages, int numpages,
++                                  const struct iovec *iov, unsigned long nsegs,
++                                  ssize_t iov_offset, loff_t pos, size_t count,
++                                  int rw)
++{
++        ssize_t amount = 0;
++        int i;
++        int updatechecksum = ll_i2sbi(pages[0]->mapping->host)->ll_flags &
++                             LL_SBI_LLITE_CHECKSUM;
++        ENTRY;
++
++        for (i = 0; i < numpages; i++) {
++                unsigned offset, bytes, left = 0;
++                char *vaddr;
++
++                vaddr = kmap(pages[i]);
++                offset = pos & (CFS_PAGE_SIZE - 1);
++                bytes = min_t(unsigned, CFS_PAGE_SIZE - offset, count);
++                LL_CDEBUG_PAGE(D_PAGE, pages[i], "op = %s, addr = %p, "
++                               "bytes = %u\n",
++                               (rw == WRITE) ? "CFU" : "CTU",
++                               vaddr + offset, bytes);
++                while (bytes > 0 && !left && nsegs) {
++                        unsigned copy = min_t(ssize_t, bytes,
++                                               iov->iov_len - iov_offset);
++                        if (rw == WRITE) {
++                                left = copy_from_user(vaddr + offset,
++                                                      iov->iov_base +iov_offset,
++                                                      copy);
++                                if (updatechecksum) {
++                                        struct ll_async_page *llap;
++
++                                        llap = llap_cast_private(pages[i]);
++                                        llap->llap_checksum =
++                                                init_checksum(OSC_DEFAULT_CKSUM);
++                                        llap->llap_checksum =
++                                           compute_checksum(llap->llap_checksum,
++                                                            vaddr,CFS_PAGE_SIZE,
++                                                            OSC_DEFAULT_CKSUM);
++                                }
++                        } else {
++                                left = copy_to_user(iov->iov_base + iov_offset,
++                                                    vaddr + offset, copy);
++                        }
++
++                        amount += copy;
++                        count -= copy;
++                        pos += copy;
++                        iov_offset += copy;
++                        bytes -= copy;
++                        if (iov_offset == iov->iov_len) {
++                                iov_offset = 0;
++                                iov++;
++                                nsegs--;
++                        }
++                }
++                kunmap(pages[i]);
++                if (left) {
++                        amount -= left;
++                        break;
++                }
++        }
++        if (amount == 0)
++                RETURN(-EFAULT);
++        RETURN(amount);
++}
++
++static int ll_file_oig_pages(struct inode * inode, struct page **pages,
++                             int numpages, loff_t pos, size_t count, int rw)
++{
++        struct obd_io_group *oig;
++        struct ll_inode_info *lli = ll_i2info(inode);
++        struct obd_export *exp;
++        loff_t org_pos = pos;
++        obd_flag brw_flags;
++        int rc;
++        int i;
++        ENTRY;
++
++        exp = ll_i2obdexp(inode);
++        if (exp == NULL)
++                RETURN(-EINVAL);
++        rc = oig_init(&oig);
++        if (rc)
++                RETURN(rc);
++        brw_flags = OBD_BRW_SRVLOCK;
++        if (cfs_capable(CFS_CAP_SYS_RESOURCE))
++                brw_flags |= OBD_BRW_NOQUOTA;
++
++        for (i = 0; i < numpages; i++) {
++                struct ll_async_page *llap;
++                unsigned from, bytes;
++
++                from = pos & (CFS_PAGE_SIZE - 1);
++                bytes = min_t(unsigned, CFS_PAGE_SIZE - from,
++                              count - pos + org_pos);
++                llap = llap_cast_private(pages[i]);
++                LASSERT(llap);
++
++                lock_page(pages[i]);
++
++                LL_CDEBUG_PAGE(D_PAGE, pages[i], "offset "LPU64","
++                               " from %u, bytes = %u\n",
++                               pos, from, bytes);
++                LASSERTF(pos >> CFS_PAGE_SHIFT == pages[i]->index,
++                         "wrong page index %lu (%lu)\n",
++                         pages[i]->index,
++                         (unsigned long)(pos >> CFS_PAGE_SHIFT));
++                rc = obd_queue_group_io(exp, lli->lli_smd, NULL, oig,
++                                        llap->llap_cookie,
++                                        (rw == WRITE) ?
++                                        OBD_BRW_WRITE:OBD_BRW_READ,
++                                        from, bytes, brw_flags,
++                                        ASYNC_READY | ASYNC_URGENT |
++                                        ASYNC_COUNT_STABLE | ASYNC_GROUP_SYNC);
++                if (rc) {
++                        i++;
++                        GOTO(out, rc);
++                }
++                pos += bytes;
++        }
++        rc = obd_trigger_group_io(exp, lli->lli_smd, NULL, oig);
++        if (rc)
++                GOTO(out, rc);
++        rc = oig_wait(oig);
++out:
++        while(--i >= 0)
++                unlock_page(pages[i]);
++        oig_release(oig);
++        RETURN(rc);
++}
++
++/* Advance through passed iov, adjust iov pointer as necessary and return
++ * starting offset in individual entry we are pointing at. Also reduce
++ * nr_segs as needed */
++static ssize_t ll_iov_advance(const struct iovec **iov, unsigned long *nr_segs,
++                              ssize_t offset)
++{
++        while (*nr_segs > 0) {
++                if ((*iov)->iov_len > offset)
++                        return ((*iov)->iov_len - offset);
++                offset -= (*iov)->iov_len;
++                (*iov)++;
++                (*nr_segs)--;
++        }
++        return 0;
++}
++
++ssize_t ll_file_lockless_io(struct file *file, const struct iovec *iov,
++                            unsigned long nr_segs,
++                            loff_t *ppos, int rw, ssize_t count)
++{
++        loff_t pos;
++        struct inode *inode = file->f_dentry->d_inode;
++        ssize_t rc = 0;
++        int max_pages;
++        size_t amount = 0;
++        unsigned long first, last;
++        const struct iovec *iv = &iov[0];
++        unsigned long nsegs = nr_segs;
++        unsigned long offset = 0;
++        ENTRY;
++
++        if (rw == READ) {
++                loff_t isize;
++
++                ll_inode_size_lock(inode, 0);
++                isize = i_size_read(inode);
++                ll_inode_size_unlock(inode, 0);
++                if (*ppos >= isize)
++                        GOTO(out, rc = 0);
++                if (*ppos + count >= isize)
++                        count -= *ppos + count - isize;
++                if (count == 0)
++                        GOTO(out, rc);
++        } else {
++                rc = generic_write_checks(file, ppos, &count, 0);
++                if (rc)
++                        GOTO(out, rc);
++                rc = ll_remove_suid(file->f_dentry, file->f_vfsmnt);
++                if (rc)
++                        GOTO(out, rc);
++        }
++
++        pos = *ppos;
++        first = pos >> CFS_PAGE_SHIFT;
++        last = (pos + count - 1) >> CFS_PAGE_SHIFT;
++        max_pages = PTLRPC_MAX_BRW_PAGES *
++                ll_i2info(inode)->lli_smd->lsm_stripe_count;
++        CDEBUG(D_INFO, "%u, stripe_count = %u\n",
++               PTLRPC_MAX_BRW_PAGES /* max_pages_per_rpc */,
++               ll_i2info(inode)->lli_smd->lsm_stripe_count);
++
++        while (first <= last && rc >= 0) {
++                int pages_for_io;
++                struct page **pages;
++                size_t bytes = count - amount;
++
++                pages_for_io = min_t(int, last - first + 1, max_pages);
++                pages = ll_file_prepare_pages(pages_for_io, inode, first);
++                if (IS_ERR(pages)) {
++                        rc = PTR_ERR(pages);
++                        break;
++                }
++                if (rw == WRITE) {
++                        rc = ll_file_copy_pages(pages, pages_for_io, iv, nsegs,
++                                                offset, pos + amount, bytes,
++                                                rw);
++                        if (rc < 0)
++                                GOTO(put_pages, rc);
++                        offset = ll_iov_advance(&iv, &nsegs, offset + rc);
++                        bytes = rc;
++                }
++                rc = ll_file_oig_pages(inode, pages, pages_for_io,
++                                       pos + amount, bytes, rw);
++                if (rc)
++                        GOTO(put_pages, rc);
++                if (rw == READ) {
++                        rc = ll_file_copy_pages(pages, pages_for_io, iv, nsegs,
++                                                offset, pos + amount, bytes, rw);
++                        if (rc < 0)
++                                GOTO(put_pages, rc);
++                        offset = ll_iov_advance(&iv, &nsegs, offset + rc);
++                        bytes = rc;
++                }
++                amount += bytes;
++put_pages:
++                ll_file_put_pages(pages, pages_for_io);
++                first += pages_for_io;
++                /* a short read/write check */
++                if (pos + amount < ((loff_t)first << CFS_PAGE_SHIFT))
++                        break;
++                /* Check if we are out of userspace buffers. (how that could
++                   happen?) */
++                if (nsegs == 0)
++                        break;
++        }
++        /* NOTE: don't update i_size and KMS in absence of LDLM locks even
++         * write makes the file large */
++        file_accessed(file);
++        if (rw == READ && amount < count && rc == 0) {
++                unsigned long not_cleared;
++
++                while (nsegs > 0) {
++                        ssize_t to_clear = min_t(ssize_t, count - amount,
++                                                 iv->iov_len - offset);
++                        not_cleared = clear_user(iv->iov_base + offset,
++                                                 to_clear);
++                        amount += to_clear - not_cleared;
++                        if (not_cleared) {
++                                rc = -EFAULT;
++                                break;
++                        }
++                        offset = 0;
++                        iv++;
++                        nsegs--;
++                }
++        }
++        if (amount > 0) {
++                lprocfs_counter_add(ll_i2sbi(inode)->ll_stats,
++                                    (rw == WRITE) ?
++                                    LPROC_LL_LOCKLESS_WRITE :
++                                    LPROC_LL_LOCKLESS_READ,
++                                    (long)amount);
++                *ppos += amount;
++                RETURN(amount);
++        }
++out:
++        RETURN(rc);
++}
+diff -urNad lustre~/lustre/llite/symlink.c lustre/lustre/llite/symlink.c
+--- lustre~/lustre/llite/symlink.c	2009-03-12 10:32:27.000000000 +0100
++++ lustre/lustre/llite/symlink.c	2009-03-12 11:02:51.000000000 +0100
+@@ -177,8 +177,12 @@
+                 up(&lli->lli_size_sem);
+         }
+         if (rc) {
++#ifdef HAVE_PATH_RELEASE
+                 path_release(nd); /* Kernel assumes that ->follow_link()
+                                      releases nameidata on error */
++#else
++                path_put(&nd->path);
++#endif
+                 GOTO(out, rc);
+         }
+ 
+diff -urNad lustre~/lustre/lvfs/lvfs_linux.c lustre/lustre/lvfs/lvfs_linux.c
+--- lustre~/lustre/lvfs/lvfs_linux.c	2009-03-12 10:32:27.000000000 +0100
++++ lustre/lustre/lvfs/lvfs_linux.c	2009-03-12 11:02:51.000000000 +0100
+@@ -148,10 +148,10 @@
+         */
+ 
+         save->fs = get_fs();
+-        LASSERT(atomic_read(&current->fs->pwd->d_count));
++        LASSERT(atomic_read(&cfs_fs_pwd(current->fs)->d_count));
+         LASSERT(atomic_read(&new_ctx->pwd->d_count));
+-        save->pwd = dget(current->fs->pwd);
+-        save->pwdmnt = mntget(current->fs->pwdmnt);
++        save->pwd = dget(cfs_fs_pwd(current->fs));
++        save->pwdmnt = mntget(cfs_fs_mnt(current->fs));
+         save->luc.luc_umask = current->fs->umask;
+ 
+         LASSERT(save->pwd);
+@@ -205,10 +205,10 @@
+                atomic_read(&current->fs->pwdmnt->mnt_count));
+         */
+ 
+-        LASSERTF(current->fs->pwd == new_ctx->pwd, "%p != %p\n",
+-                 current->fs->pwd, new_ctx->pwd);
+-        LASSERTF(current->fs->pwdmnt == new_ctx->pwdmnt, "%p != %p\n",
+-                 current->fs->pwdmnt, new_ctx->pwdmnt);
++        LASSERTF(cfs_fs_pwd(current->fs) == new_ctx->pwd, "%p != %p\n",
++                 cfs_fs_pwd(current->fs), new_ctx->pwd);
++        LASSERTF(cfs_fs_mnt(current->fs) == new_ctx->pwdmnt, "%p != %p\n",
++                 cfs_fs_mnt(current->fs), new_ctx->pwdmnt);
+ 
+         set_fs(saved->fs);
+         ll_set_fs_pwd(current->fs, saved->pwdmnt, saved->pwd);
+diff -urNad lustre~/lustre/mgc/mgc_request.c lustre/lustre/mgc/mgc_request.c
+--- lustre~/lustre/mgc/mgc_request.c	2009-03-12 10:32:27.000000000 +0100
++++ lustre/lustre/mgc/mgc_request.c	2009-03-12 11:02:51.000000000 +0100
+@@ -415,7 +415,7 @@
+         obd->obd_lvfs_ctxt.fs = get_ds();
+ 
+         push_ctxt(&saved, &obd->obd_lvfs_ctxt, NULL);
+-        dentry = lookup_one_len(MOUNT_CONFIGS_DIR, current->fs->pwd,
++        dentry = lookup_one_len(MOUNT_CONFIGS_DIR, cfs_fs_pwd(current->fs),
+                                 strlen(MOUNT_CONFIGS_DIR));
+         pop_ctxt(&saved, &obd->obd_lvfs_ctxt, NULL);
+         if (IS_ERR(dentry)) {
+diff -urNad lustre~/lustre/obdclass/linux/linux-module.c lustre/lustre/obdclass/linux/linux-module.c
+--- lustre~/lustre/obdclass/linux/linux-module.c	2009-03-12 10:32:27.000000000 +0100
++++ lustre/lustre/obdclass/linux/linux-module.c	2009-03-12 11:02:51.000000000 +0100
+@@ -419,13 +419,14 @@
+         ENTRY;
+ 
+         obd_sysctl_init();
+-        proc_lustre_root = proc_mkdir("lustre", proc_root_fs);
++        proc_lustre_root = lprocfs_register("fs/lustre", NULL,
++                                              lprocfs_base, NULL);
+         if (!proc_lustre_root) {
+                 printk(KERN_ERR
+                        "LustreError: error registering /proc/fs/lustre\n");
+                 RETURN(-ENOMEM);
+         }
+-        proc_version = lprocfs_add_vars(proc_lustre_root, lprocfs_base, NULL);
++
+         entry = create_proc_entry("devices", 0444, proc_lustre_root);
+         if (entry == NULL) {
+                 CERROR("error registering /proc/fs/lustre/devices\n");
+diff -urNad lustre~/lustre/obdclass/linux/linux-sysctl.c lustre/lustre/obdclass/linux/linux-sysctl.c
+--- lustre~/lustre/obdclass/linux/linux-sysctl.c	2009-03-12 10:32:27.000000000 +0100
++++ lustre/lustre/obdclass/linux/linux-sysctl.c	2009-03-12 11:02:51.000000000 +0100
+@@ -56,7 +56,9 @@
+ 
+ cfs_sysctl_table_header_t *obd_table_header = NULL;
+ 
+-#define OBD_SYSCTL 300
++#ifndef HAVE_SYSCTL_UNNUMBERED
++
++#define CTL_LUSTRE      300
+ 
+ enum {
+         OBD_FAIL_LOC = 1,       /* control test failures instrumentation */
+@@ -74,6 +76,23 @@
+         OBD_ALLOC_FAIL_RATE,    /* memory allocation random failure rate */
+         OBD_MAX_DIRTY_PAGES,    /* maximum dirty pages */
+ };
++#else
++#define CTL_LUSTRE              CTL_UNNUMBERED
++#define OBD_FAIL_LOC            CTL_UNNUMBERED
++#define OBD_FAIL_VAL            CTL_UNNUMBERED
++#define OBD_TIMEOUT             CTL_UNNUMBERED
++#define OBD_DUMP_ON_TIMEOUT     CTL_UNNUMBERED
++#define OBD_MEMUSED             CTL_UNNUMBERED
++#define OBD_PAGESUSED           CTL_UNNUMBERED
++#define OBD_MAXMEMUSED          CTL_UNNUMBERED
++#define OBD_MAXPAGESUSED        CTL_UNNUMBERED
++#define OBD_SYNCFILTER          CTL_UNNUMBERED
++#define OBD_LDLM_TIMEOUT        CTL_UNNUMBERED
++#define OBD_DUMP_ON_EVICTION    CTL_UNNUMBERED
++#define OBD_DEBUG_PEER_ON_TIMEOUT CTL_UNNUMBERED
++#define OBD_ALLOC_FAIL_RATE     CTL_UNNUMBERED
++#define OBD_MAX_DIRTY_PAGES     CTL_UNNUMBERED
++#endif
+ 
+ int LL_PROC_PROTO(proc_fail_loc)
+ {
+@@ -120,7 +139,8 @@
+                         obd_max_dirty_pages = 4 << (20 - CFS_PAGE_SHIFT);
+                 }
+         } else {
+-                char buf[21];
++                char buf[22];
++                struct ctl_table dummy;
+                 int len;
+ 
+                 len = lprocfs_read_frac_helper(buf, sizeof(buf),
+@@ -129,7 +149,13 @@
+                 if (len > *lenp)
+                         len = *lenp;
+                 buf[len] = '\0';
+-                if (copy_to_user(buffer, buf, len))
++
++                dummy = *table;
++                dummy.data = buf;
++                dummy.maxlen = sizeof(buf);
++
++                rc = ll_proc_dostring(&dummy,write,filp,buffer,lenp, ppos);
++                if (rc)
+                         return -EFAULT;
+                 *lenp = len;
+         }
+@@ -152,7 +178,8 @@
+                                                (unsigned int*)table->data,
+                                                OBD_ALLOC_FAIL_MULT);
+         } else {
+-                char buf[21];
++                char buf[22];
++                struct ctl_table dummy;
+                 int  len;
+ 
+                 len = lprocfs_read_frac_helper(buf, sizeof(buf),
+@@ -161,7 +188,12 @@
+                 if (len > *lenp)
+                         len = *lenp;
+                 buf[len] = '\0';
+-                if (copy_to_user(buffer, buf, len))
++                dummy = *table;
++                dummy.data = buf;
++                dummy.maxlen = sizeof(buf);
++
++                rc = ll_proc_dostring(&dummy,write,filp,buffer,lenp, ppos);
++                if(rc)
+                         return -EFAULT;
+                 *lenp = len;
+         }
+@@ -172,6 +204,7 @@
+ 
+ int LL_PROC_PROTO(proc_memory_alloc)
+ {
++        struct ctl_table dummy;
+         char buf[22];
+         int len;
+         DECLARE_LL_PROC_PPOS_DECL;
+@@ -187,15 +220,17 @@
+         if (len > *lenp)
+                 len = *lenp;
+         buf[len] = '\0';
+-        if (copy_to_user(buffer, buf, len))
+-                return -EFAULT;
+-        *lenp = len;
+-        *ppos += *lenp;
+-        return 0;
++
++        dummy = *table;
++        dummy.data = buf;
++        dummy.maxlen = sizeof(buf);
++
++        return ll_proc_dostring(&dummy,write,filp,buffer,lenp, ppos);
+ }
+ 
+ int LL_PROC_PROTO(proc_pages_alloc)
+ {
++        struct ctl_table dummy;
+         char buf[22];
+         int len;
+         DECLARE_LL_PROC_PPOS_DECL;
+@@ -211,15 +246,17 @@
+         if (len > *lenp)
+                 len = *lenp;
+         buf[len] = '\0';
+-        if (copy_to_user(buffer, buf, len))
+-                return -EFAULT;
+-        *lenp = len;
+-        *ppos += *lenp;
+-        return 0;
++
++        dummy = *table;
++        dummy.data = buf;
++        dummy.maxlen = sizeof(buf);
++
++        return ll_proc_dostring(&dummy,write,filp,buffer,lenp, ppos);
+ }
+ 
+ int LL_PROC_PROTO(proc_mem_max)
+ {
++        struct ctl_table dummy;
+         char buf[22];
+         int len;
+         DECLARE_LL_PROC_PPOS_DECL;
+@@ -235,17 +272,19 @@
+         if (len > *lenp)
+                 len = *lenp;
+         buf[len] = '\0';
+-        if (copy_to_user(buffer, buf, len))
+-                return -EFAULT;
+-        *lenp = len;
+-        *ppos += *lenp;
+-        return 0;
++
++        dummy = *table;
++        dummy.data = buf;
++        dummy.maxlen = sizeof(buf);
++
++        return ll_proc_dostring(&dummy,write,filp,buffer,lenp, ppos);
+ }
+ 
+ int LL_PROC_PROTO(proc_pages_max)
+ {
+         char buf[22];
+         int len;
++        struct ctl_table dummy;
+         DECLARE_LL_PROC_PPOS_DECL;
+ 
+         if (!*lenp || (*ppos && !write)) {
+@@ -254,16 +293,17 @@
+         }
+         if (write)
+                 return -EINVAL;
++         dummy = *table;
++         dummy.data = buf;
++         dummy.maxlen = sizeof(buf);
++         len = snprintf(buf, sizeof(buf), LPU64,
++                        obd_pages_max());
+ 
+-        len = snprintf(buf, sizeof(buf), LPU64"\n", obd_pages_max());
+-        if (len > *lenp)
+-                len = *lenp;
+-        buf[len] = '\0';
+-        if (copy_to_user(buffer, buf, len))
+-                return -EFAULT;
+-        *lenp = len;
+-        *ppos += *lenp;
+-        return 0;
++         if (len > *lenp)
++                 len = *lenp;
++         buf[len] = '\0';
++
++         return ll_proc_dostring(&dummy,write,filp,buffer,lenp, ppos);
+ }
+ 
+ static cfs_sysctl_table_t obd_table[] = {
+@@ -281,7 +321,8 @@
+                 .data     = &obd_fail_val,
+                 .maxlen   = sizeof(int),
+                 .mode     = 0644,
+-                .proc_handler = &proc_dointvec
++                .proc_handler = &proc_dointvec,
++                .strategy = &sysctl_intvec,
+         },
+         {
+                 .ctl_name = OBD_TIMEOUT,
+@@ -297,7 +338,7 @@
+                 .data     = &obd_debug_peer_on_timeout,
+                 .maxlen   = sizeof(int),
+                 .mode     = 0644,
+-                .proc_handler = &proc_dointvec
++                .proc_handler = &proc_dointvec,
+         },
+         {
+                 .ctl_name = OBD_DUMP_ON_TIMEOUT,
+@@ -305,7 +346,7 @@
+                 .data     = &obd_dump_on_timeout,
+                 .maxlen   = sizeof(int),
+                 .mode     = 0644,
+-                .proc_handler = &proc_dointvec
++                .proc_handler = &proc_dointvec,
+         },
+         {
+                 .ctl_name = OBD_DUMP_ON_EVICTION,
+@@ -313,7 +354,7 @@
+                 .data     = &obd_dump_on_eviction,
+                 .maxlen   = sizeof(int),
+                 .mode     = 0644,
+-                .proc_handler = &proc_dointvec
++                .proc_handler = &proc_dointvec,
+         },
+         {
+                 .ctl_name = OBD_MEMUSED,
+@@ -321,7 +362,7 @@
+                 .data     = NULL,
+                 .maxlen   = 0,
+                 .mode     = 0444,
+-                .proc_handler = &proc_memory_alloc
++                .proc_handler = &proc_memory_alloc,
+         },
+         {
+                 .ctl_name = OBD_PAGESUSED,
+@@ -329,7 +370,7 @@
+                 .data     = NULL,
+                 .maxlen   = 0,
+                 .mode     = 0444,
+-                .proc_handler = &proc_pages_alloc
++                .proc_handler = &proc_pages_alloc,
+         },
+         {
+                 .ctl_name = OBD_MAXMEMUSED,
+@@ -337,7 +378,7 @@
+                 .data     = NULL,
+                 .maxlen   = 0,
+                 .mode     = 0444,
+-                .proc_handler = &proc_mem_max
++                .proc_handler = &proc_mem_max,
+         },
+         {
+                 .ctl_name = OBD_MAXPAGESUSED,
+@@ -345,7 +386,7 @@
+                 .data     = NULL,
+                 .maxlen   = 0,
+                 .mode     = 0444,
+-                .proc_handler = &proc_pages_max
++                .proc_handler = &proc_pages_max,
+         },
+         {
+                 .ctl_name = OBD_LDLM_TIMEOUT,
+@@ -378,7 +419,7 @@
+ 
+ static cfs_sysctl_table_t parent_table[] = {
+        {
+-               .ctl_name = OBD_SYSCTL,
++               .ctl_name = CTL_LUSTRE,
+                .procname = "lustre",
+                .data     = NULL,
+                .maxlen   = 0,
+diff -urNad lustre~/lustre/obdclass/lprocfs_status.c lustre/lustre/obdclass/lprocfs_status.c
+--- lustre~/lustre/obdclass/lprocfs_status.c	2009-03-12 10:32:27.000000000 +0100
++++ lustre/lustre/obdclass/lprocfs_status.c	2009-03-12 11:02:51.000000000 +0100
+@@ -151,7 +151,7 @@
+ 
+         LPROCFS_ENTRY();
+         OBD_FAIL_TIMEOUT(OBD_FAIL_LPROC_REMOVE, 10);
+-        if (!dp->deleted && dp->read_proc)
++        if (!LPROCFS_CHECK_DELETED(dp) && dp->read_proc)
+                 rc = dp->read_proc(page, &start, *ppos, PAGE_SIZE,
+                         &eof, dp->data);
+         LPROCFS_EXIT();
+@@ -191,7 +191,7 @@
+         int rc = -EIO;
+ 
+         LPROCFS_ENTRY();
+-        if (!dp->deleted && dp->write_proc)
++        if (!LPROCFS_CHECK_DELETED(dp) && dp->write_proc)
+                 rc = dp->write_proc(f, buf, size, dp->data);
+         LPROCFS_EXIT();
+         return rc;
+diff -urNad lustre~/lustre/obdclass/lprocfs_status.c.orig lustre/lustre/obdclass/lprocfs_status.c.orig
+--- lustre~/lustre/obdclass/lprocfs_status.c.orig	1970-01-01 00:00:00.000000000 +0000
++++ lustre/lustre/obdclass/lprocfs_status.c.orig	2009-03-12 10:32:27.000000000 +0100
+@@ -0,0 +1,2062 @@
++/* -*- mode: c; c-basic-offset: 8; indent-tabs-mode: nil; -*-
++ * vim:expandtab:shiftwidth=8:tabstop=8:
++ *
++ * GPL HEADER START
++ *
++ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
++ *
++ * This program is free software; you can redistribute it and/or modify
++ * it under the terms of the GNU General Public License version 2 only,
++ * as published by the Free Software Foundation.
++ *
++ * This program is distributed in the hope that it will be useful, but
++ * WITHOUT ANY WARRANTY; without even the implied warranty of
++ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
++ * General Public License version 2 for more details (a copy is included
++ * in the LICENSE file that accompanied this code).
++ *
++ * You should have received a copy of the GNU General Public License
++ * version 2 along with this program; If not, see
++ * http://www.sun.com/software/products/lustre/docs/GPLv2.pdf
++ *
++ * Please contact Sun Microsystems, Inc., 4150 Network Circle, Santa Clara,
++ * CA 95054 USA or visit www.sun.com if you need additional information or
++ * have any questions.
++ *
++ * GPL HEADER END
++ */
++/*
++ * Copyright  2008 Sun Microsystems, Inc. All rights reserved
++ * Use is subject to license terms.
++ */
++/*
++ * This file is part of Lustre, http://www.lustre.org/
++ * Lustre is a trademark of Sun Microsystems, Inc.
++ *
++ * lustre/obdclass/lprocfs_status.c
++ *
++ * Author: Hariharan Thantry <thantry at users.sourceforge.net>
++ */
++
++#ifndef EXPORT_SYMTAB
++# define EXPORT_SYMTAB
++#endif
++#define DEBUG_SUBSYSTEM S_CLASS
++
++#ifndef __KERNEL__
++# include <liblustre.h>
++#endif
++
++#include <obd_class.h>
++#include <lprocfs_status.h>
++#include <lustre_fsfilt.h>
++
++#if defined(LPROCFS)
++
++#define MAX_STRING_SIZE 128
++
++/* for bug 10866, global variable */
++DECLARE_RWSEM(_lprocfs_lock);
++EXPORT_SYMBOL(_lprocfs_lock);
++
++int lprocfs_seq_release(struct inode *inode, struct file *file)
++{
++        LPROCFS_EXIT();
++        return seq_release(inode, file);
++}
++EXPORT_SYMBOL(lprocfs_seq_release);
++
++struct proc_dir_entry *lprocfs_srch(struct proc_dir_entry *head,
++                                    const char *name)
++{
++        struct proc_dir_entry *temp;
++
++        if (head == NULL)
++                return NULL;
++
++        LPROCFS_ENTRY();
++        temp = head->subdir;
++        while (temp != NULL) {
++                if (strcmp(temp->name, name) == 0) {
++                        LPROCFS_EXIT();
++                        return temp;
++                }
++
++                temp = temp->next;
++        }
++        LPROCFS_EXIT();
++        return NULL;
++}
++
++/* lprocfs API calls */
++
++/* Function that emulates snprintf but also has the side effect of advancing
++   the page pointer for the next write into the buffer, incrementing the total
++   length written to the buffer, and decrementing the size left in the
++   buffer. */
++static int lprocfs_obd_snprintf(char **page, int end, int *len,
++                                const char *format, ...)
++{
++        va_list list;
++        int n;
++
++        if (*len >= end)
++                return 0;
++
++        va_start(list, format);
++        n = vsnprintf(*page, end - *len, format, list);
++        va_end(list);
++
++        *page += n; *len += n;
++        return n;
++}
++
++int lprocfs_add_simple(struct proc_dir_entry *root, char *name,
++                       read_proc_t *read_proc, write_proc_t *write_proc,
++                       void *data)
++{
++        struct proc_dir_entry *proc;
++        mode_t mode = 0;
++
++        if (root == NULL || name == NULL)
++                return -EINVAL;
++        if (read_proc)
++                mode = 0444;
++        if (write_proc)
++                mode |= 0200;
++        proc = create_proc_entry(name, mode, root);
++        if (!proc) {
++                CERROR("LprocFS: No memory to create /proc entry %s", name);
++                return -ENOMEM;
++        }
++        proc->read_proc = read_proc;
++        proc->write_proc = write_proc;
++        proc->data = data;
++        return 0;
++}
++
++static ssize_t lprocfs_fops_read(struct file *f, char __user *buf, size_t size,
++                                 loff_t *ppos)
++{
++        struct proc_dir_entry *dp = PDE(f->f_dentry->d_inode);
++        char *page, *start = NULL;
++        int rc = 0, eof = 1, count;
++
++        if (*ppos >= PAGE_SIZE)
++                return 0;
++
++        page = (char *)__get_free_page(GFP_KERNEL);
++        if (page == NULL)
++                return -ENOMEM;
++
++        LPROCFS_ENTRY();
++        OBD_FAIL_TIMEOUT(OBD_FAIL_LPROC_REMOVE, 10);
++        if (!dp->deleted && dp->read_proc)
++                rc = dp->read_proc(page, &start, *ppos, PAGE_SIZE,
++                        &eof, dp->data);
++        LPROCFS_EXIT();
++        if (rc <= 0)
++                goto out;
++
++        /* for lustre proc read, the read count must be less than PAGE_SIZE */
++        LASSERT(eof == 1);
++
++        if (start == NULL) {
++                rc -= *ppos;
++                if (rc < 0)
++                        rc = 0;
++                if (rc == 0)
++                        goto out;
++                start = page + *ppos;
++        } else if (start < page) {
++                start = page;
++        }
++
++        count = (rc < size) ? rc : size;
++        if (copy_to_user(buf, start, count)) {
++                rc = -EFAULT;
++                goto out;
++        }
++        *ppos += count;
++
++out:
++        free_page((unsigned long)page);
++        return rc;
++}
++
++static ssize_t lprocfs_fops_write(struct file *f, const char __user *buf,
++                                  size_t size, loff_t *ppos)
++{
++        struct proc_dir_entry *dp = PDE(f->f_dentry->d_inode);
++        int rc = -EIO;
++
++        LPROCFS_ENTRY();
++        if (!dp->deleted && dp->write_proc)
++                rc = dp->write_proc(f, buf, size, dp->data);
++        LPROCFS_EXIT();
++        return rc;
++}
++
++static struct file_operations lprocfs_generic_fops = {
++        .owner = THIS_MODULE,
++        .read = lprocfs_fops_read,
++        .write = lprocfs_fops_write,
++};
++
++int lprocfs_evict_client_open(struct inode *inode, struct file *f)
++{
++        struct proc_dir_entry *dp = PDE(f->f_dentry->d_inode);
++        struct obd_device *obd = dp->data;
++
++        atomic_inc(&obd->obd_evict_inprogress);
++
++        return 0;
++}
++
++int lprocfs_evict_client_release(struct inode *inode, struct file *f)
++{
++        struct proc_dir_entry *dp = PDE(f->f_dentry->d_inode);
++        struct obd_device *obd = dp->data;
++
++        atomic_dec(&obd->obd_evict_inprogress);
++        wake_up(&obd->obd_evict_inprogress_waitq);
++
++        return 0;
++}
++
++struct file_operations lprocfs_evict_client_fops = {
++        .owner = THIS_MODULE,
++        .read = lprocfs_fops_read,
++        .write = lprocfs_fops_write,
++        .open = lprocfs_evict_client_open,
++        .release = lprocfs_evict_client_release,
++};
++EXPORT_SYMBOL(lprocfs_evict_client_fops);
++
++/**
++ * Add /proc entrys.
++ *
++ * \param root [in]  The parent proc entry on which new entry will be added.
++ * \param list [in]  Array of proc entries to be added.
++ * \param data [in]  The argument to be passed when entries read/write routines
++ *                   are called through /proc file.
++ *
++ * \retval 0   on success
++ *         < 0 on error
++ */
++int lprocfs_add_vars(struct proc_dir_entry *root, struct lprocfs_vars *list,
++                     void *data)
++{
++        if (root == NULL || list == NULL)
++                return -EINVAL;
++
++        while (list->name != NULL) {
++                struct proc_dir_entry *cur_root, *proc;
++                char *pathcopy, *cur, *next, pathbuf[64];
++                int pathsize = strlen(list->name) + 1;
++
++                proc = NULL;
++                cur_root = root;
++
++                /* need copy of path for strsep */
++                if (strlen(list->name) > sizeof(pathbuf) - 1) {
++                        OBD_ALLOC(pathcopy, pathsize);
++                        if (pathcopy == NULL)
++                                return -ENOMEM;
++                } else {
++                        pathcopy = pathbuf;
++                }
++
++                next = pathcopy;
++                strcpy(pathcopy, list->name);
++
++                while (cur_root != NULL && (cur = strsep(&next, "/"))) {
++                        if (*cur =='\0') /* skip double/trailing "/" */
++                                continue;
++
++                        proc = lprocfs_srch(cur_root, cur);
++                        CDEBUG(D_OTHER, "cur_root=%s, cur=%s, next=%s, (%s)\n",
++                               cur_root->name, cur, next,
++                               (proc ? "exists" : "new"));
++                        if (next != NULL) {
++                                cur_root = (proc ? proc :
++                                            proc_mkdir(cur, cur_root));
++                        } else if (proc == NULL) {
++                                mode_t mode = 0;
++                                if (list->proc_mode != 0000) {
++                                        mode = list->proc_mode;
++                                } else {
++                                        if (list->read_fptr)
++                                                mode = 0444;
++                                        if (list->write_fptr)
++                                                mode |= 0200;
++                                }
++                                proc = create_proc_entry(cur, mode, cur_root);
++                        }
++                }
++
++                if (pathcopy != pathbuf)
++                        OBD_FREE(pathcopy, pathsize);
++
++                if (cur_root == NULL || proc == NULL) {
++                        CERROR("LprocFS: No memory to create /proc entry %s",
++                               list->name);
++                        return -ENOMEM;
++                }
++
++                if (list->fops)
++                        proc->proc_fops = list->fops;
++                else
++                        proc->proc_fops = &lprocfs_generic_fops;
++                proc->read_proc = list->read_fptr;
++                proc->write_proc = list->write_fptr;
++                proc->data = (list->data ? list->data : data);
++                list++;
++        }
++        return 0;
++}
++
++void lprocfs_remove(struct proc_dir_entry **rooth)
++{
++        struct proc_dir_entry *root = *rooth;
++        struct proc_dir_entry *temp = root;
++        struct proc_dir_entry *rm_entry;
++        struct proc_dir_entry *parent;
++
++        if (!root)
++                return;
++        *rooth = NULL;
++
++        parent = root->parent;
++        LASSERT(parent != NULL);
++        LPROCFS_WRITE_ENTRY(); /* search vs remove race */
++
++        while (1) {
++                while (temp->subdir != NULL)
++                        temp = temp->subdir;
++
++                rm_entry = temp;
++                temp = temp->parent;
++
++                /* Memory corruption once caused this to fail, and
++                   without this LASSERT we would loop here forever. */
++                LASSERTF(strlen(rm_entry->name) == rm_entry->namelen,
++                         "0x%p  %s/%s len %d\n", rm_entry, temp->name,
++                         rm_entry->name, (int)strlen(rm_entry->name));
++
++                /* Now, the rm_entry->deleted flags is protected
++                 * by _lprocfs_lock. */
++                rm_entry->data = NULL;
++                remove_proc_entry(rm_entry->name, temp);
++                if (temp == parent)
++                        break;
++        }
++        LPROCFS_WRITE_EXIT();
++}
++
++struct proc_dir_entry *lprocfs_register(const char *name,
++                                        struct proc_dir_entry *parent,
++                                        struct lprocfs_vars *list, void *data)
++{
++        struct proc_dir_entry *newchild;
++
++        newchild = lprocfs_srch(parent, name);
++        if (newchild != NULL) {
++                CERROR(" Lproc: Attempting to register %s more than once \n",
++                       name);
++                return ERR_PTR(-EALREADY);
++        }
++
++        newchild = proc_mkdir(name, parent);
++        if (newchild != NULL && list != NULL) {
++                int rc = lprocfs_add_vars(newchild, list, data);
++                if (rc) {
++                        lprocfs_remove(&newchild);
++                        return ERR_PTR(rc);
++                }
++        }
++        return newchild;
++}
++
++/* Generic callbacks */
++int lprocfs_rd_uint(char *page, char **start, off_t off,
++                    int count, int *eof, void *data)
++{
++        unsigned int *temp = (unsigned int *)data;
++        return snprintf(page, count, "%u\n", *temp);
++}
++
++int lprocfs_wr_uint(struct file *file, const char *buffer,
++                    unsigned long count, void *data)
++{
++        unsigned *p = data;
++        char dummy[MAX_STRING_SIZE + 1] = { '\0' }, *end;
++        unsigned long tmp;
++
++        if (count >= sizeof(dummy) || count == 0)
++                return -EINVAL;
++
++        if (copy_from_user(dummy, buffer, count))
++                return -EFAULT;
++
++        tmp = simple_strtoul(dummy, &end, 0);
++        if (dummy == end)
++                return -EINVAL;
++
++        *p = (unsigned int)tmp;
++        return count;
++}
++
++int lprocfs_rd_u64(char *page, char **start, off_t off,
++                   int count, int *eof, void *data)
++{
++        LASSERT(data != NULL);
++        *eof = 1;
++        return snprintf(page, count, LPU64"\n", *(__u64 *)data);
++}
++
++int lprocfs_rd_atomic(char *page, char **start, off_t off,
++                   int count, int *eof, void *data)
++{
++        atomic_t *atom = (atomic_t *)data;
++        LASSERT(atom != NULL);
++        *eof = 1;
++        return snprintf(page, count, "%d\n", atomic_read(atom));
++}
++
++int lprocfs_wr_atomic(struct file *file, const char *buffer,
++                      unsigned long count, void *data)
++{
++        atomic_t *atm = data;
++        int val = 0;
++        int rc;
++
++        rc = lprocfs_write_helper(buffer, count, &val);
++        if (rc < 0)
++                return rc;
++
++        if (val <= 0)
++                return -ERANGE;
++
++        atomic_set(atm, val);
++        return count;
++}
++
++int lprocfs_rd_uuid(char *page, char **start, off_t off, int count,
++                    int *eof, void *data)
++{
++        struct obd_device *obd = (struct obd_device*)data;
++
++        LASSERT(obd != NULL);
++        *eof = 1;
++        return snprintf(page, count, "%s\n", obd->obd_uuid.uuid);
++}
++
++int lprocfs_rd_name(char *page, char **start, off_t off, int count,
++                    int *eof, void* data)
++{
++        struct obd_device *dev = (struct obd_device *)data;
++
++        LASSERT(dev != NULL);
++        LASSERT(dev->obd_name != NULL);
++        *eof = 1;
++        return snprintf(page, count, "%s\n", dev->obd_name);
++}
++
++int lprocfs_rd_fstype(char *page, char **start, off_t off, int count, int *eof,
++                      void *data)
++{
++        struct obd_device *obd = (struct obd_device *)data;
++
++        LASSERT(obd != NULL);
++        LASSERT(obd->obd_fsops != NULL);
++        LASSERT(obd->obd_fsops->fs_type != NULL);
++        return snprintf(page, count, "%s\n", obd->obd_fsops->fs_type);
++}
++
++int lprocfs_rd_blksize(char *page, char **start, off_t off, int count,
++                       int *eof, void *data)
++{
++        struct obd_statfs osfs;
++        int rc = obd_statfs(data, &osfs, cfs_time_current_64() - HZ,
++                            OBD_STATFS_NODELAY);
++        if (!rc) {
++                *eof = 1;
++                rc = snprintf(page, count, "%u\n", osfs.os_bsize);
++        }
++        return rc;
++}
++
++int lprocfs_rd_kbytestotal(char *page, char **start, off_t off, int count,
++                           int *eof, void *data)
++{
++        struct obd_statfs osfs;
++        int rc = obd_statfs(data, &osfs, cfs_time_current_64() - HZ,
++                            OBD_STATFS_NODELAY);
++        if (!rc) {
++                __u32 blk_size = osfs.os_bsize >> 10;
++                __u64 result = osfs.os_blocks;
++
++                while (blk_size >>= 1)
++                        result <<= 1;
++
++                *eof = 1;
++                rc = snprintf(page, count, LPU64"\n", result);
++        }
++        return rc;
++}
++
++int lprocfs_rd_kbytesfree(char *page, char **start, off_t off, int count,
++                          int *eof, void *data)
++{
++        struct obd_statfs osfs;
++        int rc = obd_statfs(data, &osfs, cfs_time_current_64() - HZ,
++                            OBD_STATFS_NODELAY);
++        if (!rc) {
++                __u32 blk_size = osfs.os_bsize >> 10;
++                __u64 result = osfs.os_bfree;
++
++                while (blk_size >>= 1)
++                        result <<= 1;
++
++                *eof = 1;
++                rc = snprintf(page, count, LPU64"\n", result);
++        }
++        return rc;
++}
++
++int lprocfs_rd_kbytesavail(char *page, char **start, off_t off, int count,
++                           int *eof, void *data)
++{
++        struct obd_statfs osfs;
++        int rc = obd_statfs(data, &osfs, cfs_time_current_64() - HZ,
++                            OBD_STATFS_NODELAY);
++        if (!rc) {
++                __u32 blk_size = osfs.os_bsize >> 10;
++                __u64 result = osfs.os_bavail;
++
++                while (blk_size >>= 1)
++                        result <<= 1;
++
++                *eof = 1;
++                rc = snprintf(page, count, LPU64"\n", result);
++        }
++        return rc;
++}
++
++int lprocfs_rd_filestotal(char *page, char **start, off_t off, int count,
++                          int *eof, void *data)
++{
++        struct obd_statfs osfs;
++        int rc = obd_statfs(data, &osfs, cfs_time_current_64() - HZ,
++                            OBD_STATFS_NODELAY);
++        if (!rc) {
++                *eof = 1;
++                rc = snprintf(page, count, LPU64"\n", osfs.os_files);
++        }
++
++        return rc;
++}
++
++int lprocfs_rd_filesfree(char *page, char **start, off_t off, int count,
++                         int *eof, void *data)
++{
++        struct obd_statfs osfs;
++        int rc = obd_statfs(data, &osfs, cfs_time_current_64() - HZ,
++                            OBD_STATFS_NODELAY);
++        if (!rc) {
++                *eof = 1;
++                rc = snprintf(page, count, LPU64"\n", osfs.os_ffree);
++        }
++        return rc;
++}
++
++int lprocfs_rd_server_uuid(char *page, char **start, off_t off, int count,
++                           int *eof, void *data)
++{
++        struct obd_device *obd = (struct obd_device *)data;
++        struct obd_import *imp;
++        char *imp_state_name = NULL;
++        int rc = 0;
++
++        LASSERT(obd != NULL);
++        LPROCFS_CLIMP_CHECK(obd);
++        imp = obd->u.cli.cl_import;
++        imp_state_name = ptlrpc_import_state_name(imp->imp_state);
++        *eof = 1;
++        rc = snprintf(page, count, "%s\t%s%s\n",
++                        obd2cli_tgt(obd), imp_state_name,
++                        imp->imp_deactive ? "\tDEACTIVATED" : "");
++
++        LPROCFS_CLIMP_EXIT(obd);
++        return rc;
++}
++
++int lprocfs_rd_conn_uuid(char *page, char **start, off_t off, int count,
++                         int *eof,  void *data)
++{
++        struct obd_device *obd = (struct obd_device*)data;
++        struct ptlrpc_connection *conn;
++        int rc = 0;
++
++        LASSERT(obd != NULL);
++        LPROCFS_CLIMP_CHECK(obd);
++        conn = obd->u.cli.cl_import->imp_connection;
++        LASSERT(conn != NULL);
++        *eof = 1;
++        rc = snprintf(page, count, "%s\n", conn->c_remote_uuid.uuid);
++
++        LPROCFS_CLIMP_EXIT(obd);
++        return rc;
++}
++
++#define flag2str(flag) \
++        if (imp->imp_##flag && max - len > 0) \
++                len += snprintf(str + len, max - len, " " #flag);
++
++/**
++ * Append a space separated list of current set flags to str.
++ */
++static int obd_import_flags2str(struct obd_import *imp, char *str,
++                                          int max)
++{
++        int len = 0;
++
++        if (imp->imp_obd->obd_no_recov)
++                len += snprintf(str, max - len, " no_recov");
++
++        flag2str(invalid);
++        flag2str(deactive);
++        flag2str(replayable);
++        flag2str(pingable);
++        flag2str(recon_bk);
++        flag2str(last_recon);
++        return len;
++}
++#undef flags2str
++
++int lprocfs_rd_import(char *page, char **start, off_t off, int count,
++                      int *eof, void *data)
++{
++        struct obd_device *obd = (struct obd_device *)data;
++        struct obd_import *imp;
++        char *imp_state_name = NULL;
++        int rc = 0;
++
++        LASSERT(obd != NULL);
++        LPROCFS_CLIMP_CHECK(obd);
++        imp = obd->u.cli.cl_import;
++        imp_state_name = ptlrpc_import_state_name(imp->imp_state);
++        *eof = 1;
++
++        rc = snprintf(page, count,
++                      "import: %s\n"
++                      "    target: %s@%s\n"
++                      "    state: %s\n"
++                      "    inflight: %u\n"
++                      "    unregistering: %u\n"
++                      "    conn_cnt: %u\n"
++                      "    generation: %u\n"
++                      "    inval_cnt: %u\n"
++                      "    last_replay_transno: "LPU64"\n"
++                      "    peer_committed_transno: "LPU64"\n"
++                      "    last_trasno_checked: "LPU64"\n"
++                      "    flags:",
++                      obd->obd_name,
++                      obd2cli_tgt(obd), imp->imp_connection->c_remote_uuid.uuid,
++                      imp_state_name,
++                      atomic_read(&imp->imp_inflight),
++                      atomic_read(&imp->imp_unregistering),
++                      imp->imp_conn_cnt,
++                      imp->imp_generation,
++                      atomic_read(&imp->imp_inval_count),
++                      imp->imp_last_replay_transno,
++                      imp->imp_peer_committed_transno,
++                      imp->imp_last_transno_checked);
++        rc += obd_import_flags2str(imp, page + rc, count - rc);
++        rc += snprintf(page+rc, count - rc, "\n");
++        LPROCFS_CLIMP_EXIT(obd);
++        return rc;
++}
++
++int lprocfs_at_hist_helper(char *page, int count, int rc,
++                           struct adaptive_timeout *at)
++{
++        int i;
++        for (i = 0; i < AT_BINS; i++)
++                rc += snprintf(page + rc, count - rc, "%3u ", at->at_hist[i]);
++        rc += snprintf(page + rc, count - rc, "\n");
++        return rc;
++}
++
++/* See also ptlrpc_lprocfs_rd_timeouts */
++int lprocfs_rd_timeouts(char *page, char **start, off_t off, int count,
++                        int *eof, void *data)
++{
++        struct obd_device *obd = (struct obd_device *)data;
++        struct obd_import *imp;
++        unsigned int cur, worst;
++        time_t now, worstt;
++        struct dhms ts;
++        int i, rc = 0;
++
++        LASSERT(obd != NULL);
++        LPROCFS_CLIMP_CHECK(obd);
++        imp = obd->u.cli.cl_import;
++        *eof = 1;
++
++        now = cfs_time_current_sec();
++
++        /* Some network health info for kicks */
++        s2dhms(&ts, now - imp->imp_last_reply_time);
++        rc += snprintf(page + rc, count - rc,
++                       "%-10s : %ld, "DHMS_FMT" ago\n",
++                       "last reply", imp->imp_last_reply_time, DHMS_VARS(&ts));
++
++        cur = at_get(&imp->imp_at.iat_net_latency);
++        worst = imp->imp_at.iat_net_latency.at_worst_ever;
++        worstt = imp->imp_at.iat_net_latency.at_worst_time;
++        s2dhms(&ts, now - worstt);
++        rc += snprintf(page + rc, count - rc,
++                       "%-10s : cur %3u  worst %3u (at %ld, "DHMS_FMT" ago) ",
++                       "network", cur, worst, worstt, DHMS_VARS(&ts));
++        rc = lprocfs_at_hist_helper(page, count, rc,
++                                    &imp->imp_at.iat_net_latency);
++
++        for(i = 0; i < IMP_AT_MAX_PORTALS; i++) {
++                if (imp->imp_at.iat_portal[i] == 0)
++                        break;
++                cur = at_get(&imp->imp_at.iat_service_estimate[i]);
++                worst = imp->imp_at.iat_service_estimate[i].at_worst_ever;
++                worstt = imp->imp_at.iat_service_estimate[i].at_worst_time;
++                s2dhms(&ts, now - worstt);
++                rc += snprintf(page + rc, count - rc,
++                               "portal %-2d  : cur %3u  worst %3u (at %ld, "
++                               DHMS_FMT" ago) ", imp->imp_at.iat_portal[i],
++                               cur, worst, worstt, DHMS_VARS(&ts));
++                rc = lprocfs_at_hist_helper(page, count, rc,
++                                          &imp->imp_at.iat_service_estimate[i]);
++        }
++
++        LPROCFS_CLIMP_EXIT(obd);
++        return rc;
++}
++
++static const char *obd_connect_names[] = {
++        "read_only",
++        "lov_index",
++        "unused",
++        "write_grant",
++        "server_lock",
++        "version",
++        "request_portal",
++        "acl",
++        "xattr",
++        "create_on_write",
++        "truncate_lock",
++        "initial_transno",
++        "inode_bit_locks",
++        "join_file",
++        "getattr_by_fid",
++        "no_oh_for_devices",
++        "local_1.8_client",
++        "remote_1.8_client",
++        "max_byte_per_rpc",
++        "64bit_qdata",
++        "fid_capability",
++        "oss_capability",
++        "early_lock_cancel",
++        "size_on_mds",
++        "adaptive_timeout",
++        "lru_resize",
++        "mds_mds_connection",
++        "real_conn",
++        "change_qunit_size",
++        "alt_checksum_algorithm",
++        "fid_is_enabled",
++        "version_recovery",
++        "pools",
++        NULL
++};
++
++int lprocfs_rd_connect_flags(char *page, char **start, off_t off,
++                             int count, int *eof, void *data)
++{
++        struct obd_device *obd = data;
++        __u64 mask = 1, flags;
++        int i, ret = 0;
++
++        LPROCFS_CLIMP_CHECK(obd);
++        flags = obd->u.cli.cl_import->imp_connect_data.ocd_connect_flags;
++        ret = snprintf(page, count, "flags="LPX64"\n", flags);
++        for (i = 0; obd_connect_names[i] != NULL; i++, mask <<= 1) {
++                if (flags & mask)
++                        ret += snprintf(page + ret, count - ret, "%s\n",
++                                        obd_connect_names[i]);
++        }
++        if (flags & ~(mask - 1))
++                ret += snprintf(page + ret, count - ret,
++                                "unknown flags "LPX64"\n", flags & ~(mask - 1));
++
++        LPROCFS_CLIMP_EXIT(obd);
++        return ret;
++}
++EXPORT_SYMBOL(lprocfs_rd_connect_flags);
++
++int lprocfs_rd_num_exports(char *page, char **start, off_t off, int count,
++                           int *eof,  void *data)
++{
++        struct obd_device *obd = (struct obd_device*)data;
++
++        LASSERT(obd != NULL);
++        *eof = 1;
++        return snprintf(page, count, "%u\n", obd->obd_num_exports);
++}
++
++int lprocfs_rd_numrefs(char *page, char **start, off_t off, int count,
++                       int *eof, void *data)
++{
++        struct obd_type *class = (struct obd_type*) data;
++
++        LASSERT(class != NULL);
++        *eof = 1;
++        return snprintf(page, count, "%d\n", class->typ_refcnt);
++}
++
++int lprocfs_obd_setup(struct obd_device *obd, struct lprocfs_vars *list)
++{
++        int rc = 0;
++
++        LASSERT(obd != NULL);
++        LASSERT(obd->obd_magic == OBD_DEVICE_MAGIC);
++        LASSERT(obd->obd_type->typ_procroot != NULL);
++
++        obd->obd_proc_entry = lprocfs_register(obd->obd_name,
++                                               obd->obd_type->typ_procroot,
++                                               list, obd);
++        if (IS_ERR(obd->obd_proc_entry)) {
++                rc = PTR_ERR(obd->obd_proc_entry);
++                CERROR("error %d setting up lprocfs for %s\n",rc,obd->obd_name);
++                obd->obd_proc_entry = NULL;
++        }
++        return rc;
++}
++
++int lprocfs_obd_cleanup(struct obd_device *obd)
++{
++        if (!obd)
++                return -EINVAL;
++        if (obd->obd_proc_exports_entry) {
++                /* Should be no exports left */
++                LASSERT(obd->obd_proc_exports_entry->subdir == NULL);
++                lprocfs_remove(&obd->obd_proc_exports_entry);
++        }
++        lprocfs_remove(&obd->obd_proc_entry);
++        return 0;
++}
++
++static void lprocfs_free_client_stats(struct nid_stat *client_stat)
++{
++        CDEBUG(D_CONFIG, "stat %p - data %p/%p/%p\n", client_stat,
++               client_stat->nid_proc, client_stat->nid_stats,
++               client_stat->nid_brw_stats);
++
++        LASSERTF(client_stat->nid_exp_ref_count == 0, "count %d\n",
++                 client_stat->nid_exp_ref_count);
++
++        hlist_del_init(&client_stat->nid_hash);
++
++        if (client_stat->nid_proc)
++                lprocfs_remove(&client_stat->nid_proc);
++
++        if (client_stat->nid_stats)
++                lprocfs_free_stats(&client_stat->nid_stats);
++
++        if (client_stat->nid_brw_stats)
++                OBD_FREE_PTR(client_stat->nid_brw_stats);
++
++        if (client_stat->nid_ldlm_stats)
++                lprocfs_free_stats(&client_stat->nid_ldlm_stats);
++
++        OBD_FREE_PTR(client_stat);
++        return;
++
++}
++
++void lprocfs_free_per_client_stats(struct obd_device *obd)
++{
++        struct nid_stat *stat;
++        ENTRY;
++
++        /* we need extra list - because hash_exit called to early */
++        /* not need locking because all clients is died */
++        while(!list_empty(&obd->obd_nid_stats)) {
++                stat = list_entry(obd->obd_nid_stats.next,
++                                  struct nid_stat, nid_list);
++                list_del_init(&stat->nid_list);
++                lprocfs_free_client_stats(stat);
++        }
++
++        EXIT;
++}
++
++struct lprocfs_stats *lprocfs_alloc_stats(unsigned int num,
++                                          enum lprocfs_stats_flags flags)
++{
++        struct lprocfs_stats *stats;
++        unsigned int percpusize;
++        unsigned int i, j;
++        unsigned int num_cpu;
++
++        if (num == 0)
++                return NULL;
++
++        if (flags & LPROCFS_STATS_FLAG_NOPERCPU)
++                num_cpu = 1;
++        else
++                num_cpu = num_possible_cpus();
++
++        OBD_ALLOC(stats, offsetof(typeof(*stats), ls_percpu[num_cpu]));
++        if (stats == NULL)
++                return NULL;
++
++        if (flags & LPROCFS_STATS_FLAG_NOPERCPU) {
++                stats->ls_flags = flags;
++                spin_lock_init(&stats->ls_lock);
++                /* Use this lock only if there are no percpu areas */
++        } else {
++                stats->ls_flags = 0;
++        }
++
++        percpusize = offsetof(struct lprocfs_percpu, lp_cntr[num]);
++        if (num_cpu > 1)
++                percpusize = L1_CACHE_ALIGN(percpusize);
++
++        for (i = 0; i < num_cpu; i++) {
++                OBD_ALLOC(stats->ls_percpu[i], percpusize);
++                if (stats->ls_percpu[i] == NULL) {
++                        for (j = 0; j < i; j++) {
++                                OBD_FREE(stats->ls_percpu[j], percpusize);
++                                stats->ls_percpu[j] = NULL;
++                        }
++                        break;
++                }
++        }
++        if (stats->ls_percpu[0] == NULL) {
++                OBD_FREE(stats, offsetof(typeof(*stats),
++                                         ls_percpu[num_cpu]));
++                return NULL;
++        }
++
++        stats->ls_num = num;
++        return stats;
++}
++
++void lprocfs_free_stats(struct lprocfs_stats **statsh)
++{
++        struct lprocfs_stats *stats = *statsh;
++        unsigned int num_cpu;
++        unsigned int percpusize;
++        unsigned int i;
++
++        if (!stats || (stats->ls_num == 0))
++                return;
++        *statsh = NULL;
++        if (stats->ls_flags & LPROCFS_STATS_FLAG_NOPERCPU)
++                num_cpu = 1;
++        else
++                num_cpu = num_possible_cpus();
++
++        percpusize = offsetof(struct lprocfs_percpu, lp_cntr[stats->ls_num]);
++        if (num_cpu > 1)
++                percpusize = L1_CACHE_ALIGN(percpusize);
++        for (i = 0; i < num_cpu; i++)
++                OBD_FREE(stats->ls_percpu[i], percpusize);
++        OBD_FREE(stats, offsetof(typeof(*stats), ls_percpu[num_cpu]));
++}
++
++void lprocfs_clear_stats(struct lprocfs_stats *stats)
++{
++        struct lprocfs_counter *percpu_cntr;
++        int i, j;
++        unsigned int num_cpu;
++
++        num_cpu = lprocfs_stats_lock(stats, LPROCFS_GET_NUM_CPU);
++
++        for (i = 0; i < num_cpu; i++) {
++                for (j = 0; j < stats->ls_num; j++) {
++                        percpu_cntr = &(stats->ls_percpu[i])->lp_cntr[j];
++                        atomic_inc(&percpu_cntr->lc_cntl.la_entry);
++                        percpu_cntr->lc_count = 0;
++                        percpu_cntr->lc_sum = 0;
++                        percpu_cntr->lc_min = LC_MIN_INIT;
++                        percpu_cntr->lc_max = 0;
++                        percpu_cntr->lc_sumsquare = 0;
++                        atomic_inc(&percpu_cntr->lc_cntl.la_exit);
++                }
++        }
++
++        lprocfs_stats_unlock(stats);
++}
++
++static ssize_t lprocfs_stats_seq_write(struct file *file, const char *buf,
++                                       size_t len, loff_t *off)
++{
++        struct seq_file *seq = file->private_data;
++        struct lprocfs_stats *stats = seq->private;
++
++        lprocfs_clear_stats(stats);
++
++        return len;
++}
++
++static void *lprocfs_stats_seq_start(struct seq_file *p, loff_t *pos)
++{
++        struct lprocfs_stats *stats = p->private;
++        /* return 1st cpu location */
++        return (*pos >= stats->ls_num) ? NULL :
++                &(stats->ls_percpu[0]->lp_cntr[*pos]);
++}
++
++static void lprocfs_stats_seq_stop(struct seq_file *p, void *v)
++{
++}
++
++static void *lprocfs_stats_seq_next(struct seq_file *p, void *v, loff_t *pos)
++{
++        struct lprocfs_stats *stats = p->private;
++        ++*pos;
++        return (*pos >= stats->ls_num) ? NULL :
++                &(stats->ls_percpu[0]->lp_cntr[*pos]);
++}
++
++/* seq file export of one lprocfs counter */
++static int lprocfs_stats_seq_show(struct seq_file *p, void *v)
++{
++       struct lprocfs_stats *stats = p->private;
++       struct lprocfs_counter  *cntr = v;
++       struct lprocfs_counter  t, ret = { .lc_min = LC_MIN_INIT };
++       int i, idx, rc = 0;
++       unsigned int num_cpu;
++
++       if (cntr == &(stats->ls_percpu[0])->lp_cntr[0]) {
++               struct timeval now;
++               do_gettimeofday(&now);
++               rc = seq_printf(p, "%-25s %lu.%lu secs.usecs\n",
++                               "snapshot_time", now.tv_sec, now.tv_usec);
++               if (rc < 0)
++                       return rc;
++       }
++       idx = cntr - &(stats->ls_percpu[0])->lp_cntr[0];
++
++       if (stats->ls_flags & LPROCFS_STATS_FLAG_NOPERCPU)
++               num_cpu = 1;
++       else
++               num_cpu = num_possible_cpus();
++
++       for (i = 0; i < num_cpu; i++) {
++               struct lprocfs_counter *percpu_cntr =
++                       &(stats->ls_percpu[i])->lp_cntr[idx];
++               int centry;
++
++               do {
++                       centry = atomic_read(&percpu_cntr->lc_cntl.la_entry);
++                       t.lc_count = percpu_cntr->lc_count;
++                       t.lc_sum = percpu_cntr->lc_sum;
++                       t.lc_min = percpu_cntr->lc_min;
++                       t.lc_max = percpu_cntr->lc_max;
++                       t.lc_sumsquare = percpu_cntr->lc_sumsquare;
++               } while (centry != atomic_read(&percpu_cntr->lc_cntl.la_entry) &&
++                        centry != atomic_read(&percpu_cntr->lc_cntl.la_exit));
++               ret.lc_count += t.lc_count;
++               ret.lc_sum += t.lc_sum;
++               if (t.lc_min < ret.lc_min)
++                       ret.lc_min = t.lc_min;
++               if (t.lc_max > ret.lc_max)
++                       ret.lc_max = t.lc_max;
++               ret.lc_sumsquare += t.lc_sumsquare;
++       }
++
++       if (ret.lc_count == 0)
++               goto out;
++
++       rc = seq_printf(p, "%-25s "LPD64" samples [%s]", cntr->lc_name,
++                       ret.lc_count, cntr->lc_units);
++       if (rc < 0)
++               goto out;
++
++       if ((cntr->lc_config & LPROCFS_CNTR_AVGMINMAX) && (ret.lc_count > 0)) {
++               rc = seq_printf(p, " "LPD64" "LPD64" "LPD64,
++                               ret.lc_min, ret.lc_max, ret.lc_sum);
++               if (rc < 0)
++                       goto out;
++               if (cntr->lc_config & LPROCFS_CNTR_STDDEV)
++                       rc = seq_printf(p, " "LPD64, ret.lc_sumsquare);
++               if (rc < 0)
++                       goto out;
++       }
++       rc = seq_printf(p, "\n");
++ out:
++       return (rc < 0) ? rc : 0;
++}
++
++struct seq_operations lprocfs_stats_seq_sops = {
++        start: lprocfs_stats_seq_start,
++        stop:  lprocfs_stats_seq_stop,
++        next:  lprocfs_stats_seq_next,
++        show:  lprocfs_stats_seq_show,
++};
++
++static int lprocfs_stats_seq_open(struct inode *inode, struct file *file)
++{
++        struct proc_dir_entry *dp = PDE(inode);
++        struct seq_file *seq;
++        int rc;
++
++        LPROCFS_ENTRY_AND_CHECK(dp);
++        rc = seq_open(file, &lprocfs_stats_seq_sops);
++        if (rc) {
++                LPROCFS_EXIT();
++                return rc;
++        }
++
++        seq = file->private_data;
++        seq->private = dp->data;
++        return 0;
++}
++
++struct file_operations lprocfs_stats_seq_fops = {
++        .owner   = THIS_MODULE,
++        .open    = lprocfs_stats_seq_open,
++        .read    = seq_read,
++        .write   = lprocfs_stats_seq_write,
++        .llseek  = seq_lseek,
++        .release = lprocfs_seq_release,
++};
++
++int lprocfs_register_stats(struct proc_dir_entry *root, const char *name,
++                           struct lprocfs_stats *stats)
++{
++        struct proc_dir_entry *entry;
++        LASSERT(root != NULL);
++
++        entry = create_proc_entry(name, 0644, root);
++        if (entry == NULL)
++                return -ENOMEM;
++        entry->proc_fops = &lprocfs_stats_seq_fops;
++        entry->data = (void *)stats;
++        return 0;
++}
++
++void lprocfs_counter_init(struct lprocfs_stats *stats, int index,
++                          unsigned conf, const char *name, const char *units)
++{
++        struct lprocfs_counter *c;
++        int i;
++        unsigned int num_cpu;
++
++        LASSERT(stats != NULL);
++
++        num_cpu = lprocfs_stats_lock(stats, LPROCFS_GET_NUM_CPU);
++
++        for (i = 0; i < num_cpu; i++) {
++                c = &(stats->ls_percpu[i]->lp_cntr[index]);
++                c->lc_config = conf;
++                c->lc_count = 0;
++                c->lc_sum = 0;
++                c->lc_min = LC_MIN_INIT;
++                c->lc_max = 0;
++                c->lc_name = name;
++                c->lc_units = units;
++        }
++
++        lprocfs_stats_unlock(stats);
++}
++EXPORT_SYMBOL(lprocfs_counter_init);
++
++#define LPROCFS_OBD_OP_INIT(base, stats, op)                               \
++do {                                                                       \
++        unsigned int coffset = base + OBD_COUNTER_OFFSET(op);              \
++        LASSERT(coffset < stats->ls_num);                                  \
++        lprocfs_counter_init(stats, coffset, 0, #op, "reqs");              \
++} while (0)
++
++void lprocfs_init_ops_stats(int num_private_stats, struct lprocfs_stats *stats)
++{
++        LPROCFS_OBD_OP_INIT(num_private_stats, stats, iocontrol);
++        LPROCFS_OBD_OP_INIT(num_private_stats, stats, get_info);
++        LPROCFS_OBD_OP_INIT(num_private_stats, stats, set_info_async);
++        LPROCFS_OBD_OP_INIT(num_private_stats, stats, attach);
++        LPROCFS_OBD_OP_INIT(num_private_stats, stats, detach);
++        LPROCFS_OBD_OP_INIT(num_private_stats, stats, setup);
++        LPROCFS_OBD_OP_INIT(num_private_stats, stats, precleanup);
++        LPROCFS_OBD_OP_INIT(num_private_stats, stats, cleanup);
++        LPROCFS_OBD_OP_INIT(num_private_stats, stats, process_config);
++        LPROCFS_OBD_OP_INIT(num_private_stats, stats, postrecov);
++        LPROCFS_OBD_OP_INIT(num_private_stats, stats, add_conn);
++        LPROCFS_OBD_OP_INIT(num_private_stats, stats, del_conn);
++        LPROCFS_OBD_OP_INIT(num_private_stats, stats, connect);
++        LPROCFS_OBD_OP_INIT(num_private_stats, stats, reconnect);
++        LPROCFS_OBD_OP_INIT(num_private_stats, stats, disconnect);
++        LPROCFS_OBD_OP_INIT(num_private_stats, stats, statfs);
++        LPROCFS_OBD_OP_INIT(num_private_stats, stats, statfs_async);
++        LPROCFS_OBD_OP_INIT(num_private_stats, stats, packmd);
++        LPROCFS_OBD_OP_INIT(num_private_stats, stats, unpackmd);
++        LPROCFS_OBD_OP_INIT(num_private_stats, stats, checkmd);
++        LPROCFS_OBD_OP_INIT(num_private_stats, stats, preallocate);
++        LPROCFS_OBD_OP_INIT(num_private_stats, stats, precreate);
++        LPROCFS_OBD_OP_INIT(num_private_stats, stats, create);
++        LPROCFS_OBD_OP_INIT(num_private_stats, stats, destroy);
++        LPROCFS_OBD_OP_INIT(num_private_stats, stats, setattr);
++        LPROCFS_OBD_OP_INIT(num_private_stats, stats, setattr_async);
++        LPROCFS_OBD_OP_INIT(num_private_stats, stats, getattr);
++        LPROCFS_OBD_OP_INIT(num_private_stats, stats, getattr_async);
++        LPROCFS_OBD_OP_INIT(num_private_stats, stats, brw);
++        LPROCFS_OBD_OP_INIT(num_private_stats, stats, brw_async);
++        LPROCFS_OBD_OP_INIT(num_private_stats, stats, prep_async_page);
++        LPROCFS_OBD_OP_INIT(num_private_stats, stats, reget_short_lock);
++        LPROCFS_OBD_OP_INIT(num_private_stats, stats, release_short_lock);
++        LPROCFS_OBD_OP_INIT(num_private_stats, stats, queue_async_io);
++        LPROCFS_OBD_OP_INIT(num_private_stats, stats, queue_group_io);
++        LPROCFS_OBD_OP_INIT(num_private_stats, stats, trigger_group_io);
++        LPROCFS_OBD_OP_INIT(num_private_stats, stats, set_async_flags);
++        LPROCFS_OBD_OP_INIT(num_private_stats, stats, teardown_async_page);
++        LPROCFS_OBD_OP_INIT(num_private_stats, stats, merge_lvb);
++        LPROCFS_OBD_OP_INIT(num_private_stats, stats, adjust_kms);
++        LPROCFS_OBD_OP_INIT(num_private_stats, stats, punch);
++        LPROCFS_OBD_OP_INIT(num_private_stats, stats, sync);
++        LPROCFS_OBD_OP_INIT(num_private_stats, stats, migrate);
++        LPROCFS_OBD_OP_INIT(num_private_stats, stats, copy);
++        LPROCFS_OBD_OP_INIT(num_private_stats, stats, iterate);
++        LPROCFS_OBD_OP_INIT(num_private_stats, stats, preprw);
++        LPROCFS_OBD_OP_INIT(num_private_stats, stats, commitrw);
++        LPROCFS_OBD_OP_INIT(num_private_stats, stats, enqueue);
++        LPROCFS_OBD_OP_INIT(num_private_stats, stats, match);
++        LPROCFS_OBD_OP_INIT(num_private_stats, stats, change_cbdata);
++        LPROCFS_OBD_OP_INIT(num_private_stats, stats, cancel);
++        LPROCFS_OBD_OP_INIT(num_private_stats, stats, cancel_unused);
++        LPROCFS_OBD_OP_INIT(num_private_stats, stats, join_lru);
++        LPROCFS_OBD_OP_INIT(num_private_stats, stats, init_export);
++        LPROCFS_OBD_OP_INIT(num_private_stats, stats, destroy_export);
++        LPROCFS_OBD_OP_INIT(num_private_stats, stats, extent_calc);
++        LPROCFS_OBD_OP_INIT(num_private_stats, stats, llog_init);
++        LPROCFS_OBD_OP_INIT(num_private_stats, stats, llog_finish);
++        LPROCFS_OBD_OP_INIT(num_private_stats, stats, pin);
++        LPROCFS_OBD_OP_INIT(num_private_stats, stats, unpin);
++        LPROCFS_OBD_OP_INIT(num_private_stats, stats, import_event);
++        LPROCFS_OBD_OP_INIT(num_private_stats, stats, notify);
++        LPROCFS_OBD_OP_INIT(num_private_stats, stats, health_check);
++        LPROCFS_OBD_OP_INIT(num_private_stats, stats, quotacheck);
++        LPROCFS_OBD_OP_INIT(num_private_stats, stats, quotactl);
++        LPROCFS_OBD_OP_INIT(num_private_stats, stats, quota_adjust_qunit);
++        LPROCFS_OBD_OP_INIT(num_private_stats, stats, ping);
++        LPROCFS_OBD_OP_INIT(num_private_stats, stats, register_page_removal_cb);
++        LPROCFS_OBD_OP_INIT(num_private_stats,stats,unregister_page_removal_cb);
++        LPROCFS_OBD_OP_INIT(num_private_stats, stats, register_lock_cancel_cb);
++        LPROCFS_OBD_OP_INIT(num_private_stats, stats,unregister_lock_cancel_cb);
++}
++
++void lprocfs_init_ldlm_stats(struct lprocfs_stats *ldlm_stats)
++{
++        lprocfs_counter_init(ldlm_stats,
++                             LDLM_ENQUEUE - LDLM_FIRST_OPC,
++                             0, "ldlm_enqueue", "reqs");
++        lprocfs_counter_init(ldlm_stats,
++                             LDLM_CONVERT - LDLM_FIRST_OPC,
++                             0, "ldlm_convert", "reqs");
++        lprocfs_counter_init(ldlm_stats,
++                             LDLM_CANCEL - LDLM_FIRST_OPC,
++                             0, "ldlm_cancel", "reqs");
++        lprocfs_counter_init(ldlm_stats,
++                             LDLM_BL_CALLBACK - LDLM_FIRST_OPC,
++                             0, "ldlm_bl_callback", "reqs");
++        lprocfs_counter_init(ldlm_stats,
++                             LDLM_CP_CALLBACK - LDLM_FIRST_OPC,
++                             0, "ldlm_cp_callback", "reqs");
++        lprocfs_counter_init(ldlm_stats,
++                             LDLM_GL_CALLBACK - LDLM_FIRST_OPC,
++                             0, "ldlm_gl_callback", "reqs");
++}
++
++int lprocfs_alloc_obd_stats(struct obd_device *obd, unsigned num_private_stats)
++{
++        struct lprocfs_stats *stats;
++        unsigned int num_stats;
++        int rc, i;
++
++        LASSERT(obd->obd_stats == NULL);
++        LASSERT(obd->obd_proc_entry != NULL);
++        LASSERT(obd->obd_cntr_base == 0);
++
++        num_stats = ((int)sizeof(*obd->obd_type->typ_ops) / sizeof(void *)) +
++                num_private_stats - 1 /* o_owner */;
++        stats = lprocfs_alloc_stats(num_stats, 0);
++        if (stats == NULL)
++                return -ENOMEM;
++
++        lprocfs_init_ops_stats(num_private_stats, stats);
++
++        for (i = num_private_stats; i < num_stats; i++) {
++                /* If this LBUGs, it is likely that an obd
++                 * operation was added to struct obd_ops in
++                 * <obd.h>, and that the corresponding line item
++                 * LPROCFS_OBD_OP_INIT(.., .., opname)
++                 * is missing from the list above. */
++                LASSERTF(stats->ls_percpu[0]->lp_cntr[i].lc_name != NULL,
++                         "Missing obd_stat initializer obd_op "
++                         "operation at offset %d.\n", i - num_private_stats);
++        }
++        rc = lprocfs_register_stats(obd->obd_proc_entry, "stats", stats);
++        if (rc < 0) {
++                lprocfs_free_stats(&stats);
++        } else {
++                obd->obd_stats  = stats;
++                obd->obd_cntr_base = num_private_stats;
++        }
++        return rc;
++}
++
++void lprocfs_free_obd_stats(struct obd_device *obd)
++{
++        if (obd->obd_stats)
++                lprocfs_free_stats(&obd->obd_stats);
++}
++
++int lprocfs_exp_rd_nid(char *page, char **start, off_t off, int count,
++                         int *eof,  void *data)
++{
++        struct obd_export *exp = (struct obd_export*)data;
++        LASSERT(exp != NULL);
++        *eof = 1;
++        return snprintf(page, count, "%s\n", obd_export_nid2str(exp));
++}
++
++struct exp_uuid_cb_data {
++        char                   *page;
++        int                     count;
++        int                    *eof;
++        int                    *len;
++};
++
++static void
++lprocfs_exp_rd_cb_data_init(struct exp_uuid_cb_data *cb_data, char *page,
++                            int count, int *eof, int *len)
++{
++        cb_data->page = page;
++        cb_data->count = count;
++        cb_data->eof = eof;
++        cb_data->len = len;
++}
++
++void lprocfs_exp_print_uuid(void *obj, void *cb_data)
++{
++        struct obd_export *exp = (struct obd_export *)obj;
++        struct exp_uuid_cb_data *data = (struct exp_uuid_cb_data *)cb_data;
++
++        if (exp->exp_nid_stats)
++                *data->len += snprintf((data->page + *data->len),
++                                       data->count, "%s\n",
++                                       obd_uuid2str(&exp->exp_client_uuid));
++}
++
++int lprocfs_exp_rd_uuid(char *page, char **start, off_t off, int count,
++                        int *eof,  void *data)
++{
++        struct nid_stat *stats = (struct nid_stat *)data;
++        struct exp_uuid_cb_data cb_data;
++        struct obd_device *obd = stats->nid_obd;
++        int len = 0;
++
++        *eof = 1;
++        page[0] = '\0';
++        lprocfs_exp_rd_cb_data_init(&cb_data, page, count, eof, &len);
++        lustre_hash_for_each_key(obd->obd_nid_hash, &stats->nid,
++                                 lprocfs_exp_print_uuid, &cb_data);
++        return (*cb_data.len);
++}
++
++void lprocfs_exp_print_hash(void *obj, void *cb_data)
++{
++        struct obd_export *exp = (struct obd_export *)obj;
++        struct exp_uuid_cb_data *data = (struct exp_uuid_cb_data *)cb_data;
++        lustre_hash_t *lh;
++
++        lh = exp->exp_lock_hash;
++        if (lh) {
++                if (!*data->len)
++                        *data->len += lustre_hash_debug_header(data->page,
++                                                               data->count);
++
++                *data->len += lustre_hash_debug_str(lh, data->page +
++                                                    *data->len,
++                                                    data->count);
++     }
++}
++
++int lprocfs_exp_rd_hash(char *page, char **start, off_t off, int count,
++                     int *eof,  void *data)
++{
++        struct nid_stat *stats = (struct nid_stat *)data;
++        struct exp_uuid_cb_data cb_data;
++        struct obd_device *obd = stats->nid_obd;
++        int len = 0;
++
++        *eof = 1;
++        page[0] = '\0';
++        lprocfs_exp_rd_cb_data_init(&cb_data, page, count, eof, &len);
++        lustre_hash_for_each_key(obd->obd_nid_hash, &stats->nid,
++                                 lprocfs_exp_print_hash, &cb_data);
++        return (*cb_data.len);
++}
++
++int lprocfs_nid_stats_clear_read(char *page, char **start, off_t off,
++                                        int count, int *eof,  void *data)
++{
++        *eof = 1;
++        return snprintf(page, count, "%s\n",
++                        "Write into this file to clear all nid stats and "
++                        "stale nid entries");
++}
++EXPORT_SYMBOL(lprocfs_nid_stats_clear_read);
++
++void lprocfs_nid_stats_clear_write_cb(void *obj, void *data)
++{
++        struct nid_stat *stat = obj;
++        int i;
++
++        /* object has only hash + iterate_all references.
++         * add/delete blocked by hash bucket lock */
++        CDEBUG(D_INFO,"refcnt %d\n", stat->nid_exp_ref_count);
++        if (stat->nid_exp_ref_count == 2) {
++                hlist_del_init(&stat->nid_hash);
++                stat->nid_exp_ref_count--;
++                spin_lock(&stat->nid_obd->obd_nid_lock);
++                list_del_init(&stat->nid_list);
++                spin_unlock(&stat->nid_obd->obd_nid_lock);
++                list_add(&stat->nid_list, data);
++                EXIT;
++                return;
++        }
++        /* we has reference to object - only clear data*/
++        if (stat->nid_stats)
++                lprocfs_clear_stats(stat->nid_stats);
++
++        if (stat->nid_brw_stats) {
++                for (i = 0; i < BRW_LAST; i++)
++                        lprocfs_oh_clear(&stat->nid_brw_stats->hist[i]);
++        }
++        EXIT;
++        return;
++}
++
++int lprocfs_nid_stats_clear_write(struct file *file, const char *buffer,
++                                         unsigned long count, void *data)
++{
++        struct obd_device *obd = (struct obd_device *)data;
++        struct nid_stat *client_stat;
++        CFS_LIST_HEAD(free_list);
++
++        lustre_hash_for_each(obd->obd_nid_stats_hash,
++                             lprocfs_nid_stats_clear_write_cb, &free_list);
++
++        while (!list_empty(&free_list)) {
++                client_stat = list_entry(free_list.next, struct nid_stat,
++                                         nid_list);
++                list_del_init(&client_stat->nid_list);
++                lprocfs_free_client_stats(client_stat);
++        }
++
++        return count;
++}
++EXPORT_SYMBOL(lprocfs_nid_stats_clear_write);
++
++int lprocfs_exp_setup(struct obd_export *exp, lnet_nid_t *nid, int *newnid)
++{
++        struct nid_stat *new_stat, *old_stat;
++        struct nid_stat_uuid *new_ns_uuid;
++        struct obd_device *obd;
++        int rc = 0;
++        ENTRY;
++
++        *newnid = 0;
++
++        if (!exp || !exp->exp_obd || !exp->exp_obd->obd_proc_exports_entry ||
++            !exp->exp_obd->obd_nid_stats_hash)
++                RETURN(-EINVAL);
++
++	/* not test against zero because eric say:
++	 * You may only test nid against another nid, or LNET_NID_ANY.
++         * Anything else is nonsense.*/
++        if (!nid || *nid == LNET_NID_ANY)
++                RETURN(0);
++
++        obd = exp->exp_obd;
++
++        CDEBUG(D_CONFIG, "using hash %p\n", obd->obd_nid_stats_hash);
++
++        OBD_ALLOC_PTR(new_stat);
++        if (new_stat == NULL)
++                RETURN(-ENOMEM);
++
++        OBD_ALLOC_PTR(new_ns_uuid);
++        if (new_ns_uuid == NULL) {
++                OBD_FREE_PTR(new_stat);
++                RETURN(-ENOMEM);
++        }
++        CFS_INIT_LIST_HEAD(&new_ns_uuid->ns_uuid_list);
++        strncpy(new_ns_uuid->ns_uuid.uuid, exp->exp_client_uuid.uuid,
++                sizeof(struct obd_uuid));
++
++        CFS_INIT_LIST_HEAD(&new_stat->nid_uuid_list);
++        new_stat->nid = *nid;
++        new_stat->nid_obd = exp->exp_obd;
++        /* need live in hash after destroy export */
++        new_stat->nid_exp_ref_count = 1;
++
++        old_stat = lustre_hash_findadd_unique(obd->obd_nid_stats_hash,
++                                              nid, &new_stat->nid_hash);
++        CDEBUG(D_INFO, "Found stats %p for nid %s - ref %d\n",
++               old_stat, libcfs_nid2str(*nid), new_stat->nid_exp_ref_count);
++
++        /* Return -EALREADY here so that we know that the /proc
++         * entry already has been created */
++        if (old_stat != new_stat) {
++                struct nid_stat_uuid *tmp_uuid;
++                int found = 0;
++
++                exp->exp_nid_stats = old_stat;
++
++                /* We need to decrement the refcount if the uuid was
++                 * already in our list */
++                spin_lock(&obd->obd_nid_lock);
++                list_for_each_entry(tmp_uuid, &old_stat->nid_uuid_list,
++                                    ns_uuid_list) {
++                        if (tmp_uuid && obd_uuid_equals(&tmp_uuid->ns_uuid,
++                                                        &exp->exp_client_uuid)){
++                                found = 1;
++                                --old_stat->nid_exp_ref_count;
++                                break;
++                        }
++                }
++
++                if (!found)
++                        list_add(&new_ns_uuid->ns_uuid_list,
++                                 &old_stat->nid_uuid_list);
++                else
++                        OBD_FREE_PTR(new_ns_uuid);
++                spin_unlock(&obd->obd_nid_lock);
++
++                GOTO(destroy_new, rc = -EALREADY);
++        }
++        /* not found - create */
++        new_stat->nid_proc = proc_mkdir(libcfs_nid2str(*nid),
++                                        obd->obd_proc_exports_entry);
++        if (!new_stat->nid_proc) {
++                CERROR("Error making export directory for"
++                       " nid %s\n", libcfs_nid2str(*nid));
++                GOTO(destroy_new_ns, rc = -ENOMEM);
++        }
++
++        /* Add in uuid to our nid_stats list */
++        spin_lock(&obd->obd_nid_lock);
++        list_add(&new_ns_uuid->ns_uuid_list, &new_stat->nid_uuid_list);
++        spin_unlock(&obd->obd_nid_lock);
++
++        rc = lprocfs_add_simple(new_stat->nid_proc, "uuid",
++                                lprocfs_exp_rd_uuid, NULL, new_stat);
++        if (rc) {
++                CWARN("Error adding the uuid file\n");
++                GOTO(destroy_new_ns, rc);
++        }
++
++        rc = lprocfs_add_simple(new_stat->nid_proc, "hash",
++                                lprocfs_exp_rd_hash, NULL, new_stat);
++        if (rc) {
++                CWARN("Error adding the hash file\n");
++                lprocfs_remove(&new_stat->nid_proc);
++                GOTO(destroy_new_ns, rc);
++        }
++
++        exp->exp_nid_stats = new_stat;
++        *newnid = 1;
++        /* protect competitive add to list, not need locking on destroy */
++        spin_lock(&obd->obd_nid_lock);
++        list_add(&new_stat->nid_list, &obd->obd_nid_stats);
++        spin_unlock(&obd->obd_nid_lock);
++
++        RETURN(rc);
++
++destroy_new_ns:
++        lustre_hash_del(obd->obd_nid_stats_hash, nid, &new_stat->nid_hash);
++        OBD_FREE_PTR(new_ns_uuid);
++
++destroy_new:
++        OBD_FREE_PTR(new_stat);
++        RETURN(rc);
++}
++
++int lprocfs_exp_cleanup(struct obd_export *exp)
++{
++        struct nid_stat *stat = exp->exp_nid_stats;
++        struct nid_stat_uuid *cursor, *tmp;
++        int found = 0;
++
++        if(!stat || !exp->exp_obd)
++                RETURN(0);
++
++        spin_lock(&exp->exp_obd->obd_nid_lock);
++        list_for_each_entry_safe(cursor, tmp,
++                                 &stat->nid_uuid_list,
++                                 ns_uuid_list) {
++                if (cursor && obd_uuid_equals(&cursor->ns_uuid,
++                                              &exp->exp_client_uuid)) {
++                        found = 1;
++                        list_del(&cursor->ns_uuid_list);
++                        OBD_FREE_PTR(cursor);
++                        --stat->nid_exp_ref_count;
++                        CDEBUG(D_INFO, "Put stat %p - %d\n", stat,
++                               stat->nid_exp_ref_count);
++                        break;
++                }
++        }
++        spin_unlock(&exp->exp_obd->obd_nid_lock);
++        if (!found)
++                CERROR("obd_export's client uuid %s are not found in its "
++                       "nid_stats list\n", exp->exp_client_uuid.uuid);
++
++        exp->exp_nid_stats = NULL;
++        lprocfs_free_stats(&exp->exp_ops_stats);
++
++        return 0;
++}
++
++int lprocfs_write_helper(const char *buffer, unsigned long count,
++                         int *val)
++{
++        return lprocfs_write_frac_helper(buffer, count, val, 1);
++}
++
++int lprocfs_write_frac_helper(const char *buffer, unsigned long count,
++                              int *val, int mult)
++{
++        char kernbuf[20], *end, *pbuf;
++
++        if (count > (sizeof(kernbuf) - 1))
++                return -EINVAL;
++
++        if (copy_from_user(kernbuf, buffer, count))
++                return -EFAULT;
++
++        kernbuf[count] = '\0';
++        pbuf = kernbuf;
++        if (*pbuf == '-') {
++                mult = -mult;
++                pbuf++;
++        }
++
++        *val = (int)simple_strtoul(pbuf, &end, 10) * mult;
++        if (pbuf == end)
++                return -EINVAL;
++
++        if (end != NULL && *end == '.') {
++                int temp_val, pow = 1;
++                int i;
++
++                pbuf = end + 1;
++                if (strlen(pbuf) > 5)
++                        pbuf[5] = '\0'; /*only allow 5bits fractional*/
++
++                temp_val = (int)simple_strtoul(pbuf, &end, 10) * mult;
++
++                if (pbuf < end) {
++                        for (i = 0; i < (end - pbuf); i++)
++                                pow *= 10;
++
++                        *val += temp_val / pow;
++                }
++        }
++        return 0;
++}
++
++int lprocfs_read_frac_helper(char *buffer, unsigned long count, long val,
++                             int mult)
++{
++        long decimal_val, frac_val;
++        int prtn;
++
++        if (count < 10)
++                return -EINVAL;
++
++        decimal_val = val / mult;
++        prtn = snprintf(buffer, count, "%ld", decimal_val);
++        frac_val = val % mult;
++
++        if (prtn < (count - 4) && frac_val > 0) {
++                long temp_frac;
++                int i, temp_mult = 1, frac_bits = 0;
++
++                temp_frac = frac_val * 10;
++                buffer[prtn++] = '.';
++                while (frac_bits < 2 && (temp_frac / mult) < 1 ) {
++                        /*only reserved 2bits fraction*/
++                        buffer[prtn++] ='0';
++                        temp_frac *= 10;
++                        frac_bits++;
++                }
++                /*
++                  Need to think these cases :
++                        1. #echo x.00 > /proc/xxx       output result : x
++                        2. #echo x.0x > /proc/xxx       output result : x.0x
++                        3. #echo x.x0 > /proc/xxx       output result : x.x
++                        4. #echo x.xx > /proc/xxx       output result : x.xx
++                        Only reserved 2bits fraction.
++                 */
++                for (i = 0; i < (5 - prtn); i++)
++                        temp_mult *= 10;
++
++                frac_bits = min((int)count - prtn, 3 - frac_bits);
++                prtn += snprintf(buffer + prtn, frac_bits, "%ld",
++                                 frac_val * temp_mult / mult);
++
++                prtn--;
++                while(buffer[prtn] < '1' || buffer[prtn] > '9') {
++                        prtn--;
++                        if (buffer[prtn] == '.') {
++                                prtn--;
++                                break;
++                        }
++                }
++                prtn++;
++        }
++        buffer[prtn++] ='\n';
++        return prtn;
++}
++
++int lprocfs_write_u64_helper(const char *buffer, unsigned long count,__u64 *val)
++{
++        return lprocfs_write_frac_u64_helper(buffer, count, val, 1);
++}
++
++int lprocfs_write_frac_u64_helper(const char *buffer, unsigned long count,
++                              __u64 *val, int mult)
++{
++        char kernbuf[22], *end, *pbuf;
++        __u64 whole, frac = 0, units;
++        unsigned frac_d = 1;
++
++        if (count > (sizeof(kernbuf) - 1))
++                return -EINVAL;
++
++        if (copy_from_user(kernbuf, buffer, count))
++                return -EFAULT;
++
++        kernbuf[count] = '\0';
++        pbuf = kernbuf;
++        if (*pbuf == '-') {
++                mult = -mult;
++                pbuf++;
++        }
++
++        whole = simple_strtoull(pbuf, &end, 10);
++        if (pbuf == end)
++                return -EINVAL;
++
++        if (end != NULL && *end == '.') {
++                int i;
++                pbuf = end + 1;
++
++                /* need to limit frac_d to a __u32 */
++                if (strlen(pbuf) > 10)
++                        pbuf[10] = '\0';
++
++                frac = simple_strtoull(pbuf, &end, 10);
++                /* count decimal places */
++                for (i = 0; i < (end - pbuf); i++)
++                        frac_d *= 10;
++        }
++
++        units = 1;
++        switch(*end) {
++        case 'p': case 'P':
++                units <<= 10;
++        case 't': case 'T':
++                units <<= 10;
++        case 'g': case 'G':
++                units <<= 10;
++        case 'm': case 'M':
++                units <<= 10;
++        case 'k': case 'K':
++                units <<= 10;
++        }
++        /* Specified units override the multiplier */
++        if (units)
++                mult = mult < 0 ? -units : units;
++
++        frac *= mult;
++        do_div(frac, frac_d);
++        *val = whole * mult + frac;
++        return 0;
++}
++
++int lprocfs_seq_create(cfs_proc_dir_entry_t *parent,
++                       char *name, mode_t mode,
++                       struct file_operations *seq_fops, void *data)
++{
++        struct proc_dir_entry *entry;
++        ENTRY;
++
++        entry = create_proc_entry(name, mode, parent);
++        if (entry == NULL)
++                RETURN(-ENOMEM);
++        entry->proc_fops = seq_fops;
++        entry->data = data;
++
++        RETURN(0);
++}
++EXPORT_SYMBOL(lprocfs_seq_create);
++
++__inline__ int lprocfs_obd_seq_create(struct obd_device *dev, char *name,
++                                      mode_t mode,
++                                      struct file_operations *seq_fops,
++                                      void *data)
++{
++        return (lprocfs_seq_create(dev->obd_proc_entry, name,
++                                   mode, seq_fops, data));
++}
++EXPORT_SYMBOL(lprocfs_obd_seq_create);
++
++void lprocfs_oh_tally(struct obd_histogram *oh, unsigned int value)
++{
++        if (value >= OBD_HIST_MAX)
++                value = OBD_HIST_MAX - 1;
++
++        spin_lock(&oh->oh_lock);
++        oh->oh_buckets[value]++;
++        spin_unlock(&oh->oh_lock);
++}
++EXPORT_SYMBOL(lprocfs_oh_tally);
++
++void lprocfs_oh_tally_log2(struct obd_histogram *oh, unsigned int value)
++{
++        unsigned int val;
++
++        for (val = 0; ((1 << val) < value) && (val <= OBD_HIST_MAX); val++)
++                ;
++
++        lprocfs_oh_tally(oh, val);
++}
++EXPORT_SYMBOL(lprocfs_oh_tally_log2);
++
++unsigned long lprocfs_oh_sum(struct obd_histogram *oh)
++{
++        unsigned long ret = 0;
++        int i;
++
++        for (i = 0; i < OBD_HIST_MAX; i++)
++                ret +=  oh->oh_buckets[i];
++        return ret;
++}
++EXPORT_SYMBOL(lprocfs_oh_sum);
++
++void lprocfs_oh_clear(struct obd_histogram *oh)
++{
++        spin_lock(&oh->oh_lock);
++        memset(oh->oh_buckets, 0, sizeof(oh->oh_buckets));
++        spin_unlock(&oh->oh_lock);
++}
++EXPORT_SYMBOL(lprocfs_oh_clear);
++
++int lprocfs_obd_rd_recovery_status(char *page, char **start, off_t off,
++                                   int count, int *eof, void *data)
++{
++        struct obd_device *obd = data;
++        int len = 0, size;
++
++        LASSERT(obd != NULL);
++        LASSERT(count >= 0);
++
++        /* Set start of user data returned to
++           page + off since the user may have
++           requested to read much smaller than
++           what we need to read */
++        *start = page + off;
++
++        /* We know we are allocated a page here.
++           Also we know that this function will
++           not need to write more than a page
++           so we can truncate at CFS_PAGE_SIZE.  */
++        size = min(count + (int)off + 1, (int)CFS_PAGE_SIZE);
++
++        /* Initialize the page */
++        memset(page, 0, size);
++
++        if (lprocfs_obd_snprintf(&page, size, &len, "status: ") <= 0)
++                goto out;
++        if (obd->obd_max_recoverable_clients == 0) {
++                if (lprocfs_obd_snprintf(&page, size, &len, "INACTIVE\n") <= 0)
++                        goto out;
++
++                goto fclose;
++        }
++
++        /* sampled unlocked, but really... */
++        if (obd->obd_recovering == 0) {
++                if (lprocfs_obd_snprintf(&page, size, &len, "COMPLETE\n") <= 0)
++                        goto out;
++                if (lprocfs_obd_snprintf(&page, size, &len,
++                                         "recovery_start: %lu\n",
++                                         obd->obd_recovery_start) <= 0)
++                        goto out;
++                if (lprocfs_obd_snprintf(&page, size, &len,
++                                         "recovery_duration: %lu\n",
++                                         obd->obd_recovery_end -
++                                         obd->obd_recovery_start) <= 0)
++                        goto out;
++                /* Number of clients that have completed recovery */
++                if (lprocfs_obd_snprintf(&page, size, &len,
++                                         "completed_clients: %d/%d\n",
++                                         obd->obd_max_recoverable_clients -
++                                         obd->obd_recoverable_clients,
++                                         obd->obd_max_recoverable_clients) <= 0)
++                        goto out;
++                if (lprocfs_obd_snprintf(&page, size, &len,
++                                         "replayed_requests: %d\n",
++                                         obd->obd_replayed_requests) <= 0)
++                        goto out;
++                if (lprocfs_obd_snprintf(&page, size, &len,
++                                         "last_transno: "LPD64"\n",
++                                         obd->obd_next_recovery_transno - 1)<=0)
++                        goto out;
++                goto fclose;
++        }
++
++        if (lprocfs_obd_snprintf(&page, size, &len, "RECOVERING\n") <= 0)
++                goto out;
++        if (lprocfs_obd_snprintf(&page, size, &len, "recovery_start: %lu\n",
++                                 obd->obd_recovery_start) <= 0)
++                goto out;
++        if (lprocfs_obd_snprintf(&page, size, &len, "time_remaining: %lu\n",
++                           cfs_time_current_sec() >= obd->obd_recovery_end ? 0 :
++                           obd->obd_recovery_end - cfs_time_current_sec()) <= 0)
++                goto out;
++        if (lprocfs_obd_snprintf(&page, size, &len,"connected_clients: %d/%d\n",
++                                 obd->obd_connected_clients,
++                                 obd->obd_max_recoverable_clients) <= 0)
++                goto out;
++        /* Number of clients that have completed recovery */
++        if (lprocfs_obd_snprintf(&page, size, &len,"completed_clients: %d/%d\n",
++                                 obd->obd_max_recoverable_clients -
++                                 obd->obd_recoverable_clients,
++                                 obd->obd_max_recoverable_clients) <= 0)
++                goto out;
++        if (lprocfs_obd_snprintf(&page, size, &len,"replayed_requests: %d/??\n",
++                                 obd->obd_replayed_requests) <= 0)
++                goto out;
++        if (lprocfs_obd_snprintf(&page, size, &len, "queued_requests: %d\n",
++                                 obd->obd_requests_queued_for_recovery) <= 0)
++                goto out;
++        if (lprocfs_obd_snprintf(&page, size, &len, "next_transno: "LPD64"\n",
++                                 obd->obd_next_recovery_transno) <= 0)
++                goto out;
++
++fclose:
++        *eof = 1;
++out:
++        return min(count, len - (int)off);
++}
++EXPORT_SYMBOL(lprocfs_obd_rd_recovery_status);
++
++int lprocfs_obd_rd_hash(char *page, char **start, off_t off,
++                        int count, int *eof, void *data)
++{
++        struct obd_device *obd = data;
++        int c = 0;
++
++        if (obd == NULL)
++                return 0;
++
++        c += lustre_hash_debug_header(page, count);
++        c += lustre_hash_debug_str(obd->obd_uuid_hash, page + c, count - c);
++        c += lustre_hash_debug_str(obd->obd_nid_hash, page + c, count - c);
++        c += lustre_hash_debug_str(obd->obd_nid_stats_hash, page+c, count-c);
++
++        return c;
++}
++EXPORT_SYMBOL(lprocfs_obd_rd_hash);
++
++#ifdef CRAY_XT3
++int lprocfs_obd_rd_recovery_maxtime(char *page, char **start, off_t off,
++                                    int count, int *eof, void *data)
++{
++        struct obd_device *obd = (struct obd_device *)data;
++        LASSERT(obd != NULL);
++
++        return snprintf(page, count, "%lu\n",
++                        obd->obd_recovery_max_time);
++}
++EXPORT_SYMBOL(lprocfs_obd_rd_recovery_maxtime);
++
++int lprocfs_obd_wr_recovery_maxtime(struct file *file, const char *buffer,
++                                    unsigned long count, void *data)
++{
++        struct obd_device *obd = (struct obd_device *)data;
++        int val, rc;
++        LASSERT(obd != NULL);
++
++        rc = lprocfs_write_helper(buffer, count, &val);
++        if (rc)
++                return rc;
++
++        obd->obd_recovery_max_time = val;
++        return count;
++}
++EXPORT_SYMBOL(lprocfs_obd_wr_recovery_maxtime);
++#endif /* CRAY_XT3 */
++
++EXPORT_SYMBOL(lprocfs_register);
++EXPORT_SYMBOL(lprocfs_srch);
++EXPORT_SYMBOL(lprocfs_remove);
++EXPORT_SYMBOL(lprocfs_add_vars);
++EXPORT_SYMBOL(lprocfs_obd_setup);
++EXPORT_SYMBOL(lprocfs_obd_cleanup);
++EXPORT_SYMBOL(lprocfs_add_simple);
++EXPORT_SYMBOL(lprocfs_free_per_client_stats);
++EXPORT_SYMBOL(lprocfs_alloc_stats);
++EXPORT_SYMBOL(lprocfs_free_stats);
++EXPORT_SYMBOL(lprocfs_clear_stats);
++EXPORT_SYMBOL(lprocfs_register_stats);
++EXPORT_SYMBOL(lprocfs_init_ops_stats);
++EXPORT_SYMBOL(lprocfs_init_ldlm_stats);
++EXPORT_SYMBOL(lprocfs_alloc_obd_stats);
++EXPORT_SYMBOL(lprocfs_free_obd_stats);
++EXPORT_SYMBOL(lprocfs_exp_setup);
++EXPORT_SYMBOL(lprocfs_exp_cleanup);
++
++EXPORT_SYMBOL(lprocfs_rd_u64);
++EXPORT_SYMBOL(lprocfs_rd_atomic);
++EXPORT_SYMBOL(lprocfs_wr_atomic);
++EXPORT_SYMBOL(lprocfs_rd_uint);
++EXPORT_SYMBOL(lprocfs_wr_uint);
++EXPORT_SYMBOL(lprocfs_rd_uuid);
++EXPORT_SYMBOL(lprocfs_rd_name);
++EXPORT_SYMBOL(lprocfs_rd_fstype);
++EXPORT_SYMBOL(lprocfs_rd_server_uuid);
++EXPORT_SYMBOL(lprocfs_rd_conn_uuid);
++EXPORT_SYMBOL(lprocfs_rd_num_exports);
++EXPORT_SYMBOL(lprocfs_rd_numrefs);
++EXPORT_SYMBOL(lprocfs_at_hist_helper);
++EXPORT_SYMBOL(lprocfs_rd_import);
++EXPORT_SYMBOL(lprocfs_rd_timeouts);
++EXPORT_SYMBOL(lprocfs_rd_blksize);
++EXPORT_SYMBOL(lprocfs_rd_kbytestotal);
++EXPORT_SYMBOL(lprocfs_rd_kbytesfree);
++EXPORT_SYMBOL(lprocfs_rd_kbytesavail);
++EXPORT_SYMBOL(lprocfs_rd_filestotal);
++EXPORT_SYMBOL(lprocfs_rd_filesfree);
++
++EXPORT_SYMBOL(lprocfs_write_helper);
++EXPORT_SYMBOL(lprocfs_write_frac_helper);
++EXPORT_SYMBOL(lprocfs_read_frac_helper);
++EXPORT_SYMBOL(lprocfs_write_u64_helper);
++EXPORT_SYMBOL(lprocfs_write_frac_u64_helper);
++#endif /* LPROCFS*/
+diff -urNad lustre~/lustre/ptlrpc/service.c lustre/lustre/ptlrpc/service.c
+--- lustre~/lustre/ptlrpc/service.c	2009-03-12 10:32:27.000000000 +0100
++++ lustre/lustre/ptlrpc/service.c	2009-03-12 11:02:51.000000000 +0100
+@@ -1501,7 +1501,7 @@
+         cfs_daemonize(name);
+         exit_fs(cfs_current());
+         current->fs = fs;
+-        ll_set_fs_pwd(current->fs, init_task.fs->pwdmnt, init_task.fs->pwd);
++        ll_set_fs_pwd(current->fs, cfs_fs_mnt(init_task.fs), cfs_fs_pwd(init_task.fs));
+ }
+ 
+ static void
diff --git a/debian/patches/posix_acl.dpatch b/debian/patches/posix_acl.dpatch
index 1ab99e0..c837e3b 100755
--- a/debian/patches/posix_acl.dpatch
+++ b/debian/patches/posix_acl.dpatch
@@ -5,22 +5,21 @@
 ## DP: Patch from Q-Leap Networks
 
 @DPATCH@
-
-diff --git a/lustre/llite/file.c b/lustre/llite/file.c
---- a/lustre/llite/file.c
-+++ b/lustre/llite/file.c
-@@ -27,6 +27,7 @@
+diff -urNad lustre~/lustre/llite/file.c lustre/lustre/llite/file.c
+--- lustre~/lustre/llite/file.c	2009-03-12 10:33:45.000000000 +0100
++++ lustre/lustre/llite/file.c	2009-03-12 10:41:51.000000000 +0100
+@@ -45,6 +45,7 @@
  #include <lustre_lite.h>
  #include <linux/pagemap.h>
  #include <linux/file.h>
 +#include <linux/posix_acl.h>
- #if (LINUX_VERSION_CODE < KERNEL_VERSION(2,5,0))
- #include <linux/lustre_compat25.h>
- #endif
-diff --git a/lustre/llite/xattr.c b/lustre/llite/xattr.c
---- a/lustre/llite/xattr.c
-+++ b/lustre/llite/xattr.c
-@@ -23,6 +23,7 @@
+ #include "llite_internal.h"
+ #include <lustre/ll_fiemap.h>
+ 
+diff -urNad lustre~/lustre/llite/xattr.c lustre/lustre/llite/xattr.c
+--- lustre~/lustre/llite/xattr.c	2009-03-12 10:27:57.000000000 +0100
++++ lustre/lustre/llite/xattr.c	2009-03-12 10:40:31.000000000 +0100
+@@ -38,6 +38,7 @@
  #include <linux/sched.h>
  #include <linux/mm.h>
  #include <linux/smp_lock.h>
@@ -28,10 +27,10 @@ diff --git a/lustre/llite/xattr.c b/lustre/llite/xattr.c
  
  #define DEBUG_SUBSYSTEM S_LLITE
  
-diff --git a/lustre/mdc/mdc_locks.c b/lustre/mdc/mdc_locks.c
---- a/lustre/mdc/mdc_locks.c
-+++ b/lustre/mdc/mdc_locks.c
-@@ -32,6 +32,7 @@
+diff -urNad lustre~/lustre/mdc/mdc_locks.c lustre/lustre/mdc/mdc_locks.c
+--- lustre~/lustre/mdc/mdc_locks.c	2009-03-12 10:27:57.000000000 +0100
++++ lustre/lustre/mdc/mdc_locks.c	2009-03-12 10:40:31.000000000 +0100
+@@ -44,6 +44,7 @@
  # include <linux/pagemap.h>
  # include <linux/miscdevice.h>
  # include <linux/init.h>
@@ -39,10 +38,10 @@ diff --git a/lustre/mdc/mdc_locks.c b/lustre/mdc/mdc_locks.c
  #else
  # include <liblustre.h>
  #endif
-diff --git a/lustre/mdc/mdc_request.c b/lustre/mdc/mdc_request.c
---- a/lustre/mdc/mdc_request.c
-+++ b/lustre/mdc/mdc_request.c
-@@ -32,6 +32,7 @@
+diff -urNad lustre~/lustre/mdc/mdc_request.c lustre/lustre/mdc/mdc_request.c
+--- lustre~/lustre/mdc/mdc_request.c	2009-03-12 10:27:57.000000000 +0100
++++ lustre/lustre/mdc/mdc_request.c	2009-03-12 10:40:31.000000000 +0100
+@@ -44,6 +44,7 @@
  # include <linux/pagemap.h>
  # include <linux/miscdevice.h>
  # include <linux/init.h>
@@ -50,14 +49,14 @@ diff --git a/lustre/mdc/mdc_request.c b/lustre/mdc/mdc_request.c
  #else
  # include <liblustre.h>
  #endif
-diff --git a/lustre/mds/handler.c b/lustre/mds/handler.c
---- a/lustre/mds/handler.c
-+++ b/lustre/mds/handler.c
-@@ -40,6 +40,7 @@
+diff -urNad lustre~/lustre/mds/handler.c lustre/lustre/mds/handler.c
+--- lustre~/lustre/mds/handler.c	2009-03-12 10:40:31.000000000 +0100
++++ lustre/lustre/mds/handler.c	2009-03-12 10:42:18.000000000 +0100
+@@ -52,6 +52,7 @@
  #include <linux/random.h>
  #include <linux/fs.h>
  #include <linux/jbd.h>
 +#include <linux/posix_acl_xattr.h>
- #if (LINUX_VERSION_CODE >= KERNEL_VERSION(2,5,0))
  # include <linux/smp_lock.h>
  # include <linux/buffer_head.h>
+ # include <linux/workqueue.h>
diff --git a/debian/patches/remove-set_tunables.dpatch b/debian/patches/remove-set_tunables.dpatch
index c7ff1c9..471e062 100755
--- a/debian/patches/remove-set_tunables.dpatch
+++ b/debian/patches/remove-set_tunables.dpatch
@@ -5,10 +5,10 @@
 ## DP: removed set tunables from mount.lustre since it doesn't work under etch
 
 @DPATCH@
-diff -urNad lustre-1.6.6~/lustre/utils/mount_lustre.c lustre-1.6.6/lustre/utils/mount_lustre.c
---- lustre-1.6.6~/lustre/utils/mount_lustre.c	2008-11-26 10:50:51.000000000 +0100
-+++ lustre-1.6.6/lustre/utils/mount_lustre.c	2008-11-26 10:56:17.000000000 +0100
-@@ -305,7 +305,10 @@
+diff -urNad lustre~/lustre/utils/mount_lustre.c lustre/lustre/utils/mount_lustre.c
+--- lustre~/lustre/utils/mount_lustre.c	2009-03-12 10:32:27.000000000 +0100
++++ lustre/lustre/utils/mount_lustre.c	2009-03-12 10:44:12.000000000 +0100
+@@ -320,7 +320,10 @@
  
  /* This is to tune the kernel for good SCSI performance.
   * For that we set the value of /sys/block/{dev}/queue/max_sectors_kb
@@ -20,7 +20,7 @@ diff -urNad lustre-1.6.6~/lustre/utils/mount_lustre.c lustre-1.6.6/lustre/utils/
  int set_tunables(char *source, int src_len)
  {
          glob_t glob_info;
-@@ -359,10 +362,10 @@
+@@ -374,10 +377,10 @@
          if (rc != ENOENT)
                  return rc;
  
@@ -33,7 +33,7 @@ diff -urNad lustre-1.6.6~/lustre/utils/mount_lustre.c lustre-1.6.6/lustre/utils/
          dev = real_path + src_len - 1;
          while (dev > real_path) {
                  if (isdigit(*dev))
-@@ -434,7 +437,7 @@
+@@ -449,7 +452,7 @@
  out:
          globfree(&glob_info);
          return rc;
@@ -42,12 +42,13 @@ diff -urNad lustre-1.6.6~/lustre/utils/mount_lustre.c lustre-1.6.6/lustre/utils/
  
  int main(int argc, char *const argv[])
  {
-@@ -567,10 +570,10 @@
+@@ -582,11 +585,11 @@
                  printf("mounting device %s at %s, flags=%#x options=%s\n",
                         source, target, flags, optcopy);
  
--        if (set_tunables(source, strlen(source)) && verbose)
-+        /*if (set_tunables(source, strlen(source)) && verbose)
+-        if (!strstr(usource, ":/") && set_tunables(source, strlen(source)) &&
++        /*if (!strstr(usource, ":/") && set_tunables(source, strlen(source)) &&
+             verbose)
                  fprintf(stderr, "%s: unable to set tunables for %s"
                                  " (may cause reduced IO performance)\n",
 -                                argv[0], source);

-- 
Lustre Debian Packaging