[Pkg-lustre-svn-commit] updated: [d6e4e96] Delete old unneeded patches

Fri Jun 5 13:57:57 UTC 2009

The following commit has been merged in the master branch:
commit d6e4e96bb8f1a5965216ea5a873f471ff46d3838
Author: Patrick Winnertz <winnie at debian.org>
Date:   Fri Jun 5 15:56:42 2009 +0200

    Delete old unneeded patches
    
    Signed-off-by: Patrick Winnertz <winnie at debian.org>

diff --git a/debian/patches/patchless_support_2.6.24.dpatch b/debian/patches/patchless_support_2.6.24.dpatch
deleted file mode 100755
index 77832c4..0000000
--- a/debian/patches/patchless_support_2.6.24.dpatch
+++ /dev/null
@@ -1,4057 +0,0 @@
-#! /bin/sh /usr/share/dpatch/dpatch-run
-## posix_acl.patch by Patrick Winnertz <winnie at debian.org>
-##
-## All lines beginning with `## DP:' are a description of the patch.
-## DP: Patch which will enable 2.6.24 patchless support for lustre, taken from #14250
-
- at DPATCH@
-diff -urNad lustre~/lnet/autoconf/lustre-lnet.m4 lustre/lnet/autoconf/lustre-lnet.m4
---- lustre~/lnet/autoconf/lustre-lnet.m4	2008-12-30 11:23:32.000000000 +0100
-+++ lustre/lnet/autoconf/lustre-lnet.m4	2009-03-10 11:41:03.000000000 +0100
-@@ -1290,6 +1290,41 @@
- ])
- ])
- 
-+# 2.6.24 request not use real numbers for ctl_name
-+AC_DEFUN([LN_SYSCTL_UNNUMBERED],
-+[AC_MSG_CHECKING([for CTL_UNNUMBERED])
-+LB_LINUX_TRY_COMPILE([
-+        #include <linux/sysctl.h>
-+],[
-+	#ifndef CTL_UNNUMBERED
-+	#error CTL_UNNUMBERED not exist in kernel
-+	#endif
-+],[
-+        AC_MSG_RESULT(yes)
-+        AC_DEFINE(HAVE_SYSCTL_UNNUMBERED, 1,
-+                  [sysctl has CTL_UNNUMBERED])
-+],[
-+        AC_MSG_RESULT(NO)
-+])
-+])
-+
-+# 2.6.24 lost scatterlist->page
-+AC_DEFUN([LN_SCATTERLIST_SETPAGE],
-+[AC_MSG_CHECKING([for exist sg_set_page])
-+LB_LINUX_TRY_COMPILE([
-+        #include <linux/scatterlist.h>
-+],[
-+	sg_set_page(NULL,NULL,0,0);
-+],[
-+        AC_MSG_RESULT(yes)
-+        AC_DEFINE(HAVE_SCATTERLIST_SETPAGE, 1,
-+                  [struct scatterlist has page member])
-+],[
-+        AC_MSG_RESULT(NO)
-+])
-+])
-+
-+
- #
- # LN_PROG_LINUX
- #
-@@ -1333,6 +1368,9 @@
- LN_KMEM_CACHE
- # 2.6.23
- LN_KMEM_CACHE_CREATE_DTOR
-+# 2.6.24
-+LN_SYSCTL_UNNUMBERED
-+LN_SCATTERLIST_SETPAGE
- ])
- 
- #
-diff -urNad lustre~/lnet/include/libcfs/curproc.h lustre/lnet/include/libcfs/curproc.h
---- lustre~/lnet/include/libcfs/curproc.h	2008-12-30 11:23:32.000000000 +0100
-+++ lustre/lnet/include/libcfs/curproc.h	2009-03-10 11:41:03.000000000 +0100
-@@ -72,6 +72,11 @@
-  */
- cfs_kernel_cap_t cfs_curproc_cap_get(void);
- void cfs_curproc_cap_set(cfs_kernel_cap_t cap);
-+
-+typedef __u32 cfs_cap_t;
-+
-+cfs_cap_t cfs_cap_convert_from_kernel(cfs_kernel_cap_t cap);
-+
- #endif
- 
- /* __LIBCFS_CURPROC_H__ */
-diff -urNad lustre~/lnet/include/libcfs/linux/linux-prim.h lustre/lnet/include/libcfs/linux/linux-prim.h
---- lustre~/lnet/include/libcfs/linux/linux-prim.h	2008-12-30 11:23:32.000000000 +0100
-+++ lustre/lnet/include/libcfs/linux/linux-prim.h	2009-03-10 11:41:03.000000000 +0100
-@@ -84,6 +84,17 @@
- #endif
- #define cfs_unregister_sysctl_table(t)	unregister_sysctl_table(t)
- 
-+#define DECLARE_PROC_HANDLER(name)                      \
-+static int                                              \
-+LL_PROC_PROTO(name)                                     \
-+{                                                       \
-+        DECLARE_LL_PROC_PPOS_DECL;                      \
-+                                                        \
-+        return proc_call_handler(table->data, write,    \
-+                                 ppos, buffer, lenp,    \
-+                                 __##name);             \
-+}
-+
- /*
-  * Symbol register
-  */
-diff -urNad lustre~/lnet/klnds/gmlnd/gmlnd_module.c lustre/lnet/klnds/gmlnd/gmlnd_module.c
---- lustre~/lnet/klnds/gmlnd/gmlnd_module.c	2008-12-30 11:23:32.000000000 +0100
-+++ lustre/lnet/klnds/gmlnd/gmlnd_module.c	2009-03-10 11:41:03.000000000 +0100
-@@ -78,9 +78,37 @@
- };
- 
- #if defined(CONFIG_SYSCTL) && !CFS_SYSFS_MODULE_PARM
-+
-+#ifndef HAVE_SYSCTL_UNNUMBERED
-+
-+#define CTL_GMLND       202
-+
-+enum {
-+        GMLND_PORT = 1,
-+        GMLND_NTX,
-+        GMLND_CREDITS,
-+        GMLND_PEERCREDITS,
-+        GMLND_NLARGE_TX_BUFS,
-+        GMLND_NRX_SMALL,
-+        GMLND_NRX_LARGE
-+};
-+
-+#else
-+#define CTL_GMLND       CTL_UNNUMBERED
-+
-+#define GMLND_PORT              CTL_UNNUMBERED
-+#define GMLND_NTX               CTL_UNNUMBERED
-+#define GMLND_CREDITS           CTL_UNNUMBERED
-+#define GMLND_PEERCREDITS       CTL_UNNUMBERED
-+#define GMLND_NLARGE_TX_BUFS    CTL_UNNUMBERED
-+#define GMLND_NRX_SMALL         CTL_UNNUMBERED
-+#define GMLND_NRX_LARGE         CTL_UNNUMBERED
-+
-+#endif
-+
- static cfs_sysctl_table_t gmnal_ctl_table[] = {
-         {
--                .ctl_name = 1,
-+                .ctl_name = GMLND_PORT,
-                 .procname = "port",
-                 .data     = &port,
-                 .maxlen   = sizeof (int),
-@@ -88,7 +116,7 @@
-                 .proc_handler = &proc_dointvec
-         },
-         {
--                .ctl_name = 2,
-+                .ctl_name = GMLND_NTX,
-                 .procname = "ntx",
-                 .data     = &ntx,
-                 .maxlen   = sizeof (int),
-@@ -96,7 +124,7 @@
-                 .proc_handler = &proc_dointvec
-         },
-         {
--                .ctl_name = 3,
-+                .ctl_name = GMLND_CREDITS,
-                 .procname = "credits",
-                 .data     = &credits,
-                 .maxlen   = sizeof (int),
-@@ -104,7 +132,7 @@
-                 .proc_handler = &proc_dointvec
-         },
-         {
--                .ctl_name = 4,
-+                .ctl_name = GMLND_PEERCREDITS,
-                 .procname = "peer_credits",
-                 .data     = &peer_credits,
-                 .maxlen   = sizeof (int),
-@@ -112,7 +140,7 @@
-                 .proc_handler = &proc_dointvec
-         },
-         {
--                .ctl_name = 5,
-+                .ctl_name = GMLND_NLARGE_TX_BUFS,
-                 .procname = "nlarge_tx_bufs",
-                 .data     = &nlarge_tx_bufs,
-                 .maxlen   = sizeof (int),
-@@ -120,7 +148,7 @@
-                 .proc_handler = &proc_dointvec
-         },
-         {
--                .ctl_name = 6,
-+                .ctl_name = GMLND_NRX_SMALL,
-                 .procname = "nrx_small",
-                 .data     = &nrx_small,
-                 .maxlen   = sizeof (int),
-@@ -128,7 +156,7 @@
-                 .proc_handler = &proc_dointvec
-         },
-         {
--                .ctl_name = 7,
-+                .ctl_name = GMLND_NRX_LARGE,
-                 .procname = "nrx_large",
-                 .data     = &nrx_large,
-                 .maxlen   = sizeof (int),
-@@ -140,7 +168,7 @@
- 
- static cfs_sysctl_table_t gmnal_top_ctl_table[] = {
-         {
--                .ctl_name = 207,
-+                .ctl_name = CTL_GMLND,
-                 .procname = "gmnal",
-                 .data     = NULL,
-                 .maxlen   = 0,
-diff -urNad lustre~/lnet/klnds/iiblnd/iiblnd_modparams.c lustre/lnet/klnds/iiblnd/iiblnd_modparams.c
---- lustre~/lnet/klnds/iiblnd/iiblnd_modparams.c	2008-12-30 11:23:32.000000000 +0100
-+++ lustre/lnet/klnds/iiblnd/iiblnd_modparams.c	2009-03-10 11:41:03.000000000 +0100
-@@ -119,9 +119,50 @@
-  * not to truncate the printout; it only needs to be the actual size of the
-  * string buffer if we allow writes (and we don't) */
- 
-+#ifdef HAVE_SYSCTL_UNNUMBERED
-+
-+#define CTL_IIBLND      203
-+
-+enum {
-+        IIBLND_IPIF_BASENAME = 1,
-+        IIBLND_SERVICE_NAME,
-+        IIBLND_SERVICE_NUMBER,
-+        IIBLND_RECONNECT_MIN,
-+        IIBLND_RECONNECT_MAX,
-+        IIBLND_CONCURRENT_PEERS,
-+        IIBLND_CKSUM,
-+        IIBLND_TIMEOUT,
-+        IIBLND_NTX,
-+        IIBLND_CREDITS,
-+        IIBLND_PEER_CREDITS,
-+        IIBLND_SD_RETRIES,
-+        IIBLND_KEEPALIVE,
-+        IIBLND_CONCURRENT_SENDS
-+};
-+
-+#else
-+#define CTL_IIBLND      CTL_UNNUMBERED
-+
-+#define IIBLND_IPIF_BASENAME    CTL_UNNUMBERED
-+#define IIBLND_SERVICE_NAME     CTL_UNNUMBERED
-+#define IIBLND_SERVICE_NUMBER   CTL_UNNUMBERED
-+#define IIBLND_RECONNECT_MIN    CTL_UNNUMBERED
-+#define IIBLND_RECONNECT_MAX    CTL_UNNUMBERED
-+#define IIBLND_CONCURRENT_PEERS CTL_UNNUMBERED
-+#define IIBLND_CKSUM            CTL_UNNUMBERED
-+#define IIBLND_TIMEOUT          CTL_UNNUMBERED
-+#define IIBLND_NTX              CTL_UNNUMBERED
-+#define IIBLND_CREDITS          CTL_UNNUMBERED
-+#define IIBLND_PEER_CREDITS     CTL_UNNUMBERED
-+#define IIBLND_SD_RETRIES       CTL_UNNUMBERED
-+#define IIBLND_KEEPALIVE        CTL_UNNUMBERED
-+#define IIBLND_CONCURRENT_SENDS CTL_UNNUMBERED
-+
-+#endif
-+
- static cfs_sysctl_table_t kibnal_ctl_table[] = {
-         {
--                .ctl_name = 1,
-+                .ctl_name = IBLND_IPIF_BASENAME,
-                 .procname = "ipif_basename",
-                 .data     = &ipif_basename,
-                 .maxlen   = 1024,
-@@ -129,7 +170,7 @@
-                 .proc_handler = &proc_dostring
-         },
-         {
--                .ctl_name = 2,
-+                .ctl_name = IIBLND_SERVICE_NAME,
-                 .procname = "service_name",
-                 .data     = &service_name,
-                 .maxlen   = 1024,
-@@ -137,7 +178,7 @@
-                 .proc_handler = &proc_dostring
-         },
-         {
--                .ctl_name = 3,
-+                .ctl_name = IIBLND_SERVICE_NUMBER,
-                 .procname = "service_number",
-                 .data     = &service_number,
-                 .maxlen   = sizeof(int),
-@@ -145,7 +186,7 @@
-                 .proc_handler = &proc_dointvec
-         },
-         {
--                .ctl_name = 4,
-+                .ctl_name = IIBLND_RECONNECT_MIN,
-                 .procname = "min_reconnect_interval",
-                 .data     = &min_reconnect_interval,
-                 .maxlen   = sizeof(int),
-@@ -153,7 +194,7 @@
-                 .proc_handler = &proc_dointvec
-         },
-         {
--                .ctl_name = 5,
-+                .ctl_name = IIBLND_RECONNECT_MAX,
-                 .procname = "max_reconnect_interval",
-                 .data     = &max_reconnect_interval,
-                 .maxlen   = sizeof(int),
-@@ -161,7 +202,7 @@
-                 .proc_handler = &proc_dointvec
-         },
-         {
--                .ctl_name = 6,
-+                .ctl_name = IIBLND_CONCURRENT_PEERS,
-                 .procname = "concurrent_peers",
-                 .data     = &concurrent_peers,
-                 .maxlen   = sizeof(int),
-@@ -169,7 +210,7 @@
-                 .proc_handler = &proc_dointvec
-         },
-         {
--                .ctl_name = 7,
-+                .ctl_name = IIBLND_CKSUM,
-                 .procname = "cksum",
-                 .data     = &cksum,
-                 .maxlen   = sizeof(int),
-@@ -177,7 +218,7 @@
-                 .proc_handler = &proc_dointvec
-         },
-         {
--                .ctl_name = 8,
-+                .ctl_name = IIBLND_TIMEOUT,
-                 .procname = "timeout",
-                 .data     = &timeout,
-                 .maxlen   = sizeof(int),
-@@ -185,7 +226,7 @@
-                 .proc_handler = &proc_dointvec
-         },
-         {
--                .ctl_name = 9,
-+                .ctl_name = IIBLND_NTX,
-                 .procname = "ntx",
-                 .data     = &ntx,
-                 .maxlen   = sizeof(int),
-@@ -193,7 +234,7 @@
-                 .proc_handler = &proc_dointvec
-         },
-         {
--                .ctl_name = 10,
-+                .ctl_name = IIBLND_CREDITS,
-                 .procname = "credits",
-                 .data     = &credits,
-                 .maxlen   = sizeof(int),
-@@ -201,7 +242,7 @@
-                 .proc_handler = &proc_dointvec
-         },
-         {
--                .ctl_name = 11,
-+                .ctl_name = IIBLND_PEER_CREDITS,
-                 .procname = "peer_credits",
-                 .data     = &peer_credits,
-                 .maxlen   = sizeof(int),
-@@ -209,7 +250,7 @@
-                 .proc_handler = &proc_dointvec
-         },
-         {
--                .ctl_name = 12,
-+                .ctl_name = IIBLND_SD_RETRIES,
-                 .procname = "sd_retries",
-                 .data     = &sd_retries,
-                 .maxlen   = sizeof(int),
-@@ -217,7 +258,7 @@
-                 .proc_handler = &proc_dointvec
-         },
-         {
--                .ctl_name = 13,
-+                .ctl_name = IIBLND_KEEPALIVE,
-                 .procname = "keepalive",
-                 .data     = &keepalive,
-                 .maxlen   = sizeof(int),
-@@ -225,7 +266,7 @@
-                 .proc_handler = &proc_dointvec
-         },
-         {
--                .ctl_name = 14,
-+                .ctl_name = IIBLND_CONCURRENT_SENDS,
-                 .procname = "concurrent_sends",
-                 .data     = &concurrent_sends,
-                 .maxlen   = sizeof(int),
-@@ -237,7 +278,7 @@
- 
- static cfs_sysctl_table_t kibnal_top_ctl_table[] = {
-         {
--                .ctl_name = 203,
-+                .ctl_name = CTL_IIBLND,
-                 .procname = "openibnal",
-                 .data     = NULL,
-                 .maxlen   = 0,
-diff -urNad lustre~/lnet/klnds/o2iblnd/o2iblnd.h lustre/lnet/klnds/o2iblnd/o2iblnd.h
---- lustre~/lnet/klnds/o2iblnd/o2iblnd.h	2008-12-30 11:23:32.000000000 +0100
-+++ lustre/lnet/klnds/o2iblnd/o2iblnd.h	2009-03-10 11:41:03.000000000 +0100
-@@ -773,3 +773,13 @@
- int  kiblnd_recv(lnet_ni_t *ni, void *private, lnet_msg_t *lntmsg, int delayed,
-                  unsigned int niov, struct iovec *iov, lnet_kiov_t *kiov,
-                  unsigned int offset, unsigned int mlen, unsigned int rlen);
-+/* compat macros */
-+#ifndef HAVE_SCATTERLIST_SETPAGE
-+static inline void sg_set_page(struct scatterlist *sg, struct page *page,
-+                                               unsigned int len, unsigned int offset)
-+{
-+        sg->page = page;
-+        sg->offset = offset;
-+        sg->length = len;
-+}
-+#endif
-diff -urNad lustre~/lnet/klnds/o2iblnd/o2iblnd_cb.c lustre/lnet/klnds/o2iblnd/o2iblnd_cb.c
---- lustre~/lnet/klnds/o2iblnd/o2iblnd_cb.c	2008-12-30 11:23:32.000000000 +0100
-+++ lustre/lnet/klnds/o2iblnd/o2iblnd_cb.c	2009-03-10 11:41:03.000000000 +0100
-@@ -643,9 +643,7 @@
-                 fragnob = min((int)(iov->iov_len - offset), nob);
-                 fragnob = min(fragnob, (int)PAGE_SIZE - page_offset);
- 
--                sg->page = page;
--                sg->offset = page_offset;
--                sg->length = fragnob;
-+                sg_set_page(sg, page, fragnob, page_offset);
-                 sg++;
- 
-                 if (offset + fragnob < iov->iov_len) {
-@@ -708,11 +706,10 @@
-                 fragnob = min((int)(kiov->kiov_len - offset), nob);
- 
-                 memset(sg, 0, sizeof(*sg));
--                sg->page = kiov->kiov_page;
--                sg->offset = kiov->kiov_offset + offset;
--                sg->length = fragnob;
-+                sg_set_page(sg, kiov->kiov_page, fragnob,
-+                            kiov->kiov_offset + offset);
-                 sg++;
--                
-+ 
-                 offset = 0;
-                 kiov++;
-                 nkiov--;
-diff -urNad lustre~/lnet/klnds/o2iblnd/o2iblnd_modparams.c lustre/lnet/klnds/o2iblnd/o2iblnd_modparams.c
---- lustre~/lnet/klnds/o2iblnd/o2iblnd_modparams.c	2008-12-30 11:23:32.000000000 +0100
-+++ lustre/lnet/klnds/o2iblnd/o2iblnd_modparams.c	2009-03-10 11:41:03.000000000 +0100
-@@ -130,9 +130,51 @@
- 
- static char ipif_basename_space[32];
- 
-+#ifndef HAVE_SYSCTL_UNNUMBERED
-+
-+#define CTL_O2IBLND      205
-+
-+enum {
-+        O2IBLND_SERVICE  = 1,
-+        O2IBLND_CKSUM,
-+        O2IBLND_TIMEOUT,
-+        O2IBLND_NTX,
-+        O2IBLND_CREDITS,
-+        O2IBLND_PEER_CREDITS,
-+        O2IBLND_IPIF_BASENAME,
-+        O2IBLND_RETRY_COUNT,
-+        O2IBLND_RNR_RETRY_COUNT,
-+        O2IBLND_KEEPALIVE,
-+        O2IBLND_CONCURRENT_SENDS,
-+        O2IBLND_IB_MTU,
-+        O2IBLND_FMR_POOL_SIZE,
-+        O2IBLND_FMR_FLUSH_TRIGGER,
-+        O2IBLND_FMR_CACHE
-+};
-+#else
-+#define CTL_O2IBLND              CTL_UNNUMBERED
-+
-+#define O2IBLND_SERVICE          CTL_UNNUMBERED
-+#define O2IBLND_CKSUM            CTL_UNNUMBERED
-+#define O2IBLND_TIMEOUT          CTL_UNNUMBERED
-+#define O2IBLND_NTX              CTL_UNNUMBERED
-+#define O2IBLND_CREDITS          CTL_UNNUMBERED
-+#define O2IBLND_PEER_CREDITS     CTL_UNNUMBERED
-+#define O2IBLND_IPIF_BASENAME    CTL_UNNUMBERED
-+#define O2IBLND_RETRY_COUNT      CTL_UNNUMBERED
-+#define O2IBLND_RNR_RETRY_COUNT  CTL_UNNUMBERED
-+#define O2IBLND_KEEPALIVE        CTL_UNNUMBERED
-+#define O2IBLND_CONCURRENT_SENDS CTL_UNNUMBERED
-+#define O2IBLND_IB_MTU           CTL_UNNUMBERED
-+#define O2IBLND_FMR_POOL_SIZE    CTL_UNNUMBERED
-+#define O2IBLND_FMR_FLUSH_TRIGGER CTL_UNNUMBERED
-+#define O2IBLND_FMR_CACHE        CTL_UNNUMBERED
-+
-+#endif
-+
- static cfs_sysctl_table_t kiblnd_ctl_table[] = {
-         {
--                .ctl_name = 1,
-+                .ctl_name = O2IBLND_SERVICE,
-                 .procname = "service",
-                 .data     = &service,
-                 .maxlen   = sizeof(int),
-@@ -140,7 +182,7 @@
-                 .proc_handler = &proc_dointvec
-         },
-         {
--                .ctl_name = 2,
-+                .ctl_name = O2IBLND_CKSUM,
-                 .procname = "cksum",
-                 .data     = &cksum,
-                 .maxlen   = sizeof(int),
-@@ -148,7 +190,7 @@
-                 .proc_handler = &proc_dointvec
-         },
-         {
--                .ctl_name = 3,
-+                .ctl_name = O2IBLND_TIMEOUT,
-                 .procname = "timeout",
-                 .data     = &timeout,
-                 .maxlen   = sizeof(int),
-@@ -156,7 +198,7 @@
-                 .proc_handler = &proc_dointvec
-         },
-         {
--                .ctl_name = 4,
-+                .ctl_name = O2IBLND_NTX,
-                 .procname = "ntx",
-                 .data     = &ntx,
-                 .maxlen   = sizeof(int),
-@@ -164,7 +206,7 @@
-                 .proc_handler = &proc_dointvec
-         },
-         {
--                .ctl_name = 5,
-+                .ctl_name = O2IBLND_CREDITS,
-                 .procname = "credits",
-                 .data     = &credits,
-                 .maxlen   = sizeof(int),
-@@ -172,7 +214,7 @@
-                 .proc_handler = &proc_dointvec
-         },
-         {
--                .ctl_name = 6,
-+                .ctl_name = O2IBLND_PEER_CREDITS,
-                 .procname = "peer_credits",
-                 .data     = &peer_credits,
-                 .maxlen   = sizeof(int),
-@@ -180,7 +222,7 @@
-                 .proc_handler = &proc_dointvec
-         },
-         {
--                .ctl_name = 7,
-+                .ctl_name = O2IBLND_IPIF_BASENAME,
-                 .procname = "ipif_name",
-                 .data     = ipif_basename_space,
-                 .maxlen   = sizeof(ipif_basename_space),
-@@ -188,7 +230,7 @@
-                 .proc_handler = &proc_dostring
-         },
-         {
--                .ctl_name = 8,
-+                .ctl_name = O2IBLND_RETRY_COUNT,
-                 .procname = "retry_count",
-                 .data     = &retry_count,
-                 .maxlen   = sizeof(int),
-@@ -196,7 +238,7 @@
-                 .proc_handler = &proc_dointvec
-         },
-         {
--                .ctl_name = 9,
-+                .ctl_name = O2IBLND_RNR_RETRY_COUNT,
-                 .procname = "rnr_retry_count",
-                 .data     = &rnr_retry_count,
-                 .maxlen   = sizeof(int),
-@@ -204,7 +246,7 @@
-                 .proc_handler = &proc_dointvec
-         },
-         {
--                .ctl_name = 10,
-+                .ctl_name = O2IBLND_KEEPALIVE,
-                 .procname = "keepalive",
-                 .data     = &keepalive,
-                 .maxlen   = sizeof(int),
-@@ -212,7 +254,7 @@
-                 .proc_handler = &proc_dointvec
-         },
-         {
--                .ctl_name = 11,
-+                .ctl_name = O2IBLND_CONCURRENT_SENDS,
-                 .procname = "concurrent_sends",
-                 .data     = &concurrent_sends,
-                 .maxlen   = sizeof(int),
-@@ -220,7 +262,7 @@
-                 .proc_handler = &proc_dointvec
-         },
-         {
--                .ctl_name = 12,
-+                .ctl_name = O2IBLND_IB_MTU,
-                 .procname = "ib_mtu",
-                 .data     = &ib_mtu,
-                 .maxlen   = sizeof(int),
-@@ -229,7 +271,7 @@
-         },
- #if IBLND_MAP_ON_DEMAND
-         {
--                .ctl_name = 13,
-+                .ctl_name = O2IBLND_FMR_POOL_SIZE,
-                 .procname = "fmr_pool_size",
-                 .data     = &fmr_pool_size,
-                 .maxlen   = sizeof(int),
-@@ -237,7 +279,7 @@
-                 .proc_handler = &proc_dointvec
-         },
-         {
--                .ctl_name = 14,
-+                .ctl_name = O2IBLND_FMR_FLUSH_TRIGGER,
-                 .procname = "fmr_flush_trigger",
-                 .data     = &fmr_flush_trigger,
-                 .maxlen   = sizeof(int),
-@@ -245,7 +287,7 @@
-                 .proc_handler = &proc_dointvec
-         },
-         {
--                .ctl_name = 15,
-+                .ctl_name = O2IBLND_FMR_CACHE,
-                 .procname = "fmr_cache",
-                 .data     = &fmr_cache,
-                 .maxlen   = sizeof(int),
-@@ -258,7 +300,7 @@
- 
- static cfs_sysctl_table_t kiblnd_top_ctl_table[] = {
-         {
--                .ctl_name = 203,
-+                .ctl_name = CTL_O2IBLND,
-                 .procname = "o2iblnd",
-                 .data     = NULL,
-                 .maxlen   = 0,
-diff -urNad lustre~/lnet/klnds/openiblnd/openiblnd_modparams.c lustre/lnet/klnds/openiblnd/openiblnd_modparams.c
---- lustre~/lnet/klnds/openiblnd/openiblnd_modparams.c	2008-12-30 11:23:32.000000000 +0100
-+++ lustre/lnet/klnds/openiblnd/openiblnd_modparams.c	2009-03-10 11:41:03.000000000 +0100
-@@ -100,9 +100,42 @@
- 
- #if defined(CONFIG_SYSCTL) && !CFS_SYSFS_MODULE_PARM
- 
-+#ifndef HAVE_SYSCTL_UNNUMBERED
-+#define CTL_KIBNAL      203
-+enum {
-+        KIBNAL_IPIF_BASENAME = 1,
-+        KIBNAL_N_CONND,
-+        KIBNAL_RECONNECT_MIN,
-+        KIBNAL_RECONNECT_MAX,
-+        KIBNAL_CONCURRENT_PEERS,
-+        KIBNAL_CKSUM,
-+        KIBNAL_TIMEOUT,
-+        KIBNAL_NTX,
-+        KIBNAL_CREDITS,
-+        KIBNAL_PEER_CREDITS,
-+        KIBNAL_KEEPALIVE
-+};
-+#else
-+
-+#define CTL_KIBNAL      CTL_UNNUMBERED
-+
-+#define KIBNAL_IPIF_BASENAME    CTL_UNNUMBERED
-+#define KIBNAL_N_CONND          CTL_UNNUMBERED
-+#define KIBNAL_RECONNECT_MIN    CTL_UNNUMBERED
-+#define KIBNAL_RECONNECT_MAX    CTL_UNNUMBERED
-+#define KIBNAL_CONCURRENT_PEERS CTL_UNNUMBERED
-+#define KIBNAL_CKSUM            CTL_UNNUMBERED
-+#define KIBNAL_TIMEOUT          CTL_UNNUMBERED
-+#define kiBNAL_NTX              CTL_UNNUMBERED
-+#define KIBNAL_CREDITS          CTL_UNNUMBERED
-+#define KIBNAL_PEER_CREDITS     CTL_UNNUMBERED
-+#define KIBNAL_KEEPALIVE        CTL_UNNUMBERED
-+
-+#endif
-+
- static cfs_sysctl_table_t kibnal_ctl_table[] = {
-         {
--                .ctl_name = 1,
-+                .ctl_name = KIBNAL_IPIF_BASENAME,
-                 .procname = "ipif_basename",
-                 .data     = &ipif_basename,
-                 .maxlen   = 1024,
-@@ -110,7 +143,7 @@
-                 .proc_handler = &proc_dostring
-         },
-         {
--                .ctl_name = 2,
-+                .ctl_name = KIBNAL_N_CONND,
-                 .procname = "n_connd",
-                 .data     = &n_connd,
-                 .maxlen   = sizeof(int),
-@@ -118,7 +151,7 @@
-                 .proc_handler = &proc_dointvec
-         },
-         {
--                .ctl_name = 3,
-+                .ctl_name = KIBNAL_RECONNECT_MIN,
-                 .procname = "min_reconnect_interval",
-                 .data     = &min_reconnect_interval,
-                 .maxlen   = sizeof(int),
-@@ -126,7 +159,7 @@
-                 .proc_handler = &proc_dointvec
-         },
-         {
--                .ctl_name = 4,
-+                .ctl_name = KIBNAL_RECONNECT_MAX,
-                 .procname = "max_reconnect_interval",
-                 .data     = &max_reconnect_interval,
-                 .maxlen   = sizeof(int),
-@@ -134,7 +167,7 @@
-                 .proc_handler = &proc_dointvec
-         },
-         {
--                .ctl_name = 5,
-+                .ctl_name = KIBNAL_CONCURRENT_PEERS,
-                 .procname = "concurrent_peers",
-                 .data     = &concurrent_peers,
-                 .maxlen   = sizeof(int),
-@@ -142,7 +175,7 @@
-                 .proc_handler = &proc_dointvec
-         },
-         {
--                .ctl_name = 6,
-+                .ctl_name = KIBNAL_CKSUM,
-                 .procname = "cksum",
-                 .data     = &cksum,
-                 .maxlen   = sizeof(int),
-@@ -150,7 +183,7 @@
-                 .proc_handler = &proc_dointvec
-         },
-         {
--                .ctl_name = 7,
-+                .ctl_name = KIBNAL_TIMEOUT,
-                 .procname = "timeout",
-                 .data     = &timeout,
-                 .maxlen   = sizeof(int),
-@@ -158,7 +191,7 @@
-                 .proc_handler = &proc_dointvec
-         },
-         {
--                .ctl_name = 8,
-+                .ctl_name = KIBNAL_NTX,
-                 .procname = "ntx",
-                 .data     = &ntx,
-                 .maxlen   = sizeof(int),
-@@ -166,7 +199,7 @@
-                 .proc_handler = &proc_dointvec
-         },
-         {
--                .ctl_name = 9,
-+                .ctl_name = KIBNAL_CREDITS,
-                 .procname = "credits",
-                 .data     = &credits,
-                 .maxlen   = sizeof(int),
-@@ -174,7 +207,7 @@
-                 .proc_handler = &proc_dointvec
-         },
-         {
--                .ctl_name = 10,
-+                .ctl_name = KIBNAL_PEER_CREDITS,
-                 .procname = "peer_credits",
-                 .data     = &peer_credits,
-                 .maxlen   = sizeof(int),
-@@ -182,7 +215,7 @@
-                 .proc_handler = &proc_dointvec
-         },
-         {
--                .ctl_name = 11,
-+                .ctl_name = KIBNAL_KEEPALIVE,
-                 .procname = "keepalive",
-                 .data     = &keepalive,
-                 .maxlen   = sizeof(int),
-@@ -194,7 +227,7 @@
- 
- static cfs_sysctl_table_t kibnal_top_ctl_table[] = {
-         {
--                .ctl_name = 203,
-+                .ctl_name = CTL_KIBNAL,
-                 .procname = "openibnal",
-                 .data     = NULL,
-                 .maxlen   = 0,
-diff -urNad lustre~/lnet/klnds/ptllnd/ptllnd_modparams.c lustre/lnet/klnds/ptllnd/ptllnd_modparams.c
---- lustre~/lnet/klnds/ptllnd/ptllnd_modparams.c	2008-12-30 11:23:32.000000000 +0100
-+++ lustre/lnet/klnds/ptllnd/ptllnd_modparams.c	2009-03-10 11:41:03.000000000 +0100
-@@ -156,9 +156,54 @@
- }
- #endif
- 
-+#ifndef HAVE_SYSCTL_UNNUMBERED
-+
-+#define CTL_PTLLND       207
-+
-+enum {
-+        KPTLLND_NTX     = 1,
-+        KPTLLND_MAX_NODES,
-+        KPTLLND_MAX_PROC_PER_NODE,
-+        KPTLLND_CHECKSUM,
-+        KPTLLND_TIMEOUT,
-+        KPTLLND_PORTAL,
-+        KPTLLND_PID,
-+        KPTLLND_RXB_PAGES,
-+        KPTLLND_CREDITS,
-+        KPTLLND_PEERCREDITS,
-+        KPTLLND_MAX_MSG_SIZE,
-+        KPTLLND_PEER_HASH_SIZE,
-+        KPTLLND_RESHEDULE_LOOPS,
-+        KPTLLND_ACK_PUTS,
-+        KPTLLND_TRACETIMEOUT,
-+        KPTLLND_TRACEBASENAME,
-+        KPTLLND_SIMULATION_BITMAP
-+};
-+#else
-+#define CTL_PTLLND              CTL_UNNUMBERED
-+
-+#define KPTLLND_NTX             CTL_UNNUMBERED
-+#define KPTLLND_MAX_NODES       CTL_UNNUMBERED
-+#define KPTLLND_MAX_PROC_PER_NODE CTL_UNNUMBERED
-+#define KPTLLND_CHECKSUM        CTL_UNNUMBERED
-+#define KPTLLND_TIMEOUT         CTL_UNNUMBERED
-+#define KPTLLND_PORTAL          CTL_UNNUMBERED
-+#define KPTLLND_PID             CTL_UNNUMBERED
-+#define KPTLLND_RXB_PAGES       CTL_UNNUMBERED
-+#define KPTLLND_CREDITS         CTL_UNNUMBERED
-+#define KPTLLND_PEERCREDITS     CTL_UNNUMBERED
-+#define KPTLLND_MAX_MSG_SIZE    CTL_UNNUMBERED
-+#define KPTLLND_PEER_HASH_SIZE  CTL_UNNUMBERED
-+#define KPTLLND_RESHEDULE_LOOPS CTL_UNNUMBERED
-+#define KPTLLND_ACK_PUTS        CTL_UNNUMBERED
-+#define KPTLLND_TRACETIMEOUT    CTL_UNNUMBERED
-+#define KPTLLND_TRACEBASENAME   CTL_UNNUMBERED
-+#define KPTLLND_SIMULATION_BITMAP CTL_UNNUMBERED
-+#endif
-+
- static cfs_sysctl_table_t kptllnd_ctl_table[] = {
-         {
--                .ctl_name = 1,
-+                .ctl_name = KPTLLND_NTX,
-                 .procname = "ntx",
-                 .data     = &ntx,
-                 .maxlen   = sizeof(int),
-@@ -166,15 +211,15 @@
-                 .proc_handler = &proc_dointvec
-         },
-         {
--                .ctl_name = 2,
-+                .ctl_name = KPTLLND_MAX_NODES,
-                 .procname = "max_nodes",
-                 .data     = &max_nodes,
-                 .maxlen   = sizeof(int),
-                 .mode     = 0444,
--                .proc_handler = &proc_dointvec
-+               
-         },
-         {
--                .ctl_name = 3,
-+                .ctl_name = KPTLLND_MAX_PROC_PER_NODE,
-                 .procname = "max_procs_per_node",
-                 .data     = &max_procs_per_node,
-                 .maxlen   = sizeof(int),
-@@ -182,7 +227,7 @@
-                 .proc_handler = &proc_dointvec
-         },
-         {
--                .ctl_name = 4,
-+                .ctl_name = KPTLLND_CHECKSUM,
-                 .procname = "checksum",
-                 .data     = &checksum,
-                 .maxlen   = sizeof(int),
-@@ -190,7 +235,7 @@
-                 .proc_handler = &proc_dointvec
-         },
-         {
--                .ctl_name = 5,
-+                .ctl_name = KPTLLND_TIMEOUT,
-                 .procname = "timeout",
-                 .data     = &timeout,
-                 .maxlen   = sizeof(int),
-@@ -198,7 +243,7 @@
-                 .proc_handler = &proc_dointvec
-         },
-         {
--                .ctl_name = 6,
-+                .ctl_name = KPTLLND_PORTAL,
-                 .procname = "portal",
-                 .data     = &portal,
-                 .maxlen   = sizeof(int),
-@@ -206,7 +251,7 @@
-                 .proc_handler = &proc_dointvec
-         },
-         {
--                .ctl_name = 7,
-+                .ctl_name = KPTLLND_PID,
-                 .procname = "pid",
-                 .data     = &pid,
-                 .maxlen   = sizeof(int),
-@@ -214,7 +259,7 @@
-                 .proc_handler = &proc_dointvec
-         },
-         {
--                .ctl_name = 8,
-+                .ctl_name = KPTLLND_RXB_PAGES,
-                 .procname = "rxb_npages",
-                 .data     = &rxb_npages,
-                 .maxlen   = sizeof(int),
-@@ -222,7 +267,7 @@
-                 .proc_handler = &proc_dointvec
-         },
-         {
--                .ctl_name = 9,
-+                .ctl_name = KPTLLND_CREDITS,
-                 .procname = "credits",
-                 .data     = &credits,
-                 .maxlen   = sizeof(int),
-@@ -230,7 +275,7 @@
-                 .proc_handler = &proc_dointvec
-         },
-         {
--                .ctl_name = 10,
-+                .ctl_name = KPTLLND_PEERCREDITS,
-                 .procname = "peercredits",
-                 .data     = &peercredits,
-                 .maxlen   = sizeof(int),
-@@ -238,7 +283,7 @@
-                 .proc_handler = &proc_dointvec
-         },
-         {
--                .ctl_name = 11,
-+                .ctl_name = KPTLLND_MAX_MSG_SIZE,
-                 .procname = "max_msg_size",
-                 .data     = &max_msg_size,
-                 .maxlen   = sizeof(int),
-@@ -246,7 +291,7 @@
-                 .proc_handler = &proc_dointvec
-         },
-         {
--                .ctl_name = 12,
-+                .ctl_name = KPTLLND_PEER_HASH_SIZE,
-                 .procname = "peer_hash_table_size",
-                 .data     = &peer_hash_table_size,
-                 .maxlen   = sizeof(int),
-@@ -254,7 +299,7 @@
-                 .proc_handler = &proc_dointvec
-         },
-         {
--                .ctl_name = 13,
-+                .ctl_name = KPTLLND_RESHEDULE_LOOPS,
-                 .procname = "reschedule_loops",
-                 .data     = &reschedule_loops,
-                 .maxlen   = sizeof(int),
-@@ -262,7 +307,7 @@
-                 .proc_handler = &proc_dointvec
-         },
-         {
--                .ctl_name = 14,
-+                .ctl_name = KPTLLND_ACK_PUTS,
-                 .procname = "ack_puts",
-                 .data     = &ack_puts,
-                 .maxlen   = sizeof(int),
-@@ -271,7 +316,7 @@
-         },
- #ifdef CRAY_XT3
-         {
--                .ctl_name = 15,
-+                .ctl_name = KPTLLND_TRACETIMEOUT,
-                 .procname = "ptltrace_on_timeout",
-                 .data     = &ptltrace_on_timeout,
-                 .maxlen   = sizeof(int),
-@@ -279,7 +324,7 @@
-                 .proc_handler = &proc_dointvec
-         },
-         {
--                .ctl_name = 16,
-+                .ctl_name = KPTLLND_TRACEBASENAME,
-                 .procname = "ptltrace_basename",
-                 .data     = ptltrace_basename_space,
-                 .maxlen   = sizeof(ptltrace_basename_space),
-@@ -290,7 +335,7 @@
- #endif
- #ifdef PJK_DEBUGGING
-         {
--                .ctl_name = 17,
-+                .ctl_name = KPTLLND_SIMULATION_BITMAP,
-                 .procname = "simulation_bitmap",
-                 .data     = &simulation_bitmap,
-                 .maxlen   = sizeof(int),
-@@ -304,7 +349,7 @@
- 
- static cfs_sysctl_table_t kptllnd_top_ctl_table[] = {
-         {
--                .ctl_name = 203,
-+                .ctl_name = CTL_PTLLND,
-                 .procname = "ptllnd",
-                 .data     = NULL,
-                 .maxlen   = 0,
-diff -urNad lustre~/lnet/klnds/qswlnd/qswlnd_modparams.c lustre/lnet/klnds/qswlnd/qswlnd_modparams.c
---- lustre~/lnet/klnds/qswlnd/qswlnd_modparams.c	2008-12-30 11:23:32.000000000 +0100
-+++ lustre/lnet/klnds/qswlnd/qswlnd_modparams.c	2009-03-10 11:41:03.000000000 +0100
-@@ -87,9 +87,45 @@
- };
- 
- #if defined(CONFIG_SYSCTL) && !CFS_SYSFS_MODULE_PARM
-+
-+#ifndef HAVE_SYSCTL_UNNUMBERED
-+
-+#define CTL_KQSWNAL	207
-+
-+enum
-+	KQSWNAL_TX_MAXCONTIG = 1,
-+	KQSWNAL_NTXMSG,
-+	KQSWNAL_CREDITS,
-+	KQSWNAL_PEERCREDITS,
-+	KQSWNAL_NRXMSGS_LARGE,
-+	KQSWNAL_EP_ENVELOPES_LARGE,
-+	KQSWNAL_NRXMSGS_SMALL,
-+	KQSWNAL_EP_ENVELOPES_SMALL,
-+	KQSWNAL_OPTIMIZED_PUTS,
-+	KQSWNAL_OPTIMIZED_GETS,
-+	KQSWNAL_INJECT_CSUM_ERROR
-+};
-+#else
-+
-+#define CTL_KQSWNAL             CTL_UNNUMBERED
-+
-+#define KQSWNAL_TX_MAXCONTIG    CTL_UNNUMBERED
-+#define KQSWNAL_NTXMSG          CTL_UNNUMBERED
-+#define KQSWNAL_CREDITS         CTL_UNNUMBERED
-+#define KQSWNAL_PEERCREDITS     CTL_UNNUMBERED
-+#define KQSWNAL_NRXMSGS_LARGE   CTL_UNNUMBERED
-+#define KQSWNAL_EP_ENVELOPES_LARGE CTL_UNNUMBERED
-+#define KQSWNAL_NRXMSGS_SMALL   CTL_UNNUMBERED
-+#define KQSWNAL_EP_ENVELOPES_SMALL CTL_UNNUMBERED
-+#define KQSWNAL_OPTIMIZED_PUTS  CTL_UNNUMBERED
-+#define KQSWNAL_OPTIMIZED_GETS  CTL_UNNUMBERED
-+#define KQSWNAL_INJECT_CSUM_ERROR CTL_UNNUMBERED
-+
-+#endif
-+
- static cfs_sysctl_table_t kqswnal_ctl_table[] = {
-         {
--                .ctl_name = 1,
-+                .ctl_name = KQSWNAL_TX_MAXCONTIG,
-                 .procname = "tx_maxcontig",
-                 .data     = &tx_maxcontig,
-                 .maxlen   = sizeof (int),
-@@ -97,7 +133,7 @@
-                 .proc_handler = &proc_dointvec
-         },
-         {
--                .ctl_name = 2,
-+                .ctl_name = KQSWNAL_NTXMSG,
-                 .procname = "ntxmsgs",
-                 .data     = &ntxmsgs,
-                 .maxlen   = sizeof (int),
-@@ -105,7 +141,7 @@
-                 .proc_handler = &proc_dointvec
-         },
-         {
--                .ctl_name = 3,
-+                .ctl_name = KQSWNAL_CREDITS,
-                 .procname = "credits",
-                 .data     = &credits,
-                 .maxlen   = sizeof (int),
-@@ -113,7 +149,7 @@
-                 .proc_handler = &proc_dointvec
-         },
-         {
--                .ctl_name = 4,
-+                .ctl_name = KQSWNAL_PEERCREDITS,
-                 .procname = "peer_credits",
-                 .data     = &peer_credits,
-                 .maxlen   = sizeof (int),
-@@ -121,7 +157,7 @@
-                 .proc_handler = &proc_dointvec
-         },
-         {
--                .ctl_name = 5,
-+                .ctl_name = KQSWNAL_NRXMSGS_LARGE,
-                 .procname = "nrxmsgs_large",
-                 .data     = &nrxmsgs_large,
-                 .maxlen   = sizeof (int),
-@@ -129,7 +165,7 @@
-                 .proc_handler = &proc_dointvec
-         },
-         {
--                .ctl_name = 6,
-+                .ctl_name = KQSWNAL_EP_ENVELOPES_LARGE,
-                 .procname = "ep_envelopes_large",
-                 .data     = &ep_envelopes_large,
-                 .maxlen   = sizeof (int),
-@@ -137,7 +173,7 @@
-                 .proc_handler = &proc_dointvec
-         },
-         {
--                .ctl_name = 7,
-+                .ctl_name = KQSWNAL_NRXMSGS_SMALL,
-                 .procname = "nrxmsgs_small",
-                 .data     = &nrxmsgs_small,
-                 .maxlen   = sizeof (int),
-@@ -145,7 +181,7 @@
-                 .proc_handler = &proc_dointvec
-         },
-         {
--                .ctl_name = 8,
-+                .ctl_name = KQSWNAL_EP_ENVELOPES_SMALL,
-                 .procname = "ep_envelopes_small",
-                 .data     = &ep_envelopes_small,
-                 .maxlen   = sizeof (int),
-@@ -153,7 +189,7 @@
-                 .proc_handler = &proc_dointvec
-         },
-         {
--                .ctl_name = 9,
-+                .ctl_name = KQSWNAL_OPTIMIZED_PUTS,
-                 .procname = "optimized_puts",
-                 .data     = &optimized_puts,
-                 .maxlen   = sizeof (int),
-@@ -161,7 +197,7 @@
-                 .proc_handler = &proc_dointvec
-         },
-         {
--                .ctl_name = 10,
-+                .ctl_name = KQSWNAL_OPTIMIZED_GETS,
-                 .procname = "optimized_gets",
-                 .data     = &optimized_gets,
-                 .maxlen   = sizeof (int),
-@@ -170,7 +206,7 @@
-         },
- #if KQSW_CKSUM
-         {
--                .ctl_name = 11,
-+                .ctl_name = KQSWNAL_INJECT_CSUM_ERROR,
-                 .procname = "inject_csum_error",
-                 .data     = &inject_csum_error,
-                 .maxlen   = sizeof (int),
-@@ -183,7 +219,7 @@
- 
- static cfs_sysctl_table_t kqswnal_top_ctl_table[] = {
-         {
--                .ctl_name = 201,
-+                .ctl_name = CTL_KQSWNAL,
-                 .procname = "qswnal",
-                 .data     = NULL,
-                 .maxlen   = 0,
-diff -urNad lustre~/lnet/klnds/ralnd/ralnd_modparams.c lustre/lnet/klnds/ralnd/ralnd_modparams.c
---- lustre~/lnet/klnds/ralnd/ralnd_modparams.c	2008-12-30 11:23:32.000000000 +0100
-+++ lustre/lnet/klnds/ralnd/ralnd_modparams.c	2009-03-10 11:41:03.000000000 +0100
-@@ -89,9 +89,37 @@
- };
- 
- #if CONFIG_SYSCTL && !CFS_SYSFS_MODULE_PARM
-+
-+#ifndef HAVE_SYSCTL_UNNUMBERED
-+#define CTL_KRANAL      202
-+enum {
-+        KRANAL_N_CONND = 1,
-+        KRANAL_RECONNECT_MIN,
-+        KRANAL_RECONNECT_MAX,
-+        KRANAL_NTX,
-+        KRANAL_CREDITS,
-+        KRANAL_PEERCREDITS,
-+        KRANAL_FMA_CQ_SIZE,
-+        KRANAL_TIMEOUT,
-+        KRANAL_IMMEDIATE_MAX
-+};
-+#else
-+#define CTL_KRANAL              CTL_UNNUMBERED
-+
-+#define KRANAL_N_CONND          CTL_UNNUMBERED
-+#define KRANAL_RECONNECT_MIN    CTL_UNNUMBERED
-+#define KRANAL_RECONNECT_MAX    CTL_UNNUMBERED
-+#define KRANAL_NTX              CTL_UNNUMBERED
-+#define KRANAL_CREDITS          CTL_UNNUMBERED
-+#define KRANAL_PEERCREDITS      CTL_UNNUMBERED
-+#define KRANAL_FMA_CQ_SIZE      CTL_UNNUMBERED
-+#define KRANAL_TIMEOUT          CTL_UNNUMBERED
-+#define KRANAL_IMMEDIATE_MAX    CTL_UNNUMBERED
-+#endif
-+
- static cfs_sysctl_table_t kranal_ctl_table[] = {
-         {
--                .ctl_name = 1,
-+                .ctl_name = KRANAL_N_CONND,
-                 .procname = "n_connd",
-                 .data     = &n_connd,
-                 .maxlen   = sizeof(int),
-@@ -99,7 +127,7 @@
-                 .proc_handler = &proc_dointvec
-         },
-         {
--                .ctl_name = 2,
-+                .ctl_name = KRANAL_RECONNECT_MIN,
-                 .procname = "min_reconnect_interval",
-                 .data     = &min_reconnect_interval,
-                 .maxlen   = sizeof(int),
-@@ -107,7 +135,7 @@
-                 .proc_handler = &proc_dointvec
-         },
-         {
--                .ctl_name = 3,
-+                .ctl_name = KRANAL_RECONNECT_MAX,
-                 .procname = "max_reconnect_interval",
-                 .data     = &max_reconnect_interval,
-                 .maxlen   = sizeof(int),
-@@ -115,7 +143,7 @@
-                 .proc_handler = &proc_dointvec
-         },
-         {
--                .ctl_name = 4,
-+                .ctl_name = KRANAL_NTX,
-                 .procname = "ntx",
-                 .data     = &ntx,
-                 .maxlen   = sizeof(int),
-@@ -123,7 +151,7 @@
-                 .proc_handler = &proc_dointvec
-         },
-         {
--                .ctl_name = 5,
-+                .ctl_name = KRANAL_CREDITS,
-                 .procname = "credits",
-                 .data     = &credits,
-                 .maxlen   = sizeof(int),
-@@ -131,7 +159,7 @@
-                 .proc_handler = &proc_dointvec
-         },
-         {
--                .ctl_name = 6,
-+                .ctl_name = KRANAL_PEERCREDITS,
-                 .procname = "peer_credits",
-                 .data     = &peer_credits,
-                 .maxlen   = sizeof(int),
-@@ -139,7 +167,7 @@
-                 .proc_handler = &proc_dointvec
-         },
-         {
--                .ctl_name = 7,
-+                .ctl_name = KRANAL_FMA_CQ_SIZE,
-                 .procname = "fma_cq_size",
-                 .data     = &fma_cq_size,
-                 .maxlen   = sizeof(int),
-@@ -147,7 +175,7 @@
-                 .proc_handler = &proc_dointvec
-         },
-         {
--                .ctl_name = 8,
-+                .ctl_name = KRANAL_TIMEOUT,
-                 .procname = "timeout",
-                 .data     = &timeout,
-                 .maxlen   = sizeof(int),
-@@ -155,7 +183,7 @@
-                 .proc_handler = &proc_dointvec
-         },
-         {
--                .ctl_name = 9,
-+                .ctl_name = KRANAL_IMMEDIATE_MAX,
-                 .procname = "max_immediate",
-                 .data     = &max_immediate,
-                 .maxlen   = sizeof(int),
-@@ -167,7 +195,7 @@
- 
- static cfs_sysctl_table_t kranal_top_ctl_table[] = {
-         {
--                .ctl_name = 202,
-+                .ctl_name = CTL_KRANAL,
-                 .procname = "ranal",
-                 .data     = NULL,
-                 .maxlen   = 0,
-diff -urNad lustre~/lnet/klnds/socklnd/socklnd_lib-linux.c lustre/lnet/klnds/socklnd/socklnd_lib-linux.c
---- lustre~/lnet/klnds/socklnd/socklnd_lib-linux.c	2008-12-30 11:23:32.000000000 +0100
-+++ lustre/lnet/klnds/socklnd/socklnd_lib-linux.c	2009-03-10 11:41:03.000000000 +0100
-@@ -37,197 +37,244 @@
- #include "socklnd.h"
- 
- # if defined(CONFIG_SYSCTL) && !CFS_SYSFS_MODULE_PARM
--static cfs_sysctl_table_t ksocknal_ctl_table[21];
- 
--cfs_sysctl_table_t ksocknal_top_ctl_table[] = {
--        {
--                .ctl_name = 200,
--                .procname = "socknal",
--                .data     = NULL,
--                .maxlen   = 0,
--                .mode     = 0555,
--                .child    = ksocknal_ctl_table
--        },
--        { 0 }
-+#ifndef HAVE_SYSCTL_UNNUMBERED
-+
-+#define CTL_SOCKLND     209
-+
-+enum {
-+        SOCKLND_TIMEOUT = 1,
-+        SOCKLND_CREDITS,
-+        SOCKLND_PEER_CREDITS,
-+        SOCKLND_NCONNDS,
-+        SOCKLND_RECONNECTS_MIN,
-+        SOCKLND_RECONNECTS_MAX,
-+        SOCKLND_EAGER_ACK,
-+        SOCKLND_ZERO_COPY,
-+        SOCKLND_TYPED,
-+        SOCKLND_BULK_MIN,
-+        SOCKLND_RX_BUFFER_SIZE,
-+        SOCKLND_TX_BUFFER_SIZE,
-+        SOCKLND_NAGLE,
-+        SOCKLND_IRQ_AFFINITY,
-+        SOCKLND_KEEPALIVE_IDLE,
-+        SOCKLND_KEEPALIVE_COUNT,
-+        SOCKLND_KEEPALIVE_INTVL,
-+        SOCKLND_BACKOFF_INIT,
-+        SOCKLND_BACKOFF_MAX,
-+        SOCKLND_PROTOCOL
- };
-+#else
-+#define CTL_SOCKLND             CTL_UNNUMBERED
- 
--int
--ksocknal_lib_tunables_init ()
--{
--        int    i = 0;
--        int    j = 1;
-+#define SOCKLND_TIMEOUT         CTL_UNNUMBERED
-+#define SOCKLND_CREDITS         CTL_UNNUMBERED
-+#define SOCKLND_PEER_CREDITS    CTL_UNNUMBERED
-+#define SOCKLND_NCONNDS         CTL_UNNUMBERED
-+#define SOCKLND_RECONNECTS_MIN  CTL_UNNUMBERED
-+#define SOCKLND_RECONNECTS_MAX  CTL_UNNUMBERED
-+#define SOCKLND_EAGER_ACK       CTL_UNNUMBERED
-+#define SOCKLND_ZERO_COPY       CTL_UNNUMBERED
-+#define SOCKLND_TYPED           CTL_UNNUMBERED
-+#define SOCKLND_BULK_MIN        CTL_UNNUMBERED
-+#define SOCKLND_RX_BUFFER_SIZE  CTL_UNNUMBERED
-+#define SOCKLND_TX_BUFFER_SIZE  CTL_UNNUMBERED
-+#define SOCKLND_NAGLE           CTL_UNNUMBERED
-+#define SOCKLND_IRQ_AFFINITY    CTL_UNNUMBERED
-+#define SOCKLND_KEEPALIVE_IDLE  CTL_UNNUMBERED
-+#define SOCKLND_KEEPALIVE_COUNT CTL_UNNUMBERED
-+#define SOCKLND_KEEPALIVE_INTVL CTL_UNNUMBERED
-+#define SOCKLND_BACKOFF_INIT    CTL_UNNUMBERED
-+#define SOCKLND_BACKOFF_MAX     CTL_UNNUMBERED
-+#define SOCKLND_PROTOCOL        CTL_UNNUMBERED
-+#endif
- 
--        ksocknal_ctl_table[i++] = (cfs_sysctl_table_t) {
--                .ctl_name = j++,
-+static cfs_sysctl_table_t ksocknal_ctl_table[] = {
-+        {
-+                .ctl_name = SOCKLND_TIMEOUT,
-                 .procname = "timeout",
-                 .data     = ksocknal_tunables.ksnd_timeout,
-                 .maxlen   = sizeof (int),
-                 .mode     = 0644,
-                 .proc_handler = &proc_dointvec
--        };
--        ksocknal_ctl_table[i++] = (cfs_sysctl_table_t) {
--                .ctl_name = j++,
-+        },
-+        {
-+                .ctl_name = SOCKLND_CREDITS,
-                 .procname = "credits",
-                 .data     = ksocknal_tunables.ksnd_credits,
-                 .maxlen   = sizeof (int),
-                 .mode     = 0444,
-                 .proc_handler = &proc_dointvec
--        };
--        ksocknal_ctl_table[i++] = (cfs_sysctl_table_t) {
--                .ctl_name = j++,
-+        },
-+         {
-+                .ctl_name = SOCKLND_PEER_CREDITS,
-                 .procname = "peer_credits",
-                 .data     = ksocknal_tunables.ksnd_peercredits,
-                 .maxlen   = sizeof (int),
-                 .mode     = 0444,
-                 .proc_handler = &proc_dointvec
--        };
--        ksocknal_ctl_table[i++] = (cfs_sysctl_table_t) {
--                .ctl_name = j++,
-+        },
-+        {
-+                .ctl_name = SOCKLND_NCONNDS,
-                 .procname = "nconnds",
-                 .data     = ksocknal_tunables.ksnd_nconnds,
-                 .maxlen   = sizeof (int),
-                 .mode     = 0444,
-                 .proc_handler = &proc_dointvec
--        };
--        ksocknal_ctl_table[i++] = (cfs_sysctl_table_t) {
--                .ctl_name = j++,
-+        },
-+        {
-+                .ctl_name = SOCKLND_RECONNECTS_MIN,
-                 .procname = "min_reconnectms",
-                 .data     = ksocknal_tunables.ksnd_min_reconnectms,
-                 .maxlen   = sizeof (int),
-                 .mode     = 0444,
-                 .proc_handler = &proc_dointvec
--        };
--        ksocknal_ctl_table[i++] = (cfs_sysctl_table_t) {
--                .ctl_name = j++,
-+        },
-+        {
-+                .ctl_name = SOCKLND_RECONNECTS_MAX,
-                 .procname = "max_reconnectms",
-                 .data     = ksocknal_tunables.ksnd_max_reconnectms,
-                 .maxlen   = sizeof (int),
-                 .mode     = 0444,
-                 .proc_handler = &proc_dointvec
--        };
--        ksocknal_ctl_table[i++] = (cfs_sysctl_table_t) {
--                .ctl_name = j++,
-+        },
-+        {
-+                .ctl_name = SOCKLND_EAGER_ACK,
-                 .procname = "eager_ack",
-                 .data     = ksocknal_tunables.ksnd_eager_ack,
-                 .maxlen   = sizeof (int),
-                 .mode     = 0644,
-                 .proc_handler = &proc_dointvec
--        };
--        ksocknal_ctl_table[i++] = (cfs_sysctl_table_t) {
--                .ctl_name = j++,
-+        },
-+        {
-+                .ctl_name = SOCKLND_ZERO_COPY,
-                 .procname = "zero_copy",
-                 .data     = ksocknal_tunables.ksnd_zc_min_frag,
-                 .maxlen   = sizeof (int),
-                 .mode     = 0644,
-                 .proc_handler = &proc_dointvec
--        };
--        ksocknal_ctl_table[i++] = (cfs_sysctl_table_t) {
--                .ctl_name = j++,
-+        },
-+        {
-+                .ctl_name = SOCKLND_TYPED,
-                 .procname = "typed",
-                 .data     = ksocknal_tunables.ksnd_typed_conns,
-                 .maxlen   = sizeof (int),
-                 .mode     = 0444,
-                 .proc_handler = &proc_dointvec
--        };
--        ksocknal_ctl_table[i++] = (cfs_sysctl_table_t) {
--                .ctl_name = j++,
-+        },
-+        {
-+                .ctl_name = SOCKLND_BULK_MIN,
-                 .procname = "min_bulk",
-                 .data     = ksocknal_tunables.ksnd_min_bulk,
-                 .maxlen   = sizeof (int),
-                 .mode     = 0644,
-                 .proc_handler = &proc_dointvec
--        };
--        ksocknal_ctl_table[i++] = (cfs_sysctl_table_t) {
--                .ctl_name = j++,
-+        },
-+        {
-+                .ctl_name = SOCKLND_RX_BUFFER_SIZE,
-                 .procname = "rx_buffer_size",
-                 .data     = ksocknal_tunables.ksnd_rx_buffer_size,
-                 .maxlen   = sizeof(int),
-                 .mode     = 0644,
-                 .proc_handler = &proc_dointvec
--        };
--        ksocknal_ctl_table[i++] = (cfs_sysctl_table_t) {
--                .ctl_name = j++,
-+        },
-+        {
-+                .ctl_name = SOCKLND_TX_BUFFER_SIZE,
-                 .procname = "tx_buffer_size",
-                 .data     = ksocknal_tunables.ksnd_tx_buffer_size,
-                 .maxlen   = sizeof(int),
-                 .mode     = 0644,
-                 .proc_handler = &proc_dointvec
--        };
--        ksocknal_ctl_table[i++] = (cfs_sysctl_table_t) {
--                .ctl_name = j++,
-+        },
-+        {
-+                .ctl_name = SOCKLND_NAGLE,
-                 .procname = "nagle",
-                 .data     = ksocknal_tunables.ksnd_nagle,
-                 .maxlen   = sizeof(int),
-                 .mode     = 0644,
-                 .proc_handler = &proc_dointvec
--        };
-+        },
- #ifdef CPU_AFFINITY
--        ksocknal_ctl_table[i++] = (cfs_sysctl_table_t) {
--                .ctl_name = j++,
-+        {
-+                .ctl_name = SOCKLND_IRQ_AFFINITY,
-                 .procname = "irq_affinity",
-                 .data     = ksocknal_tunables.ksnd_irq_affinity,
-                 .maxlen   = sizeof(int),
-                 .mode     = 0644,
-                 .proc_handler = &proc_dointvec
--        };
-+        },
- #endif
--        ksocknal_ctl_table[i++] = (cfs_sysctl_table_t) {
--                .ctl_name = j++,
-+        {
-+                .ctl_name = SOCKLND_KEEPALIVE_IDLE,
-                 .procname = "keepalive_idle",
-                 .data     = ksocknal_tunables.ksnd_keepalive_idle,
-                 .maxlen   = sizeof(int),
-                 .mode     = 0644,
-                 .proc_handler = &proc_dointvec
--        };
--        ksocknal_ctl_table[i++] = (cfs_sysctl_table_t) {
--                .ctl_name = j++,
-+        },
-+        {
-+                .ctl_name = SOCKLND_KEEPALIVE_COUNT,
-                 .procname = "keepalive_count",
-                 .data     = ksocknal_tunables.ksnd_keepalive_count,
-                 .maxlen   = sizeof(int),
-                 .mode     = 0644,
-                 .proc_handler = &proc_dointvec
--        };
--        ksocknal_ctl_table[i++] = (cfs_sysctl_table_t) {
--                .ctl_name = j++,
-+        },
-+        {
-+                .ctl_name = SOCKLND_KEEPALIVE_INTVL,
-                 .procname = "keepalive_intvl",
-                 .data     = ksocknal_tunables.ksnd_keepalive_intvl,
-                 .maxlen   = sizeof(int),
-                 .mode     = 0644,
-                 .proc_handler = &proc_dointvec
--        };
-+        },
- #ifdef SOCKNAL_BACKOFF
--        ksocknal_ctl_table[i++] = (cfs_sysctl_table_t) {
--                .ctl_name = j++,
-+        {
-+                .ctl_name = SOCKLND_BACKOFF_INIT,
-                 .procname = "backoff_init",
-                 .data     = ksocknal_tunables.ksnd_backoff_init,
-                 .maxlen   = sizeof(int),
-                 .mode     = 0644,
-                 .proc_handler = &proc_dointvec
--        };
--        ksocknal_ctl_table[i++] = (cfs_sysctl_table_t) {
--                .ctl_name = j++,
-+        },
-+        {
-+                .ctl_name = SOCKLND_BACKOFF_MAX,
-                 .procname = "backoff_max",
-                 .data     = ksocknal_tunables.ksnd_backoff_max,
-                 .maxlen   = sizeof(int),
-                 .mode     = 0644,
-                 .proc_handler = &proc_dointvec
--        };
-+        }
- #endif
- #if SOCKNAL_VERSION_DEBUG
--        ksocknal_ctl_table[i++] = (cfs_sysctl_table_t) {
--                .ctl_name = j++,
-+        {
-+                .ctl_name = SOCKLND_PROTOCOL,
-                 .procname = "protocol",
-                 .data     = ksocknal_tunables.ksnd_protocol,
-                 .maxlen   = sizeof(int),
-                 .mode     = 0644,
-                 .proc_handler = &proc_dointvec
--        };
-+        },
- #endif
--        ksocknal_ctl_table[i++] =  (cfs_sysctl_table_t) { 0 };
-+        {0}
-+};
- 
--        LASSERT (j == i);
--        LASSERT (i <= sizeof(ksocknal_ctl_table)/sizeof(ksocknal_ctl_table[0]));
- 
-+cfs_sysctl_table_t ksocknal_top_ctl_table[] = {
-+        {
-+                .ctl_name = CTL_SOCKLND,
-+                .procname = "socknal",
-+                .data     = NULL,
-+                .maxlen   = 0,
-+                .mode     = 0555,
-+                .child    = ksocknal_ctl_table
-+        },
-+        { 0 }
-+};
-+
-+int
-+ksocknal_lib_tunables_init ()
-+{
-         ksocknal_tunables.ksnd_sysctl =
-                 cfs_register_sysctl_table(ksocknal_top_ctl_table, 0);
- 
-diff -urNad lustre~/lnet/klnds/viblnd/viblnd_modparams.c lustre/lnet/klnds/viblnd/viblnd_modparams.c
---- lustre~/lnet/klnds/viblnd/viblnd_modparams.c	2008-12-30 11:23:32.000000000 +0100
-+++ lustre/lnet/klnds/viblnd/viblnd_modparams.c	2009-03-10 11:41:03.000000000 +0100
-@@ -142,6 +142,56 @@
- #endif
- };
- 
-+#ifndef HAVE_SYSCTL_UNNUMBERED
-+
-+#define CTL_VIBLND       209
-+
-+enum {
-+        VIBLND_SERVICE = 1,
-+        VIBLND_RECONNECT_MIN,
-+        VIBLND_RECONNECT_MAX,
-+        VIBLND_CONCURRENT_PEERS,
-+        VIBLND_CHKSUM,
-+        VIBLND_TIMEOUT,
-+        VIBLND_NTX,
-+        VIBLND_CREDITS,
-+        VIBLND_PEER_CREDITS,
-+        VIBLND_ARP_RETRIES,
-+        VIBLND_HCA_BASENAME,
-+        VIBLND_IPIF_BASENAME,
-+        VIBLND_LOCAL_ACK_TIMEOUT,
-+        VIBLND_RETRY_CNT
-+        VIBLND_RNR_CNT,
-+        VIBLND_RNR_NAK_TIMER,
-+        VIBLND_KEEPALIVE,
-+        VIBLND_CONCURRENT_SENDS,
-+        VIBLND_FMR_REMAPS
-+};        
-+#else
-+#define CTL_VIBLND              CTL_UNNUMBERED
-+
-+#define VIBLND_SERVICE          CTL_UNNUMBERED
-+#define VIBLND_RECONNECT_MIN    CTL_UNNUMBERED
-+#define VIBLND_RECONNECT_MAX    CTL_UNNUMBERED
-+#define VIBLND_CONCURRENT_PEERS CTL_UNNUMBERED
-+#define VIBLND_CHKSUM           CTL_UNNUMBERED
-+#define VIBLND_TIMEOUT          CTL_UNNUMBERED
-+#define VIBLND_NTX              CTL_UNNUMBERED
-+#define VIBLND_CREDITS          CTL_UNNUMBERED
-+#define VIBLND_PEER_CREDITS     CTL_UNNUMBERED
-+#define VIBLND_ARP_RETRIES      CTL_UNNUMBERED
-+#define VIBLND_HCA_BASENAME     CTL_UNNUMBERED
-+#define VIBLND_IPIF_BASENAME    CTL_UNNUMBERED
-+#define VIBLND_LOCAL_ACK_TIMEOUT CTL_UNNUMBERED
-+#define VIBLND_RETRY_CNT        CTL_UNNUMBERED
-+#define VIBLND_RNR_CNT          CTL_UNNUMBERED
-+#define VIBLND_RNR_NAK_TIMER    CTL_UNNUMBERED
-+#define VIBLND_KEEPALIVE        CTL_UNNUMBERED
-+#define VIBLND_CONCURRENT_SENDS CTL_UNNUMBERED
-+#define VIBLND_FMR_REMAPS       CTL_UNNUMBERED
-+
-+#endif
-+
- #if defined(CONFIG_SYSCTL) && !CFS_SYSFS_MODULE_PARM
- 
- static char hca_basename_space[32];
-@@ -149,7 +199,7 @@
- 
- static cfs_sysctl_table_t kibnal_ctl_table[] = {
-         {
--                .ctl_name = 1,
-+                .ctl_name = VIBLND_SERVICE,
-                 .procname = "service_number",
-                 .data     = &service_number,
-                 .maxlen   = sizeof(int),
-@@ -157,7 +207,7 @@
-                 .proc_handler = &proc_dointvec
-         },
-         {
--                .ctl_name = 2,
-+                .ctl_name = VIBLND_RECONNECT_MIN,
-                 .procname = "min_reconnect_interval",
-                 .data     = &min_reconnect_interval,
-                 .maxlen   = sizeof(int),
-@@ -165,7 +215,7 @@
-                 .proc_handler = &proc_dointvec
-         },
-         {
--                .ctl_name = 3,
-+                .ctl_name = VIBLND_RECONNECT_MAX,
-                 .procname = "max_reconnect_interval",
-                 .data     = &max_reconnect_interval,
-                 .maxlen   = sizeof(int),
-@@ -173,7 +223,7 @@
-                 .proc_handler = &proc_dointvec
-         },
-         {
--                .ctl_name = 4,
-+                .ctl_name = VIBLND_CONCURRENT_PEERS,
-                 .procname = "concurrent_peers",
-                 .data     = &concurrent_peers,
-                 .maxlen   = sizeof(int),
-@@ -181,7 +231,7 @@
-                 .proc_handler = &proc_dointvec
-         },
-         {
--                .ctl_name = 5,
-+                .ctl_name = VIBLND_CHKSUM,
-                 .procname = "cksum",
-                 .data     = &cksum,
-                 .maxlen   = sizeof(int),
-@@ -189,7 +239,7 @@
-                 .proc_handler = &proc_dointvec
-         },
-         {
--                .ctl_name = 6,
-+                .ctl_name = VIBLND_TIMEOUT,
-                 .procname = "timeout",
-                 .data     = &timeout,
-                 .maxlen   = sizeof(int),
-@@ -197,7 +247,7 @@
-                 .proc_handler = &proc_dointvec
-         },
-         {
--                .ctl_name = 7,
-+                .ctl_name = VIBLND_NTX,
-                 .procname = "ntx",
-                 .data     = &ntx,
-                 .maxlen   = sizeof(int),
-@@ -205,7 +255,7 @@
-                 .proc_handler = &proc_dointvec
-         },
-         {
--                .ctl_name = 8,
-+                .ctl_name = VIBLND_CREDITS,
-                 .procname = "credits",
-                 .data     = &credits,
-                 .maxlen   = sizeof(int),
-@@ -213,7 +263,7 @@
-                 .proc_handler = &proc_dointvec
-         },
-         {
--                .ctl_name = 9,
-+                .ctl_name = VIBLND_PEER_CREDITS,
-                 .procname = "peer_credits",
-                 .data     = &peer_credits,
-                 .maxlen   = sizeof(int),
-@@ -221,7 +271,7 @@
-                 .proc_handler = &proc_dointvec
-         },
-         {
--                .ctl_name = 10,
-+                .ctl_name = VIBLND_ARP_RETRIES,
-                 .procname = "arp_retries",
-                 .data     = &arp_retries,
-                 .maxlen   = sizeof(int),
-@@ -229,7 +279,7 @@
-                 .proc_handler = &proc_dointvec
-         },
-         {
--                .ctl_name = 11,
-+                .ctl_name = VIBLND_HCA_BASENAME,
-                 .procname = "hca_basename",
-                 .data     = hca_basename_space,
-                 .maxlen   = sizeof(hca_basename_space),
-@@ -237,7 +287,7 @@
-                 .proc_handler = &proc_dostring
-         },
-         {
--                .ctl_name = 12,
-+                .ctl_name = VIBLND_IPIF_BASENAME,
-                 .procname = "ipif_basename",
-                 .data     = ipif_basename_space,
-                 .maxlen   = sizeof(ipif_basename_space),
-@@ -245,7 +295,7 @@
-                 .proc_handler = &proc_dostring
-         },
-         {
--                .ctl_name = 13,
-+                .ctl_name = VIBLND_LOCAL_ACK_TIMEOUT,
-                 .procname = "local_ack_timeout",
-                 .data     = &local_ack_timeout,
-                 .maxlen   = sizeof(int),
-@@ -253,7 +303,7 @@
-                 .proc_handler = &proc_dointvec
-         },
-         {
--                .ctl_name = 14,
-+                .ctl_name = VIBLND_RETRY_CNT,
-                 .procname = "retry_cnt",
-                 .data     = &retry_cnt,
-                 .maxlen   = sizeof(int),
-@@ -261,7 +311,7 @@
-                 .proc_handler = &proc_dointvec
-         },
-         {
--                .ctl_name = 15,
-+                .ctl_name = VIBLND_RNR_CNT,
-                 .procname = "rnr_cnt",
-                 .data     = &rnr_cnt,
-                 .maxlen   = sizeof(int),
-@@ -269,7 +319,7 @@
-                 .proc_handler = &proc_dointvec
-         },
-         {
--                .ctl_name = 16,
-+                .ctl_name = VIBLND_RNR_NAK_TIMER,
-                 .procname = "rnr_nak_timer",
-                 .data     = &rnr_nak_timer,
-                 .maxlen   = sizeof(int),
-@@ -277,7 +327,7 @@
-                 .proc_handler = &proc_dointvec
-         },
-         {
--                .ctl_name = 17,
-+                .ctl_name = VIBLND_KEEPALIVE,
-                 .procname = "keepalive",
-                 .data     = &keepalive,
-                 .maxlen   = sizeof(int),
-@@ -285,7 +335,7 @@
-                 .proc_handler = &proc_dointvec
-         },
-         {
--                .ctl_name = 18,
-+                .ctl_name = VIBLND_CONCURRENT_SENDS,
-                 .procname = "concurrent_sends",
-                 .data     = &concurrent_sends,
-                 .maxlen   = sizeof(int),
-@@ -294,7 +344,7 @@
-         },
- #if IBNAL_USE_FMR
-         {
--                .ctl_name = 19,
-+                .ctl_name = VIBLND_FMR_REMAPS,
-                 .procname = "fmr_remaps",
-                 .data     = &fmr_remaps,
-                 .maxlen   = sizeof(int),
-@@ -307,7 +357,7 @@
- 
- static cfs_sysctl_table_t kibnal_top_ctl_table[] = {
-         {
--                .ctl_name = 203,
-+                .ctl_name = CTL_VIBLND,
-                 .procname = "vibnal",
-                 .data     = NULL,
-                 .maxlen   = 0,
-diff -urNad lustre~/lnet/libcfs/linux/linux-curproc.c lustre/lnet/libcfs/linux/linux-curproc.c
---- lustre~/lnet/libcfs/linux/linux-curproc.c	2008-12-30 11:23:32.000000000 +0100
-+++ lustre/lnet/libcfs/linux/linux-curproc.c	2009-03-10 11:41:03.000000000 +0100
-@@ -131,6 +131,19 @@
-         current->cap_effective = cap;
- }
- 
-+cfs_cap_t cfs_cap_convert_from_kernel(cfs_kernel_cap_t cap)
-+{
-+#if _LINUX_CAPABILITY_VERSION_3 || _LINUX_CAPABILITY_VERSION == 0x20071026
-+        /* XXX lost high byte */
-+        return cap.cap[0];
-+#elif _LINUX_CAPABILITY_VERSION == 0x19980330
-+        return cap;
-+#else
-+        #error "need correct _LINUX_CAPABILITY_VERSION "
-+#endif
-+}
-+
-+
- EXPORT_SYMBOL(cfs_curproc_uid);
- EXPORT_SYMBOL(cfs_curproc_pid);
- EXPORT_SYMBOL(cfs_curproc_gid);
-@@ -143,7 +156,7 @@
- EXPORT_SYMBOL(cfs_curproc_is_in_groups);
- EXPORT_SYMBOL(cfs_curproc_cap_get);
- EXPORT_SYMBOL(cfs_curproc_cap_set);
--
-+EXPORT_SYMBOL(cfs_cap_convert_from_kernel);
- /*
-  * Local variables:
-  * c-indentation-style: "K&R"
-diff -urNad lustre~/lnet/libcfs/linux/linux-proc.c lustre/lnet/libcfs/linux/linux-proc.c
---- lustre~/lnet/libcfs/linux/linux-proc.c	2008-12-30 11:23:32.000000000 +0100
-+++ lustre/lnet/libcfs/linux/linux-proc.c	2009-03-10 11:41:03.000000000 +0100
-@@ -79,7 +79,8 @@
- static cfs_sysctl_table_header_t *lnet_table_header = NULL;
- extern char lnet_upcall[1024];
- 
--#define PSDEV_LNET  (0x100)
-+#ifndef HAVE_SYSCTL_UNNUMBERED
-+#define CTL_LNET        (0x100)
- enum {
-         PSDEV_DEBUG = 1,          /* control debugging */
-         PSDEV_SUBSYSTEM_DEBUG,    /* control debugging */
-@@ -98,8 +99,27 @@
-         PSDEV_LNET_DAEMON_FILE,   /* spool kernel debug buffer to file */
-         PSDEV_LNET_DEBUG_MB,      /* size of debug buffer */
- };
-+#else
-+#define CTL_LNET                        CTL_UNNUMBERED
-+#define PSDEV_DEBUG                     CTL_UNNUMBERED
-+#define PSDEV_SUBSYSTEM_DEBUG           CTL_UNNUMBERED
-+#define PSDEV_PRINTK                    CTL_UNNUMBERED
-+#define PSDEV_CONSOLE_RATELIMIT         CTL_UNNUMBERED
-+#define PSDEV_CONSOLE_MAX_DELAY_CS      CTL_UNNUMBERED
-+#define PSDEV_CONSOLE_MIN_DELAY_CS      CTL_UNNUMBERED
-+#define PSDEV_CONSOLE_BACKOFF           CTL_UNNUMBERED
-+#define PSDEV_DEBUG_PATH                CTL_UNNUMBERED
-+#define PSDEV_DEBUG_DUMP_PATH           CTL_UNNUMBERED
-+#define PSDEV_LNET_UPCALL               CTL_UNNUMBERED
-+#define PSDEV_LNET_MEMUSED              CTL_UNNUMBERED
-+#define PSDEV_LNET_CATASTROPHE          CTL_UNNUMBERED
-+#define PSDEV_LNET_PANIC_ON_LBUG        CTL_UNNUMBERED
-+#define PSDEV_LNET_DUMP_KERNEL          CTL_UNNUMBERED
-+#define PSDEV_LNET_DAEMON_FILE          CTL_UNNUMBERED
-+#define PSDEV_LNET_DEBUG_MB             CTL_UNNUMBERED
-+#endif
- 
--static int 
-+int 
- proc_call_handler(void *data, int write, 
-                   loff_t *ppos, void *buffer, size_t *lenp, 
-                   int (*handler)(void *data, int write,
-@@ -118,17 +138,7 @@
-         }
-         return 0;
- }
--
--#define DECLARE_PROC_HANDLER(name)                      \
--static int                                              \
--LL_PROC_PROTO(name)                                     \
--{                                                       \
--        DECLARE_LL_PROC_PPOS_DECL;                      \
--                                                        \
--        return proc_call_handler(table->data, write,    \
--                                 ppos, buffer, lenp,    \
--                                 __##name);             \
--}
-+EXPORT_SYMBOL(proc_call_handler);
- 
- static int __proc_dobitmasks(void *data, int write, 
-                              loff_t pos, void *buffer, int nob)
-@@ -200,11 +210,12 @@
- 
- DECLARE_PROC_HANDLER(proc_daemon_file)
- 
-+char tmpstr[32];
-+
- static int __proc_debug_mb(void *data, int write,
-                            loff_t pos, void *buffer, int nob)
- {
-         if (!write) {
--                char tmpstr[32];
-                 int  len = snprintf(tmpstr, sizeof(tmpstr), "%d",
-                                     trace_get_debug_mb());
- 
-@@ -319,7 +330,8 @@
-                 .data     = &libcfs_debug,
-                 .maxlen   = sizeof(int),
-                 .mode     = 0644,
--                .proc_handler = &proc_dobitmasks
-+                .proc_handler = &proc_dobitmasks,
-+                .strategy = &sysctl_intvec,
-         },
-         {
-                 .ctl_name = PSDEV_SUBSYSTEM_DEBUG,
-@@ -327,7 +339,8 @@
-                 .data     = &libcfs_subsystem_debug,
-                 .maxlen   = sizeof(int),
-                 .mode     = 0644,
--                .proc_handler = &proc_dobitmasks
-+                .proc_handler = &proc_dobitmasks,
-+                .strategy = &sysctl_intvec,
-         },
-         {
-                 .ctl_name = PSDEV_PRINTK,
-@@ -335,7 +348,8 @@
-                 .data     = &libcfs_printk,
-                 .maxlen   = sizeof(int),
-                 .mode     = 0644,
--                .proc_handler = &proc_dobitmasks
-+                .proc_handler = &proc_dobitmasks,
-+                .strategy = &sysctl_intvec,
-         },
-         {
-                 .ctl_name = PSDEV_CONSOLE_RATELIMIT,
-@@ -343,30 +357,36 @@
-                 .data     = &libcfs_console_ratelimit,
-                 .maxlen   = sizeof(int),
-                 .mode     = 0644,
--                .proc_handler = &proc_dointvec
-+                .proc_handler = &proc_dointvec,
-+                .strategy = &sysctl_intvec,
-         },
-         {
-                 .ctl_name = PSDEV_CONSOLE_MAX_DELAY_CS,
-                 .procname = "console_max_delay_centisecs",
-+                .data     = &libcfs_console_max_delay,
-                 .maxlen   = sizeof(int),
-                 .mode     = 0644,
--                .proc_handler = &proc_console_max_delay_cs
-+                .proc_handler = &proc_console_max_delay_cs,
-+                .strategy = &sysctl_intvec,
-         },
-         {
-                 .ctl_name = PSDEV_CONSOLE_MIN_DELAY_CS,
-                 .procname = "console_min_delay_centisecs",
-                 .maxlen   = sizeof(int),
-+                .data     = &libcfs_console_min_delay,
-                 .mode     = 0644,
--                .proc_handler = &proc_console_min_delay_cs
-+                .proc_handler = &proc_console_min_delay_cs,
-+                .strategy = &sysctl_intvec,
-         },
-         {
-                 .ctl_name = PSDEV_CONSOLE_BACKOFF,
-                 .procname = "console_backoff",
-                 .maxlen   = sizeof(int),
-+                .data     = &libcfs_console_backoff,
-                 .mode     = 0644,
--                .proc_handler = &proc_console_backoff
-+                .proc_handler = &proc_console_backoff,
-+                .strategy = &sysctl_intvec,
-         },
--
-         {
-                 .ctl_name = PSDEV_DEBUG_PATH,
-                 .procname = "debug_path",
-@@ -374,8 +394,8 @@
-                 .maxlen   = sizeof(debug_file_path_arr),
-                 .mode     = 0644,
-                 .proc_handler = &proc_dostring,
-+                .strategy = &sysctl_string,
-         },
--
-         {
-                 .ctl_name = PSDEV_LNET_UPCALL,
-                 .procname = "upcall",
-@@ -383,6 +403,7 @@
-                 .maxlen   = sizeof(lnet_upcall),
-                 .mode     = 0644,
-                 .proc_handler = &proc_dostring,
-+                .strategy = &sysctl_string,
-         },
-         {
-                 .ctl_name = PSDEV_LNET_MEMUSED,
-@@ -390,7 +411,8 @@
-                 .data     = (int *)&libcfs_kmemory.counter,
-                 .maxlen   = sizeof(int),
-                 .mode     = 0444,
--                .proc_handler = &proc_dointvec
-+                .proc_handler = &proc_dointvec,
-+                .strategy = &sysctl_intvec,
-         },
-         {
-                 .ctl_name = PSDEV_LNET_CATASTROPHE,
-@@ -398,7 +420,8 @@
-                 .data     = &libcfs_catastrophe,
-                 .maxlen   = sizeof(int),
-                 .mode     = 0444,
--                .proc_handler = &proc_dointvec
-+                .proc_handler = &proc_dointvec,
-+                .strategy = &sysctl_intvec,
-         },
-         {
-                 .ctl_name = PSDEV_LNET_PANIC_ON_LBUG,
-@@ -406,39 +429,52 @@
-                 .data     = &libcfs_panic_on_lbug,
-                 .maxlen   = sizeof(int),
-                 .mode     = 0644,
--                .proc_handler = &proc_dointvec
-+                .proc_handler = &proc_dointvec,
-+                .strategy = &sysctl_intvec,
-         },
-         {
-                 .ctl_name = PSDEV_LNET_DUMP_KERNEL,
-                 .procname = "dump_kernel",
-+                .data     = tmpstr,
-+                .maxlen   = 256,
-                 .mode     = 0200,
-                 .proc_handler = &proc_dump_kernel,
-+                .strategy = &sysctl_string,
-         },
-         {
-                 .ctl_name = PSDEV_LNET_DAEMON_FILE,
-                 .procname = "daemon_file",
-+                .data     = tmpstr,
-                 .mode     = 0644,
-+                .maxlen   = 256,
-                 .proc_handler = &proc_daemon_file,
-+                .strategy = &sysctl_string,
-         },
-         {
-                 .ctl_name = PSDEV_LNET_DEBUG_MB,
-                 .procname = "debug_mb",
-+                .data     = tmpstr,
-+                .maxlen   = sizeof(tmpstr),
-                 .mode     = 0644,
-                 .proc_handler = &proc_debug_mb,
-+                .strategy = &sysctl_string,
-         },
-         {0}
- };
- 
--static cfs_sysctl_table_t top_table[2] = {
-+static cfs_sysctl_table_t top_table[] = {
-         {
--                .ctl_name = PSDEV_LNET,
-+                .ctl_name = CTL_LNET,
-                 .procname = "lnet",
--                .data     = NULL,
--                .maxlen   = 0,
-                 .mode     = 0555,
--                .child    = lnet_table
-+                .child    = lnet_table,
-+#ifdef HAVE_PARENT_IN_CTLTABLE 
-+                .parent   = NULL,
-+#endif
-         },
--        {0}
-+        {
-+                .ctl_name = 0
-+        }
- };
- 
- int insert_proc(void)
-diff -urNad lustre~/lnet/lnet/router_proc.c lustre/lnet/lnet/router_proc.c
---- lustre~/lnet/lnet/router_proc.c	2008-12-30 11:23:32.000000000 +0100
-+++ lustre/lnet/lnet/router_proc.c	2009-03-10 11:41:03.000000000 +0100
-@@ -32,12 +32,13 @@
- 
- /* this is really lnet_proc.c */
- 
--#define LNET_PROC_STATS   "sys/lnet/stats"
--#define LNET_PROC_ROUTES  "sys/lnet/routes"
--#define LNET_PROC_ROUTERS "sys/lnet/routers"
--#define LNET_PROC_PEERS   "sys/lnet/peers"
--#define LNET_PROC_BUFFERS "sys/lnet/buffers"
--#define LNET_PROC_NIS     "sys/lnet/nis"
-+#define LNET_PROC_ROOT    "sys/lnet"
-+#define LNET_PROC_STATS   LNET_PROC_ROOT"/stats"
-+#define LNET_PROC_ROUTES  LNET_PROC_ROOT"/routes"
-+#define LNET_PROC_ROUTERS LNET_PROC_ROOT"/routers"
-+#define LNET_PROC_PEERS   LNET_PROC_ROOT"/peers"
-+#define LNET_PROC_BUFFERS LNET_PROC_ROOT"/buffers"
-+#define LNET_PROC_NIS     LNET_PROC_ROOT"/nis"
- 
- static int
- lnet_router_proc_stats_read (char *page, char **start, off_t off,
-@@ -1007,6 +1008,12 @@
- {
-         struct proc_dir_entry *pde;
- 
-+        pde = proc_mkdir(LNET_PROC_ROOT, NULL);
-+        if (pde == NULL) {
-+                CERROR("couldn't create "LNET_PROC_ROOT"\n");
-+                return; 
-+        }
-+
-         /* Initialize LNET_PROC_STATS */
-         pde = create_proc_entry (LNET_PROC_STATS, 0644, NULL);
-         if (pde == NULL) {
-@@ -1078,6 +1085,7 @@
-         remove_proc_entry(LNET_PROC_PEERS, 0);
-         remove_proc_entry(LNET_PROC_BUFFERS, 0);
-         remove_proc_entry(LNET_PROC_NIS, 0);
-+        remove_proc_entry(LNET_PROC_ROOT, 0);
- }
- 
- #else
-diff -urNad lustre~/lustre/autoconf/lustre-core.m4 lustre/lustre/autoconf/lustre-core.m4
---- lustre~/lustre/autoconf/lustre-core.m4	2008-12-30 11:23:32.000000000 +0100
-+++ lustre/lustre/autoconf/lustre-core.m4	2009-03-10 11:46:22.000000000 +0100
-@@ -1105,20 +1105,79 @@
- ])
- ])
- 
-+# Older kernels (2.6.18) doesn't know about .parent in 
-+# ctl_table
-+AC_DEFUN([CLT_TABLE_HAS_PARENT],
-+[AC_MSG_CHECKING([sysctl has .parent])
-+LB_LINUX_TRY_COMPILE([
-+        #include <linux/module.h>
-+        #include <linux/sysctl.h>
-+],[
-+        struct ctl_table random_table[] = {
-+        {
-+                .ctl_name       = 1,
-+                .procname       = "poolsize",
-+                .data           = NULL,
-+                .maxlen         = sizeof(int),
-+                .parent         = NULL,
-+        },
-+        { .ctl_name = 0 }
-+        };
-+],[
-+        AC_MSG_RESULT(yes)
-+        AC_DEFINE(HAVE_PARENT_IN_CTLTABLE, 1,
-+              [ctl_table knows .parent])
-+],[
-+        AC_MSG_RESULT(NO)
-+])
-+])
-+
-+AC_DEFUN([IGET_CALL_IS_PRESENT],
-+[AC_MSG_CHECKING([iget() call is available])
-+LB_LINUX_TRY_COMPILE([
-+      #include <linux/fs.h>
-+],[
-+      iget(NULL,0);
-+],[
-+      AC_MSG_RESULT(yes)
-+      AC_DEFINE(HAVE_IGET_CALL, 1,
-+        [iget call is available, which is removed in 2.6.26])
-+],[
-+      AC_MSG_RESULT(NO)
-+])
-+])
-+
- # RHEL5 PageChecked and SetPageChecked defined
- AC_DEFUN([LC_PAGE_CHECKED],
- [AC_MSG_CHECKING([kernel has PageChecked and SetPageChecked])
- LB_LINUX_TRY_COMPILE([
--        #include <linux/mm.h>
--        #include <linux/page-flags.h>
-+       #include <linux/page-flags.h>
-+       #include <linux/autoconf.h>
-+       #include <linux/mm_types.h>
- ],[
-+       struct page *p;
-+       
-+       /* 2.6.26 use function instead of define for it */
-+       SetPageChecked(p);
-+       PageChecked(p);
-+],[
-+        AC_MSG_RESULT(yes)
-+        AC_DEFINE(HAVE_PAGE_CHECKED, 1,
-+          [does kernel have PageChecked and SetPageChecked])
-+],[
-+  AC_MSG_CHECKING([kernel has PageChecked and SetPageChecked defined])
-+  LB_LINUX_TRY_COMPILE([
-+        #include <linux/page-flags.h>
-+        #include <linux/autoconf.h>
-+        #include <linux/mm.h>
-+  ],[
-         #ifndef PageChecked
-         #error PageChecked not defined in kernel
-         #endif
-         #ifndef SetPageChecked
-         #error SetPageChecked not defined in kernel
-         #endif
--],[
-+  ],[
-         AC_MSG_RESULT(yes)
-         AC_DEFINE(HAVE_PAGE_CHECKED, 1,
-                   [does kernel have PageChecked and SetPageChecked])
-@@ -1126,6 +1185,7 @@
-         AC_MSG_RESULT(NO)
- ])
- ])
-+])
- 
- AC_DEFUN([LC_EXPORT_TRUNCATE_COMPLETE],
- [LB_CHECK_SYMBOL_EXPORT([truncate_complete_page],
-@@ -1271,11 +1331,170 @@
- 
- # 2.6.23 extract nfs export related data into exportfs.h
- AC_DEFUN([LC_HAVE_EXPORTFS_H],
--[
--tmpfl="$CFLAGS"
--CFLAGS="$CFLAGS -I$LINUX_OBJ/include"
--AC_CHECK_HEADERS([linux/exportfs.h])
--CFLAGS="$tmpfl"
-+[LB_CHECK_FILE([$LINUX/include/linux/exportfs.h], [
-+        AC_DEFINE(HAVE_LINUX_EXPORTFS_H, 1,
-+                [kernel has include/exportfs.h])
-+],[
-+        AC_MSG_RESULT([no])
-+])
-+])
-+
-+# 2.6.23 have new page fault handling API
-+AC_DEFUN([LC_VM_OP_FAULT],
-+[AC_MSG_CHECKING([if kernel has .fault in vm_operation_struct])
-+LB_LINUX_TRY_COMPILE([
-+        #include <linux/mm.h>
-+],[
-+        struct vm_operations_struct op;
-+
-+        op.fault = NULL;
-+], [
-+        AC_MSG_RESULT([yes])
-+        AC_DEFINE(HAVE_VM_OP_FAULT, 1,
-+                [if kernel has .fault in vm_operation_struct])
-+],[
-+        AC_MSG_RESULT([no])
-+])
-+])
-+
-+#2.6.23 has new shrinker API
-+AC_DEFUN([LC_REGISTER_SHRINKER],
-+[AC_MSG_CHECKING([if kernel has register_shrinker])
-+LB_LINUX_TRY_COMPILE([
-+        #include <linux/mm.h>
-+],[
-+        register_shrinker(NULL);
-+], [
-+        AC_MSG_RESULT([yes])
-+        AC_DEFINE(HAVE_REGISTER_SHRINKER, 1,
-+                [if kernel has register_shrinker])
-+],[
-+        AC_MSG_RESULT([no])
-+])
-+])
-+
-+# 2.6.24 has bio_endio with 2 args
-+AC_DEFUN([LC_BIO_ENDIO_2ARG],
-+[AC_MSG_CHECKING([if kernel has bio_endio with 2 args])
-+LB_LINUX_TRY_COMPILE([
-+        #include <linux/bio.h>
-+],[
-+        bio_endio(NULL, 0);
-+], [
-+        AC_MSG_RESULT([yes])
-+        AC_DEFINE(HAVE_BIO_ENDIO_2ARG, 1,
-+                [if kernel has bio_endio with 2 args])
-+],[
-+        AC_MSG_RESULT([no])
-+])
-+])
-+
-+# 2.6.24 has new members in exports struct.
-+AC_DEFUN([LC_FH_TO_DENTRY],
-+[AC_MSG_CHECKING([if kernel has .fh_to_dentry member in export_operations struct])
-+LB_LINUX_TRY_COMPILE([
-+#ifdef HAVE_LINUX_EXPORTFS_H
-+        #include <linux/exportfs.h>
-+#else
-+        #include <linux/fs.h>
-+#endif
-+],[
-+        struct export_operations exp;
-+
-+        exp.fh_to_dentry   = NULL;
-+], [
-+        AC_MSG_RESULT([yes])
-+        AC_DEFINE(HAVE_FH_TO_DENTRY, 1,
-+                [kernel has .fh_to_dentry member in export_operations struct])
-+],[
-+        AC_MSG_RESULT([no])
-+])
-+])
-+
-+# 2.6.24 remove long aged procfs entry -> deleted member
-+AC_DEFUN([LC_PROCFS_DELETED],
-+[AC_MSG_CHECKING([if kernel has deleted member in procfs entry struct])
-+LB_LINUX_TRY_COMPILE([
-+	#include <linux/proc_fs.h>
-+],[
-+        struct proc_dir_entry pde;
-+
-+        pde.deleted   = NULL;
-+], [
-+        AC_MSG_RESULT([yes])
-+        AC_DEFINE(HAVE_PROCFS_DELETED, 1,
-+                [kernel has deleted member in procfs entry struct])
-+],[
-+        AC_MSG_RESULT([no])
-+])
-+])
-+
-+# 2.6.26 isn't export set_fs_pwd and change paramter in fs struct
-+AC_DEFUN([LC_FS_STRUCT_USE_PATH],
-+[AC_MSG_CHECKING([fs_struct use path structure])
-+LB_LINUX_TRY_COMPILE([
-+        #include <asm/atomic.h>
-+        #include <linux/spinlock.h>
-+        #include <linux/fs_struct.h>
-+],[
-+        struct path path;
-+        struct fs_struct fs;
-+
-+        fs.pwd = path;
-+], [
-+        AC_MSG_RESULT([yes])
-+        AC_DEFINE(HAVE_FS_STRUCT_USE_PATH, 1,
-+                [fs_struct use path structure])
-+],[
-+        AC_MSG_RESULT([no])
-+])
-+])
-+
-+# 2.6.26 drop list_for_each_safe_rcu
-+AC_DEFUN([LC_RCU_LIST_SAFE],
-+[AC_MSG_CHECKING([if list_for_each_safe_rcu exist])
-+LB_LINUX_TRY_COMPILE([
-+    #include <linux/list.h>
-+],[
-+    #ifndef list_for_each_safe_rcu
-+    #error list_for_each_safe not exist
-+    #endif
-+],[
-+    AC_DEFINE(HAVE_RCU_LIST_SAFE, 1, [list_for_each_safe_rcu exist])
-+    AC_MSG_RESULT([yes])
-+],[
-+    AC_MSG_RESULT([no]) 
-+])
-+])
-+
-+# 2.6.26 remove path_release and use path_put instead
-+AC_DEFUN([LC_PATH_RELEASE],
-+[AC_MSG_CHECKING([if path_release exist])
-+LB_LINUX_TRY_COMPILE([
-+    #include <linux/dcache.h>
-+    #include <linux/namei.h>
-+],[
-+    path_release(NULL);
-+],[
-+    AC_DEFINE(HAVE_PATH_RELEASE, 1, [path_release exist])
-+    AC_MSG_RESULT([yes])
-+],[
-+    AC_MSG_RESULT([no]) 
-+])
-+])
-+# blk_put_queue is replaced in 2.6.25-rc5 by blk_cleanup_queue
-+AC_DEFUN([LC_BLK_CLEANUP_QUEUE],
-+[AC_MSG_CHECKING([if blk_cleanup_queue exists])
-+LB_LINUX_TRY_COMPILE([
-+    #include <linux/blkdev.h>
-+],[
-+    blk_cleanup_queue(NULL);
-+],[
-+    AC_DEFINE(HAVE_BLK_CLEANUP_QUEUE, 1, [blk_cleanup_queue exists])
-+    AC_MSG_RESULT([yes])
-+],[
-+    AC_MSG_RESULT([no])
-+])
- ])
- 
- #
-@@ -1377,6 +1596,20 @@
-           LC_UNREGISTER_BLKDEV_RETURN_INT
-           LC_KERNEL_SPLICE_READ
-           LC_HAVE_EXPORTFS_H
-+          LC_VM_OP_FAULT
-+          LC_REGISTER_SHRINKER
-+	  
-+	  # 2.6.24
-+          LC_BIO_ENDIO_2ARG
-+          LC_FH_TO_DENTRY
-+	  LC_PROCFS_DELETED
-+
-+          # 2.6.26
-+          LC_FS_STRUCT_USE_PATH
-+          LC_RCU_LIST_SAFE
-+	        LC_PATH_RELEASE
-+          IGET_CALL_IS_PRESENT
-+          LC_BLK_CLEANUP_QUEUE
- ])
- 
- #
-@@ -1609,6 +1842,7 @@
-         ],[
-                 AC_MSG_RESULT([no]) 
-         ])
-+
- ],[
-         AC_MSG_RESULT([no])
- ])
-diff -urNad lustre~/lustre/include/liblustre.h lustre/lustre/include/liblustre.h
---- lustre~/lustre/include/liblustre.h	2008-12-30 11:23:32.000000000 +0100
-+++ lustre/lustre/include/liblustre.h	2009-03-10 11:41:03.000000000 +0100
-@@ -743,11 +743,13 @@
- struct _cap_struct;
- typedef struct _cap_struct *cap_t;
- typedef int cap_value_t;
-+
- typedef enum {
-     CAP_EFFECTIVE=0,
-     CAP_PERMITTED=1,
-     CAP_INHERITABLE=2
- } cap_flag_t;
-+
- typedef enum {
-     CAP_CLEAR=0,
-     CAP_SET=1
-@@ -757,11 +759,33 @@
- #define CAP_DAC_READ_SEARCH     2
- #define CAP_FOWNER              3
- #define CAP_FSETID              4
--#define CAP_SYS_ADMIN          21
-+#define CAP_SYS_ADMIN           21
-+#define CAP_SYS_RESOURCE        24
-+
-+#define cap_raise(c, flag) do {} while(0)
-+
- 
- cap_t   cap_get_proc(void);
- int     cap_get_flag(cap_t, cap_value_t, cap_flag_t, cap_flag_value_t *);
- 
-+/* XXX should be move into licfs */
-+typedef __u32 cfs_cap_t;
-+
-+static inline cfs_kernel_cap_t cfs_curproc_cap_get(void)
-+{
-+        return current->cap_effective;
-+}
-+
-+static inline void cfs_curproc_cap_set(cfs_kernel_cap_t cap)
-+{
-+        current->cap_effective = cap;
-+}
-+
-+static inline cfs_cap_t cfs_cap_convert_from_kernel(cfs_kernel_cap_t data)
-+{
-+        return (cfs_cap_t)data;
-+}
-+
- static inline void libcfs_run_lbug_upcall(char *file, const char *fn,
-                                            const int l){}
- 
-diff -urNad lustre~/lustre/include/linux/lustre_compat25.h lustre/lustre/include/linux/lustre_compat25.h
---- lustre~/lustre/include/linux/lustre_compat25.h	2008-12-30 11:23:32.000000000 +0100
-+++ lustre/lustre/include/linux/lustre_compat25.h	2009-03-10 11:41:03.000000000 +0100
-@@ -57,6 +57,28 @@
- #endif /* LINUX_VERSION_CODE >= KERNEL_VERSION(2,6,14) */
- 
- #ifndef HAVE_SET_FS_PWD
-+
-+#ifdef HAVE_FS_STRUCT_USE_PATH
-+static inline void ll_set_fs_pwd(struct fs_struct *fs, struct vfsmount *mnt,
-+                struct dentry *dentry)
-+{
-+        struct path path;
-+	struct path old_pwd;
-+
-+        path.mnt = mnt;
-+        path.dentry = dentry;
-+        write_lock(&fs->lock);
-+        old_pwd = fs->pwd;
-+        path_get(&path);
-+        fs->pwd = path;
-+        write_unlock(&fs->lock);
-+
-+	if (old_pwd.dentry)
-+		path_put(&old_pwd);
-+}
-+
-+#else
-+
- static inline void ll_set_fs_pwd(struct fs_struct *fs, struct vfsmount *mnt,
-                 struct dentry *dentry)
- {
-@@ -75,6 +97,7 @@
-                 mntput(old_pwdmnt);
-         }
- }
-+#endif
- #else
- #define ll_set_fs_pwd set_fs_pwd
- #endif /* HAVE_SET_FS_PWD */
-@@ -590,5 +613,56 @@
-                 vfs_rename(old,old_dir,new,new_dir)
- #endif
- 
-+#ifdef HAVE_REGISTER_SHRINKER
-+typedef int (*shrinker_t)(int nr_to_scan, gfp_t gfp_mask);
-+
-+static inline
-+struct shrinker *set_shrinker(int seek, shrinker_t func)
-+{
-+        struct shrinker *s;
-+
-+        s = kmalloc(sizeof(*s), GFP_KERNEL);
-+        if (s == NULL)
-+                return (NULL);
-+
-+        s->shrink = func;
-+        s->seeks = seek;
-+
-+        register_shrinker(s);
-+
-+        return s;
-+}
-+
-+static inline
-+void remove_shrinker(struct shrinker *shrinker) 
-+{
-+        if (shrinker == NULL)
-+                return;
-+
-+        unregister_shrinker(shrinker);
-+        kfree(shrinker);
-+}
-+#endif
-+
-+#ifdef HAVE_BIO_ENDIO_2ARG
-+#define cfs_bio_io_error(a,b)   bio_io_error((a))
-+#define cfs_bio_endio(a,b,c)    bio_endio((a),(c))
-+#else
-+#define cfs_bio_io_error(a,b)   bio_io_error((a),(b))
-+#define cfs_bio_endio(a,b,c)    bio_endio((a),(b),(c))
-+#endif
-+
-+#ifdef HAVE_FS_STRUCT_USE_PATH
-+#define cfs_fs_pwd(fs)       ((fs)->pwd.dentry)
-+#define cfs_fs_mnt(fs)       ((fs)->pwd.mnt)
-+#else
-+#define cfs_fs_pwd(fs)       ((fs)->pwd)
-+#define cfs_fs_mnt(fs)       ((fs)->pwdmnt)
-+#endif
-+
-+#ifndef HAVE_RCU_LIST_SAFE
-+#define list_for_each_safe_rcu(a,b,c) list_for_each_rcu(b, c)
-+#endif
-+
- #endif /* __KERNEL__ */
- #endif /* _COMPAT25_H */
-diff -urNad lustre~/lustre/include/linux/lvfs.h lustre/lustre/include/linux/lvfs.h
---- lustre~/lustre/include/linux/lvfs.h	2008-12-30 11:23:32.000000000 +0100
-+++ lustre/lustre/include/linux/lvfs.h	2009-03-10 11:41:03.000000000 +0100
-@@ -64,7 +64,7 @@
-         struct upcall_cache_entry *luc_uce;
-         __u32 luc_fsuid;
-         __u32 luc_fsgid;
--        __u32 luc_cap;
-+        cfs_kernel_cap_t luc_cap;
-         __u32 luc_suppgid1;
-         __u32 luc_suppgid2;
-         __u32 luc_umask;
-diff -urNad lustre~/lustre/include/lprocfs_status.h lustre/lustre/include/lprocfs_status.h
---- lustre~/lustre/include/lprocfs_status.h	2008-12-30 11:23:32.000000000 +0100
-+++ lustre/lustre/include/lprocfs_status.h	2009-03-10 11:41:03.000000000 +0100
-@@ -509,6 +509,8 @@
- #define LPROCFS_EXIT()            do {  \
-         up_read(&_lprocfs_lock);        \
- } while(0)
-+
-+#ifdef HAVE_PROCFS_DELETED
- #define LPROCFS_ENTRY_AND_CHECK(dp) do {        \
-         typecheck(struct proc_dir_entry *, dp); \
-         LPROCFS_ENTRY();                        \
-@@ -517,6 +519,13 @@
-                 return -ENODEV;                 \
-         }                                       \
- } while(0)
-+#define LPROCFS_CHECK_DELETED(dp) ((dp)->deleted)
-+#else
-+#define LPROCFS_ENTRY_AND_CHECK(dp) \
-+        LPROCFS_ENTRY();
-+#define LPROCFS_CHECK_DELETED(dp) (0)
-+#endif
-+
- #define LPROCFS_WRITE_ENTRY()     do {  \
-         down_write(&_lprocfs_lock);     \
- } while(0)
-diff -urNad lustre~/lustre/include/lustre_log.h lustre/lustre/include/lustre_log.h
---- lustre~/lustre/include/lustre_log.h	2008-12-30 11:23:32.000000000 +0100
-+++ lustre/lustre/include/lustre_log.h	2009-03-10 11:41:03.000000000 +0100
-@@ -238,14 +238,6 @@
-         void                    *llog_proc_cb;
- };
- 
--#ifndef __KERNEL__
--
--#define cap_raise(c, flag) do {} while(0)
--
--#define CAP_SYS_RESOURCE 24
--
--#endif   /* !__KERNEL__ */
--
- static inline void llog_gen_init(struct llog_ctxt *ctxt)
- {
-         struct obd_device *obd = ctxt->loc_exp->exp_obd;
-@@ -349,7 +341,7 @@
-                                  int numcookies, void *buf, int idx)
- {
-         struct llog_operations *lop;
--        __u32 cap;
-+        cfs_kernel_cap_t cap;
-         int rc, buflen;
-         ENTRY;
- 
-@@ -366,10 +358,10 @@
-                 buflen = rec->lrh_len;
-         LASSERT(size_round(buflen) == buflen);
- 
--        cap = current->cap_effective;             
--        cap_raise(current->cap_effective, CAP_SYS_RESOURCE); 
-+        cap = current->cap_effective;
-+        cap_raise(current->cap_effective, CAP_SYS_RESOURCE);
-         rc = lop->lop_write_rec(handle, rec, logcookies, numcookies, buf, idx);
--        current->cap_effective = cap; 
-+        current->cap_effective = cap;
-         RETURN(rc);
- }
- 
-@@ -465,7 +457,7 @@
-                               struct llog_logid *logid, char *name)
- {
-         struct llog_operations *lop;
--        __u32 cap;
-+        cfs_kernel_cap_t cap;
-         int rc;
-         ENTRY;
- 
-@@ -475,10 +467,10 @@
-         if (lop->lop_create == NULL)
-                 RETURN(-EOPNOTSUPP);
- 
--        cap = current->cap_effective;             
-+        cap = current->cap_effective;
-         cap_raise(current->cap_effective, CAP_SYS_RESOURCE);
-         rc = lop->lop_create(ctxt, res, logid, name);
--        current->cap_effective = cap; 
-+        current->cap_effective = cap;
-         RETURN(rc);
- }
- 
-diff -urNad lustre~/lustre/include/lustre_mds.h lustre/lustre/include/lustre_mds.h
---- lustre~/lustre/include/lustre_mds.h	2008-12-30 11:23:32.000000000 +0100
-+++ lustre/lustre/include/lustre_mds.h	2009-03-10 11:41:03.000000000 +0100
-@@ -209,7 +209,7 @@
-                  struct page *, struct ptlrpc_request **);
- int mdc_create(struct obd_export *exp, struct mdc_op_data *op_data,
-                const void *data, int datalen, int mode, __u32 uid, __u32 gid,
--               __u32 cap_effective, __u64 rdev,struct ptlrpc_request **request);
-+               cfs_kernel_cap_t cap_effective, __u64 rdev,struct ptlrpc_request **request);
- int mdc_unlink(struct obd_export *exp, struct mdc_op_data *data,
-                struct ptlrpc_request **request);
- int mdc_link(struct obd_export *exp, struct mdc_op_data *data,
-diff -urNad lustre~/lustre/llite/file.c lustre/lustre/llite/file.c
---- lustre~/lustre/llite/file.c	2009-03-10 11:41:02.000000000 +0100
-+++ lustre/lustre/llite/file.c	2009-03-10 11:41:03.000000000 +0100
-@@ -1820,9 +1820,17 @@
- /*
-  * Send file content (through pagecache) somewhere with helper
-  */
--#if (LINUX_VERSION_CODE > KERNEL_VERSION(2,5,0))
--static ssize_t ll_file_sendfile(struct file *in_file, loff_t *ppos,size_t count,
--                                read_actor_t actor, void *target)
-+/* change based on 
-+ * http://git.kernel.org/?p=linux/kernel/git/torvalds/linux-2.6.git;a=commit;h=f0930fffa99e7fe0a0c4b6c7d9a244dc88288c27
-+ */
-+#ifdef HAVE_KERNEL_SPLICE_READ
-+static ssize_t ll_file_splice_read(struct file *in_file, loff_t *ppos,
-+                                   struct pipe_inode_info *pipe, size_t count,
-+                                   unsigned int flags)
-+#else
-+static ssize_t ll_file_sendfile(struct file *in_file, loff_t *ppos,
-+                                size_t count, read_actor_t actor, void *target)
-+#endif
- {
-         struct inode *inode = in_file->f_dentry->d_inode;
-         struct ll_inode_info *lli = ll_i2info(inode);
-@@ -1831,8 +1839,7 @@
-         struct ll_lock_tree_node *node;
-         struct ost_lvb lvb;
-         struct ll_ra_read bead;
--        int rc;
--        ssize_t retval;
-+        ssize_t rc;
-         __u64 kms;
-         ENTRY;
-         CDEBUG(D_VFSTRACE, "VFS Op:inode=%lu/%u(%p),size="LPSZ",offset=%Ld\n",
-@@ -1848,8 +1855,14 @@
-         in_file->f_ra.ra_pages = 0;
- 
-         /* File with no objects, nothing to lock */
--        if (!lsm)
--                RETURN(generic_file_sendfile(in_file, ppos, count, actor, target));
-+        if (!lsm) {
-+#ifdef HAVE_KERNEL_SPLICE_READ
-+                rc = generic_file_splice_read(in_file, ppos, pipe, count, flags);
-+#else
-+                rc = generic_file_sendfile(in_file, ppos, count, actor, target);
-+#endif
-+                RETURN(rc);
-+        }
- 
-         node = ll_node_from_inode(inode, *ppos, *ppos + count - 1, LCK_PR);
-         if (IS_ERR(node))
-@@ -1889,8 +1902,8 @@
-                 /* A glimpse is necessary to determine whether we return a
-                  * short read (B) or some zeroes at the end of the buffer (C) */
-                 ll_inode_size_unlock(inode, 1);
--                retval = ll_glimpse_size(inode, LDLM_FL_BLOCK_GRANTED);
--                if (retval)
-+                rc = ll_glimpse_size(inode, LDLM_FL_BLOCK_GRANTED);
-+                if (rc)
-                         goto out;
-         } else {
-                 /* region is within kms and, hence, within real file size (A) */
-@@ -1906,14 +1919,17 @@
-         ll_ra_read_in(in_file, &bead);
-         /* BUG: 5972 */
-         file_accessed(in_file);
--        retval = generic_file_sendfile(in_file, ppos, count, actor, target);
-+#ifdef HAVE_KERNEL_SPLICE_READ
-+        rc = generic_file_splice_read(in_file, ppos, pipe, count, flags);
-+#else
-+        rc = generic_file_sendfile(in_file, ppos, count, actor, target);
-+#endif
-         ll_ra_read_ex(in_file, &bead);
- 
-  out:
-         ll_tree_unlock(&tree);
--        RETURN(retval);
-+        RETURN(rc);
- }
--#endif
- 
- static int ll_lov_recreate_obj(struct inode *inode, struct file *file,
-                                unsigned long arg)
-@@ -3179,7 +3195,9 @@
-         .release        = ll_file_release,
-         .mmap           = ll_file_mmap,
-         .llseek         = ll_file_seek,
--#if (LINUX_VERSION_CODE >= KERNEL_VERSION(2,5,0))
-+#ifdef HAVE_KERNEL_SPLICE_READ
-+        .splice_read    = ll_file_splice_read,
-+#else
-         .sendfile       = ll_file_sendfile,
- #endif
-         .fsync          = ll_fsync,
-@@ -3203,7 +3221,9 @@
-         .release        = ll_file_release,
-         .mmap           = ll_file_mmap,
-         .llseek         = ll_file_seek,
--#if (LINUX_VERSION_CODE >= KERNEL_VERSION(2,5,0))
-+#ifdef HAVE_KERNEL_SPLICE_READ
-+        .splice_read    = ll_file_splice_read,
-+#else
-         .sendfile       = ll_file_sendfile,
- #endif
-         .fsync          = ll_fsync,
-@@ -3232,7 +3252,9 @@
-         .release        = ll_file_release,
-         .mmap           = ll_file_mmap,
-         .llseek         = ll_file_seek,
--#if (LINUX_VERSION_CODE >= KERNEL_VERSION(2,5,0))
-+#ifdef HAVE_KERNEL_SPLICE_READ
-+        .splice_read    = ll_file_splice_read,
-+#else
-         .sendfile       = ll_file_sendfile,
- #endif
-         .fsync          = ll_fsync,
-diff -urNad lustre~/lustre/llite/llite_internal.h lustre/lustre/llite/llite_internal.h
---- lustre~/lustre/llite/llite_internal.h	2008-12-30 11:23:32.000000000 +0100
-+++ lustre/lustre/llite/llite_internal.h	2009-03-10 11:41:03.000000000 +0100
-@@ -748,9 +748,6 @@
- /* llite/llite_nfs.c */
- extern struct export_operations lustre_export_operations;
- __u32 get_uuid2int(const char *name, int len);
--struct dentry *ll_fh_to_dentry(struct super_block *sb, __u32 *data, int len,
--                               int fhtype, int parent);
--int ll_dentry_to_fh(struct dentry *, __u32 *datap, int *lenp, int need_parent);
- 
- /* llite/special.c */
- extern struct inode_operations ll_special_inode_operations;
-diff -urNad lustre~/lustre/llite/llite_lib.c lustre/lustre/llite/llite_lib.c
---- lustre~/lustre/llite/llite_lib.c	2008-12-30 11:23:32.000000000 +0100
-+++ lustre/lustre/llite/llite_lib.c	2009-03-10 11:41:03.000000000 +0100
-@@ -1373,7 +1373,7 @@
-                 rc = vmtruncate(inode, new_size);
-                 clear_bit(LLI_F_SRVLOCK, &lli->lli_flags);
-                 if (rc != 0) {
--                        LASSERT(atomic_read(&lli->lli_size_sem.count) <= 0);
-+//                        LASSERT(atomic_read(&lli->lli_size_sem.count) <= 0);
-                         ll_inode_size_unlock(inode, 0);
-                 }
-         }
-diff -urNad lustre~/lustre/llite/llite_mmap.c lustre/lustre/llite/llite_mmap.c
---- lustre~/lustre/llite/llite_mmap.c	2008-12-30 11:23:32.000000000 +0100
-+++ lustre/lustre/llite/llite_mmap.c	2009-03-10 11:41:03.000000000 +0100
-@@ -53,9 +53,6 @@
- #include <linux/mm.h>
- #include <linux/pagemap.h>
- #include <linux/smp_lock.h>
--#if (LINUX_VERSION_CODE < KERNEL_VERSION(2,5,0))
--#include <linux/iobuf.h>
--#endif
- 
- #define DEBUG_SUBSYSTEM S_LLITE
- 
-@@ -84,8 +81,7 @@
- int lt_get_mmap_locks(struct ll_lock_tree *tree,
-                       unsigned long addr, size_t count);
- 
--struct page *ll_nopage(struct vm_area_struct *vma, unsigned long address,
--                       int *type);
-+static struct vm_operations_struct ll_file_vm_ops;
- 
- struct ll_lock_tree_node * ll_node_from_inode(struct inode *inode, __u64 start,
-                                               __u64 end, ldlm_mode_t mode)
-@@ -311,7 +307,7 @@
-         spin_lock(&mm->page_table_lock);
-         for(vma = find_vma(mm, addr);
-             vma != NULL && vma->vm_start < (addr + count); vma = vma->vm_next) {
--                if (vma->vm_ops && vma->vm_ops->nopage == ll_nopage &&
-+                if (vma->vm_ops && vma->vm_ops == &ll_file_vm_ops &&
-                     vma->vm_flags & VM_SHARED) {
-                         ret = vma;
-                         break;
-@@ -363,44 +359,30 @@
-         }
-         RETURN(0);
- }
--/**
-- * Page fault handler.
-- *
-- * \param vma - is virtiual area struct related to page fault
-- * \param address - address when hit fault
-- * \param type - of fault
-- *
-- * \return allocated and filled page for address
-- * \retval NOPAGE_SIGBUS if page not exist on this address
-- * \retval NOPAGE_OOM not have memory for allocate new page
-- */
--struct page *ll_nopage(struct vm_area_struct *vma, unsigned long address,
--                       int *type)
-+
-+
-+static int ll_get_extent_lock(struct vm_area_struct *vma, unsigned long pgoff,
-+                             int *save_flags, struct lustre_handle *lockh)
- {
-         struct file *filp = vma->vm_file;
-         struct ll_file_data *fd = LUSTRE_FPRIVATE(filp);
-         struct inode *inode = filp->f_dentry->d_inode;
--        struct lustre_handle lockh = { 0 };
-         ldlm_policy_data_t policy;
-         ldlm_mode_t mode;
--        struct page *page = NULL;
-         struct ll_inode_info *lli = ll_i2info(inode);
--        struct lov_stripe_md *lsm;
-         struct ost_lvb lvb;
-         __u64 kms, old_mtime;
--        unsigned long pgoff, size, rand_read, seq_read;
--        int rc = 0;
--        ENTRY;
-+        unsigned long size;
- 
-         if (lli->lli_smd == NULL) {
-                 CERROR("No lsm on fault?\n");
--                RETURN(NOPAGE_SIGBUS);
-+                RETURN(0);
-         }
- 
-         ll_clear_file_contended(inode);
- 
-         /* start and end the lock on the first and last bytes in the page */
--        policy_from_vma(&policy, vma, address, CFS_PAGE_SIZE);
-+        policy_from_vma(&policy, vma, pgoff, CFS_PAGE_SIZE);
- 
-         CDEBUG(D_MMAP, "nopage vma %p inode %lu, locking ["LPU64", "LPU64"]\n",
-                vma, inode->i_ino, policy.l_extent.start, policy.l_extent.end);
-@@ -408,26 +390,24 @@
-         mode = mode_from_vma(vma);
-         old_mtime = LTIME_S(inode->i_mtime);
- 
--        lsm = lli->lli_smd;
--        rc = ll_extent_lock(fd, inode, lsm, mode, &policy,
--                            &lockh, LDLM_FL_CBPENDING | LDLM_FL_NO_LRU);
--        if (rc != 0)
--                RETURN(NOPAGE_SIGBUS);
-+        if(ll_extent_lock(fd, inode, lli->lli_smd, mode, &policy,
-+                           lockh, LDLM_FL_CBPENDING | LDLM_FL_NO_LRU))
-+                RETURN(0);
- 
-         if (vma->vm_flags & VM_EXEC && LTIME_S(inode->i_mtime) != old_mtime)
-                 CWARN("binary changed. inode %lu\n", inode->i_ino);
- 
--        lov_stripe_lock(lsm);
-+        lov_stripe_lock(lli->lli_smd);
-         inode_init_lvb(inode, &lvb);
--        obd_merge_lvb(ll_i2obdexp(inode), lsm, &lvb, 1);
-+        obd_merge_lvb(ll_i2obdexp(inode), lli->lli_smd, &lvb, 1);
-         kms = lvb.lvb_size;
- 
--        pgoff = ((address - vma->vm_start) >> CFS_PAGE_SHIFT) + vma->vm_pgoff;
-         size = (kms + CFS_PAGE_SIZE - 1) >> CFS_PAGE_SHIFT;
- 
-         if (pgoff >= size) {
--                lov_stripe_unlock(lsm);
-+                lov_stripe_unlock(lli->lli_smd);
-                 ll_glimpse_size(inode, LDLM_FL_BLOCK_GRANTED);
-+                lov_stripe_lock(lli->lli_smd);
-         } else {
-                 /* XXX change inode size without ll_inode_size_lock() held!
-                  *     there is a race condition with truncate path. (see
-@@ -449,29 +429,59 @@
-                         CDEBUG(D_INODE, "ino=%lu, updating i_size %llu\n",
-                                inode->i_ino, i_size_read(inode));
-                 }
--                lov_stripe_unlock(lsm);
-         }
- 
-         /* If mapping is writeable, adjust kms to cover this page,
-          * but do not extend kms beyond actual file size.
-          * policy.l_extent.end is set to the end of the page by policy_from_vma
-          * bug 10919 */
--        lov_stripe_lock(lsm);
-         if (mode == LCK_PW)
--                obd_adjust_kms(ll_i2obdexp(inode), lsm,
-+                obd_adjust_kms(ll_i2obdexp(inode), lli->lli_smd,
-                                min_t(loff_t, policy.l_extent.end + 1,
-                                i_size_read(inode)), 0);
--        lov_stripe_unlock(lsm);
-+        lov_stripe_unlock(lli->lli_smd);
- 
-         /* disable VM_SEQ_READ and use VM_RAND_READ to make sure that
-          * the kernel will not read other pages not covered by ldlm in
-          * filemap_nopage. we do our readahead in ll_readpage.
-          */
--        rand_read = vma->vm_flags & VM_RAND_READ;
--        seq_read = vma->vm_flags & VM_SEQ_READ;
-+       *save_flags = vma->vm_flags & (VM_RAND_READ | VM_SEQ_READ);
-         vma->vm_flags &= ~ VM_SEQ_READ;
-         vma->vm_flags |= VM_RAND_READ;
- 
-+        RETURN(1);
-+}
-+
-+static void ll_put_extent_lock(struct vm_area_struct *vma, int save_flags,
-+                             struct lustre_handle *lockh)
-+{
-+        struct file *filp = vma->vm_file;
-+        struct ll_file_data *fd = LUSTRE_FPRIVATE(filp);
-+        struct inode *inode = filp->f_dentry->d_inode;
-+        ldlm_mode_t mode;
-+
-+        mode = mode_from_vma(vma);
-+        vma->vm_flags &= ~(VM_RAND_READ | VM_SEQ_READ);
-+        vma->vm_flags |= save_flags;
-+
-+        ll_extent_unlock(fd, inode, ll_i2info(inode)->lli_smd, mode, lockh);
-+}
-+
-+#ifndef HAVE_VM_OP_FAULT
-+struct page *ll_nopage(struct vm_area_struct *vma, unsigned long address,
-+                       int *type) {
-+        struct lustre_handle lockh = { 0 };
-+        int save_fags;
-+        unsigned long pgoff;
-+        struct page *page;
-+        ENTRY;
-+
-+        pgoff = ((address - vma->vm_start) >> CFS_PAGE_SHIFT) + vma->vm_pgoff;
-+        if(!ll_get_extent_lock(vma, pgoff, &save_fags, &lockh))
-+                RETURN(NOPAGE_SIGBUS);
-+
-+
-+
-         page = filemap_nopage(vma, address, type);
-         if (page != NOPAGE_SIGBUS && page != NOPAGE_OOM)
-                 LL_CDEBUG_PAGE(D_PAGE, page, "got addr %lu type %lx\n", address,
-@@ -480,12 +490,30 @@
-                 CDEBUG(D_PAGE, "got addr %lu type %lx - SIGBUS\n",  address,
-                                (long)type);
- 
--        vma->vm_flags &= ~VM_RAND_READ;
--        vma->vm_flags |= (rand_read | seq_read);
-+        ll_put_extent_lock(vma, save_fags, &lockh);
- 
--        ll_extent_unlock(fd, inode, ll_i2info(inode)->lli_smd, mode, &lockh);
-         RETURN(page);
- }
-+#else
-+/* New fault() API*/
-+int ll_fault(struct vm_area_struct *vma, struct vm_fault *vmf)
-+{
-+        struct lustre_handle lockh = { 0 };
-+        int save_fags;
-+        int rc;
-+        ENTRY;
-+
-+        if(!ll_get_extent_lock(vma, vmf->pgoff, &save_fags, &lockh))
-+               RETURN(VM_FAULT_SIGBUS);
-+
-+        rc = filemap_fault(vma, vmf);
-+        LL_CDEBUG_PAGE(D_PAGE, vmf->page, "got addr %p type NOPAGE\n",
-+                       vmf->virtual_address);
-+        ll_put_extent_lock(vma, save_fags, &lockh);
-+
-+        RETURN(rc);
-+}
-+#endif
- 
- /* To avoid cancel the locks covering mmapped region for lock cache pressure,
-  * we track the mapped vma count by lli_mmap_cnt.
-@@ -551,6 +579,7 @@
-         }
- }
- 
-+#ifndef HAVE_VM_OP_FAULT
- #ifndef HAVE_FILEMAP_POPULATE
- static int (*filemap_populate)(struct vm_area_struct * area, unsigned long address, unsigned long len, pgprot_t prot, unsigned long pgoff, int nonblock);
- #endif
-@@ -565,6 +594,7 @@
-         rc = filemap_populate(area, address, len, prot, pgoff, 1);
-         RETURN(rc);
- }
-+#endif
- 
- /* return the user space pointer that maps to a file offset via a vma */
- static inline unsigned long file_to_user(struct vm_area_struct *vma, __u64 byte)
-@@ -591,10 +621,14 @@
- }
- 
- static struct vm_operations_struct ll_file_vm_ops = {
--        .nopage         = ll_nopage,
-         .open           = ll_vm_open,
-         .close          = ll_vm_close,
-+#ifdef HAVE_VM_OP_FAULT
-+        .fault          = ll_fault,
-+#else
-+        .nopage         = ll_nopage,
-         .populate       = ll_populate,
-+#endif
- };
- 
- int ll_file_mmap(struct file * file, struct vm_area_struct * vma)
-@@ -605,8 +639,7 @@
-         ll_stats_ops_tally(ll_i2sbi(file->f_dentry->d_inode), LPROC_LL_MAP, 1);
-         rc = generic_file_mmap(file, vma);
-         if (rc == 0) {
--#if !defined(HAVE_FILEMAP_POPULATE) && \
--    (LINUX_VERSION_CODE >= KERNEL_VERSION(2,5,0))
-+#if !defined(HAVE_FILEMAP_POPULATE) && !defined(HAVE_VM_OP_FAULT)
-                 if (!filemap_populate)
-                         filemap_populate = vma->vm_ops->populate;
- #endif
-diff -urNad lustre~/lustre/llite/llite_nfs.c lustre/lustre/llite/llite_nfs.c
---- lustre~/lustre/llite/llite_nfs.c	2008-12-30 11:23:32.000000000 +0100
-+++ lustre/lustre/llite/llite_nfs.c	2009-03-10 11:41:03.000000000 +0100
-@@ -57,11 +57,7 @@
-         return (key0 << 1);
- }
- 
--#if (LINUX_VERSION_CODE < KERNEL_VERSION(2,5,0))
--static int ll_nfs_test_inode(struct inode *inode, unsigned long ino, void *opaque)
--#else
- static int ll_nfs_test_inode(struct inode *inode, void *opaque)
--#endif
- {
-         struct ll_fid *iid = opaque;
- 
-@@ -73,12 +69,10 @@
- 
- static struct inode * search_inode_for_lustre(struct super_block *sb,
-                                               unsigned long ino,
--                                              unsigned long generation,
--                                              int mode)
-+                                              unsigned long generation)
- {
-         struct ptlrpc_request *req = NULL;
-         struct ll_sb_info *sbi = ll_s2sbi(sb);
--        struct ll_fid fid;
-         unsigned long valid = 0;
-         int eadatalen = 0, rc;
-         struct inode *inode = NULL;
-@@ -89,17 +83,15 @@
- 
-         if (inode)
-                 RETURN(inode);
--        if (S_ISREG(mode)) {
--                rc = ll_get_max_mdsize(sbi, &eadatalen);
--                if (rc) 
--                        RETURN(ERR_PTR(rc));
--                valid |= OBD_MD_FLEASIZE;
--        }
--        fid.id = (__u64)ino;
--        fid.generation = generation;
--        fid.f_type = mode;
- 
--        rc = mdc_getattr(sbi->ll_mdc_exp, &fid, valid, eadatalen, &req);
-+        rc = ll_get_max_mdsize(sbi, &eadatalen);
-+        if (rc) 
-+                RETURN(ERR_PTR(rc));
-+
-+        valid |= OBD_MD_FLEASIZE;
-+
-+        /* mds_fid2dentry ignore f_type */
-+        rc = mdc_getattr(sbi->ll_mdc_exp, &iid, valid, eadatalen, &req);
-         if (rc) {
-                 CERROR("failure %d inode %lu\n", rc, ino);
-                 RETURN(ERR_PTR(rc));
-@@ -115,20 +107,23 @@
-         RETURN(inode);
- }
- 
--static struct dentry *ll_iget_for_nfs(struct super_block *sb, unsigned long ino,
--                                      __u32 generation, umode_t mode)
-+extern struct dentry_operations ll_d_ops;
-+
-+#ifdef HAVE_FH_TO_DENTRY
-+static struct inode *ll_iget_for_nfs(struct super_block *sb, __u64 ino,
-+                                      __u32 generation)
-+#else
-+static struct inode *ll_iget_for_nfs(struct super_block *sb, unsigned long ino,
-+                                      __u32 generation)
-+#endif
- {
-         struct inode *inode;
--        struct dentry *result;
--#if (LINUX_VERSION_CODE < KERNEL_VERSION(2,5,0))
--        struct list_head *lp;
--#endif
-         ENTRY;
- 
-         if (ino == 0)
-                 RETURN(ERR_PTR(-ESTALE));
- 
--        inode = search_inode_for_lustre(sb, ino, generation, mode);
-+        inode = search_inode_for_lustre(sb, ino, generation);
-         if (IS_ERR(inode)) {
-                 RETURN(ERR_PTR(PTR_ERR(inode)));
-         }
-@@ -142,40 +137,24 @@
-                 iput(inode);
-                 RETURN(ERR_PTR(-ESTALE));
-         }
-+        RETURN(inode);
-+}
-+
-+static struct dentry *ll_nfs_get_dentry(struct super_block *sb, void *data)
-+{
-+        __u32 *inump = (__u32*)data;
-+        struct inode *inode;
-+        struct dentry *result;
-+
-+        inode = ll_iget_for_nfs(sb, inump[0], inump[1]);
-+        if (IS_ERR(inode))
-+                RETURN((struct dentry *)inode);
- 
--#if (LINUX_VERSION_CODE > KERNEL_VERSION(2,5,0))
-         result = d_alloc_anon(inode);
-         if (!result) {
-                 iput(inode);
-                 RETURN(ERR_PTR(-ENOMEM));
-         }
--#else
--        /* now to find a dentry.
--         * If possible, get a well-connected one
--         */
--        spin_lock(&dcache_lock);
--        for (lp = inode->i_dentry.next; lp != &inode->i_dentry ; lp=lp->next) {
--                result = list_entry(lp,struct dentry, d_alias);
--                lock_dentry(result);
--                if (!(result->d_flags & DCACHE_DISCONNECTED)) {
--                        dget_locked(result);
--                        ll_set_dflags(result, DCACHE_REFERENCED);
--                        unlock_dentry(result);
--                        spin_unlock(&dcache_lock);
--                        iput(inode);
--                        RETURN(result);
--                }
--                unlock_dentry(result);
--        }
--        spin_unlock(&dcache_lock);
--        result = d_alloc_root(inode);
--        if (result == NULL) {
--                iput(inode);
--                RETURN(ERR_PTR(-ENOMEM));
--        }
--        result->d_flags |= DCACHE_DISCONNECTED;
--
--#endif
-         ll_set_dd(result);
- 
-         lock_dentry(result);
-@@ -190,80 +169,63 @@
-         }
- 
-         RETURN(result);
-+
- }
- 
--struct dentry *ll_fh_to_dentry(struct super_block *sb, __u32 *data, int len,
--                               int fhtype, int parent)
-+#ifdef HAVE_FH_TO_DENTRY
-+
-+static struct dentry *ll_fh_to_dentry(struct super_block *sb, struct fid *fid,
-+                int fh_len, int fh_type)
- {
--        switch (fhtype) {
--                case 2:
--                        if (len < 5)
--                                break;
--                        if (parent)
--                                return ll_iget_for_nfs(sb, data[3], 0, data[4]);
--                case 1:
--                        if (len < 3)
--                                break;
--                        if (parent)
--                                break;
--                        return ll_iget_for_nfs(sb, data[0], data[1], data[2]);
--                default: break;
-+        struct dentry *result;
-+        
-+        result = generic_fh_to_dentry(sb, fid, fh_len, fh_type,
-+                                      ll_iget_for_nfs);
-+        if(!IS_ERR(result)) {
-+                ll_set_dd(result);
-+                result->d_op = &ll_d_ops;
-         }
--        return ERR_PTR(-EINVAL);
-+        RETURN(result);
- }
- 
--int ll_dentry_to_fh(struct dentry *dentry, __u32 *datap, int *lenp,
--                    int need_parent)
-+static struct dentry *ll_fh_to_parent(struct super_block *sb, struct fid *fid,
-+                int fh_len, int fh_type)
- {
--        if (*lenp < 3)
--                return 255;
--        *datap++ = dentry->d_inode->i_ino;
--        *datap++ = dentry->d_inode->i_generation;
--        *datap++ = (__u32)(S_IFMT & dentry->d_inode->i_mode);
--
--        if (*lenp == 3 || S_ISDIR(dentry->d_inode->i_mode)) {
--                *lenp = 3;
--                return 1;
--        }
--        if (dentry->d_parent) {
--                *datap++ = dentry->d_parent->d_inode->i_ino;
--                *datap++ = (__u32)(S_IFMT & dentry->d_parent->d_inode->i_mode);
-+        struct dentry *result;
- 
--                *lenp = 5;
--                return 2;
-+        result = generic_fh_to_parent(sb, fid, fh_len, fh_type,
-+                                      ll_iget_for_nfs);
-+        if(!IS_ERR(result)) {
-+                ll_set_dd(result);
-+                result->d_op = &ll_d_ops;
-         }
--        *lenp = 3;
--        return 1;
-+        RETURN(result);
- }
--
--#if (LINUX_VERSION_CODE > KERNEL_VERSION(2,5,0))
--struct dentry *ll_get_dentry(struct super_block *sb, void *data)
-+#else
-+static struct dentry *ll_get_dentry(struct super_block *sb, void *data)
- {
--        __u32 *inump = (__u32*)data;
--        return ll_iget_for_nfs(sb, inump[0], inump[1], S_IFREG);
-+        return ll_nfs_get_dentry(sb, data);
- }
- 
--struct dentry *ll_get_parent(struct dentry *dchild)
-+#endif
-+
-+static struct dentry *ll_get_parent(struct dentry *dchild)
- {
-         struct ptlrpc_request *req = NULL;
-         struct inode *dir = dchild->d_inode;
--        struct ll_sb_info *sbi;
-         struct dentry *result = NULL;
-         struct ll_fid fid;
-         struct mds_body *body;
-         char dotdot[] = "..";
-+        __u32 idata[2];
-         int  rc = 0;
-         ENTRY;
-         
-         LASSERT(dir && S_ISDIR(dir->i_mode));
--        
--        sbi = ll_s2sbi(dir->i_sb);       
-  
-         fid.id = (__u64)dir->i_ino;
-         fid.generation = dir->i_generation;
--        fid.f_type = S_IFDIR;
--
--        rc = mdc_getattr_name(sbi->ll_mdc_exp, &fid, dotdot, strlen(dotdot) + 1,
-+        rc = mdc_getattr_name(ll_s2sbi(dir->i_sb)->ll_mdc_exp, &fid, dotdot, sizeof(dotdot),
-                               0, 0, &req);
-         if (rc) {
-                 CERROR("failure %d inode %lu get parent\n", rc, dir->i_ino);
-@@ -273,8 +235,9 @@
-        
-         LASSERT((body->valid & OBD_MD_FLGENER) && (body->valid & OBD_MD_FLID));
-         
--        result = ll_iget_for_nfs(dir->i_sb, body->ino, body->generation, S_IFDIR);
--
-+        idata[0] = body->ino;
-+        idata[1] = body->generation;
-+        result = ll_nfs_get_dentry(dir->i_sb, &idata);
-         if (IS_ERR(result))
-                 rc = PTR_ERR(result);
- 
-@@ -285,7 +248,11 @@
- } 
- 
- struct export_operations lustre_export_operations = {
--       .get_parent = ll_get_parent,
--       .get_dentry = ll_get_dentry, 
--};
-+#ifdef HAVE_FH_TO_DENTRY
-+        .fh_to_dentry   = ll_fh_to_dentry,
-+        .fh_to_parent   = ll_fh_to_parent,
-+#else
-+        .get_dentry = ll_get_dentry,
- #endif
-+        .get_parent = ll_get_parent,
-+};
-diff -urNad lustre~/lustre/llite/lloop.c lustre/lustre/llite/lloop.c
---- lustre~/lustre/llite/lloop.c	2008-12-30 11:23:32.000000000 +0100
-+++ lustre/lustre/llite/lloop.c	2009-03-10 11:48:26.000000000 +0100
-@@ -312,7 +312,7 @@
-         if (atomic_dec_and_test(&lo->lo_pending))
-                 up(&lo->lo_bh_mutex);
- out:
--        bio_io_error(old_bio, old_bio->bi_size);
-+        cfs_bio_io_error(old_bio, old_bio->bi_size);
-         return 0;
- inactive:
-         spin_unlock_irq(&lo->lo_lock);
-@@ -334,7 +334,7 @@
- {
-         int ret;
-         ret = do_bio_filebacked(lo, bio);
--        bio_endio(bio, bio->bi_size, ret);
-+        cfs_bio_endio(bio, bio->bi_size, ret);
- }
- 
- /*
-@@ -736,7 +736,11 @@
- 
- out_mem4:
-         while (i--)
-+#ifndef HAVE_BLK_CLEANUP_QUEUE
-                 blk_put_queue(loop_dev[i].lo_queue);
-+#else
-+                blk_cleanup_queue(loop_dev[i].lo_queue);
-+#endif
-         i = max_loop;
- out_mem3:
-         while (i--)
-@@ -758,7 +762,11 @@
-         ll_iocontrol_unregister(ll_iocontrol_magic);
-         for (i = 0; i < max_loop; i++) {
-                 del_gendisk(disks[i]);
-+#ifndef HAVE_BLK_CLEANUP_QUEUE
-                 blk_put_queue(loop_dev[i].lo_queue);
-+#else
-+                blk_cleanup_queue(loop_dev[i].lo_queue);
-+#endif
-                 put_disk(disks[i]);
-         }
-         if (ll_unregister_blkdev(lloop_major, "lloop"))
-diff -urNad lustre~/lustre/llite/namei.c lustre/lustre/llite/namei.c
---- lustre~/lustre/llite/namei.c	2008-12-30 11:23:32.000000000 +0100
-+++ lustre/lustre/llite/namei.c	2009-03-10 11:41:03.000000000 +0100
-@@ -901,7 +901,7 @@
- 
-         err = mdc_create(sbi->ll_mdc_exp, &op_data, tgt, tgt_len,
-                          mode, current->fsuid, current->fsgid,
--                         current->cap_effective, rdev, &request);
-+                         cfs_curproc_cap_get(), rdev, &request);
-         if (err)
-                 GOTO(err_exit, err);
- 
-diff -urNad lustre~/lustre/llite/rw.c lustre/lustre/llite/rw.c
---- lustre~/lustre/llite/rw.c	2008-12-30 11:23:32.000000000 +0100
-+++ lustre/lustre/llite/rw.c	2009-03-10 11:41:03.000000000 +0100
-@@ -186,7 +186,7 @@
-                 GOTO(out_unlock, 0);
-         }
- 
--        LASSERT(atomic_read(&lli->lli_size_sem.count) <= 0);
-+//        LASSERT(atomic_read(&lli->lli_size_sem.count) <= 0);
- 
-         if (!srvlock) {
-                 struct ost_lvb lvb;
-diff -urNad lustre~/lustre/llite/symlink.c lustre/lustre/llite/symlink.c
---- lustre~/lustre/llite/symlink.c	2008-12-30 11:23:32.000000000 +0100
-+++ lustre/lustre/llite/symlink.c	2009-03-10 11:41:03.000000000 +0100
-@@ -171,8 +171,12 @@
-         rc = ll_readlink_internal(inode, &request, &symname);
-         up(&lli->lli_size_sem);
-         if (rc) {
-+#ifdef HAVE_PATH_RELEASE
-                 path_release(nd); /* Kernel assumes that ->follow_link()
-                                      releases nameidata on error */
-+#else
-+                path_put(&nd->path);
-+#endif
-                 GOTO(out, rc);
-         }
- 
-diff -urNad lustre~/lustre/lvfs/lvfs_linux.c lustre/lustre/lvfs/lvfs_linux.c
---- lustre~/lustre/lvfs/lvfs_linux.c	2008-12-30 11:23:32.000000000 +0100
-+++ lustre/lustre/lvfs/lvfs_linux.c	2009-03-10 11:41:03.000000000 +0100
-@@ -148,10 +148,10 @@
-         */
- 
-         save->fs = get_fs();
--        LASSERT(atomic_read(&current->fs->pwd->d_count));
-+        LASSERT(atomic_read(&cfs_fs_pwd(current->fs)->d_count));
-         LASSERT(atomic_read(&new_ctx->pwd->d_count));
--        save->pwd = dget(current->fs->pwd);
--        save->pwdmnt = mntget(current->fs->pwdmnt);
-+        save->pwd = dget(cfs_fs_pwd(current->fs));
-+        save->pwdmnt = mntget(cfs_fs_mnt(current->fs));
-         save->luc.luc_umask = current->fs->umask;
- 
-         LASSERT(save->pwd);
-@@ -162,11 +162,11 @@
-         if (uc) {
-                 save->luc.luc_fsuid = current->fsuid;
-                 save->luc.luc_fsgid = current->fsgid;
--                save->luc.luc_cap = current->cap_effective;
-+                save->luc.luc_cap = cfs_curproc_cap_get();
- 
-                 current->fsuid = uc->luc_fsuid;
-                 current->fsgid = uc->luc_fsgid;
--                current->cap_effective = uc->luc_cap;
-+                cfs_curproc_cap_set(uc->luc_cap);
-                 push_group_info(save, uc->luc_uce);
-         }
-         current->fs->umask = 0; /* umask already applied on client */
-@@ -205,10 +205,10 @@
-                atomic_read(&current->fs->pwdmnt->mnt_count));
-         */
- 
--        LASSERTF(current->fs->pwd == new_ctx->pwd, "%p != %p\n",
--                 current->fs->pwd, new_ctx->pwd);
--        LASSERTF(current->fs->pwdmnt == new_ctx->pwdmnt, "%p != %p\n",
--                 current->fs->pwdmnt, new_ctx->pwdmnt);
-+        LASSERTF(cfs_fs_pwd(current->fs) == new_ctx->pwd, "%p != %p\n",
-+                 cfs_fs_pwd(current->fs), new_ctx->pwd);
-+        LASSERTF(cfs_fs_mnt(current->fs) == new_ctx->pwdmnt, "%p != %p\n",
-+                 cfs_fs_mnt(current->fs), new_ctx->pwdmnt);
- 
-         set_fs(saved->fs);
-         ll_set_fs_pwd(current->fs, saved->pwdmnt, saved->pwd);
-@@ -219,7 +219,7 @@
-         if (uc) {
-                 current->fsuid = saved->luc.luc_fsuid;
-                 current->fsgid = saved->luc.luc_fsgid;
--                current->cap_effective = saved->luc.luc_cap;
-+                cfs_curproc_cap_set(saved->luc.luc_cap);
-                 pop_group_info(saved, uc->luc_uce);
-         }
- 
-diff -urNad lustre~/lustre/mdc/mdc_internal.h lustre/lustre/mdc/mdc_internal.h
---- lustre~/lustre/mdc/mdc_internal.h	2008-12-30 11:23:32.000000000 +0100
-+++ lustre/lustre/mdc/mdc_internal.h	2009-03-10 11:41:03.000000000 +0100
-@@ -60,7 +60,7 @@
-                       void *ea2, int ea2len);
- void mdc_create_pack(struct ptlrpc_request *req, int offset,
-                      struct mdc_op_data *op_data, const void *data, int datalen,
--                     __u32 mode, __u32 uid, __u32 gid, __u32 cap_effective,
-+                     __u32 mode, __u32 uid, __u32 gid, cfs_kernel_cap_t cap_effective,
-                      __u64 rdev);
- void mdc_open_pack(struct ptlrpc_request *req, int offset,
-                    struct mdc_op_data *op_data, __u32 mode, __u64 rdev,
-diff -urNad lustre~/lustre/mdc/mdc_lib.c lustre/lustre/mdc/mdc_lib.c
---- lustre~/lustre/mdc/mdc_lib.c	2008-12-30 11:23:32.000000000 +0100
-+++ lustre/lustre/mdc/mdc_lib.c	2009-03-10 11:41:03.000000000 +0100
-@@ -58,7 +58,7 @@
-         b = lustre_msg_buf(req->rq_reqmsg, offset, sizeof(*b));
-         b->fsuid = current->fsuid;
-         b->fsgid = current->fsgid;
--        b->capability = current->cap_effective;
-+        b->capability = cfs_cap_convert_from_kernel(cfs_curproc_cap_get());
-         b->fid1 = *fid;
-         b->size = pg_off;                       /* !! */
-         b->suppgid = -1;
-@@ -71,7 +71,7 @@
- 
-         b->fsuid = current->fsuid;
-         b->fsgid = current->fsgid;
--        b->capability = current->cap_effective;
-+        b->capability = cfs_cap_convert_from_kernel(cfs_curproc_cap_get());
- }
- 
- void mdc_pack_req_body(struct ptlrpc_request *req, int offset,
-@@ -90,7 +90,7 @@
- /* packing of MDS records */
- void mdc_create_pack(struct ptlrpc_request *req, int offset,
-                      struct mdc_op_data *op_data, const void *data, int datalen,
--                     __u32 mode, __u32 uid, __u32 gid, __u32 cap_effective,
-+                     __u32 mode, __u32 uid, __u32 gid, cfs_kernel_cap_t cap_effective,
-                      __u64 rdev)
- {
-         struct mds_rec_create *rec;
-@@ -100,7 +100,7 @@
-         rec->cr_opcode = REINT_CREATE;
-         rec->cr_fsuid = uid;
-         rec->cr_fsgid = gid;
--        rec->cr_cap = cap_effective;
-+        rec->cr_cap = cfs_cap_convert_from_kernel(cap_effective);
-         rec->cr_fid = op_data->fid1;
-         memset(&rec->cr_replayfid, 0, sizeof(rec->cr_replayfid));
-         rec->cr_mode = mode;
-@@ -168,7 +168,7 @@
-         rec->cr_opcode = REINT_OPEN;
-         rec->cr_fsuid = current->fsuid;
-         rec->cr_fsgid = current->fsgid;
--        rec->cr_cap = current->cap_effective;
-+        rec->cr_cap = cfs_cap_convert_from_kernel(cfs_curproc_cap_get());
-         rec->cr_fid = op_data->fid1;
-         memset(&rec->cr_replayfid, 0, sizeof(rec->cr_replayfid));
-         rec->cr_mode = mode;
-@@ -242,7 +242,7 @@
-         rec->sa_opcode = REINT_SETATTR;
-         rec->sa_fsuid = current->fsuid;
-         rec->sa_fsgid = current->fsgid;
--        rec->sa_cap = current->cap_effective;
-+        rec->sa_cap = cfs_cap_convert_from_kernel(cfs_curproc_cap_get());
-         rec->sa_fid = data->fid1;
-         rec->sa_suppgid = -1;
- 
-@@ -286,7 +286,7 @@
-         rec->ul_opcode = REINT_UNLINK;
-         rec->ul_fsuid = current->fsuid;
-         rec->ul_fsgid = current->fsgid;
--        rec->ul_cap = current->cap_effective;
-+        rec->ul_cap = cfs_cap_convert_from_kernel(cfs_curproc_cap_get());
-         rec->ul_mode = data->create_mode;
-         rec->ul_suppgid = data->suppgids[0];
-         rec->ul_fid1 = data->fid1;
-@@ -309,7 +309,7 @@
-         rec->lk_opcode = REINT_LINK;
-         rec->lk_fsuid = current->fsuid;
-         rec->lk_fsgid = current->fsgid;
--        rec->lk_cap = current->cap_effective;
-+        rec->lk_cap = cfs_cap_convert_from_kernel(cfs_curproc_cap_get());
-         rec->lk_suppgid1 = data->suppgids[0];
-         rec->lk_suppgid2 = data->suppgids[1];
-         rec->lk_fid1 = data->fid1;
-@@ -333,7 +333,7 @@
-         rec->rn_opcode = REINT_RENAME;
-         rec->rn_fsuid = current->fsuid;
-         rec->rn_fsgid = current->fsgid;
--        rec->rn_cap = current->cap_effective;
-+        rec->rn_cap = cfs_cap_convert_from_kernel(cfs_curproc_cap_get());
-         rec->rn_suppgid1 = data->suppgids[0];
-         rec->rn_suppgid2 = data->suppgids[1];
-         rec->rn_fid1 = data->fid1;
-@@ -357,7 +357,7 @@
- 
-         b->fsuid = current->fsuid;
-         b->fsgid = current->fsgid;
--        b->capability = current->cap_effective;
-+        b->capability = cfs_cap_convert_from_kernel(cfs_curproc_cap_get());
-         b->valid = valid;
-         b->flags = flags | MDS_BFLAG_EXT_FLAGS;
-         /* skip MDS_BFLAG_EXT_FLAGS to verify the "client < 1.4.7" case 
-diff -urNad lustre~/lustre/mdc/mdc_reint.c lustre/lustre/mdc/mdc_reint.c
---- lustre~/lustre/mdc/mdc_reint.c	2008-12-30 11:23:32.000000000 +0100
-+++ lustre/lustre/mdc/mdc_reint.c	2009-03-10 11:41:03.000000000 +0100
-@@ -176,7 +176,7 @@
- 
- int mdc_create(struct obd_export *exp, struct mdc_op_data *op_data,
-                const void *data, int datalen, int mode, __u32 uid, __u32 gid,
--               __u32 cap_effective, __u64 rdev, struct ptlrpc_request **request)
-+               cfs_kernel_cap_t cap_effective, __u64 rdev, struct ptlrpc_request **request)
- {
-         CFS_LIST_HEAD(cancels);
-         struct obd_device *obd = exp->exp_obd;
-diff -urNad lustre~/lustre/mgc/mgc_request.c lustre/lustre/mgc/mgc_request.c
---- lustre~/lustre/mgc/mgc_request.c	2008-12-30 11:23:32.000000000 +0100
-+++ lustre/lustre/mgc/mgc_request.c	2009-03-10 11:41:03.000000000 +0100
-@@ -410,7 +410,7 @@
-         obd->obd_lvfs_ctxt.fs = get_ds();
- 
-         push_ctxt(&saved, &obd->obd_lvfs_ctxt, NULL);
--        dentry = lookup_one_len(MOUNT_CONFIGS_DIR, current->fs->pwd,
-+        dentry = lookup_one_len(MOUNT_CONFIGS_DIR, cfs_fs_pwd(current->fs),
-                                 strlen(MOUNT_CONFIGS_DIR));
-         pop_ctxt(&saved, &obd->obd_lvfs_ctxt, NULL);
-         if (IS_ERR(dentry)) {
-diff -urNad lustre~/lustre/obdclass/linux/linux-module.c lustre/lustre/obdclass/linux/linux-module.c
---- lustre~/lustre/obdclass/linux/linux-module.c	2008-12-30 11:23:32.000000000 +0100
-+++ lustre/lustre/obdclass/linux/linux-module.c	2009-03-10 11:41:03.000000000 +0100
-@@ -418,7 +418,7 @@
-         ENTRY;
- 
-         obd_sysctl_init();
--        proc_lustre_root = proc_mkdir("lustre", proc_root_fs);
-+        proc_lustre_root = proc_mkdir("fs/lustre", NULL);
-         if (!proc_lustre_root) {
-                 printk(KERN_ERR
-                        "LustreError: error registering /proc/fs/lustre\n");
-diff -urNad lustre~/lustre/obdclass/linux/linux-sysctl.c lustre/lustre/obdclass/linux/linux-sysctl.c
---- lustre~/lustre/obdclass/linux/linux-sysctl.c	2008-12-30 11:23:32.000000000 +0100
-+++ lustre/lustre/obdclass/linux/linux-sysctl.c	2009-03-10 11:41:03.000000000 +0100
-@@ -59,7 +59,9 @@
- 
- cfs_sysctl_table_header_t *obd_table_header = NULL;
- 
--#define OBD_SYSCTL 300
-+#ifndef HAVE_SYSCTL_UNNUMBERED
-+
-+#define CTL_LUSTRE      300
- 
- enum {
-         OBD_FAIL_LOC = 1,       /* control test failures instrumentation */
-@@ -77,6 +79,23 @@
-         OBD_ALLOC_FAIL_RATE,    /* memory allocation random failure rate */
-         OBD_MAX_DIRTY_PAGES,    /* maximum dirty pages */
- };
-+#else
-+#define CTL_LUSTRE              CTL_UNNUMBERED
-+#define OBD_FAIL_LOC            CTL_UNNUMBERED
-+#define OBD_FAIL_VAL            CTL_UNNUMBERED
-+#define OBD_TIMEOUT             CTL_UNNUMBERED
-+#define OBD_DUMP_ON_TIMEOUT     CTL_UNNUMBERED
-+#define OBD_MEMUSED             CTL_UNNUMBERED
-+#define OBD_PAGESUSED           CTL_UNNUMBERED
-+#define OBD_MAXMEMUSED          CTL_UNNUMBERED
-+#define OBD_MAXPAGESUSED        CTL_UNNUMBERED
-+#define OBD_SYNCFILTER          CTL_UNNUMBERED
-+#define OBD_LDLM_TIMEOUT        CTL_UNNUMBERED
-+#define OBD_DUMP_ON_EVICTION    CTL_UNNUMBERED
-+#define OBD_DEBUG_PEER_ON_TIMEOUT CTL_UNNUMBERED
-+#define OBD_ALLOC_FAIL_RATE     CTL_UNNUMBERED
-+#define OBD_MAX_DIRTY_PAGES     CTL_UNNUMBERED
-+#endif
- 
- int LL_PROC_PROTO(proc_fail_loc)
- {
-@@ -123,7 +142,8 @@
-                         obd_max_dirty_pages = 4 << (20 - CFS_PAGE_SHIFT);
-                 }
-         } else {
--                char buf[21];
-+                char buf[22];
-+            		struct ctl_table dummy;
-                 int len;
- 
-                 len = lprocfs_read_frac_helper(buf, sizeof(buf),
-@@ -132,7 +152,13 @@
-                 if (len > *lenp)
-                         len = *lenp;
-                 buf[len] = '\0';
--                if (copy_to_user(buffer, buf, len))
-+
-+                dummy = *table;
-+                dummy.data = buf;
-+                dummy.maxlen = sizeof(buf);
-+
-+                rc = ll_proc_dostring(&dummy,write,filp,buffer,lenp, ppos);
-+                if (rc)
-                         return -EFAULT;
-                 *lenp = len;
-         }
-@@ -175,98 +201,107 @@
- 
- int LL_PROC_PROTO(proc_memory_alloc)
- {
--        char buf[22];
-         int len;
-+        char buf[22];
-+        struct ctl_table dummy;
-         DECLARE_LL_PROC_PPOS_DECL;
- 
--        if (!*lenp || (*ppos && !write)) {
-+        if (write)
-+                return -EINVAL;
-+
-+        if (!*lenp || *ppos) {
-                 *lenp = 0;
-                 return 0;
-         }
--        if (write)
--                return -EINVAL;
- 
--        len = snprintf(buf, sizeof(buf), LPU64"\n", obd_memory_sum());
-+        dummy = *table;
-+        dummy.data = buf;
-+        dummy.maxlen = sizeof(buf);
-+
-+        len = snprintf(buf, sizeof(buf), LPU64,
-+                       obd_memory_sum());
-+
-         if (len > *lenp)
-                 len = *lenp;
-         buf[len] = '\0';
--        if (copy_to_user(buffer, buf, len))
--                return -EFAULT;
--        *lenp = len;
--        *ppos += *lenp;
--        return 0;
-+        return ll_proc_dostring(&dummy,write,filp,buffer,lenp, ppos);
- }
- 
- int LL_PROC_PROTO(proc_pages_alloc)
- {
--        char buf[22];
-         int len;
-+      	char buf[22];
-+        struct ctl_table dummy;
-         DECLARE_LL_PROC_PPOS_DECL;
- 
--        if (!*lenp || (*ppos && !write)) {
-+        if (write)
-+                return -EINVAL;
-+
-+        if (!*lenp || *ppos) {
-                 *lenp = 0;
-                 return 0;
-         }
--        if (write)
--                return -EINVAL;
-+        dummy = *table;
-+        dummy.data = buf;
-+        dummy.maxlen = sizeof(buf);
- 
--        len = snprintf(buf, sizeof(buf), LPU64"\n", obd_pages_sum());
-+        len = snprintf(buf, sizeof(buf), LPU64,
-+                       obd_pages_sum());
-         if (len > *lenp)
-                 len = *lenp;
-         buf[len] = '\0';
--        if (copy_to_user(buffer, buf, len))
--                return -EFAULT;
--        *lenp = len;
--        *ppos += *lenp;
--        return 0;
-+        return ll_proc_dostring(&dummy,write,filp,buffer,lenp, ppos);
- }
- 
- int LL_PROC_PROTO(proc_mem_max)
- {
--        char buf[22];
-         int len;
-+        char buf[22];
-+        struct ctl_table dummy;
-         DECLARE_LL_PROC_PPOS_DECL;
- 
--        if (!*lenp || (*ppos && !write)) {
--                *lenp = 0;
--                return 0;
--        }
-         if (write)
-                 return -EINVAL;
- 
--        len = snprintf(buf, sizeof(buf), LPU64"\n", obd_memory_max());
-+        if (!*lenp || *ppos) {
-+               *lenp = 0;
-+                return 0;
-+        }
-+        dummy = *table;
-+        dummy.data = buf;
-+        dummy.maxlen = sizeof(buf);
-+
-+        len = snprintf(buf, sizeof(buf), LPU64,
-+                       obd_memory_max());
-         if (len > *lenp)
-                 len = *lenp;
-         buf[len] = '\0';
--        if (copy_to_user(buffer, buf, len))
--                return -EFAULT;
--        *lenp = len;
--        *ppos += *lenp;
--        return 0;
-+        return ll_proc_dostring(&dummy,write,filp,buffer,lenp, ppos);
- }
- 
- int LL_PROC_PROTO(proc_pages_max)
- {
-         char buf[22];
-+        struct ctl_table dummy;
-         int len;
-         DECLARE_LL_PROC_PPOS_DECL;
- 
--        if (!*lenp || (*ppos && !write)) {
--                *lenp = 0;
--                return 0;
--        }
-         if (write)
-                 return -EINVAL;
- 
--        len = snprintf(buf, sizeof(buf), LPU64"\n", obd_pages_max());
-+        if (!*lenp || *ppos) {
-+                *lenp = 0;
-+                return 0;
-+        }
-+        dummy = *table;
-+        dummy.data = buf;
-+        dummy.maxlen = sizeof(buf);
-+        len = snprintf(buf, sizeof(buf), LPU64,
-+                       obd_pages_max());
-         if (len > *lenp)
-                 len = *lenp;
-         buf[len] = '\0';
--        if (copy_to_user(buffer, buf, len))
--                return -EFAULT;
--        *lenp = len;
--        *ppos += *lenp;
--        return 0;
-+        return ll_proc_dostring(&dummy,write,filp,buffer,lenp, ppos);
- }
- 
- static cfs_sysctl_table_t obd_table[] = {
-@@ -284,7 +319,8 @@
-                 .data     = &obd_fail_val,
-                 .maxlen   = sizeof(int),
-                 .mode     = 0644,
--                .proc_handler = &proc_dointvec
-+                .proc_handler = &proc_dointvec,
-+                .strategy = &sysctl_intvec,
-         },
-         {
-                 .ctl_name = OBD_TIMEOUT,
-@@ -300,7 +336,7 @@
-                 .data     = &obd_debug_peer_on_timeout,
-                 .maxlen   = sizeof(int),
-                 .mode     = 0644,
--                .proc_handler = &proc_dointvec
-+                .proc_handler = &proc_dointvec,
-         },
-         {
-                 .ctl_name = OBD_DUMP_ON_TIMEOUT,
-@@ -308,7 +344,7 @@
-                 .data     = &obd_dump_on_timeout,
-                 .maxlen   = sizeof(int),
-                 .mode     = 0644,
--                .proc_handler = &proc_dointvec
-+                .proc_handler = &proc_dointvec,
-         },
-         {
-                 .ctl_name = OBD_DUMP_ON_EVICTION,
-@@ -316,7 +352,7 @@
-                 .data     = &obd_dump_on_eviction,
-                 .maxlen   = sizeof(int),
-                 .mode     = 0644,
--                .proc_handler = &proc_dointvec
-+                .proc_handler = &proc_dointvec,
-         },
-         {
-                 .ctl_name = OBD_MEMUSED,
-@@ -324,7 +360,8 @@
-                 .data     = NULL,
-                 .maxlen   = 0,
-                 .mode     = 0444,
--                .proc_handler = &proc_memory_alloc
-+                .proc_handler = &proc_memory_alloc,
-+//                .strategy  = &sysctl_memory_alloc,
-         },
-         {
-                 .ctl_name = OBD_PAGESUSED,
-@@ -332,7 +369,8 @@
-                 .data     = NULL,
-                 .maxlen   = 0,
-                 .mode     = 0444,
--                .proc_handler = &proc_pages_alloc
-+                .proc_handler = &proc_pages_alloc,
-+//                .strategy  = &sysctl_pages_alloc,
-         },
-         {
-                 .ctl_name = OBD_MAXMEMUSED,
-@@ -340,7 +378,8 @@
-                 .data     = NULL,
-                 .maxlen   = 0,
-                 .mode     = 0444,
--                .proc_handler = &proc_mem_max
-+                .proc_handler = &proc_mem_max,
-+//                .strategy  = &sysctl_mem_max,
-         },
-         {
-                 .ctl_name = OBD_MAXPAGESUSED,
-@@ -348,7 +387,8 @@
-                 .data     = NULL,
-                 .maxlen   = 0,
-                 .mode     = 0444,
--                .proc_handler = &proc_pages_max
-+                .proc_handler = &proc_pages_max,
-+//                .strategy  = &sysctl_pages_max,
-         },
-         {
-                 .ctl_name = OBD_LDLM_TIMEOUT,
-@@ -380,15 +420,13 @@
- };
- 
- static cfs_sysctl_table_t parent_table[] = {
--       {
--               .ctl_name = OBD_SYSCTL,
--               .procname = "lustre",
--               .data     = NULL,
--               .maxlen   = 0,
--               .mode     = 0555,
--               .child    = obd_table
--       },
--       {0}
-+        {
-+                .ctl_name = CTL_LUSTRE,
-+                .procname = "lustre",
-+                .mode     = 0555,
-+                .child    = obd_table
-+        },
-+        { 0 }
- };
- 
- void obd_sysctl_init (void)
-diff -urNad lustre~/lustre/obdclass/llog_obd.c lustre/lustre/obdclass/llog_obd.c
---- lustre~/lustre/obdclass/llog_obd.c	2008-12-30 11:23:32.000000000 +0100
-+++ lustre/lustre/obdclass/llog_obd.c	2009-03-10 11:41:03.000000000 +0100
-@@ -203,7 +203,7 @@
-                 struct lov_stripe_md *lsm, struct llog_cookie *logcookies,
-                 int numcookies)
- {
--        __u32 cap;
-+        cfs_kernel_cap_t cap;
-         int rc;
-         ENTRY;
- 
-@@ -213,10 +213,10 @@
-         }
-         
-         CTXT_CHECK_OP(ctxt, add, -EOPNOTSUPP);
--        cap = current->cap_effective;             
-+        cap = current->cap_effective;
-         cap_raise(current->cap_effective, CAP_SYS_RESOURCE);
-         rc = CTXTP(ctxt, add)(ctxt, rec, lsm, logcookies, numcookies);
--        current->cap_effective = cap; 
-+        current->cap_effective = cap;
-         RETURN(rc);
- }
- EXPORT_SYMBOL(llog_add);
-diff -urNad lustre~/lustre/obdclass/lprocfs_status.c lustre/lustre/obdclass/lprocfs_status.c
---- lustre~/lustre/obdclass/lprocfs_status.c	2008-12-30 11:23:32.000000000 +0100
-+++ lustre/lustre/obdclass/lprocfs_status.c	2009-03-10 11:41:03.000000000 +0100
-@@ -132,6 +132,8 @@
-         proc->read_proc = read_proc;
-         proc->write_proc = write_proc;
-         proc->data = data;
-+        proc->owner = THIS_MODULE;
-+
-         return 0;
- }
- 
-@@ -151,7 +153,7 @@
- 
-         LPROCFS_ENTRY();
-         OBD_FAIL_TIMEOUT(OBD_FAIL_LPROC_REMOVE, 10);
--        if (!dp->deleted && dp->read_proc)
-+        if (!LPROCFS_CHECK_DELETED(dp) && dp->read_proc)
-                 rc = dp->read_proc(page, &start, *ppos, PAGE_SIZE,
-                         &eof, dp->data);
-         LPROCFS_EXIT();
-@@ -190,7 +192,7 @@
-         int rc = -EIO;
- 
-         LPROCFS_ENTRY();
--        if (!dp->deleted && dp->write_proc)
-+        if (!LPROCFS_CHECK_DELETED(dp) && dp->write_proc)
-                 rc = dp->write_proc(f, buf, size, dp->data);
-         LPROCFS_EXIT();
-         return rc;
-diff -urNad lustre~/lustre/ptlrpc/service.c lustre/lustre/ptlrpc/service.c
---- lustre~/lustre/ptlrpc/service.c	2008-12-30 11:23:32.000000000 +0100
-+++ lustre/lustre/ptlrpc/service.c	2009-03-10 11:41:03.000000000 +0100
-@@ -1268,7 +1268,7 @@
-         cfs_daemonize(name);
-         exit_fs(cfs_current());
-         current->fs = fs;
--        ll_set_fs_pwd(current->fs, init_task.fs->pwdmnt, init_task.fs->pwd);
-+        ll_set_fs_pwd(current->fs, cfs_fs_mnt(init_task.fs), cfs_fs_pwd(init_task.fs));
- }
- 
- static void
-diff -urNad lustre~/lustre/quota/quotacheck_test.c lustre/lustre/quota/quotacheck_test.c
---- lustre~/lustre/quota/quotacheck_test.c	2008-12-30 11:23:32.000000000 +0100
-+++ lustre/lustre/quota/quotacheck_test.c	2009-03-10 11:41:03.000000000 +0100
-@@ -97,7 +97,14 @@
-         if (ext3_test_bit(index, bitmap_bh->b_data)) {
-                 CERROR("i: %d, ino: %lu\n", index, ino);
-                 ll_sleep(1);
-+#if HAVE_IGET_CALL
-                 inode = iget(sb, ino);
-+#else
-+                inode = iget_locked(sb, ino);
-+                if (inode && (inode->i_state & I_NEW)) {
-+                  unlock_new_inode(inode);
-+                }
-+#endif
-         }
- 
-         return inode;
diff --git a/debian/patches/patchless_support_2.6.26.dpatch b/debian/patches/patchless_support_2.6.26.dpatch
deleted file mode 100755
index 0d5c413..0000000
--- a/debian/patches/patchless_support_2.6.26.dpatch
+++ /dev/null
@@ -1,16962 +0,0 @@
-#! /bin/sh /usr/share/dpatch/dpatch-run
-## posix_acl.patch by Patrick Winnertz <winnie at debian.org>
-##
-## All lines beginning with `## DP:' are a description of the patch.
-## DP: Patch which will enable 2.6.26 patchless support for lustre, taken from #14250
-
- at DPATCH@
-diff -urNad lustre~/lnet/autoconf/lustre-lnet.m4 lustre/lnet/autoconf/lustre-lnet.m4
---- lustre~/lnet/autoconf/lustre-lnet.m4	2009-03-12 10:32:27.000000000 +0100
-+++ lustre/lnet/autoconf/lustre-lnet.m4	2009-03-13 09:45:02.000000000 +0100
-@@ -1362,6 +1362,22 @@
- ])
- ])
- 
-+# 2.6.27 have second argument to sock_map_fd
-+AC_DEFUN([LN_SOCK_MAP_FD_2ARG],
-+[AC_MSG_CHECKING([sock_map_fd have second argument])
-+LB_LINUX_TRY_COMPILE([
-+	#include <linux/net.h>
-+],[
-+        sock_map_fd(NULL, 0);
-+],[
-+        AC_MSG_RESULT(yes)
-+        AC_DEFINE(HAVE_SOCK_MAP_FD_2ARG, 1,
-+                  [sock_map_fd have second argument])
-+],[
-+        AC_MSG_RESULT(NO)
-+])
-+])
-+
- #
- # LN_PROG_LINUX
- #
-@@ -1410,6 +1426,8 @@
- LN_SCATTERLIST_SETPAGE
- # 2.6.26
- LN_SEM_COUNT
-+# 2.6.27
-+LN_SOCK_MAP_FD_2ARG
- ])
- 
- #
-diff -urNad lustre~/lnet/libcfs/linux/linux-prim.c lustre/lnet/libcfs/linux/linux-prim.c
---- lustre~/lnet/libcfs/linux/linux-prim.c	2008-08-07 11:51:06.000000000 +0200
-+++ lustre/lnet/libcfs/linux/linux-prim.c	2009-03-13 09:45:02.000000000 +0100
-@@ -49,7 +49,7 @@
- void cfs_enter_debugger(void)
- {
- #if defined(CONFIG_KGDB)
--        BREAKPOINT();
-+//        BREAKPOINT();
- #elif defined(__arch_um__)
-         asm("int $3");
- #else
-diff -urNad lustre~/lnet/libcfs/linux/linux-tcpip.c lustre/lnet/libcfs/linux/linux-tcpip.c
---- lustre~/lnet/libcfs/linux/linux-tcpip.c	2008-08-07 11:51:07.000000000 +0200
-+++ lustre/lnet/libcfs/linux/linux-tcpip.c	2009-03-13 09:45:02.000000000 +0100
-@@ -63,7 +63,11 @@
-                 return rc;
-         }
- 
-+#ifdef HAVE_SOCK_MAP_FD_2ARG
-+        fd = sock_map_fd(sock,0);
-+#else
-         fd = sock_map_fd(sock);
-+#endif
-         if (fd < 0) {
-                 rc = fd;
-                 sock_release(sock);
-diff -urNad lustre~/lnet/lnet/api-ni.c lustre/lnet/lnet/api-ni.c
---- lustre~/lnet/lnet/api-ni.c	2009-03-12 10:21:27.000000000 +0100
-+++ lustre/lnet/lnet/api-ni.c	2009-03-13 09:45:02.000000000 +0100
-@@ -1032,7 +1032,7 @@
- #ifdef __KERNEL__
-                 if (lnd == NULL) {
-                         LNET_MUTEX_UP(&the_lnet.ln_lnd_mutex);
--                        rc = request_module(libcfs_lnd2modname(lnd_type));
-+                        rc = request_module("%s", libcfs_lnd2modname(lnd_type));
-                         LNET_MUTEX_DOWN(&the_lnet.ln_lnd_mutex);
- 
-                         lnd = lnet_find_lnd_by_type(lnd_type);
-diff -urNad lustre~/lustre/autoconf/lustre-core.m4 lustre/lustre/autoconf/lustre-core.m4
---- lustre~/lustre/autoconf/lustre-core.m4	2009-03-12 10:32:27.000000000 +0100
-+++ lustre/lustre/autoconf/lustre-core.m4	2009-03-13 09:45:02.000000000 +0100
-@@ -1106,15 +1106,20 @@
- AC_DEFUN([LC_PAGE_CHECKED],
- [AC_MSG_CHECKING([kernel has PageChecked and SetPageChecked])
- LB_LINUX_TRY_COMPILE([
--        #include <linux/mm.h>
--        #include <linux/page-flags.h>
-+        #include <linux/autoconf.h>
-+#ifdef HAVE_LINUX_MMTYPES_H
-+        #include <linux/mm_types.h>
-+#endif
-+	#include <linux/page-flags.h>
- ],[
--        #ifndef PageChecked
--        #error PageChecked not defined in kernel
--        #endif
--        #ifndef SetPageChecked
--        #error SetPageChecked not defined in kernel
--        #endif
-+ 	struct page *p;
-+
-+        /* before 2.6.26 this define*/
-+        #ifndef PageChecked	
-+ 	/* 2.6.26 use function instead of define for it */
-+ 	SetPageChecked(p);
-+ 	PageChecked(p);
-+ 	#endif
- ],[
-         AC_MSG_RESULT(yes)
-         AC_DEFINE(HAVE_PAGE_CHECKED, 1,
-@@ -1232,6 +1237,9 @@
- ])
- ])
- 
-+# 2.6.18
-+
-+
- # 2.6.23 have return type 'void' for unregister_blkdev
- AC_DEFUN([LC_UNREGISTER_BLKDEV_RETURN_INT],
- [AC_MSG_CHECKING([if unregister_blkdev return int])
-@@ -1249,6 +1257,25 @@
- ])
- 
- # 2.6.23 change .sendfile to .splice_read
-+# RHEL4 (-92 kernel) have both sendfile and .splice_read API
-+AC_DEFUN([LC_KERNEL_SENDFILE],
-+[AC_MSG_CHECKING([if kernel has .sendfile])
-+LB_LINUX_TRY_COMPILE([
-+        #include <linux/fs.h>
-+],[
-+        struct file_operations file;
-+
-+        file.sendfile = NULL;
-+], [
-+        AC_MSG_RESULT([yes])
-+        AC_DEFINE(HAVE_KERNEL_SENDFILE, 1,
-+                [kernel has .sendfile])
-+],[
-+        AC_MSG_RESULT([no])
-+])
-+])
-+
-+# 2.6.23 change .sendfile to .splice_read
- AC_DEFUN([LC_KERNEL_SPLICE_READ],
- [AC_MSG_CHECKING([if kernel has .splice_read])
- LB_LINUX_TRY_COMPILE([
-@@ -1268,11 +1295,240 @@
- 
- # 2.6.23 extract nfs export related data into exportfs.h
- AC_DEFUN([LC_HAVE_EXPORTFS_H],
--[
--tmpfl="$CFLAGS"
--CFLAGS="$CFLAGS -I$LINUX_OBJ/include"
--AC_CHECK_HEADERS([linux/exportfs.h])
--CFLAGS="$tmpfl"
-+[LB_CHECK_FILE([$LINUX/include/linux/exportfs.h], [
-+        AC_DEFINE(HAVE_LINUX_EXPORTFS_H, 1,
-+                [kernel has include/exportfs.h])
-+],[
-+        AC_MSG_RESULT([no])
-+])
-+])
-+
-+# 2.6.23 have new page fault handling API
-+AC_DEFUN([LC_VM_OP_FAULT],
-+[AC_MSG_CHECKING([if kernel has .fault in vm_operation_struct])
-+LB_LINUX_TRY_COMPILE([
-+        #include <linux/mm.h>
-+],[
-+        struct vm_operations_struct op;
-+
-+        op.fault = NULL;
-+], [
-+        AC_MSG_RESULT([yes])
-+        AC_DEFINE(HAVE_VM_OP_FAULT, 1,
-+                [if kernel has .fault in vm_operation_struct])
-+],[
-+        AC_MSG_RESULT([no])
-+])
-+])
-+
-+#2.6.23 has new shrinker API
-+AC_DEFUN([LC_REGISTER_SHRINKER],
-+[AC_MSG_CHECKING([if kernel has register_shrinker])
-+LB_LINUX_TRY_COMPILE([
-+        #include <linux/mm.h>
-+],[
-+        register_shrinker(NULL);
-+], [
-+        AC_MSG_RESULT([yes])
-+        AC_DEFINE(HAVE_REGISTER_SHRINKER, 1,
-+                [if kernel has register_shrinker])
-+],[
-+        AC_MSG_RESULT([no])
-+])
-+])
-+
-+# 2.6.24 has bio_endio with 2 args
-+AC_DEFUN([LC_BIO_ENDIO_2ARG],
-+[AC_MSG_CHECKING([if kernel has bio_endio with 2 args])
-+LB_LINUX_TRY_COMPILE([
-+        #include <linux/bio.h>
-+],[
-+        bio_endio(NULL, 0);
-+], [
-+        AC_MSG_RESULT([yes])
-+        AC_DEFINE(HAVE_BIO_ENDIO_2ARG, 1,
-+                [if kernel has bio_endio with 2 args])
-+],[
-+        AC_MSG_RESULT([no])
-+])
-+])
-+
-+# 2.6.24 has new members in exports struct.
-+AC_DEFUN([LC_FH_TO_DENTRY],
-+[AC_MSG_CHECKING([if kernel has .fh_to_dentry member in export_operations struct])
-+LB_LINUX_TRY_COMPILE([
-+#ifdef HAVE_LINUX_EXPORTFS_H
-+        #include <linux/exportfs.h>
-+#else
-+        #include <linux/fs.h>
-+#endif
-+],[
-+        struct export_operations exp;
-+
-+        exp.fh_to_dentry   = NULL;
-+], [
-+        AC_MSG_RESULT([yes])
-+        AC_DEFINE(HAVE_FH_TO_DENTRY, 1,
-+                [kernel has .fh_to_dentry member in export_operations struct])
-+],[
-+        AC_MSG_RESULT([no])
-+])
-+])
-+
-+# 2.6.24 need linux/mm_types.h included
-+AC_DEFUN([LC_HAVE_MMTYPES_H],
-+[LB_CHECK_FILE([$LINUX/include/linux/mm_types.h], [
-+        AC_DEFINE(HAVE_LINUX_MMTYPES_H, 1,
-+                [kernel has include/mm_types.h])
-+],[
-+        AC_MSG_RESULT([no])
-+])
-+])
-+
-+# 2.6.24 remove long aged procfs entry -> deleted member
-+AC_DEFUN([LC_PROCFS_DELETED],
-+[AC_MSG_CHECKING([if kernel has deleted member in procfs entry struct])
-+LB_LINUX_TRY_COMPILE([
-+	#include <linux/proc_fs.h>
-+],[
-+        struct proc_dir_entry pde;
-+
-+        pde.deleted   = NULL;
-+], [
-+        AC_MSG_RESULT([yes])
-+        AC_DEFINE(HAVE_PROCFS_DELETED, 1,
-+                [kernel has deleted member in procfs entry struct])
-+],[
-+        AC_MSG_RESULT([no])
-+])
-+])
-+
-+# 2.6.25 change define to inline
-+AC_DEFUN([LC_MAPPING_CAP_WRITEBACK_DIRTY],
-+[AC_MSG_CHECKING([if kernel have mapping_cap_writeback_dirty])
-+LB_LINUX_TRY_COMPILE([
-+        #include <linux/backing-dev.h>
-+],[
-+        #ifndef mapping_cap_writeback_dirty
-+        mapping_cap_writeback_dirty(NULL);
-+        #endif
-+],[
-+        AC_MSG_RESULT([yes])
-+        AC_DEFINE(HAVE_MAPPING_CAP_WRITEBACK_DIRTY, 1,
-+                [kernel have mapping_cap_writeback_dirty])
-+],[
-+        AC_MSG_RESULT([no])
-+])
-+])
-+
-+
-+
-+# 2.6.26 isn't export set_fs_pwd and change paramter in fs struct
-+AC_DEFUN([LC_FS_STRUCT_USE_PATH],
-+[AC_MSG_CHECKING([fs_struct use path structure])
-+LB_LINUX_TRY_COMPILE([
-+        #include <asm/atomic.h>
-+        #include <linux/spinlock.h>
-+        #include <linux/fs_struct.h>
-+],[
-+        struct path path;
-+        struct fs_struct fs;
-+
-+        fs.pwd = path;
-+], [
-+        AC_MSG_RESULT([yes])
-+        AC_DEFINE(HAVE_FS_STRUCT_USE_PATH, 1,
-+                [fs_struct use path structure])
-+],[
-+        AC_MSG_RESULT([no])
-+])
-+])
-+
-+# 2.6.26 remove path_release and use path_put instead
-+AC_DEFUN([LC_PATH_RELEASE],
-+[AC_MSG_CHECKING([if path_release exist])
-+LB_LINUX_TRY_COMPILE([
-+    #include <linux/dcache.h>
-+    #include <linux/namei.h>
-+],[
-+    path_release(NULL);
-+],[
-+    AC_DEFINE(HAVE_PATH_RELEASE, 1, [path_release exist])
-+    AC_MSG_RESULT([yes])
-+],[
-+    AC_MSG_RESULT([no]) 
-+])
-+])
-+
-+#2.6.27
-+AC_DEFUN([LC_INODE_PERMISION_2ARGS],
-+[AC_MSG_CHECKING([inode_operations->permission have two args])
-+LB_LINUX_TRY_COMPILE([
-+        #include <linux/fs.h>
-+],[
-+        struct inode *inode;
-+
-+        inode->i_op->permission(NULL,0);
-+],[
-+        AC_DEFINE(HAVE_INODE_PERMISION_2ARGS, 1, 
-+                  [inode_operations->permission have two args])
-+        AC_MSG_RESULT([yes])
-+],[
-+        AC_MSG_RESULT([no])
-+])
-+])
-+
-+# 2.6.27 have file_remove_suid instead of remove_suid
-+AC_DEFUN([LC_FILE_REMOVE_SUID],
-+[AC_MSG_CHECKING([kernel have file_remove_suid])
-+LB_LINUX_TRY_COMPILE([
-+        #include <linux/fs.h>
-+],[
-+        file_remove_suid(NULL);
-+],[
-+        AC_DEFINE(HAVE_FILE_REMOVE_SUID, 1,
-+                  [kernel have file_remove_suid])
-+        AC_MSG_RESULT([yes])
-+],[
-+        AC_MSG_RESULT([no])
-+])
-+])
-+
-+# 2.6.27 have new page locking API
-+AC_DEFUN([LC_TRYLOCKPAGE],
-+[AC_MSG_CHECKING([kernel use trylock_page for page lock])
-+LB_LINUX_TRY_COMPILE([
-+        #include <linux/pagemap.h>
-+],[
-+        trylock_page(NULL);
-+],[
-+        AC_DEFINE(HAVE_TRYLOCK_PAGE, 1,
-+                  [kernel use trylock_page for page lock])
-+        AC_MSG_RESULT([yes])
-+],[
-+        AC_MSG_RESULT([no])
-+])
-+])
-+
-+# 2.6.27 and some older have mapping->tree_lock as spin_lock
-+AC_DEFUN([LC_RW_TREE_LOCK],
-+[AC_MSG_CHECKING([mapping->tree_lock is rw_lock])
-+tmp_flags="$EXTRA_KCFLAGS"
-+EXTRA_KCFLAGS="-Werror"
-+LB_LINUX_TRY_COMPILE([
-+       #include <linux/fs.h>
-+],[
-+       struct address_space *map = NULL;
-+
-+       write_lock_irq(&map->tree_lock);
-+],[
-+        AC_MSG_RESULT(yes)
-+        AC_DEFINE(HAVE_RW_TREE_LOCK, 1,
-+                [mapping->tree_lock is rw_lock])
-+],[
-+        AC_MSG_RESULT(no)
-+])
-+EXTRA_KCFLAGS="$tmp_flags"
- ])
- 
- #
-@@ -1372,8 +1628,31 @@
-           LC_FS_RENAME_DOES_D_MOVE
-           # 2.6.23
-           LC_UNREGISTER_BLKDEV_RETURN_INT
-+          LC_KERNEL_SENDFILE
-           LC_KERNEL_SPLICE_READ
-           LC_HAVE_EXPORTFS_H
-+          LC_VM_OP_FAULT
-+          LC_REGISTER_SHRINKER
-+
-+          #2.6.25
-+          LC_MAPPING_CAP_WRITEBACK_DIRTY
-+ 
-+ 	  # 2.6.24
-+ 	  LC_HAVE_MMTYPES_H
-+          LC_BIO_ENDIO_2ARG
-+          LC_FH_TO_DENTRY
-+          LC_PROCFS_DELETED
-+ 
-+          # 2.6.26
-+          LC_FS_STRUCT_USE_PATH
-+          LC_RCU_LIST_SAFE
-+          LC_PATH_RELEASE
-+
-+          # 2.6.27
-+          LC_INODE_PERMISION_2ARGS
-+          LC_FILE_REMOVE_SUID
-+          LC_TRYLOCKPAGE
-+         LC_RW_TREE_LOCK
- ])
- 
- #
-@@ -1606,6 +1885,7 @@
-         ],[
-                 AC_MSG_RESULT([no]) 
-         ])
-+
- ],[
-         AC_MSG_RESULT([no])
- ])
-diff -urNad lustre~/lustre/autoconf/lustre-core.m4.orig lustre/lustre/autoconf/lustre-core.m4.orig
---- lustre~/lustre/autoconf/lustre-core.m4.orig	1970-01-01 00:00:00.000000000 +0000
-+++ lustre/lustre/autoconf/lustre-core.m4.orig	2009-03-13 09:45:02.000000000 +0100
-@@ -0,0 +1,2075 @@
-+#* -*- mode: c; c-basic-offset: 8; indent-tabs-mode: nil; -*-
-+#* vim:expandtab:shiftwidth=8:tabstop=8:
-+#
-+# LC_CONFIG_SRCDIR
-+#
-+# Wrapper for AC_CONFIG_SUBDIR
-+#
-+AC_DEFUN([LC_CONFIG_SRCDIR],
-+[AC_CONFIG_SRCDIR([lustre/obdclass/obdo.c])
-+])
-+
-+#
-+# LC_PATH_DEFAULTS
-+#
-+# lustre specific paths
-+#
-+AC_DEFUN([LC_PATH_DEFAULTS],
-+[# ptlrpc kernel build requires this
-+LUSTRE="$PWD/lustre"
-+AC_SUBST(LUSTRE)
-+
-+# mount.lustre
-+rootsbindir='/sbin'
-+AC_SUBST(rootsbindir)
-+
-+demodir='$(docdir)/demo'
-+AC_SUBST(demodir)
-+
-+pkgexampledir='${pkgdatadir}/examples'
-+AC_SUBST(pkgexampledir)
-+])
-+
-+#
-+# LC_TARGET_SUPPORTED
-+#
-+# is the target os supported?
-+#
-+AC_DEFUN([LC_TARGET_SUPPORTED],
-+[case $target_os in
-+	linux* | darwin*)
-+$1
-+		;;
-+	*)
-+$2
-+		;;
-+esac
-+])
-+
-+#
-+# LC_CONFIG_EXT3
-+#
-+# that ext3 is enabled in the kernel
-+#
-+AC_DEFUN([LC_CONFIG_EXT3],
-+[LB_LINUX_CONFIG([EXT3_FS],[],[
-+	LB_LINUX_CONFIG([EXT3_FS_MODULE],[],[$2])
-+])
-+LB_LINUX_CONFIG([EXT3_FS_XATTR],[$1],[$3])
-+])
-+
-+#
-+# LC_FSHOOKS
-+#
-+# If we have (and can build) fshooks.h
-+#
-+AC_DEFUN([LC_FSHOOKS],
-+[LB_CHECK_FILE([$LINUX/include/linux/fshooks.h],[
-+	AC_MSG_CHECKING([if fshooks.h can be compiled])
-+	LB_LINUX_TRY_COMPILE([
-+		#include <linux/fshooks.h>
-+	],[],[
-+		AC_MSG_RESULT([yes])
-+	],[
-+		AC_MSG_RESULT([no])
-+		AC_MSG_WARN([You might have better luck with gcc 3.3.x.])
-+		AC_MSG_WARN([You can set CC=gcc33 before running configure.])
-+		AC_MSG_ERROR([Your compiler cannot build fshooks.h.])
-+	])
-+$1
-+],[
-+$2
-+])
-+])
-+
-+#
-+# LC_STRUCT_KIOBUF
-+#
-+# rh 2.4.18 has iobuf->dovary, but other kernels do not
-+#
-+AC_DEFUN([LC_STRUCT_KIOBUF],
-+[AC_MSG_CHECKING([if struct kiobuf has a dovary field])
-+LB_LINUX_TRY_COMPILE([
-+	#include <linux/iobuf.h>
-+],[
-+	struct kiobuf iobuf;
-+	iobuf.dovary = 1;
-+],[
-+	AC_MSG_RESULT([yes])
-+	AC_DEFINE(HAVE_KIOBUF_DOVARY, 1, [struct kiobuf has a dovary field])
-+],[
-+	AC_MSG_RESULT([no])
-+])
-+])
-+
-+#
-+# LC_FUNC_COND_RESCHED
-+#
-+# cond_resched() was introduced in 2.4.20
-+#
-+AC_DEFUN([LC_FUNC_COND_RESCHED],
-+[AC_MSG_CHECKING([if kernel offers cond_resched])
-+LB_LINUX_TRY_COMPILE([
-+	#include <linux/sched.h>
-+],[
-+	cond_resched();
-+],[
-+	AC_MSG_RESULT([yes])
-+	AC_DEFINE(HAVE_COND_RESCHED, 1, [cond_resched found])
-+],[
-+	AC_MSG_RESULT([no])
-+])
-+])
-+
-+#
-+# LC_FUNC_ZAP_PAGE_RANGE
-+#
-+# if zap_page_range() takes a vma arg
-+#
-+AC_DEFUN([LC_FUNC_ZAP_PAGE_RANGE],
-+[AC_MSG_CHECKING([if zap_page_range with vma parameter])
-+ZAP_PAGE_RANGE_VMA="`grep -c 'zap_page_range.*struct vm_area_struct' $LINUX/include/linux/mm.h`"
-+if test "$ZAP_PAGE_RANGE_VMA" != 0 ; then
-+	AC_DEFINE(ZAP_PAGE_RANGE_VMA, 1, [zap_page_range with vma parameter])
-+	AC_MSG_RESULT([yes])
-+else
-+	AC_MSG_RESULT([no])
-+fi
-+])
-+
-+#
-+# LC_FUNC_PDE
-+#
-+# if proc_fs.h defines PDE()
-+#
-+AC_DEFUN([LC_FUNC_PDE],
-+[AC_MSG_CHECKING([if kernel defines PDE])
-+HAVE_PDE="`grep -c 'proc_dir_entry..PDE' $LINUX/include/linux/proc_fs.h`"
-+if test "$HAVE_PDE" != 0 ; then
-+	AC_DEFINE(HAVE_PDE, 1, [the kernel defines PDE])
-+	AC_MSG_RESULT([yes])
-+else
-+	AC_MSG_RESULT([no])
-+fi
-+])
-+
-+#
-+# LC_FUNC_FILEMAP_FDATASYNC
-+#
-+# if filemap_fdatasync() exists
-+#
-+AC_DEFUN([LC_FUNC_FILEMAP_FDATAWRITE],
-+[AC_MSG_CHECKING([whether filemap_fdatawrite() is defined])
-+LB_LINUX_TRY_COMPILE([
-+	#include <linux/fs.h>
-+],[
-+	int (*foo)(struct address_space *)= filemap_fdatawrite;
-+],[
-+	AC_MSG_RESULT([yes])
-+	AC_DEFINE(HAVE_FILEMAP_FDATAWRITE, 1, [filemap_fdatawrite() found])
-+],[
-+	AC_MSG_RESULT([no])
-+])
-+])
-+
-+#
-+# LC_FUNC_DIRECT_IO
-+#
-+# if direct_IO takes a struct file argument
-+#
-+AC_DEFUN([LC_FUNC_DIRECT_IO],
-+[AC_MSG_CHECKING([if kernel passes struct file to direct_IO])
-+HAVE_DIO_FILE="`grep -c 'direct_IO.*struct file' $LINUX/include/linux/fs.h`"
-+if test "$HAVE_DIO_FILE" != 0 ; then
-+	AC_DEFINE(HAVE_DIO_FILE, 1, [the kernel passes struct file to direct_IO])
-+	AC_MSG_RESULT(yes)
-+else
-+	AC_MSG_RESULT(no)
-+fi
-+])
-+
-+#
-+# LC_HEADER_MM_INLINE
-+#
-+# RHEL kernels define page_count in mm_inline.h
-+#
-+AC_DEFUN([LC_HEADER_MM_INLINE],
-+[AC_MSG_CHECKING([if kernel has mm_inline.h header])
-+LB_LINUX_TRY_COMPILE([
-+	#include <linux/mm_inline.h>
-+],[
-+	#ifndef page_count
-+	#error mm_inline.h does not define page_count
-+	#endif
-+],[
-+	AC_MSG_RESULT([yes])
-+	AC_DEFINE(HAVE_MM_INLINE, 1, [mm_inline found])
-+],[
-+	AC_MSG_RESULT([no])
-+])
-+])
-+
-+#
-+# LC_STRUCT_INODE
-+#
-+# if inode->i_alloc_sem exists
-+#
-+AC_DEFUN([LC_STRUCT_INODE],
-+[AC_MSG_CHECKING([if struct inode has i_alloc_sem])
-+LB_LINUX_TRY_COMPILE([
-+	#include <linux/fs.h>
-+	#include <linux/version.h>
-+],[
-+	struct inode i;
-+	return (char *)&i.i_alloc_sem - (char *)&i;
-+],[
-+	AC_MSG_RESULT([yes])
-+	AC_DEFINE(HAVE_I_ALLOC_SEM, 1, [struct inode has i_alloc_sem])
-+],[
-+	AC_MSG_RESULT([no])
-+])
-+])
-+
-+#
-+# LC_FUNC_REGISTER_CACHE
-+#
-+# if register_cache() is defined by kernel
-+#
-+AC_DEFUN([LC_FUNC_REGISTER_CACHE],
-+[AC_MSG_CHECKING([if kernel defines register_cache()])
-+LB_LINUX_TRY_COMPILE([
-+	#include <linux/list.h>
-+	#include <linux/cache_def.h>
-+],[
-+	struct cache_definition cache;
-+],[
-+	AC_MSG_RESULT([yes])
-+	AC_DEFINE(HAVE_REGISTER_CACHE, 1, [register_cache found])
-+	AC_MSG_CHECKING([if kernel expects return from cache shrink function])
-+	HAVE_CACHE_RETURN_INT="`grep -c 'int.*shrink' $LINUX/include/linux/cache_def.h`"
-+	if test "$HAVE_CACHE_RETURN_INT" != 0 ; then
-+		AC_DEFINE(HAVE_CACHE_RETURN_INT, 1, [kernel expects return from shrink_cache])
-+		AC_MSG_RESULT(yes)
-+	else
-+		AC_MSG_RESULT(no)
-+	fi
-+],[
-+	AC_MSG_RESULT([no])
-+])
-+])
-+
-+#
-+# LC_FUNC_GRAB_CACHE_PAGE_NOWAIT_GFP
-+#
-+# check for our patched grab_cache_page_nowait_gfp() function
-+#
-+AC_DEFUN([LC_FUNC_GRAB_CACHE_PAGE_NOWAIT_GFP],
-+[AC_MSG_CHECKING([if kernel defines grab_cache_page_nowait_gfp()])
-+HAVE_GCPN_GFP="`grep -c 'grab_cache_page_nowait_gfp' $LINUX/include/linux/pagemap.h`"
-+if test "$HAVE_GCPN_GFP" != 0 ; then
-+	AC_DEFINE(HAVE_GRAB_CACHE_PAGE_NOWAIT_GFP, 1,
-+		[kernel has grab_cache_page_nowait_gfp()])
-+	AC_MSG_RESULT(yes)
-+else
-+	AC_MSG_RESULT(no)
-+fi
-+])
-+
-+#
-+# LC_FUNC_DEV_SET_RDONLY
-+#
-+# check for the old-style dev_set_rdonly which took an extra "devno" param
-+# and can only set a single device to discard writes at one time
-+#
-+AC_DEFUN([LC_FUNC_DEV_SET_RDONLY],
-+[AC_MSG_CHECKING([if kernel has new dev_set_rdonly])
-+LB_LINUX_TRY_COMPILE([
-+        #include <linux/fs.h>
-+],[
-+        #ifndef HAVE_CLEAR_RDONLY_ON_PUT
-+        #error needs to be patched by lustre kernel patches from Lustre version 1.4.3 or above.
-+        #endif
-+],[
-+        AC_MSG_RESULT([yes])
-+        AC_DEFINE(HAVE_DEV_SET_RDONLY, 1, [kernel has new dev_set_rdonly])
-+],[
-+        AC_MSG_RESULT([no, Linux kernel source needs to be patches by lustre 
-+kernel patches from Lustre version 1.4.3 or above.])
-+])
-+])
-+
-+#
-+# LC_CONFIG_BACKINGFS
-+#
-+# setup, check the backing filesystem
-+#
-+AC_DEFUN([LC_CONFIG_BACKINGFS],
-+[
-+BACKINGFS="ldiskfs"
-+
-+if test x$with_ldiskfs = xno ; then
-+	BACKINGFS="ext3"
-+
-+	if test x$linux25$enable_server = xyesyes ; then
-+		AC_MSG_ERROR([ldiskfs is required for 2.6-based servers.])
-+	fi
-+
-+	# --- Check that ext3 and ext3 xattr are enabled in the kernel
-+	LC_CONFIG_EXT3([],[
-+		AC_MSG_ERROR([Lustre requires that ext3 is enabled in the kernel])
-+	],[
-+		AC_MSG_WARN([Lustre requires that extended attributes for ext3 are enabled in the kernel])
-+		AC_MSG_WARN([This build may fail.])
-+	])
-+else
-+	# ldiskfs is enabled
-+	LB_DEFINE_LDISKFS_OPTIONS
-+fi #ldiskfs
-+
-+AC_MSG_CHECKING([which backing filesystem to use])
-+AC_MSG_RESULT([$BACKINGFS])
-+AC_SUBST(BACKINGFS)
-+])
-+
-+#
-+# LC_CONFIG_PINGER
-+#
-+# the pinger is temporary, until we have the recovery node in place
-+#
-+AC_DEFUN([LC_CONFIG_PINGER],
-+[AC_MSG_CHECKING([whether to enable pinger support])
-+AC_ARG_ENABLE([pinger],
-+	AC_HELP_STRING([--disable-pinger],
-+			[disable recovery pinger support]),
-+	[],[enable_pinger='yes'])
-+AC_MSG_RESULT([$enable_pinger])
-+if test x$enable_pinger != xno ; then
-+  AC_DEFINE(ENABLE_PINGER, 1, Use the Pinger)
-+fi
-+])
-+
-+#
-+# LC_CONFIG_CHECKSUM
-+#
-+# do checksum of bulk data between client and OST
-+#
-+AC_DEFUN([LC_CONFIG_CHECKSUM],
-+[AC_MSG_CHECKING([whether to enable data checksum support])
-+AC_ARG_ENABLE([checksum],
-+       AC_HELP_STRING([--disable-checksum],
-+                       [disable data checksum support]),
-+       [],[enable_checksum='yes'])
-+AC_MSG_RESULT([$enable_checksum])
-+if test x$enable_checksum != xno ; then
-+  AC_DEFINE(ENABLE_CHECKSUM, 1, do data checksums)
-+fi
-+])
-+
-+#
-+# LC_CONFIG_HEALTH_CHECK_WRITE
-+#
-+# Turn on the actual write to the disk
-+#
-+AC_DEFUN([LC_CONFIG_HEALTH_CHECK_WRITE],
-+[AC_MSG_CHECKING([whether to enable a write with the health check])
-+AC_ARG_ENABLE([health-write],
-+        AC_HELP_STRING([--enable-health-write],
-+                        [enable disk writes when doing health check]),
-+        [],[enable_health_write='no'])
-+AC_MSG_RESULT([$enable_health_write])
-+if test x$enable_health_write == xyes ; then
-+  AC_DEFINE(USE_HEALTH_CHECK_WRITE, 1, Write when Checking Health)
-+fi
-+])
-+
-+#
-+# LC_CONFIG_LIBLUSTRE_RECOVERY
-+#
-+AC_DEFUN([LC_CONFIG_LIBLUSTRE_RECOVERY],
-+[AC_MSG_CHECKING([whether to enable liblustre recovery support])
-+AC_ARG_ENABLE([liblustre-recovery],
-+	AC_HELP_STRING([--disable-liblustre-recovery],
-+			[disable liblustre recovery support]),
-+	[],[enable_liblustre_recovery='yes'])
-+AC_MSG_RESULT([$enable_liblustre_recovery])
-+if test x$enable_liblustre_recovery != xno ; then
-+  AC_DEFINE(ENABLE_LIBLUSTRE_RECOVERY, 1, Liblustre Can Recover)
-+fi
-+])
-+
-+#
-+# LC_CONFIG_OBD_BUFFER_SIZE
-+#
-+# the maximum buffer size of lctl ioctls
-+#
-+AC_DEFUN([LC_CONFIG_OBD_BUFFER_SIZE],
-+[AC_MSG_CHECKING([maximum OBD ioctl size])
-+AC_ARG_WITH([obd-buffer-size],
-+	AC_HELP_STRING([--with-obd-buffer-size=[size]],
-+			[set lctl ioctl maximum bytes (default=8192)]),
-+	[
-+		OBD_BUFFER_SIZE=$with_obd_buffer_size
-+	],[
-+		OBD_BUFFER_SIZE=8192
-+	])
-+AC_MSG_RESULT([$OBD_BUFFER_SIZE bytes])
-+AC_DEFINE_UNQUOTED(OBD_MAX_IOCTL_BUFFER, $OBD_BUFFER_SIZE, [IOCTL Buffer Size])
-+])
-+
-+#
-+# LC_STRUCT_STATFS
-+#
-+# AIX does not have statfs.f_namelen
-+#
-+AC_DEFUN([LC_STRUCT_STATFS],
-+[AC_MSG_CHECKING([if struct statfs has a f_namelen field])
-+LB_LINUX_TRY_COMPILE([
-+	#include <linux/vfs.h>
-+],[
-+	struct statfs sfs;
-+	sfs.f_namelen = 1;
-+],[
-+	AC_MSG_RESULT([yes])
-+	AC_DEFINE(HAVE_STATFS_NAMELEN, 1, [struct statfs has a namelen field])
-+],[
-+	AC_MSG_RESULT([no])
-+])
-+])
-+
-+#
-+# LC_READLINK_SSIZE_T
-+#
-+AC_DEFUN([LC_READLINK_SSIZE_T],
-+[AC_MSG_CHECKING([if readlink returns ssize_t])
-+AC_TRY_COMPILE([
-+	#include <unistd.h>
-+],[
-+	ssize_t readlink(const char *, char *, size_t);
-+],[
-+	AC_MSG_RESULT([yes])
-+	AC_DEFINE(HAVE_POSIX_1003_READLINK, 1, [readlink returns ssize_t])
-+],[
-+	AC_MSG_RESULT([no])
-+])
-+])
-+
-+AC_DEFUN([LC_FUNC_PAGE_MAPPED],
-+[AC_MSG_CHECKING([if kernel offers page_mapped])
-+LB_LINUX_TRY_COMPILE([
-+	#include <linux/mm.h>
-+],[
-+	page_mapped(NULL);
-+],[
-+	AC_MSG_RESULT([yes])
-+	AC_DEFINE(HAVE_PAGE_MAPPED, 1, [page_mapped found])
-+],[
-+	AC_MSG_RESULT([no])
-+])
-+])
-+
-+AC_DEFUN([LC_STRUCT_FILE_OPS_UNLOCKED_IOCTL],
-+[AC_MSG_CHECKING([if struct file_operations has an unlocked_ioctl field])
-+LB_LINUX_TRY_COMPILE([
-+        #include <linux/fs.h>
-+],[
-+        struct file_operations fops;
-+        &fops.unlocked_ioctl;
-+],[
-+        AC_MSG_RESULT([yes])
-+        AC_DEFINE(HAVE_UNLOCKED_IOCTL, 1, [struct file_operations has an unlock ed_ioctl field])
-+],[
-+        AC_MSG_RESULT([no])
-+])
-+])
-+
-+AC_DEFUN([LC_FILEMAP_POPULATE],
-+[AC_MSG_CHECKING([for exported filemap_populate])
-+LB_LINUX_TRY_COMPILE([
-+        #include <asm/page.h>
-+        #include <linux/mm.h>
-+],[
-+	filemap_populate(NULL, 0, 0, __pgprot(0), 0, 0);
-+],[
-+        AC_MSG_RESULT([yes])
-+        AC_DEFINE(HAVE_FILEMAP_POPULATE, 1, [Kernel exports filemap_populate])
-+],[
-+        AC_MSG_RESULT([no])
-+])
-+])
-+
-+AC_DEFUN([LC_D_ADD_UNIQUE],
-+[AC_MSG_CHECKING([for d_add_unique])
-+LB_LINUX_TRY_COMPILE([
-+        #include <linux/dcache.h>
-+],[
-+       d_add_unique(NULL, NULL);
-+],[
-+        AC_MSG_RESULT([yes])
-+        AC_DEFINE(HAVE_D_ADD_UNIQUE, 1, [Kernel has d_add_unique])
-+],[
-+        AC_MSG_RESULT([no])
-+])
-+])
-+
-+AC_DEFUN([LC_BIT_SPINLOCK_H],
-+[LB_CHECK_FILE([$LINUX/include/linux/bit_spinlock.h],[
-+	AC_MSG_CHECKING([if bit_spinlock.h can be compiled])
-+	LB_LINUX_TRY_COMPILE([
-+		#include <asm/processor.h>
-+		#include <linux/spinlock.h>
-+		#include <linux/bit_spinlock.h>
-+	],[],[
-+		AC_MSG_RESULT([yes])
-+		AC_DEFINE(HAVE_BIT_SPINLOCK_H, 1, [Kernel has bit_spinlock.h])
-+	],[
-+		AC_MSG_RESULT([no])
-+	])
-+],
-+[])
-+])
-+
-+#
-+# LC_POSIX_ACL_XATTR
-+#
-+# If we have xattr_acl.h 
-+#
-+AC_DEFUN([LC_XATTR_ACL],
-+[LB_CHECK_FILE([$LINUX/include/linux/xattr_acl.h],[
-+	AC_MSG_CHECKING([if xattr_acl.h can be compiled])
-+	LB_LINUX_TRY_COMPILE([
-+		#include <linux/xattr_acl.h>
-+	],[],[
-+		AC_MSG_RESULT([yes])
-+		AC_DEFINE(HAVE_XATTR_ACL, 1, [Kernel has xattr_acl])
-+	],[
-+		AC_MSG_RESULT([no])
-+	])
-+],
-+[])
-+])
-+
-+#
-+# LC_LINUX_FIEMAP_H
-+#
-+# If we have fiemap.h
-+# after 2.6.27 use fiemap.h in include/linux
-+#
-+AC_DEFUN([LC_LINUX_FIEMAP_H],
-+[LB_CHECK_FILE([$LINUX/include/linux/fiemap.h],[
-+        AC_MSG_CHECKING([if fiemap.h can be compiled])
-+        LB_LINUX_TRY_COMPILE([
-+                #include <linux/fiemap.h>
-+        ],[],[
-+                AC_MSG_RESULT([yes])
-+                AC_DEFINE(HAVE_LINUX_FIEMAP_H, 1, [Kernel has fiemap.h])
-+        ],[
-+                AC_MSG_RESULT([no])
-+        ])
-+],
-+[])
-+])
-+
-+
-+AC_DEFUN([LC_STRUCT_INTENT_FILE],
-+[AC_MSG_CHECKING([if struct open_intent has a file field])
-+LB_LINUX_TRY_COMPILE([
-+        #include <linux/fs.h>
-+        #include <linux/namei.h>
-+],[
-+        struct open_intent intent;
-+        &intent.file;
-+],[
-+        AC_MSG_RESULT([yes])
-+        AC_DEFINE(HAVE_FILE_IN_STRUCT_INTENT, 1, [struct open_intent has a file field])
-+],[
-+        AC_MSG_RESULT([no])
-+])
-+])
-+
-+
-+AC_DEFUN([LC_POSIX_ACL_XATTR_H],
-+[LB_CHECK_FILE([$LINUX/include/linux/posix_acl_xattr.h],[
-+        AC_MSG_CHECKING([if linux/posix_acl_xattr.h can be compiled])
-+        LB_LINUX_TRY_COMPILE([
-+                #include <linux/posix_acl_xattr.h>
-+        ],[],[
-+                AC_MSG_RESULT([yes])
-+                AC_DEFINE(HAVE_LINUX_POSIX_ACL_XATTR_H, 1, [linux/posix_acl_xattr.h found])
-+
-+        ],[
-+                AC_MSG_RESULT([no])
-+        ])
-+$1
-+],[
-+AC_MSG_RESULT([no])
-+])
-+])
-+
-+#
-+# LC_EXPORT___IGET
-+# starting from 2.6.19 linux kernel exports __iget()
-+#
-+AC_DEFUN([LC_EXPORT___IGET],
-+[LB_CHECK_SYMBOL_EXPORT([__iget],
-+[fs/inode.c],[
-+        AC_DEFINE(HAVE_EXPORT___IGET, 1, [kernel exports __iget])
-+],[
-+])
-+])
-+
-+
-+AC_DEFUN([LC_LUSTRE_VERSION_H],
-+[LB_CHECK_FILE([$LINUX/include/linux/lustre_version.h],[
-+	rm -f "$LUSTRE/include/linux/lustre_version.h"
-+],[
-+	touch "$LUSTRE/include/linux/lustre_version.h"
-+	if test x$enable_server = xyes ; then
-+        	AC_MSG_WARN([Unpatched kernel detected.])
-+        	AC_MSG_WARN([Lustre servers cannot be built with an unpatched kernel;])
-+        	AC_MSG_WARN([disabling server build])
-+        	enable_server='no'
-+	fi
-+])
-+])
-+
-+AC_DEFUN([LC_FUNC_SET_FS_PWD],
-+[LB_CHECK_SYMBOL_EXPORT([set_fs_pwd],
-+[fs/namespace.c],[
-+        AC_DEFINE(HAVE_SET_FS_PWD, 1, [set_fs_pwd is exported])
-+],[
-+])
-+])
-+
-+#
-+# check for FS_RENAME_DOES_D_MOVE flag
-+#
-+AC_DEFUN([LC_FS_RENAME_DOES_D_MOVE],
-+[AC_MSG_CHECKING([if kernel has FS_RENAME_DOES_D_MOVE flag])
-+LB_LINUX_TRY_COMPILE([
-+        #include <linux/fs.h>
-+],[
-+        int v = FS_RENAME_DOES_D_MOVE;
-+],[
-+        AC_MSG_RESULT([yes])
-+        AC_DEFINE(HAVE_FS_RENAME_DOES_D_MOVE, 1, [kernel has FS_RENAME_DOES_D_MOVE flag])
-+],[
-+        AC_MSG_RESULT([no])
-+])
-+])
-+
-+#
-+# LC_FUNC_MS_FLOCK_LOCK
-+#
-+# SLES9 kernel has MS_FLOCK_LOCK sb flag
-+#
-+AC_DEFUN([LC_FUNC_MS_FLOCK_LOCK],
-+[AC_MSG_CHECKING([if kernel has MS_FLOCK_LOCK sb flag])
-+LB_LINUX_TRY_COMPILE([
-+        #include <linux/fs.h>
-+],[
-+        int flags = MS_FLOCK_LOCK;
-+],[
-+        AC_DEFINE(HAVE_MS_FLOCK_LOCK, 1,
-+                [kernel has MS_FLOCK_LOCK flag])
-+        AC_MSG_RESULT([yes])
-+],[
-+        AC_MSG_RESULT([no])
-+])
-+])
-+
-+#
-+# LC_FUNC_HAVE_CAN_SLEEP_ARG
-+#
-+# SLES9 kernel has third arg can_sleep
-+# in fs/locks.c: flock_lock_file_wait()
-+#
-+AC_DEFUN([LC_FUNC_HAVE_CAN_SLEEP_ARG],
-+[AC_MSG_CHECKING([if kernel has third arg can_sleep in fs/locks.c: flock_lock_file_wait()])
-+LB_LINUX_TRY_COMPILE([
-+        #include <linux/fs.h>
-+],[
-+        int cansleep;
-+        struct file *file;
-+        struct file_lock *file_lock;
-+        flock_lock_file_wait(file, file_lock, cansleep);
-+],[
-+        AC_DEFINE(HAVE_CAN_SLEEP_ARG, 1,
-+                [kernel has third arg can_sleep in fs/locks.c: flock_lock_file_wait()])
-+        AC_MSG_RESULT([yes])
-+],[
-+        AC_MSG_RESULT([no])
-+])
-+])
-+
-+#
-+# LC_FUNC_F_OP_FLOCK
-+#
-+# rhel4.2 kernel has f_op->flock field
-+#
-+AC_DEFUN([LC_FUNC_F_OP_FLOCK],
-+[AC_MSG_CHECKING([if struct file_operations has flock field])
-+LB_LINUX_TRY_COMPILE([
-+        #include <linux/fs.h>
-+],[
-+        struct file_operations ll_file_operations_flock;
-+        ll_file_operations_flock.flock = NULL;
-+],[
-+        AC_DEFINE(HAVE_F_OP_FLOCK, 1,
-+                [struct file_operations has flock field])
-+        AC_MSG_RESULT([yes])
-+],[
-+        AC_MSG_RESULT([no])
-+])
-+])
-+
-+#
-+# LC_FUNC_MS_FLOCK_LOCK
-+#
-+# SLES9 kernel has MS_FLOCK_LOCK sb flag
-+#
-+AC_DEFUN([LC_FUNC_MS_FLOCK_LOCK],
-+[AC_MSG_CHECKING([if kernel has MS_FLOCK_LOCK sb flag])
-+LB_LINUX_TRY_COMPILE([
-+        #include <linux/fs.h>
-+],[
-+        int flags = MS_FLOCK_LOCK;
-+],[
-+        AC_DEFINE(HAVE_MS_FLOCK_LOCK, 1,
-+                [kernel has MS_FLOCK_LOCK flag])
-+        AC_MSG_RESULT([yes])
-+],[
-+        AC_MSG_RESULT([no])
-+])
-+])
-+
-+#
-+# LC_FUNC_HAVE_CAN_SLEEP_ARG
-+#
-+# SLES9 kernel has third arg can_sleep
-+# in fs/locks.c: flock_lock_file_wait()
-+#
-+AC_DEFUN([LC_FUNC_HAVE_CAN_SLEEP_ARG],
-+[AC_MSG_CHECKING([if kernel has third arg can_sleep in fs/locks.c: flock_lock_file_wait()])
-+LB_LINUX_TRY_COMPILE([
-+        #include <linux/fs.h>
-+],[
-+        int cansleep;
-+        struct file *file;
-+        struct file_lock *file_lock;
-+        flock_lock_file_wait(file, file_lock, cansleep);
-+],[
-+        AC_DEFINE(HAVE_CAN_SLEEP_ARG, 1,
-+                [kernel has third arg can_sleep in fs/locks.c: flock_lock_file_wait()])
-+        AC_MSG_RESULT([yes])
-+],[
-+        AC_MSG_RESULT([no])
-+])
-+])
-+
-+#
-+# LC_TASK_PPTR
-+#
-+# task struct has p_pptr instead of parent
-+#
-+AC_DEFUN([LC_TASK_PPTR],
-+[AC_MSG_CHECKING([task p_pptr found])
-+LB_LINUX_TRY_COMPILE([
-+	#include <linux/sched.h>
-+],[
-+	struct task_struct *p;
-+	
-+	p = p->p_pptr;
-+],[
-+	AC_MSG_RESULT([yes])
-+	AC_DEFINE(HAVE_TASK_PPTR, 1, [task p_pptr found])
-+],[
-+	AC_MSG_RESULT([no])
-+])
-+])
-+
-+#
-+# LC_FUNC_F_OP_FLOCK
-+#
-+# rhel4.2 kernel has f_op->flock field
-+#
-+AC_DEFUN([LC_FUNC_F_OP_FLOCK],
-+[AC_MSG_CHECKING([if struct file_operations has flock field])
-+LB_LINUX_TRY_COMPILE([
-+        #include <linux/fs.h>
-+],[
-+        struct file_operations ll_file_operations_flock;
-+        ll_file_operations_flock.flock = NULL;
-+],[
-+        AC_DEFINE(HAVE_F_OP_FLOCK, 1,
-+                [struct file_operations has flock field])
-+        AC_MSG_RESULT([yes])
-+],[
-+        AC_MSG_RESULT([no])
-+])
-+])
-+
-+# LC_INODE_I_MUTEX
-+# after 2.6.15 inode have i_mutex intead of i_sem
-+AC_DEFUN([LC_INODE_I_MUTEX],
-+[AC_MSG_CHECKING([if inode has i_mutex ])
-+LB_LINUX_TRY_COMPILE([
-+	#include <linux/mutex.h>
-+	#include <linux/fs.h>
-+	#undef i_mutex
-+],[
-+	struct inode i;
-+
-+	mutex_unlock(&i.i_mutex);
-+],[
-+        AC_MSG_RESULT(yes)
-+        AC_DEFINE(HAVE_INODE_I_MUTEX, 1,
-+                [after 2.6.15 inode have i_mutex intead of i_sem])
-+],[
-+        AC_MSG_RESULT(no)
-+])
-+])
-+
-+# LC_DQUOTOFF_MUTEX
-+# after 2.6.17 dquote use mutex instead if semaphore
-+AC_DEFUN([LC_DQUOTOFF_MUTEX],
-+[AC_MSG_CHECKING([use dqonoff_mutex])
-+LB_LINUX_TRY_COMPILE([
-+	#include <linux/mutex.h>
-+	#include <linux/fs.h>
-+        #include <linux/quota.h>
-+],[
-+        struct quota_info dq;
-+
-+        mutex_unlock(&dq.dqonoff_mutex);
-+],[
-+        AC_MSG_RESULT(yes)
-+        AC_DEFINE(HAVE_DQUOTOFF_MUTEX, 1,
-+                [after 2.6.17 dquote use mutex instead if semaphore])
-+],[
-+        AC_MSG_RESULT(no)
-+])
-+])
-+
-+#
-+# LC_STATFS_DENTRY_PARAM
-+# starting from 2.6.18 linux kernel uses dentry instead of
-+# super_block for first vfs_statfs argument
-+#
-+AC_DEFUN([LC_STATFS_DENTRY_PARAM],
-+[AC_MSG_CHECKING([first vfs_statfs parameter is dentry])
-+LB_LINUX_TRY_COMPILE([
-+        #include <linux/fs.h>
-+],[
-+	int vfs_statfs(struct dentry *, struct kstatfs *);
-+],[
-+        AC_DEFINE(HAVE_STATFS_DENTRY_PARAM, 1,
-+                [first parameter of vfs_statfs is dentry])
-+        AC_MSG_RESULT([yes])
-+],[
-+        AC_MSG_RESULT([no])
-+])
-+])
-+
-+#
-+# LC_VFS_KERN_MOUNT
-+# starting from 2.6.18 kernel don't export do_kern_mount
-+# and want to use vfs_kern_mount instead.
-+#
-+AC_DEFUN([LC_VFS_KERN_MOUNT],
-+[AC_MSG_CHECKING([vfs_kern_mount exist in kernel])
-+LB_LINUX_TRY_COMPILE([
-+        #include <linux/mount.h>
-+],[
-+        vfs_kern_mount(NULL, 0, NULL, NULL);
-+],[
-+        AC_DEFINE(HAVE_VFS_KERN_MOUNT, 1,
-+                [vfs_kern_mount exist in kernel])
-+        AC_MSG_RESULT([yes])
-+],[
-+        AC_MSG_RESULT([no])
-+])
-+])
-+
-+# 
-+# LC_INVALIDATEPAGE_RETURN_INT
-+# more 2.6 api changes.  return type for the invalidatepage
-+# address_space_operation is 'void' in new kernels but 'int' in old
-+#
-+AC_DEFUN([LC_INVALIDATEPAGE_RETURN_INT],
-+[AC_MSG_CHECKING([invalidatepage has return int])
-+LB_LINUX_TRY_COMPILE([
-+        #include <linux/buffer_head.h>
-+],[
-+	int rc = block_invalidatepage(NULL, 0);
-+],[
-+	AC_MSG_RESULT(yes)
-+	AC_DEFINE(HAVE_INVALIDATEPAGE_RETURN_INT, 1,
-+		[Define if return type of invalidatepage should be int])
-+],[
-+	AC_MSG_RESULT(no)
-+])
-+])
-+
-+# LC_UMOUNTBEGIN_HAS_VFSMOUNT
-+# more 2.6 API changes. 2.6.18 umount_begin has different parameters
-+AC_DEFUN([LC_UMOUNTBEGIN_HAS_VFSMOUNT],
-+[AC_MSG_CHECKING([if umount_begin needs vfsmount parameter instead of super_block])
-+tmp_flags="$EXTRA_KCFLAGS"
-+EXTRA_KCFLAGS="-Werror"
-+LB_LINUX_TRY_COMPILE([
-+	#include <linux/fs.h>
-+
-+	struct vfsmount;
-+	static void cfg_umount_begin (struct vfsmount *v, int flags)
-+	{
-+    		;
-+	}
-+
-+	static struct super_operations cfg_super_operations = {
-+		.umount_begin	= cfg_umount_begin,
-+	};
-+],[
-+	cfg_super_operations.umount_begin(NULL,0);
-+],[
-+	AC_MSG_RESULT(yes)
-+	AC_DEFINE(HAVE_UMOUNTBEGIN_VFSMOUNT, 1,
-+		[Define umount_begin need second argument])
-+],[
-+	AC_MSG_RESULT(no)
-+])
-+EXTRA_KCFLAGS="$tmp_flags"
-+])
-+
-+# 2.6.19 API changes
-+# inode don't have i_blksize field
-+AC_DEFUN([LC_INODE_BLKSIZE],
-+[AC_MSG_CHECKING([inode has i_blksize field])
-+LB_LINUX_TRY_COMPILE([
-+#include <linux/fs.h>
-+],[
-+	struct inode i;
-+	i.i_blksize = 0; 
-+],[
-+	AC_MSG_RESULT(yes)
-+	AC_DEFINE(HAVE_INODE_BLKSIZE, 1,
-+		[struct inode has i_blksize field])
-+],[
-+	AC_MSG_RESULT(no)
-+])
-+])
-+
-+# LC_VFS_READDIR_U64_INO
-+# 2.6.19 use u64 for inode number instead of inode_t
-+AC_DEFUN([LC_VFS_READDIR_U64_INO],
-+[AC_MSG_CHECKING([check vfs_readdir need 64bit inode number])
-+tmp_flags="$EXTRA_KCFLAGS"
-+EXTRA_KCFLAGS="-Werror"
-+LB_LINUX_TRY_COMPILE([
-+#include <linux/fs.h>
-+	int fillonedir(void * __buf, const char * name, int namlen, loff_t offset,
-+                      u64 ino, unsigned int d_type)
-+	{
-+		return 0;
-+	}
-+],[
-+	filldir_t filter;
-+
-+	filter = fillonedir;
-+	return 1;
-+],[
-+        AC_MSG_RESULT(yes)
-+        AC_DEFINE(HAVE_VFS_READDIR_U64_INO, 1,
-+                [if vfs_readdir need 64bit inode number])
-+],[
-+        AC_MSG_RESULT(no)
-+])
-+EXTRA_KCFLAGS="$tmp_flags"
-+])
-+
-+# LC_FILE_WRITEV
-+# 2.6.19 replaced writev with aio_write
-+AC_DEFUN([LC_FILE_WRITEV],
-+[AC_MSG_CHECKING([writev in fops])
-+LB_LINUX_TRY_COMPILE([
-+        #include <linux/fs.h>
-+],[
-+        struct file_operations *fops = NULL;
-+        fops->writev = NULL;
-+],[
-+        AC_MSG_RESULT(yes)
-+        AC_DEFINE(HAVE_FILE_WRITEV, 1,
-+                [use fops->writev])
-+],[
-+	AC_MSG_RESULT(no)
-+])
-+])
-+
-+# LC_GENERIC_FILE_READ
-+# 2.6.19 replaced readv with aio_read
-+AC_DEFUN([LC_FILE_READV],
-+[AC_MSG_CHECKING([readv in fops])
-+LB_LINUX_TRY_COMPILE([
-+        #include <linux/fs.h>
-+],[
-+        struct file_operations *fops = NULL;
-+        fops->readv = NULL;
-+],[
-+        AC_MSG_RESULT(yes)
-+        AC_DEFINE(HAVE_FILE_READV, 1,
-+                [use fops->readv])
-+],[
-+        AC_MSG_RESULT(no)
-+])
-+])
-+
-+# LC_NR_PAGECACHE
-+# 2.6.18 don't export nr_pagecahe
-+AC_DEFUN([LC_NR_PAGECACHE],
-+[AC_MSG_CHECKING([kernel export nr_pagecache])
-+LB_LINUX_TRY_COMPILE([
-+        #include <linux/pagemap.h>
-+],[
-+        return atomic_read(&nr_pagecache);
-+],[
-+        AC_MSG_RESULT(yes)
-+        AC_DEFINE(HAVE_NR_PAGECACHE, 1,
-+                [is kernel export nr_pagecache])
-+],[
-+        AC_MSG_RESULT(no)
-+])
-+])
-+
-+# LC_CANCEL_DIRTY_PAGE
-+# 2.6.20 introduse cancel_dirty_page instead of 
-+# clear_page_dirty.
-+AC_DEFUN([LC_CANCEL_DIRTY_PAGE],
-+[AC_MSG_CHECKING([kernel has cancel_dirty_page])
-+LB_LINUX_TRY_COMPILE([
-+        #include <linux/mm.h>
-+        #include <linux/page-flags.h>
-+],[
-+        cancel_dirty_page(NULL, 0);
-+],[
-+        AC_MSG_RESULT(yes)
-+        AC_DEFINE(HAVE_CANCEL_DIRTY_PAGE, 1,
-+                  [kernel has cancel_dirty_page instead of clear_page_dirty])
-+],[
-+        AC_MSG_RESULT(no)
-+])
-+])
-+
-+#
-+# LC_PAGE_CONSTANT
-+#
-+# In order to support raid5 zerocopy patch, we have to patch the kernel to make
-+# it support constant page, which means the page won't be modified during the
-+# IO.
-+#
-+AC_DEFUN([LC_PAGE_CONSTANT],
-+[AC_MSG_CHECKING([if kernel have PageConstant defined])
-+LB_LINUX_TRY_COMPILE([
-+        #include <linux/mm.h>
-+        #include <linux/page-flags.h>
-+],[
-+        #ifndef PG_constant
-+        #error "Have no raid5 zcopy patch"
-+        #endif
-+],[
-+        AC_MSG_RESULT(yes)
-+        AC_DEFINE(HAVE_PAGE_CONSTANT, 1, [kernel have PageConstant supported])
-+],[
-+        AC_MSG_RESULT(no);
-+])
-+])
-+
-+# RHEL5 in FS-cache patch rename PG_checked flag
-+# into PG_fs_misc
-+AC_DEFUN([LC_PG_FS_MISC],
-+[AC_MSG_CHECKING([kernel has PG_fs_misc])
-+LB_LINUX_TRY_COMPILE([
-+        #include <linux/mm.h>
-+        #include <linux/page-flags.h>
-+],[
-+        #ifndef PG_fs_misc
-+        #error PG_fs_misc not defined in kernel
-+        #endif
-+],[
-+        AC_MSG_RESULT(yes)
-+        AC_DEFINE(HAVE_PG_FS_MISC, 1,
-+                  [is kernel have PG_fs_misc])
-+],[
-+        AC_MSG_RESULT(no)
-+])
-+])
-+
-+# RHEL5 PageChecked and SetPageChecked defined
-+AC_DEFUN([LC_PAGE_CHECKED],
-+[AC_MSG_CHECKING([kernel has PageChecked and SetPageChecked])
-+LB_LINUX_TRY_COMPILE([
-+        #include <linux/autoconf.h>
-+#ifdef HAVE_LINUX_MMTYPES_H
-+        #include <linux/mm_types.h>
-+#endif
-+	#include <linux/page-flags.h>
-+],[
-+ 	struct page *p;
-+
-+        /* before 2.6.26 this define*/
-+        #ifndef PageChecked	
-+ 	/* 2.6.26 use function instead of define for it */
-+ 	SetPageChecked(p);
-+ 	PageChecked(p);
-+ 	#endif
-+],[
-+        AC_MSG_RESULT(yes)
-+        AC_DEFINE(HAVE_PAGE_CHECKED, 1,
-+                  [does kernel have PageChecked and SetPageChecked])
-+],[
-+        AC_MSG_RESULT(no)
-+])
-+])
-+
-+AC_DEFUN([LC_EXPORT_TRUNCATE_COMPLETE],
-+[LB_CHECK_SYMBOL_EXPORT([truncate_complete_page],
-+[mm/truncate.c],[
-+AC_DEFINE(HAVE_TRUNCATE_COMPLETE_PAGE, 1,
-+            [kernel export truncate_complete_page])
-+],[
-+])
-+])
-+
-+AC_DEFUN([LC_EXPORT_D_REHASH_COND],
-+[LB_CHECK_SYMBOL_EXPORT([d_rehash_cond],
-+[fs/dcache.c],[
-+AC_DEFINE(HAVE_D_REHASH_COND, 1,
-+            [d_rehash_cond is exported by the kernel])
-+],[
-+])
-+])
-+
-+AC_DEFUN([LC_EXPORT___D_REHASH],
-+[LB_CHECK_SYMBOL_EXPORT([__d_rehash],
-+[fs/dcache.c],[
-+AC_DEFINE(HAVE___D_REHASH, 1,
-+            [__d_rehash is exported by the kernel])
-+],[
-+])
-+])
-+
-+AC_DEFUN([LC_EXPORT_D_MOVE_LOCKED],
-+[LB_CHECK_SYMBOL_EXPORT([d_move_locked],
-+[fs/dcache.c],[
-+AC_DEFINE(HAVE_D_MOVE_LOCKED, 1,
-+            [d_move_locked is exported by the kernel])
-+],[
-+])
-+])
-+
-+AC_DEFUN([LC_EXPORT___D_MOVE],
-+[LB_CHECK_SYMBOL_EXPORT([__d_move],
-+[fs/dcache.c],[
-+AC_DEFINE(HAVE___D_MOVE, 1,
-+            [__d_move is exported by the kernel])
-+],[
-+])
-+])
-+
-+# The actual symbol exported varies among architectures, so we need
-+# to check many symbols (but only in the current architecture.)  No
-+# matter what symbol is exported, the kernel #defines node_to_cpumask
-+# to the appropriate function and that's what we use.
-+AC_DEFUN([LC_EXPORT_NODE_TO_CPUMASK],
-+         [LB_CHECK_SYMBOL_EXPORT([node_to_cpumask],
-+                                 [arch/$LINUX_ARCH/mm/numa.c],
-+                                 [AC_DEFINE(HAVE_NODE_TO_CPUMASK, 1,
-+                                            [node_to_cpumask is exported by
-+                                             the kernel])]) # x86_64
-+          LB_CHECK_SYMBOL_EXPORT([node_to_cpu_mask],
-+                                 [arch/$LINUX_ARCH/kernel/smpboot.c],
-+                                 [AC_DEFINE(HAVE_NODE_TO_CPUMASK, 1,
-+                                            [node_to_cpumask is exported by
-+                                             the kernel])]) # ia64
-+          LB_CHECK_SYMBOL_EXPORT([node_2_cpu_mask],
-+                                 [arch/$LINUX_ARCH/kernel/smpboot.c],
-+                                 [AC_DEFINE(HAVE_NODE_TO_CPUMASK, 1,
-+                                            [node_to_cpumask is exported by
-+                                             the kernel])]) # i386
-+          ])
-+
-+#
-+# LC_VFS_INTENT_PATCHES
-+#
-+# check if the kernel has the VFS intent patches
-+AC_DEFUN([LC_VFS_INTENT_PATCHES],
-+[AC_MSG_CHECKING([if the kernel has the VFS intent patches])
-+LB_LINUX_TRY_COMPILE([
-+	#include <linux/fs.h>
-+        #include <linux/namei.h>
-+],[
-+        struct nameidata nd;
-+        struct lookup_intent *it;
-+
-+        it = &nd.intent;
-+        intent_init(it, IT_OPEN);
-+        it->d.lustre.it_disposition = 0;
-+        it->d.lustre.it_data = NULL;
-+],[
-+        AC_MSG_RESULT([yes])
-+        AC_DEFINE(HAVE_VFS_INTENT_PATCHES, 1, [VFS intent patches are applied])
-+],[
-+        AC_MSG_RESULT([no])
-+])
-+])
-+
-+# 2.6.22 lost second parameter for invalidate_bdev
-+AC_DEFUN([LC_INVALIDATE_BDEV_2ARG],
-+[AC_MSG_CHECKING([if invalidate_bdev has second argument])
-+LB_LINUX_TRY_COMPILE([
-+        #include <linux/buffer_head.h>
-+],[
-+        invalidate_bdev(NULL,0);
-+],[
-+        AC_MSG_RESULT([yes])
-+        AC_DEFINE(HAVE_INVALIDATE_BDEV_2ARG, 1,
-+                [invalidate_bdev has second argument])
-+],[
-+        AC_MSG_RESULT([no])
-+])
-+])
-+
-+# 2.6.18
-+
-+
-+# 2.6.23 have return type 'void' for unregister_blkdev
-+AC_DEFUN([LC_UNREGISTER_BLKDEV_RETURN_INT],
-+[AC_MSG_CHECKING([if unregister_blkdev return int])
-+LB_LINUX_TRY_COMPILE([
-+        #include <linux/fs.h>
-+],[
-+        int i = unregister_blkdev(0,NULL);
-+],[
-+        AC_MSG_RESULT([yes])
-+        AC_DEFINE(HAVE_UNREGISTER_BLKDEV_RETURN_INT, 1, 
-+                [unregister_blkdev return int])
-+],[
-+        AC_MSG_RESULT([no])
-+])
-+])
-+
-+# 2.6.23 change .sendfile to .splice_read
-+# RHEL4 (-92 kernel) have both sendfile and .splice_read API
-+AC_DEFUN([LC_KERNEL_SENDFILE],
-+[AC_MSG_CHECKING([if kernel has .sendfile])
-+LB_LINUX_TRY_COMPILE([
-+        #include <linux/fs.h>
-+],[
-+        struct file_operations file;
-+
-+        file.sendfile = NULL;
-+], [
-+        AC_MSG_RESULT([yes])
-+        AC_DEFINE(HAVE_KERNEL_SENDFILE, 1,
-+                [kernel has .sendfile])
-+],[
-+        AC_MSG_RESULT([no])
-+])
-+])
-+
-+# 2.6.23 change .sendfile to .splice_read
-+AC_DEFUN([LC_KERNEL_SPLICE_READ],
-+[AC_MSG_CHECKING([if kernel has .splice_read])
-+LB_LINUX_TRY_COMPILE([
-+        #include <linux/fs.h>
-+],[
-+        struct file_operations file;
-+
-+        file.splice_read = NULL;
-+], [
-+        AC_MSG_RESULT([yes])
-+        AC_DEFINE(HAVE_KERNEL_SPLICE_READ, 1,
-+                [kernel has .slice_read])
-+],[
-+        AC_MSG_RESULT([no])
-+])
-+])
-+
-+# 2.6.23 extract nfs export related data into exportfs.h
-+AC_DEFUN([LC_HAVE_EXPORTFS_H],
-+[LB_CHECK_FILE([$LINUX/include/linux/exportfs.h], [
-+        AC_DEFINE(HAVE_LINUX_EXPORTFS_H, 1,
-+                [kernel has include/exportfs.h])
-+],[
-+        AC_MSG_RESULT([no])
-+])
-+])
-+
-+# 2.6.23 have new page fault handling API
-+AC_DEFUN([LC_VM_OP_FAULT],
-+[AC_MSG_CHECKING([if kernel has .fault in vm_operation_struct])
-+LB_LINUX_TRY_COMPILE([
-+        #include <linux/mm.h>
-+],[
-+        struct vm_operations_struct op;
-+
-+        op.fault = NULL;
-+], [
-+        AC_MSG_RESULT([yes])
-+        AC_DEFINE(HAVE_VM_OP_FAULT, 1,
-+                [if kernel has .fault in vm_operation_struct])
-+],[
-+        AC_MSG_RESULT([no])
-+])
-+])
-+
-+#2.6.23 has new shrinker API
-+AC_DEFUN([LC_REGISTER_SHRINKER],
-+[AC_MSG_CHECKING([if kernel has register_shrinker])
-+LB_LINUX_TRY_COMPILE([
-+        #include <linux/mm.h>
-+],[
-+        register_shrinker(NULL);
-+], [
-+        AC_MSG_RESULT([yes])
-+        AC_DEFINE(HAVE_REGISTER_SHRINKER, 1,
-+                [if kernel has register_shrinker])
-+],[
-+        AC_MSG_RESULT([no])
-+])
-+])
-+
-+# 2.6.24 has bio_endio with 2 args
-+AC_DEFUN([LC_BIO_ENDIO_2ARG],
-+[AC_MSG_CHECKING([if kernel has bio_endio with 2 args])
-+LB_LINUX_TRY_COMPILE([
-+        #include <linux/bio.h>
-+],[
-+        bio_endio(NULL, 0);
-+], [
-+        AC_MSG_RESULT([yes])
-+        AC_DEFINE(HAVE_BIO_ENDIO_2ARG, 1,
-+                [if kernel has bio_endio with 2 args])
-+],[
-+        AC_MSG_RESULT([no])
-+])
-+])
-+
-+# 2.6.24 has new members in exports struct.
-+AC_DEFUN([LC_FH_TO_DENTRY],
-+[AC_MSG_CHECKING([if kernel has .fh_to_dentry member in export_operations struct])
-+LB_LINUX_TRY_COMPILE([
-+#ifdef HAVE_LINUX_EXPORTFS_H
-+        #include <linux/exportfs.h>
-+#else
-+        #include <linux/fs.h>
-+#endif
-+],[
-+        struct export_operations exp;
-+
-+        exp.fh_to_dentry   = NULL;
-+], [
-+        AC_MSG_RESULT([yes])
-+        AC_DEFINE(HAVE_FH_TO_DENTRY, 1,
-+                [kernel has .fh_to_dentry member in export_operations struct])
-+],[
-+        AC_MSG_RESULT([no])
-+])
-+])
-+
-+# 2.6.24 need linux/mm_types.h included
-+AC_DEFUN([LC_HAVE_MMTYPES_H],
-+[LB_CHECK_FILE([$LINUX/include/linux/mm_types.h], [
-+        AC_DEFINE(HAVE_LINUX_MMTYPES_H, 1,
-+                [kernel has include/mm_types.h])
-+],[
-+        AC_MSG_RESULT([no])
-+])
-+])
-+
-+# 2.6.24 remove long aged procfs entry -> deleted member
-+AC_DEFUN([LC_PROCFS_DELETED],
-+[AC_MSG_CHECKING([if kernel has deleted member in procfs entry struct])
-+LB_LINUX_TRY_COMPILE([
-+	#include <linux/proc_fs.h>
-+],[
-+        struct proc_dir_entry pde;
-+
-+        pde.deleted   = NULL;
-+], [
-+        AC_MSG_RESULT([yes])
-+        AC_DEFINE(HAVE_PROCFS_DELETED, 1,
-+                [kernel has deleted member in procfs entry struct])
-+],[
-+        AC_MSG_RESULT([no])
-+])
-+])
-+
-+# 2.6.25 change define to inline
-+AC_DEFUN([LC_MAPPING_CAP_WRITEBACK_DIRTY],
-+[AC_MSG_CHECKING([if kernel have mapping_cap_writeback_dirty])
-+LB_LINUX_TRY_COMPILE([
-+        #include <linux/backing-dev.h>
-+],[
-+        #ifndef mapping_cap_writeback_dirty
-+        mapping_cap_writeback_dirty(NULL);
-+        #endif
-+],[
-+        AC_MSG_RESULT([yes])
-+        AC_DEFINE(HAVE_MAPPING_CAP_WRITEBACK_DIRTY, 1,
-+                [kernel have mapping_cap_writeback_dirty])
-+],[
-+        AC_MSG_RESULT([no])
-+])
-+])
-+
-+
-+
-+# 2.6.26 isn't export set_fs_pwd and change paramter in fs struct
-+AC_DEFUN([LC_FS_STRUCT_USE_PATH],
-+[AC_MSG_CHECKING([fs_struct use path structure])
-+LB_LINUX_TRY_COMPILE([
-+        #include <asm/atomic.h>
-+        #include <linux/spinlock.h>
-+        #include <linux/fs_struct.h>
-+],[
-+        struct path path;
-+        struct fs_struct fs;
-+
-+        fs.pwd = path;
-+], [
-+        AC_MSG_RESULT([yes])
-+        AC_DEFINE(HAVE_FS_STRUCT_USE_PATH, 1,
-+                [fs_struct use path structure])
-+],[
-+        AC_MSG_RESULT([no])
-+])
-+])
-+
-+# 2.6.26 remove path_release and use path_put instead
-+AC_DEFUN([LC_PATH_RELEASE],
-+[AC_MSG_CHECKING([if path_release exist])
-+LB_LINUX_TRY_COMPILE([
-+    #include <linux/dcache.h>
-+    #include <linux/namei.h>
-+],[
-+    path_release(NULL);
-+],[
-+    AC_DEFINE(HAVE_PATH_RELEASE, 1, [path_release exist])
-+    AC_MSG_RESULT([yes])
-+],[
-+    AC_MSG_RESULT([no]) 
-+])
-+])
-+
-+#2.6.27
-+AC_DEFUN([LC_INODE_PERMISION_2ARGS],
-+[AC_MSG_CHECKING([inode_operations->permission have two args])
-+LB_LINUX_TRY_COMPILE([
-+        #include <linux/fs.h>
-+],[
-+        struct inode *inode;
-+
-+        inode->i_op->permission(NULL,0);
-+],[
-+        AC_DEFINE(HAVE_INODE_PERMISION_2ARGS, 1, 
-+                  [inode_operations->permission have two args])
-+        AC_MSG_RESULT([yes])
-+],[
-+        AC_MSG_RESULT([no])
-+])
-+])
-+
-+# 2.6.27 have file_remove_suid instead of remove_suid
-+AC_DEFUN([LC_FILE_REMOVE_SUID],
-+[AC_MSG_CHECKING([kernel have file_remove_suid])
-+LB_LINUX_TRY_COMPILE([
-+        #include <linux/fs.h>
-+],[
-+        file_remove_suid(NULL);
-+],[
-+        AC_DEFINE(HAVE_FILE_REMOVE_SUID, 1,
-+                  [kernel have file_remove_suid])
-+        AC_MSG_RESULT([yes])
-+],[
-+        AC_MSG_RESULT([no])
-+])
-+])
-+
-+# 2.6.27 have new page locking API
-+AC_DEFUN([LC_TRYLOCKPAGE],
-+[AC_MSG_CHECKING([kernel use trylock_page for page lock])
-+LB_LINUX_TRY_COMPILE([
-+        #include <linux/pagemap.h>
-+],[
-+        trylock_page(NULL);
-+],[
-+        AC_DEFINE(HAVE_TRYLOCK_PAGE, 1,
-+                  [kernel use trylock_page for page lock])
-+        AC_MSG_RESULT([yes])
-+],[
-+        AC_MSG_RESULT([no])
-+])
-+])
-+
-+#
-+# LC_PROG_LINUX
-+#
-+# Lustre linux kernel checks
-+#
-+AC_DEFUN([LC_PROG_LINUX],
-+         [LC_LUSTRE_VERSION_H
-+          if test x$enable_server = xyes ; then
-+              LC_CONFIG_BACKINGFS
-+          fi
-+          LC_CONFIG_PINGER
-+          LC_CONFIG_CHECKSUM
-+          LC_CONFIG_LIBLUSTRE_RECOVERY
-+          LC_CONFIG_HEALTH_CHECK_WRITE
-+          LC_CONFIG_LRU_RESIZE
-+          LC_CONFIG_ADAPTIVE_TIMEOUTS
-+          LC_QUOTA_MODULE
-+
-+          LC_TASK_PPTR
-+          # RHEL4 patches
-+          LC_EXPORT_TRUNCATE_COMPLETE
-+          LC_EXPORT_D_REHASH_COND
-+          LC_EXPORT___D_REHASH
-+          LC_EXPORT_D_MOVE_LOCKED
-+          LC_EXPORT___D_MOVE
-+          LC_EXPORT_NODE_TO_CPUMASK
-+
-+          LC_STRUCT_KIOBUF
-+          LC_FUNC_COND_RESCHED
-+          LC_FUNC_ZAP_PAGE_RANGE
-+          LC_FUNC_PDE
-+          LC_FUNC_DIRECT_IO
-+          LC_HEADER_MM_INLINE
-+          LC_STRUCT_INODE
-+          LC_FUNC_REGISTER_CACHE
-+          LC_FUNC_GRAB_CACHE_PAGE_NOWAIT_GFP
-+          LC_FUNC_DEV_SET_RDONLY
-+          LC_FUNC_FILEMAP_FDATAWRITE
-+          LC_STRUCT_STATFS
-+          LC_FUNC_PAGE_MAPPED
-+          LC_STRUCT_FILE_OPS_UNLOCKED_IOCTL
-+          LC_FILEMAP_POPULATE
-+          LC_D_ADD_UNIQUE
-+          LC_BIT_SPINLOCK_H
-+          LC_XATTR_ACL
-+          LC_STRUCT_INTENT_FILE
-+          LC_POSIX_ACL_XATTR_H
-+          LC_EXPORT___IGET
-+          LC_FUNC_SET_FS_PWD
-+          LC_FUNC_MS_FLOCK_LOCK
-+          LC_FUNC_HAVE_CAN_SLEEP_ARG
-+          LC_FUNC_F_OP_FLOCK
-+          LC_QUOTA_READ
-+          LC_COOKIE_FOLLOW_LINK
-+          LC_FUNC_RCU
-+          LC_QUOTA64
-+
-+          # does the kernel have VFS intent patches?
-+          LC_VFS_INTENT_PATCHES
-+
-+          # 2.6.15
-+          LC_INODE_I_MUTEX
-+
-+          # 2.6.16
-+          LC_SECURITY_PLUG  # for SLES10 SP2
-+
-+          # 2.6.17
-+          LC_DQUOTOFF_MUTEX
-+
-+          # 2.6.18
-+          LC_NR_PAGECACHE
-+          LC_STATFS_DENTRY_PARAM
-+          LC_VFS_KERN_MOUNT
-+          LC_INVALIDATEPAGE_RETURN_INT
-+          LC_UMOUNTBEGIN_HAS_VFSMOUNT
-+
-+          #2.6.18 + RHEL5 (fc6)
-+          LC_PG_FS_MISC
-+          LC_PAGE_CHECKED
-+
-+          # 2.6.19
-+          LC_INODE_BLKSIZE
-+          LC_VFS_READDIR_U64_INO
-+          LC_FILE_WRITEV
-+          LC_FILE_READV
-+
-+          # 2.6.20
-+          LC_CANCEL_DIRTY_PAGE
-+
-+          # raid5-zerocopy patch
-+          LC_PAGE_CONSTANT
-+	  
-+	  # 2.6.22
-+          LC_INVALIDATE_BDEV_2ARG
-+          LC_FS_RENAME_DOES_D_MOVE
-+          # 2.6.23
-+          LC_UNREGISTER_BLKDEV_RETURN_INT
-+          LC_KERNEL_SENDFILE
-+          LC_KERNEL_SPLICE_READ
-+          LC_HAVE_EXPORTFS_H
-+          LC_VM_OP_FAULT
-+          LC_REGISTER_SHRINKER
-+
-+          #2.6.25
-+          LC_MAPPING_CAP_WRITEBACK_DIRTY
-+ 
-+ 	  # 2.6.24
-+ 	  LC_HAVE_MMTYPES_H
-+          LC_BIO_ENDIO_2ARG
-+          LC_FH_TO_DENTRY
-+          LC_PROCFS_DELETED
-+ 
-+          # 2.6.26
-+          LC_FS_STRUCT_USE_PATH
-+          LC_RCU_LIST_SAFE
-+          LC_PATH_RELEASE
-+
-+          # 2.6.27
-+          LC_INODE_PERMISION_2ARGS
-+          LC_FILE_REMOVE_SUID
-+          LC_TRYLOCKPAGE
-+])
-+
-+#
-+# LC_CONFIG_CLIENT_SERVER
-+#
-+# Build client/server sides of Lustre
-+#
-+AC_DEFUN([LC_CONFIG_CLIENT_SERVER],
-+[AC_MSG_CHECKING([whether to build Lustre server support])
-+AC_ARG_ENABLE([server],
-+	AC_HELP_STRING([--disable-server],
-+			[disable Lustre server support]),
-+	[],[enable_server='yes'])
-+AC_MSG_RESULT([$enable_server])
-+
-+AC_MSG_CHECKING([whether to build Lustre client support])
-+AC_ARG_ENABLE([client],
-+	AC_HELP_STRING([--disable-client],
-+			[disable Lustre client support]),
-+	[],[enable_client='yes'])
-+AC_MSG_RESULT([$enable_client])])
-+
-+#
-+# LC_CONFIG_LIBLUSTRE
-+#
-+# whether to build liblustre
-+#
-+AC_DEFUN([LC_CONFIG_LIBLUSTRE],
-+[AC_MSG_CHECKING([whether to build Lustre library])
-+AC_ARG_ENABLE([liblustre],
-+	AC_HELP_STRING([--disable-liblustre],
-+			[disable building of Lustre library]),
-+	[],[enable_liblustre=$with_sysio])
-+AC_MSG_RESULT([$enable_liblustre])
-+# only build sysio if liblustre is built
-+with_sysio="$enable_liblustre"
-+
-+AC_MSG_CHECKING([whether to build liblustre tests])
-+AC_ARG_ENABLE([liblustre-tests],
-+	AC_HELP_STRING([--enable-liblustre-tests],
-+			[enable liblustre tests, if --disable-tests is used]),
-+	[],[enable_liblustre_tests=$enable_tests])
-+if test x$enable_liblustre != xyes ; then
-+   enable_liblustre_tests='no'
-+fi
-+AC_MSG_RESULT([$enable_liblustre_tests])
-+
-+AC_MSG_CHECKING([whether to enable liblustre acl])
-+AC_ARG_ENABLE([liblustre-acl],
-+	AC_HELP_STRING([--disable-liblustre-acl],
-+			[disable ACL support for liblustre]),
-+	[],[enable_liblustre_acl=yes])
-+AC_MSG_RESULT([$enable_liblustre_acl])
-+if test x$enable_liblustre_acl = xyes ; then
-+  AC_DEFINE(LIBLUSTRE_POSIX_ACL, 1, Liblustre Support ACL-enabled MDS)
-+fi
-+
-+#
-+# --enable-mpitest
-+#
-+AC_ARG_ENABLE(mpitests,
-+	AC_HELP_STRING([--enable-mpitest=yes|no|mpich directory],
-+                           [include mpi tests]),
-+	[
-+	 enable_mpitests=yes
-+         case $enableval in
-+         yes)
-+		MPI_ROOT=/opt/mpich
-+		LDFLAGS="$LDFLAGS -L$MPI_ROOT/ch-p4/lib -L$MPI_ROOT/ch-p4/lib64"
-+		CFLAGS="$CFLAGS -I$MPI_ROOT/include"
-+		;;
-+         no)
-+		enable_mpitests=no
-+		;;
-+	 [[\\/$]]* | ?:[[\\/]]* )
-+		MPI_ROOT=$enableval
-+		LDFLAGS="$LDFLAGS -L$with_mpi/lib"
-+		CFLAGS="$CFLAGS -I$MPI_ROOT/include"
-+                ;;
-+         *)
-+                 AC_MSG_ERROR([expected absolute directory name for --enable-mpitests or yes or no])
-+                 ;;
-+	 esac
-+	],
-+	[
-+	MPI_ROOT=/opt/mpich
-+        LDFLAGS="$LDFLAGS -L$MPI_ROOT/ch-p4/lib -L$MPI_ROOT/ch-p4/lib64"
-+        CFLAGS="$CFLAGS -I$MPI_ROOT/include"
-+	enable_mpitests=yes
-+	]
-+)
-+AC_SUBST(MPI_ROOT)
-+
-+if test x$enable_mpitests != xno; then
-+	AC_MSG_CHECKING([whether to mpitests can be built])
-+        AC_CHECK_FILE([$MPI_ROOT/include/mpi.h],
-+                      [AC_CHECK_LIB([mpich],[MPI_Start],[enable_mpitests=yes],[enable_mpitests=no])],
-+                      [enable_mpitests=no])
-+fi
-+AC_MSG_RESULT([$enable_mpitests])
-+
-+
-+AC_MSG_NOTICE([Enabling Lustre configure options for libsysio])
-+ac_configure_args="$ac_configure_args --with-lustre-hack --with-sockets"
-+
-+LC_CONFIG_PINGER
-+LC_CONFIG_LIBLUSTRE_RECOVERY
-+])
-+
-+AC_DEFUN([LC_CONFIG_LRU_RESIZE],
-+[AC_MSG_CHECKING([whether to enable lru self-adjusting])
-+AC_ARG_ENABLE([lru_resize], 
-+	AC_HELP_STRING([--enable-lru-resize],
-+			[enable lru resize support]),
-+	[],[enable_lru_resize='yes'])
-+AC_MSG_RESULT([$enable_lru_resize])
-+if test x$enable_lru_resize != xno; then
-+   AC_DEFINE(HAVE_LRU_RESIZE_SUPPORT, 1, [Enable lru resize support])
-+fi
-+])
-+
-+AC_DEFUN([LC_CONFIG_ADAPTIVE_TIMEOUTS],
-+[AC_MSG_CHECKING([whether to enable ptlrpc adaptive timeouts support])
-+AC_ARG_ENABLE([adaptive_timeouts],
-+	AC_HELP_STRING([--enable-adaptive-timeouts],
-+			[enable ptlrpc adaptive timeouts support]),
-+	[],[enable_adaptive_timeouts='no'])
-+AC_MSG_RESULT([$enable_adaptive_timeouts])
-+if test x$enable_adaptive_timeouts == xyes; then
-+   AC_DEFINE(HAVE_AT_SUPPORT, 1, [Enable adaptive timeouts support])
-+fi
-+])
-+
-+#
-+# LC_CONFIG_QUOTA
-+#
-+# whether to enable quota support global control
-+#
-+AC_DEFUN([LC_CONFIG_QUOTA],
-+[AC_ARG_ENABLE([quota],
-+	AC_HELP_STRING([--enable-quota],
-+			[enable quota support]),
-+	[],[enable_quota='yes'])
-+])
-+
-+# whether to enable quota support(kernel modules)
-+AC_DEFUN([LC_QUOTA_MODULE],
-+[if test x$enable_quota != xno; then
-+    LB_LINUX_CONFIG([QUOTA],[
-+	enable_quota_module='yes'
-+	AC_DEFINE(HAVE_QUOTA_SUPPORT, 1, [Enable quota support])
-+    ],[
-+	enable_quota_module='no'
-+	AC_MSG_WARN([quota is not enabled because the kernel - lacks quota support])
-+    ])
-+fi
-+])
-+
-+AC_DEFUN([LC_QUOTA],
-+[#check global
-+LC_CONFIG_QUOTA
-+#check for utils
-+AC_CHECK_HEADER(sys/quota.h,
-+                [AC_DEFINE(HAVE_SYS_QUOTA_H, 1, [Define to 1 if you have <sys/quota.h>.])],
-+                [AC_MSG_ERROR([don't find <sys/quota.h> in your system])])
-+])
-+
-+AC_DEFUN([LC_QUOTA_READ],
-+[AC_MSG_CHECKING([if kernel supports quota_read])
-+LB_LINUX_TRY_COMPILE([
-+	#include <linux/fs.h>
-+],[
-+	struct super_operations sp;
-+        void *i = (void *)sp.quota_read;
-+],[
-+	AC_MSG_RESULT([yes])
-+	AC_DEFINE(KERNEL_SUPPORTS_QUOTA_READ, 1, [quota_read found])
-+],[
-+	AC_MSG_RESULT([no])
-+])
-+])
-+
-+#
-+# LC_COOKIE_FOLLOW_LINK
-+#
-+# kernel 2.6.13+ ->follow_link returns a cookie
-+#
-+
-+AC_DEFUN([LC_COOKIE_FOLLOW_LINK],
-+[AC_MSG_CHECKING([if inode_operations->follow_link returns a cookie])
-+LB_LINUX_TRY_COMPILE([
-+        #include <linux/fs.h>
-+        #include <linux/namei.h>
-+],[
-+        struct dentry dentry;
-+        struct nameidata nd;
-+
-+        dentry.d_inode->i_op->put_link(&dentry, &nd, NULL);
-+],[
-+        AC_DEFINE(HAVE_COOKIE_FOLLOW_LINK, 1, [inode_operations->follow_link returns a cookie])
-+        AC_MSG_RESULT([yes])
-+],[
-+        AC_MSG_RESULT([no])
-+])
-+])
-+
-+#
-+# LC_FUNC_RCU
-+#
-+# kernels prior than 2.6.0(?) have no RCU supported; in kernel 2.6.5(SUSE), 
-+# call_rcu takes three parameters.
-+#
-+AC_DEFUN([LC_FUNC_RCU],
-+[AC_MSG_CHECKING([if kernel have RCU supported])
-+LB_LINUX_TRY_COMPILE([
-+        #include <linux/rcupdate.h>
-+],[],[
-+        AC_DEFINE(HAVE_RCU, 1, [have RCU defined])
-+        AC_MSG_RESULT([yes])
-+
-+        AC_MSG_CHECKING([if call_rcu takes three parameters])
-+        LB_LINUX_TRY_COMPILE([
-+                #include <linux/rcupdate.h>
-+        ],[
-+                struct rcu_head rh;
-+                call_rcu(&rh, (void (*)(struct rcu_head *))1, NULL);
-+        ],[
-+                AC_DEFINE(HAVE_CALL_RCU_PARAM, 1, [call_rcu takes three parameters])
-+                AC_MSG_RESULT([yes])
-+        ],[
-+                AC_MSG_RESULT([no]) 
-+        ])
-+
-+],[
-+        AC_MSG_RESULT([no])
-+])
-+])
-+
-+#
-+# LC_QUOTA64
-+# linux kernel may have 64-bit limits support
-+#
-+AC_DEFUN([LC_QUOTA64],
-+[AC_MSG_CHECKING([if kernel has 64-bit quota limits support])
-+LB_LINUX_TRY_COMPILE([
-+        #include <linux/kernel.h>
-+        #include <linux/fs.h>
-+        #include <linux/quotaio_v2.h>
-+        int versions[] = V2_INITQVERSIONS_R1;
-+        struct v2_disk_dqblk_r1 dqblk_r1;
-+],[],[
-+        AC_DEFINE(HAVE_QUOTA64, 1, [have quota64])
-+        AC_MSG_RESULT([yes])
-+
-+],[
-+        AC_MSG_WARN([4 TB (or larger) block quota limits can only be used with OSTs not larger than 4 TB.])
-+        AC_MSG_WARN([Continuing with limited quota support.])
-+        AC_MSG_WARN([quotacheck is needed for filesystems with recent quota versions.])
-+        AC_MSG_RESULT([no])
-+])
-+])
-+
-+# LC_SECURITY_PLUG  # for SLES10 SP2
-+# check security plug in sles10 sp2 kernel 
-+AC_DEFUN([LC_SECURITY_PLUG],
-+[AC_MSG_CHECKING([If kernel has security plug support])
-+LB_LINUX_TRY_COMPILE([
-+        #include <linux/fs.h>
-+],[
-+        struct dentry   *dentry;
-+        struct vfsmount *mnt;
-+        struct iattr    *iattr;
-+
-+        notify_change(dentry, mnt, iattr);
-+],[
-+        AC_MSG_RESULT(yes)
-+        AC_DEFINE(HAVE_SECURITY_PLUG, 1,
-+                [SLES10 SP2 use extra parameter in vfs])
-+],[
-+        AC_MSG_RESULT(no)
-+])
-+])
-+
-+#
-+# LC_CONFIGURE
-+#
-+# other configure checks
-+#
-+AC_DEFUN([LC_CONFIGURE],
-+[LC_CONFIG_OBD_BUFFER_SIZE
-+
-+# include/liblustre.h
-+AC_CHECK_HEADERS([asm/page.h sys/user.h sys/vfs.h stdint.h blkid/blkid.h])
-+
-+# liblustre/llite_lib.h
-+AC_CHECK_HEADERS([xtio.h file.h])
-+
-+# liblustre/dir.c
-+AC_CHECK_HEADERS([linux/types.h sys/types.h linux/unistd.h unistd.h])
-+
-+# liblustre/lutil.c
-+AC_CHECK_HEADERS([netinet/in.h arpa/inet.h catamount/data.h])
-+AC_CHECK_FUNCS([inet_ntoa])
-+
-+# libsysio/src/readlink.c
-+LC_READLINK_SSIZE_T
-+
-+# lvfs/prng.c - depends on linux/types.h from liblustre/dir.c
-+AC_CHECK_HEADERS([linux/random.h], [], [],
-+                 [#ifdef HAVE_LINUX_TYPES_H
-+                  # include <linux/types.h>
-+                  #endif
-+                 ])
-+
-+# utils/llverfs.c
-+AC_CHECK_HEADERS([ext2fs/ext2fs.h])
-+
-+# check for -lz support
-+ZLIB=""
-+AC_CHECK_LIB([z],
-+             [adler32],
-+             [AC_CHECK_HEADERS([zlib.h],
-+                               [ZLIB="-lz"
-+                                AC_DEFINE([HAVE_ADLER], 1,
-+                                          [support alder32 checksum type])],
-+                               [AC_MSG_WARN([No zlib-devel package found,
-+                                             unable to use adler32 checksum])])],
-+             [AC_MSG_WARN([No zlib package found, unable to use adler32 checksum])]
-+)
-+AC_SUBST(ZLIB)
-+
-+# Super safe df
-+AC_ARG_ENABLE([mindf],
-+      AC_HELP_STRING([--enable-mindf],
-+                      [Make statfs report the minimum available space on any single OST instead of the sum of free space on all OSTs]),
-+      [],[])
-+if test "$enable_mindf" = "yes" ;  then
-+      AC_DEFINE([MIN_DF], 1, [Report minimum OST free space])
-+fi
-+
-+AC_ARG_ENABLE([fail_alloc],
-+        AC_HELP_STRING([--disable-fail-alloc],
-+                [disable randomly alloc failure]),
-+        [],[enable_fail_alloc=yes])
-+AC_MSG_CHECKING([whether to randomly failing memory alloc])
-+AC_MSG_RESULT([$enable_fail_alloc])
-+if test x$enable_fail_alloc != xno ; then
-+        AC_DEFINE([RANDOM_FAIL_ALLOC], 1, [enable randomly alloc failure])
-+fi
-+
-+])
-+
-+#
-+# LC_CONDITIONALS
-+#
-+# AM_CONDITIONALS for lustre
-+#
-+AC_DEFUN([LC_CONDITIONALS],
-+[AM_CONDITIONAL(LIBLUSTRE, test x$enable_liblustre = xyes)
-+AM_CONDITIONAL(USE_QUILT, test x$QUILT != xno)
-+AM_CONDITIONAL(LIBLUSTRE_TESTS, test x$enable_liblustre_tests = xyes)
-+AM_CONDITIONAL(MPITESTS, test x$enable_mpitests = xyes, Build MPI Tests)
-+AM_CONDITIONAL(CLIENT, test x$enable_client = xyes)
-+AM_CONDITIONAL(SERVER, test x$enable_server = xyes)
-+AM_CONDITIONAL(QUOTA, test x$enable_quota_module = xyes)
-+AM_CONDITIONAL(BLKID, test x$ac_cv_header_blkid_blkid_h = xyes)
-+AM_CONDITIONAL(EXT2FS_DEVEL, test x$ac_cv_header_ext2fs_ext2fs_h = xyes)
-+AM_CONDITIONAL(LIBPTHREAD, test x$enable_libpthread = xyes)
-+])
-+
-+#
-+# LC_CONFIG_FILES
-+#
-+# files that should be generated with AC_OUTPUT
-+#
-+AC_DEFUN([LC_CONFIG_FILES],
-+[AC_CONFIG_FILES([
-+lustre/Makefile
-+lustre/autoMakefile
-+lustre/autoconf/Makefile
-+lustre/contrib/Makefile
-+lustre/doc/Makefile
-+lustre/include/Makefile
-+lustre/include/lustre_ver.h
-+lustre/include/linux/Makefile
-+lustre/include/lustre/Makefile
-+lustre/kernel_patches/targets/2.6-suse.target
-+lustre/kernel_patches/targets/2.6-vanilla.target
-+lustre/kernel_patches/targets/2.6-rhel4.target
-+lustre/kernel_patches/targets/2.6-rhel5.target
-+lustre/kernel_patches/targets/2.6-fc5.target
-+lustre/kernel_patches/targets/2.6-patchless.target
-+lustre/kernel_patches/targets/2.6-sles10.target
-+lustre/kernel_patches/targets/hp_pnnl-2.4.target
-+lustre/kernel_patches/targets/rh-2.4.target
-+lustre/kernel_patches/targets/rhel-2.4.target
-+lustre/kernel_patches/targets/suse-2.4.21-2.target
-+lustre/kernel_patches/targets/sles-2.4.target
-+lustre/ldlm/Makefile
-+lustre/liblustre/Makefile
-+lustre/liblustre/tests/Makefile
-+lustre/llite/Makefile
-+lustre/llite/autoMakefile
-+lustre/lov/Makefile
-+lustre/lov/autoMakefile
-+lustre/lvfs/Makefile
-+lustre/lvfs/autoMakefile
-+lustre/mdc/Makefile
-+lustre/mdc/autoMakefile
-+lustre/mds/Makefile
-+lustre/mds/autoMakefile
-+lustre/obdclass/Makefile
-+lustre/obdclass/autoMakefile
-+lustre/obdclass/linux/Makefile
-+lustre/obdecho/Makefile
-+lustre/obdecho/autoMakefile
-+lustre/obdfilter/Makefile
-+lustre/obdfilter/autoMakefile
-+lustre/osc/Makefile
-+lustre/osc/autoMakefile
-+lustre/ost/Makefile
-+lustre/ost/autoMakefile
-+lustre/mgc/Makefile
-+lustre/mgc/autoMakefile
-+lustre/mgs/Makefile
-+lustre/mgs/autoMakefile
-+lustre/ptlrpc/Makefile
-+lustre/ptlrpc/autoMakefile
-+lustre/quota/Makefile
-+lustre/quota/autoMakefile
-+lustre/scripts/Makefile
-+lustre/scripts/version_tag.pl
-+lustre/tests/Makefile
-+lustre/utils/Makefile
-+])
-+case $lb_target_os in
-+        darwin)
-+                AC_CONFIG_FILES([ lustre/obdclass/darwin/Makefile ])
-+                ;;
-+esac
-+
-+])
-diff -urNad lustre~/lustre/include/linux/lustre_compat25.h lustre/lustre/include/linux/lustre_compat25.h
---- lustre~/lustre/include/linux/lustre_compat25.h	2009-03-12 10:33:45.000000000 +0100
-+++ lustre/lustre/include/linux/lustre_compat25.h	2009-03-13 09:45:02.000000000 +0100
-@@ -57,6 +57,28 @@
- #endif /* LINUX_VERSION_CODE >= KERNEL_VERSION(2,6,14) */
- 
- #ifndef HAVE_SET_FS_PWD
-+
-+#ifdef HAVE_FS_STRUCT_USE_PATH
-+static inline void ll_set_fs_pwd(struct fs_struct *fs, struct vfsmount *mnt,
-+                struct dentry *dentry)
-+{
-+        struct path path;
-+	struct path old_pwd;
-+
-+        path.mnt = mnt;
-+        path.dentry = dentry;
-+        write_lock(&fs->lock);
-+        old_pwd = fs->pwd;
-+        path_get(&path);
-+        fs->pwd = path;
-+        write_unlock(&fs->lock);
-+
-+	if (old_pwd.dentry)
-+		path_put(&old_pwd);
-+}
-+
-+#else
-+
- static inline void ll_set_fs_pwd(struct fs_struct *fs, struct vfsmount *mnt,
-                 struct dentry *dentry)
- {
-@@ -75,6 +97,7 @@
-                 mntput(old_pwdmnt);
-         }
- }
-+#endif
- #else
- #define ll_set_fs_pwd set_fs_pwd
- #endif /* HAVE_SET_FS_PWD */
-@@ -151,7 +174,12 @@
- #endif
- 
- /* XXX our code should be using the 2.6 calls, not the other way around */
-+#ifndef HAVE_TRYLOCK_PAGE
- #define TryLockPage(page)               TestSetPageLocked(page)
-+#else
-+#define TryLockPage(page)               (!trylock_page(page))
-+#endif
-+
- #define Page_Uptodate(page)             PageUptodate(page)
- #define ll_redirty_page(page)           set_page_dirty(page)
- 
-@@ -364,8 +392,17 @@
- #define LL_RENAME_DOES_D_MOVE	FS_ODD_RENAME
- #endif
- 
-+#ifdef HAVE_FILE_REMOVE_SUID
-+#define ll_remove_suid(file, mnt)       file_remove_suid(file)
-+#else
-+ #ifdef HAVE_SECURITY_PLUG
-+  #define ll_remove_suid(file,mnt)      remove_suid(file->f_dentry,mnt)
-+ #else
-+  #define ll_remove_suid(file,mnt)      remove_suid(file->f_dentry)
-+ #endif
-+#endif
-+
- #ifdef HAVE_SECURITY_PLUG
--#define ll_remove_suid(inode,mnt)               remove_suid(inode,mnt)
- #define ll_vfs_rmdir(dir,entry,mnt)             vfs_rmdir(dir,entry,mnt)
- #define ll_vfs_mkdir(inode,dir,mnt,mode)        vfs_mkdir(inode,dir,mnt,mode)
- #define ll_vfs_link(old,mnt,dir,new,mnt1)       vfs_link(old,mnt,dir,new,mnt1)
-@@ -377,7 +414,6 @@
- #define ll_vfs_rename(old,old_dir,mnt,new,new_dir,mnt1) \
-                 vfs_rename(old,old_dir,mnt,new,new_dir,mnt1)
- #else
--#define ll_remove_suid(inode,mnt)               remove_suid(inode)
- #define ll_vfs_rmdir(dir,entry,mnt)             vfs_rmdir(dir,entry)
- #define ll_vfs_mkdir(inode,dir,mnt,mode)        vfs_mkdir(inode,dir,mode)
- #define ll_vfs_link(old,mnt,dir,new,mnt1)       vfs_link(old,dir,new)
-@@ -388,6 +424,57 @@
-                 vfs_rename(old,old_dir,new,new_dir)
- #endif
- 
-+#ifdef HAVE_REGISTER_SHRINKER
-+typedef int (*shrinker_t)(int nr_to_scan, gfp_t gfp_mask);
-+
-+static inline
-+struct shrinker *set_shrinker(int seek, shrinker_t func)
-+{
-+        struct shrinker *s;
-+
-+        s = kmalloc(sizeof(*s), GFP_KERNEL);
-+        if (s == NULL)
-+                return (NULL);
-+
-+        s->shrink = func;
-+        s->seeks = seek;
-+
-+        register_shrinker(s);
-+
-+        return s;
-+}
-+
-+static inline
-+void remove_shrinker(struct shrinker *shrinker) 
-+{
-+        if (shrinker == NULL)
-+                return;
-+
-+        unregister_shrinker(shrinker);
-+        kfree(shrinker);
-+}
-+#endif
-+
-+#ifdef HAVE_BIO_ENDIO_2ARG
-+#define cfs_bio_io_error(a,b)   bio_io_error((a))
-+#define cfs_bio_endio(a,b,c)    bio_endio((a),(c))
-+#else
-+#define cfs_bio_io_error(a,b)   bio_io_error((a),(b))
-+#define cfs_bio_endio(a,b,c)    bio_endio((a),(b),(c))
-+#endif
-+
-+#ifdef HAVE_FS_STRUCT_USE_PATH
-+#define cfs_fs_pwd(fs)       ((fs)->pwd.dentry)
-+#define cfs_fs_mnt(fs)       ((fs)->pwd.mnt)
-+#else
-+#define cfs_fs_pwd(fs)       ((fs)->pwd)
-+#define cfs_fs_mnt(fs)       ((fs)->pwdmnt)
-+#endif
-+
-+#ifndef list_for_each_safe_rcu
-+#define list_for_each_safe_rcu(a,b,c) list_for_each_rcu(a, c)
-+#endif
-+
- #ifndef abs
- static inline int abs(int x)
- {
-diff -urNad lustre~/lustre/include/linux/lustre_compat25.h.orig lustre/lustre/include/linux/lustre_compat25.h.orig
---- lustre~/lustre/include/linux/lustre_compat25.h.orig	1970-01-01 00:00:00.000000000 +0000
-+++ lustre/lustre/include/linux/lustre_compat25.h.orig	2009-03-13 09:45:03.000000000 +0100
-@@ -0,0 +1,411 @@
-+/* -*- mode: c; c-basic-offset: 8; indent-tabs-mode: nil; -*-
-+ * vim:expandtab:shiftwidth=8:tabstop=8:
-+ *
-+ * GPL HEADER START
-+ *
-+ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
-+ *
-+ * This program is free software; you can redistribute it and/or modify
-+ * it under the terms of the GNU General Public License version 2 only,
-+ * as published by the Free Software Foundation.
-+ *
-+ * This program is distributed in the hope that it will be useful, but
-+ * WITHOUT ANY WARRANTY; without even the implied warranty of
-+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
-+ * General Public License version 2 for more details (a copy is included
-+ * in the LICENSE file that accompanied this code).
-+ *
-+ * You should have received a copy of the GNU General Public License
-+ * version 2 along with this program; If not, see
-+ * http://www.sun.com/software/products/lustre/docs/GPLv2.pdf
-+ *
-+ * Please contact Sun Microsystems, Inc., 4150 Network Circle, Santa Clara,
-+ * CA 95054 USA or visit www.sun.com if you need additional information or
-+ * have any questions.
-+ *
-+ * GPL HEADER END
-+ */
-+/*
-+ * Copyright  2008 Sun Microsystems, Inc. All rights reserved
-+ * Use is subject to license terms.
-+ */
-+/*
-+ * This file is part of Lustre, http://www.lustre.org/
-+ * Lustre is a trademark of Sun Microsystems, Inc.
-+ */
-+
-+#ifndef _LINUX_COMPAT25_H
-+#define _LINUX_COMPAT25_H
-+
-+#ifdef __KERNEL__
-+
-+#if LINUX_VERSION_CODE < KERNEL_VERSION(2,6,5)
-+#error sorry, lustre requires at least 2.6.5
-+#endif
-+
-+#include <libcfs/linux/portals_compat25.h>
-+
-+#include <linux/lustre_patchless_compat.h>
-+
-+#if LINUX_VERSION_CODE >= KERNEL_VERSION(2,6,14)
-+struct ll_iattr_struct {
-+        struct iattr    iattr;
-+        unsigned int    ia_attr_flags;
-+};
-+#else
-+#define ll_iattr_struct iattr
-+#endif /* LINUX_VERSION_CODE >= KERNEL_VERSION(2,6,14) */
-+
-+#ifndef HAVE_SET_FS_PWD
-+static inline void ll_set_fs_pwd(struct fs_struct *fs, struct vfsmount *mnt,
-+                struct dentry *dentry)
-+{
-+        struct dentry *old_pwd;
-+        struct vfsmount *old_pwdmnt;
-+
-+        write_lock(&fs->lock);
-+        old_pwd = fs->pwd;
-+        old_pwdmnt = fs->pwdmnt;
-+        fs->pwdmnt = mntget(mnt);
-+        fs->pwd = dget(dentry);
-+        write_unlock(&fs->lock);
-+
-+        if (old_pwd) {
-+                dput(old_pwd);
-+                mntput(old_pwdmnt);
-+        }
-+}
-+#else
-+#define ll_set_fs_pwd set_fs_pwd
-+#endif /* HAVE_SET_FS_PWD */
-+
-+#ifdef HAVE_INODE_I_MUTEX
-+#define UNLOCK_INODE_MUTEX(inode) do {mutex_unlock(&(inode)->i_mutex); } while(0)
-+#define LOCK_INODE_MUTEX(inode) do {mutex_lock(&(inode)->i_mutex); } while(0)
-+#define TRYLOCK_INODE_MUTEX(inode) mutex_trylock(&(inode)->i_mutex)
-+#else
-+#define UNLOCK_INODE_MUTEX(inode) do {up(&(inode)->i_sem); } while(0)
-+#define LOCK_INODE_MUTEX(inode) do {down(&(inode)->i_sem); } while(0)
-+#define TRYLOCK_INODE_MUTEX(inode) (!down_trylock(&(inode)->i_sem))
-+#endif /* HAVE_INODE_I_MUTEX */
-+
-+#if LINUX_VERSION_CODE >= KERNEL_VERSION(2,6,15)
-+#define d_child d_u.d_child
-+#define d_rcu d_u.d_rcu
-+#endif
-+
-+#ifdef HAVE_DQUOTOFF_MUTEX
-+#define UNLOCK_DQONOFF_MUTEX(dqopt) do {mutex_unlock(&(dqopt)->dqonoff_mutex); } while(0)
-+#define LOCK_DQONOFF_MUTEX(dqopt) do {mutex_lock(&(dqopt)->dqonoff_mutex); } while(0)
-+#else
-+#define UNLOCK_DQONOFF_MUTEX(dqopt) do {up(&(dqopt)->dqonoff_sem); } while(0)
-+#define LOCK_DQONOFF_MUTEX(dqopt) do {down(&(dqopt)->dqonoff_sem); } while(0)
-+#endif /* HAVE_DQUOTOFF_MUTEX */
-+
-+
-+#if LINUX_VERSION_CODE < KERNEL_VERSION(2,6,4)
-+#define NGROUPS_SMALL           NGROUPS
-+#define NGROUPS_PER_BLOCK       ((int)(EXEC_PAGESIZE / sizeof(gid_t)))
-+
-+struct group_info {
-+        int        ngroups;
-+        atomic_t   usage;
-+        gid_t      small_block[NGROUPS_SMALL];
-+        int        nblocks;
-+        gid_t     *blocks[0];
-+};
-+#define current_ngroups current->ngroups
-+#define current_groups current->groups
-+
-+struct group_info *groups_alloc(int gidsetsize);
-+void groups_free(struct group_info *ginfo);
-+#else /* >= 2.6.4 */
-+
-+#define current_ngroups current->group_info->ngroups
-+#define current_groups current->group_info->small_block
-+
-+#endif /* LINUX_VERSION_CODE < KERNEL_VERSION(2,6,4) */
-+
-+#ifndef page_private
-+#define page_private(page) ((page)->private)
-+#define set_page_private(page, v) ((page)->private = (v))
-+#endif
-+
-+#ifndef HAVE_GFP_T
-+#define gfp_t int
-+#endif
-+
-+#define lock_dentry(___dentry)          spin_lock(&(___dentry)->d_lock)
-+#define unlock_dentry(___dentry)        spin_unlock(&(___dentry)->d_lock)
-+
-+#define ll_kernel_locked()      kernel_locked()
-+
-+/*
-+ * OBD need working random driver, thus all our
-+ * initialization routines must be called after device
-+ * driver initialization
-+ */
-+#ifndef MODULE
-+#undef module_init
-+#define module_init(a)     late_initcall(a)
-+#endif
-+
-+/* XXX our code should be using the 2.6 calls, not the other way around */
-+#define TryLockPage(page)               TestSetPageLocked(page)
-+#define Page_Uptodate(page)             PageUptodate(page)
-+#define ll_redirty_page(page)           set_page_dirty(page)
-+
-+#define KDEVT_INIT(val)                 (val)
-+
-+#define LTIME_S(time)                   (time.tv_sec)
-+#define ll_path_lookup                  path_lookup
-+#define ll_permission(inode,mask,nd)    permission(inode,mask,nd)
-+
-+#define ll_pgcache_lock(mapping)          spin_lock(&mapping->page_lock)
-+#define ll_pgcache_unlock(mapping)        spin_unlock(&mapping->page_lock)
-+#define ll_call_writepage(inode, page)  \
-+                                (inode)->i_mapping->a_ops->writepage(page, NULL)
-+#define ll_invalidate_inode_pages(inode) \
-+                                invalidate_inode_pages((inode)->i_mapping)
-+#define ll_truncate_complete_page(page) \
-+                                truncate_complete_page(page->mapping, page)
-+
-+#define ll_vfs_create(a,b,c,d)          vfs_create(a,b,c,d)
-+#define ll_dev_t                        dev_t
-+#define kdev_t                          dev_t
-+#define to_kdev_t(dev)                  (dev)
-+#define kdev_t_to_nr(dev)               (dev)
-+#define val_to_kdev(dev)                (dev)
-+#define ILOOKUP(sb, ino, test, data)    ilookup5(sb, ino, test, data);
-+
-+#include <linux/writeback.h>
-+
-+static inline int cleanup_group_info(void)
-+{
-+        struct group_info *ginfo;
-+
-+        ginfo = groups_alloc(0);
-+        if (!ginfo)
-+                return -ENOMEM;
-+
-+        set_current_groups(ginfo);
-+        put_group_info(ginfo);
-+
-+        return 0;
-+}
-+
-+#define __set_page_ll_data(page, llap) \
-+        do {       \
-+                page_cache_get(page); \
-+                SetPagePrivate(page); \
-+                set_page_private(page, (unsigned long)llap); \
-+        } while (0)
-+#define __clear_page_ll_data(page) \
-+        do {       \
-+                ClearPagePrivate(page); \
-+                set_page_private(page, 0); \
-+                page_cache_release(page); \
-+        } while(0)
-+
-+#define kiobuf bio
-+
-+#include <linux/proc_fs.h>
-+
-+#if !defined(HAVE_D_REHASH_COND) && defined(HAVE___D_REHASH)
-+#define d_rehash_cond(dentry, lock) __d_rehash(dentry, lock)
-+extern void __d_rehash(struct dentry *dentry, int lock);
-+#endif
-+
-+#if !defined(HAVE_D_MOVE_LOCKED) && defined(HAVE___D_MOVE)
-+#define d_move_locked(dentry, target) __d_move(dentry, target)
-+extern void __d_move(struct dentry *dentry, struct dentry *target);
-+#endif
-+
-+#ifdef HAVE_CAN_SLEEP_ARG
-+#define ll_flock_lock_file_wait(file, lock, can_sleep) \
-+        flock_lock_file_wait(file, lock, can_sleep)
-+#else
-+#define ll_flock_lock_file_wait(file, lock, can_sleep) \
-+        flock_lock_file_wait(file, lock)
-+#endif
-+
-+#define CheckWriteback(page, cmd) \
-+        ((!PageWriteback(page) && (cmd & OBD_BRW_READ)) || \
-+         (PageWriteback(page) && (cmd & OBD_BRW_WRITE)))
-+
-+
-+#ifdef HAVE_PAGE_LIST
-+static inline int mapping_has_pages(struct address_space *mapping)
-+{
-+        int rc = 1;
-+
-+        ll_pgcache_lock(mapping);
-+        if (list_empty(&mapping->dirty_pages) &&
-+            list_empty(&mapping->clean_pages) &&
-+            list_empty(&mapping->locked_pages)) {
-+                rc = 0;
-+        }
-+        ll_pgcache_unlock(mapping);
-+
-+        return rc;
-+}
-+#else
-+static inline int mapping_has_pages(struct address_space *mapping)
-+{
-+        return mapping->nrpages > 0;
-+}
-+#endif
-+
-+#ifdef HAVE_KIOBUF_KIO_BLOCKS
-+#define KIOBUF_GET_BLOCKS(k) ((k)->kio_blocks)
-+#else
-+#define KIOBUF_GET_BLOCKS(k) ((k)->blocks)
-+#endif
-+
-+#if (LINUX_VERSION_CODE < KERNEL_VERSION(2,6,7))
-+#define ll_set_dflags(dentry, flags) do { dentry->d_vfs_flags |= flags; } while(0)
-+#define ll_vfs_symlink(dir, dentry, mnt, path, mode) \
-+                       vfs_symlink(dir, dentry, path)
-+#else
-+#define ll_set_dflags(dentry, flags) do { \
-+                spin_lock(&dentry->d_lock); \
-+                dentry->d_flags |= flags; \
-+                spin_unlock(&dentry->d_lock); \
-+        } while(0)
-+#ifdef HAVE_SECURITY_PLUG
-+#define ll_vfs_symlink(dir, dentry, mnt, path, mode) \
-+                vfs_symlink(dir, dentry, mnt, path, mode)
-+#else
-+#define ll_vfs_symlink(dir, dentry, mnt, path, mode) \
-+                vfs_symlink(dir, dentry, path, mode)
-+#endif
-+#endif
-+
-+#ifndef container_of
-+#define container_of(ptr, type, member) ({                      \
-+                const typeof( ((type *)0)->member ) *__mptr = (ptr); \
-+                (type *)( (char *)__mptr - offsetof(type,member) );})
-+#endif
-+
-+#ifdef HAVE_I_ALLOC_SEM
-+#define UP_WRITE_I_ALLOC_SEM(i)   do { up_write(&(i)->i_alloc_sem); } while (0)
-+#define DOWN_WRITE_I_ALLOC_SEM(i) do { down_write(&(i)->i_alloc_sem); } while(0)
-+#define LASSERT_I_ALLOC_SEM_WRITE_LOCKED(i) LASSERT(down_read_trylock(&(i)->i_alloc_sem) == 0)
-+
-+#define UP_READ_I_ALLOC_SEM(i)    do { up_read(&(i)->i_alloc_sem); } while (0)
-+#define DOWN_READ_I_ALLOC_SEM(i)  do { down_read(&(i)->i_alloc_sem); } while (0)
-+#define LASSERT_I_ALLOC_SEM_READ_LOCKED(i) LASSERT(down_write_trylock(&(i)->i_alloc_sem) == 0)
-+#else
-+#define UP_READ_I_ALLOC_SEM(i)              do { } while (0)
-+#define DOWN_READ_I_ALLOC_SEM(i)            do { } while (0)
-+#define LASSERT_I_ALLOC_SEM_READ_LOCKED(i)  do { } while (0)
-+
-+#define UP_WRITE_I_ALLOC_SEM(i)             do { } while (0)
-+#define DOWN_WRITE_I_ALLOC_SEM(i)           do { } while (0)
-+#define LASSERT_I_ALLOC_SEM_WRITE_LOCKED(i) do { } while (0)
-+#endif
-+
-+#ifndef HAVE_GRAB_CACHE_PAGE_NOWAIT_GFP
-+#define grab_cache_page_nowait_gfp(x, y, z) grab_cache_page_nowait((x), (y))
-+#endif
-+
-+#ifndef HAVE_FILEMAP_FDATAWRITE
-+#define filemap_fdatawrite(mapping)      filemap_fdatasync(mapping)
-+#endif
-+
-+#ifdef HAVE_VFS_KERN_MOUNT
-+static inline 
-+struct vfsmount *
-+ll_kern_mount(const char *fstype, int flags, const char *name, void *data)
-+{
-+        struct file_system_type *type = get_fs_type(fstype);
-+        struct vfsmount *mnt;
-+        if (!type)
-+                return ERR_PTR(-ENODEV);
-+        mnt = vfs_kern_mount(type, flags, name, data);
-+        module_put(type->owner);
-+        return mnt;
-+}
-+#else
-+#define ll_kern_mount(fstype, flags, name, data) do_kern_mount((fstype), (flags), (name), (data))
-+#endif
-+
-+#ifdef HAVE_STATFS_DENTRY_PARAM
-+#define ll_do_statfs(sb, sfs) (sb)->s_op->statfs((sb)->s_root, (sfs))
-+#else
-+#define ll_do_statfs(sb, sfs) (sb)->s_op->statfs((sb), (sfs))
-+#endif
-+
-+/* task_struct */
-+#ifndef HAVE_TASK_PPTR
-+#define p_pptr parent
-+#endif
-+
-+#ifdef HAVE_UNREGISTER_BLKDEV_RETURN_INT
-+#define ll_unregister_blkdev(a,b)       unregister_blkdev((a),(b))
-+#else
-+static inline 
-+int ll_unregister_blkdev(unsigned int dev, const char *name)
-+{
-+        unregister_blkdev(dev, name);
-+        return 0;
-+}
-+#endif
-+
-+#ifdef HAVE_INVALIDATE_BDEV_2ARG
-+#define ll_invalidate_bdev(a,b)         invalidate_bdev((a),(b))
-+#else
-+#define ll_invalidate_bdev(a,b)         invalidate_bdev((a))
-+#endif
-+
-+#ifdef HAVE_FS_RENAME_DOES_D_MOVE
-+#define LL_RENAME_DOES_D_MOVE	FS_RENAME_DOES_D_MOVE
-+#else
-+#define LL_RENAME_DOES_D_MOVE	FS_ODD_RENAME
-+#endif
-+
-+#ifdef HAVE_SECURITY_PLUG
-+#define ll_remove_suid(inode,mnt)               remove_suid(inode,mnt)
-+#define ll_vfs_rmdir(dir,entry,mnt)             vfs_rmdir(dir,entry,mnt)
-+#define ll_vfs_mkdir(inode,dir,mnt,mode)        vfs_mkdir(inode,dir,mnt,mode)
-+#define ll_vfs_link(old,mnt,dir,new,mnt1)       vfs_link(old,mnt,dir,new,mnt1)
-+#define ll_vfs_unlink(inode,entry,mnt)          vfs_unlink(inode,entry,mnt)
-+#define ll_vfs_mknod(dir,entry,mnt,mode,dev)            \
-+                vfs_mknod(dir,entry,mnt,mode,dev)
-+#define ll_security_inode_unlink(dir,entry,mnt)         \
-+                security_inode_unlink(dir,entry,mnt)     
-+#define ll_vfs_rename(old,old_dir,mnt,new,new_dir,mnt1) \
-+                vfs_rename(old,old_dir,mnt,new,new_dir,mnt1)
-+#else
-+#define ll_remove_suid(inode,mnt)               remove_suid(inode)
-+#define ll_vfs_rmdir(dir,entry,mnt)             vfs_rmdir(dir,entry)
-+#define ll_vfs_mkdir(inode,dir,mnt,mode)        vfs_mkdir(inode,dir,mode)
-+#define ll_vfs_link(old,mnt,dir,new,mnt1)       vfs_link(old,dir,new)
-+#define ll_vfs_unlink(inode,entry,mnt)          vfs_unlink(inode,entry)
-+#define ll_vfs_mknod(dir,entry,mnt,mode,dev)    vfs_mknod(dir,entry,mode,dev)
-+#define ll_security_inode_unlink(dir,entry,mnt) security_inode_unlink(dir,entry)     
-+#define ll_vfs_rename(old,old_dir,mnt,new,new_dir,mnt1) \
-+                vfs_rename(old,old_dir,new,new_dir)
-+#endif
-+
-+#ifndef abs
-+static inline int abs(int x)
-+{
-+        return (x < 0) ? -x : x;
-+}
-+#endif
-+
-+#ifndef labs
-+static inline long labs(long x)
-+{
-+        return (x < 0) ? -x : x;
-+}
-+#endif
-+
-+/* Using kernel fls(). Userspace will use one defined in user-bitops.h. */
-+#ifndef __fls
-+#define __fls fls
-+#endif
-+
-+#endif /* __KERNEL__ */
-+#endif /* _COMPAT25_H */
-diff -urNad lustre~/lustre/include/linux/lustre_lib.h lustre/lustre/include/linux/lustre_lib.h
---- lustre~/lustre/include/linux/lustre_lib.h	2008-08-07 11:52:06.000000000 +0200
-+++ lustre/lustre/include/linux/lustre_lib.h	2009-03-13 09:45:03.000000000 +0100
-@@ -49,7 +49,6 @@
- # include <string.h>
- # include <sys/types.h>
- #else
--# include <asm/semaphore.h>
- # include <linux/rwsem.h>
- # include <linux/sched.h>
- # include <linux/signal.h>
-diff -urNad lustre~/lustre/include/linux/lustre_patchless_compat.h lustre/lustre/include/linux/lustre_patchless_compat.h
---- lustre~/lustre/include/linux/lustre_patchless_compat.h	2008-08-07 11:52:10.000000000 +0200
-+++ lustre/lustre/include/linux/lustre_patchless_compat.h	2009-03-13 09:45:03.000000000 +0100
-@@ -52,7 +52,7 @@
- 
-         BUG_ON(!PageLocked(page));
- 
--#if (LINUX_VERSION_CODE >= KERNEL_VERSION(2,6,15))
-+#ifdef HAVE_RW_TREE_LOCK
-         write_lock_irq(&mapping->tree_lock);
- #else
- 	spin_lock_irq(&mapping->tree_lock);
-@@ -65,7 +65,7 @@
- #else
- 	__dec_zone_page_state(page, NR_FILE_PAGES);
- #endif
--#if (LINUX_VERSION_CODE >= KERNEL_VERSION(2,6,15))
-+#ifdef HAVE_RW_TREE_LOCK
-         write_unlock_irq(&mapping->tree_lock);
- #else
- 	spin_unlock_irq(&mapping->tree_lock);
-diff -urNad lustre~/lustre/include/lprocfs_status.h lustre/lustre/include/lprocfs_status.h
---- lustre~/lustre/include/lprocfs_status.h	2009-03-12 10:32:27.000000000 +0100
-+++ lustre/lustre/include/lprocfs_status.h	2009-03-13 09:45:03.000000000 +0100
-@@ -521,6 +521,8 @@
- #define LPROCFS_EXIT()            do {  \
-         up_read(&_lprocfs_lock);        \
- } while(0)
-+
-+#ifdef HAVE_PROCFS_DELETED
- #define LPROCFS_ENTRY_AND_CHECK(dp) do {        \
-         typecheck(struct proc_dir_entry *, dp); \
-         LPROCFS_ENTRY();                        \
-@@ -529,6 +531,14 @@
-                 return -ENODEV;                 \
-         }                                       \
- } while(0)
-+#define LPROCFS_CHECK_DELETED(dp) ((dp)->deleted)
-+#else
-+
-+#define LPROCFS_ENTRY_AND_CHECK(dp) \
-+        LPROCFS_ENTRY();
-+#define LPROCFS_CHECK_DELETED(dp) (0)
-+#endif
-+
- #define LPROCFS_WRITE_ENTRY()     do {  \
-         down_write(&_lprocfs_lock);     \
- } while(0)
-@@ -536,6 +546,7 @@
-         up_write(&_lprocfs_lock);       \
- } while(0)
- 
-+
- /* You must use these macros when you want to refer to
-  * the import in a client obd_device for a lprocfs entry */
- #define LPROCFS_CLIMP_CHECK(obd) do {           \
-diff -urNad lustre~/lustre/include/lprocfs_status.h.orig lustre/lustre/include/lprocfs_status.h.orig
---- lustre~/lustre/include/lprocfs_status.h.orig	1970-01-01 00:00:00.000000000 +0000
-+++ lustre/lustre/include/lprocfs_status.h.orig	2009-03-13 09:45:03.000000000 +0100
-@@ -0,0 +1,817 @@
-+/* -*- mode: c; c-basic-offset: 8; indent-tabs-mode: nil; -*-
-+ * vim:expandtab:shiftwidth=8:tabstop=8:
-+ *
-+ * GPL HEADER START
-+ *
-+ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
-+ *
-+ * This program is free software; you can redistribute it and/or modify
-+ * it under the terms of the GNU General Public License version 2 only,
-+ * as published by the Free Software Foundation.
-+ *
-+ * This program is distributed in the hope that it will be useful, but
-+ * WITHOUT ANY WARRANTY; without even the implied warranty of
-+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
-+ * General Public License version 2 for more details (a copy is included
-+ * in the LICENSE file that accompanied this code).
-+ *
-+ * You should have received a copy of the GNU General Public License
-+ * version 2 along with this program; If not, see
-+ * http://www.sun.com/software/products/lustre/docs/GPLv2.pdf
-+ *
-+ * Please contact Sun Microsystems, Inc., 4150 Network Circle, Santa Clara,
-+ * CA 95054 USA or visit www.sun.com if you need additional information or
-+ * have any questions.
-+ *
-+ * GPL HEADER END
-+ */
-+/*
-+ * Copyright  2008 Sun Microsystems, Inc. All rights reserved
-+ * Use is subject to license terms.
-+ */
-+/*
-+ * This file is part of Lustre, http://www.lustre.org/
-+ * Lustre is a trademark of Sun Microsystems, Inc.
-+ *
-+ * lustre/include/lprocfs_status.h
-+ *
-+ * Top level header file for LProc SNMP
-+ *
-+ * Author: Hariharan Thantry thantry at users.sourceforge.net
-+ */
-+#ifndef _LPROCFS_SNMP_H
-+#define _LPROCFS_SNMP_H
-+
-+#include <lustre/lustre_idl.h>
-+#if defined(__linux__)
-+#include <linux/lprocfs_status.h>
-+#elif defined(__APPLE__)
-+#include <darwin/lprocfs_status.h>
-+#elif defined(__WINNT__)
-+#include <winnt/lprocfs_status.h>
-+#else
-+#error Unsupported operating system.
-+#endif
-+
-+#undef LPROCFS
-+#if (defined(__KERNEL__) && defined(CONFIG_PROC_FS))
-+# define LPROCFS
-+#endif
-+
-+struct lprocfs_vars {
-+        const char   *name;
-+        cfs_read_proc_t *read_fptr;
-+        cfs_write_proc_t *write_fptr;
-+        void *data;
-+        struct file_operations *fops;
-+        /**
-+         * /proc file mode.
-+         */
-+        mode_t proc_mode;
-+};
-+
-+struct lprocfs_static_vars {
-+        struct lprocfs_vars *module_vars;
-+        struct lprocfs_vars *obd_vars;
-+};
-+
-+/* if we find more consumers this could be generalized */
-+#define OBD_HIST_MAX 32
-+struct obd_histogram {
-+        spinlock_t      oh_lock;
-+        unsigned long   oh_buckets[OBD_HIST_MAX];
-+};
-+
-+enum {
-+        BRW_R_PAGES = 0,
-+        BRW_W_PAGES,
-+        BRW_R_RPC_HIST,
-+        BRW_W_RPC_HIST,
-+        BRW_R_IO_TIME,
-+        BRW_W_IO_TIME,
-+        BRW_R_DISCONT_PAGES,
-+        BRW_W_DISCONT_PAGES,
-+        BRW_R_DISCONT_BLOCKS,
-+        BRW_W_DISCONT_BLOCKS,
-+        BRW_R_DISK_IOSIZE,
-+        BRW_W_DISK_IOSIZE,
-+        BRW_R_DIO_FRAGS,
-+        BRW_W_DIO_FRAGS,
-+        BRW_LAST,
-+};
-+
-+struct brw_stats {
-+        struct obd_histogram hist[BRW_LAST];
-+};
-+
-+
-+/* An lprocfs counter can be configured using the enum bit masks below.
-+ *
-+ * LPROCFS_CNTR_EXTERNALLOCK indicates that an external lock already
-+ * protects this counter from concurrent updates. If not specified,
-+ * lprocfs an internal per-counter lock variable. External locks are
-+ * not used to protect counter increments, but are used to protect
-+ * counter readout and resets.
-+ *
-+ * LPROCFS_CNTR_AVGMINMAX indicates a multi-valued counter samples,
-+ * (i.e. counter can be incremented by more than "1"). When specified,
-+ * the counter maintains min, max and sum in addition to a simple
-+ * invocation count. This allows averages to be be computed.
-+ * If not specified, the counter is an increment-by-1 counter.
-+ * min, max, sum, etc. are not maintained.
-+ *
-+ * LPROCFS_CNTR_STDDEV indicates that the counter should track sum of
-+ * squares (for multi-valued counter samples only). This allows
-+ * external computation of standard deviation, but involves a 64-bit
-+ * multiply per counter increment.
-+ */
-+
-+enum {
-+        LPROCFS_CNTR_EXTERNALLOCK = 0x0001,
-+        LPROCFS_CNTR_AVGMINMAX    = 0x0002,
-+        LPROCFS_CNTR_STDDEV       = 0x0004,
-+
-+        /* counter data type */
-+        LPROCFS_TYPE_REGS         = 0x0100,
-+        LPROCFS_TYPE_BYTES        = 0x0200,
-+        LPROCFS_TYPE_PAGES        = 0x0400,
-+        LPROCFS_TYPE_CYCLE        = 0x0800,
-+};
-+
-+struct lprocfs_atomic {
-+        atomic_t               la_entry;
-+        atomic_t               la_exit;
-+};
-+
-+#define LC_MIN_INIT ((~(__u64)0) >> 1)
-+
-+struct lprocfs_counter {
-+        struct lprocfs_atomic  lc_cntl;  /* may need to move to per set */
-+        unsigned int           lc_config;
-+        __s64                  lc_count;
-+        __s64                  lc_sum;
-+        __s64                  lc_min;
-+        __s64                  lc_max;
-+        __s64                  lc_sumsquare;
-+        const char            *lc_name;   /* must be static */
-+        const char            *lc_units;  /* must be static */
-+};
-+
-+struct lprocfs_percpu {
-+        struct lprocfs_counter lp_cntr[0];
-+};
-+
-+#define LPROCFS_GET_NUM_CPU 0x0001
-+#define LPROCFS_GET_SMP_ID  0x0002
-+
-+enum lprocfs_stats_flags {
-+        LPROCFS_STATS_FLAG_PERCPU   = 0x0000, /* per cpu counter */
-+        LPROCFS_STATS_FLAG_NOPERCPU = 0x0001, /* stats have no percpu
-+                                               * area and need locking */
-+};
-+
-+enum lprocfs_fields_flags {
-+        LPROCFS_FIELDS_FLAGS_CONFIG = 0x0001,
-+        LPROCFS_FIELDS_FLAGS_SUM    = 0x0002,
-+        LPROCFS_FIELDS_FLAGS_MIN    = 0x0003,
-+        LPROCFS_FIELDS_FLAGS_MAX    = 0x0004,
-+        LPROCFS_FIELDS_FLAGS_AVG    = 0x0005,
-+        LPROCFS_FIELDS_FLAGS_SUMSQUARE = 0x0006,
-+        LPROCFS_FIELDS_FLAGS_COUNT  = 0x0007,
-+};
-+
-+struct lprocfs_stats {
-+        unsigned int           ls_num;     /* # of counters */
-+        int                    ls_flags; /* See LPROCFS_STATS_FLAG_* */
-+        spinlock_t             ls_lock;  /* Lock used only when there are
-+                                          * no percpu stats areas */
-+        struct lprocfs_percpu *ls_percpu[0];
-+};
-+
-+static inline int opcode_offset(__u32 opc) {
-+        if (opc < OST_LAST_OPC) {
-+                 /* OST opcode */
-+                return (opc - OST_FIRST_OPC);
-+        } else if (opc < MDS_LAST_OPC) {
-+                /* MDS opcode */
-+                return (opc - MDS_FIRST_OPC +
-+                        (OST_LAST_OPC - OST_FIRST_OPC));
-+        } else if (opc < LDLM_LAST_OPC) {
-+                /* LDLM Opcode */
-+                return (opc - LDLM_FIRST_OPC +
-+                        (MDS_LAST_OPC - MDS_FIRST_OPC) +
-+                        (OST_LAST_OPC - OST_FIRST_OPC));
-+        } else if (opc < MGS_LAST_OPC) {
-+                /* MGS Opcode */
-+                return (opc - MGS_FIRST_OPC +
-+                        (LDLM_LAST_OPC - LDLM_FIRST_OPC) +
-+                        (MDS_LAST_OPC - MDS_FIRST_OPC) +
-+                        (OST_LAST_OPC - OST_FIRST_OPC));
-+        } else if (opc < OBD_LAST_OPC) {
-+                /* OBD Ping */
-+                return (opc - OBD_FIRST_OPC +
-+                        (MGS_LAST_OPC - MGS_FIRST_OPC) +
-+                        (LDLM_LAST_OPC - LDLM_FIRST_OPC) +
-+                        (MDS_LAST_OPC - MDS_FIRST_OPC) +
-+                        (OST_LAST_OPC - OST_FIRST_OPC));
-+        } else if (opc < LLOG_LAST_OPC) {
-+                /* LLOG Opcode */
-+                return (opc - LLOG_FIRST_OPC +
-+                        (OBD_LAST_OPC - OBD_FIRST_OPC) +
-+                        (MGS_LAST_OPC - MGS_FIRST_OPC) +
-+                        (LDLM_LAST_OPC - LDLM_FIRST_OPC) +
-+                        (MDS_LAST_OPC - MDS_FIRST_OPC) +
-+                        (OST_LAST_OPC - OST_FIRST_OPC));
-+        } else if (opc < QUOTA_LAST_OPC) {
-+                /* LQUOTA Opcode */
-+                return (opc - QUOTA_FIRST_OPC +
-+                        (LLOG_LAST_OPC - LLOG_FIRST_OPC) +
-+                        (OBD_LAST_OPC - OBD_FIRST_OPC) +
-+                        (MGS_LAST_OPC - MGS_FIRST_OPC) +
-+                        (LDLM_LAST_OPC - LDLM_FIRST_OPC) +
-+                        (MDS_LAST_OPC - MDS_FIRST_OPC) +
-+                        (OST_LAST_OPC - OST_FIRST_OPC));
-+        } else {
-+                /* Unknown Opcode */
-+                return -1;
-+        }
-+}
-+
-+#define LUSTRE_MAX_OPCODES ((OST_LAST_OPC - OST_FIRST_OPC)     + \
-+                            (MDS_LAST_OPC - MDS_FIRST_OPC)     + \
-+                            (LDLM_LAST_OPC - LDLM_FIRST_OPC)   + \
-+                            (MGS_LAST_OPC - MGS_FIRST_OPC)     + \
-+                            (OBD_LAST_OPC - OBD_FIRST_OPC)     + \
-+                            (LLOG_LAST_OPC - LLOG_FIRST_OPC)   + \
-+                            (QUOTA_LAST_OPC - QUOTA_FIRST_OPC))
-+
-+#define EXTRA_MAX_OPCODES ((PTLRPC_LAST_CNTR - PTLRPC_FIRST_CNTR)  + \
-+                           (EXTRA_LAST_OPC - EXTRA_FIRST_OPC))
-+
-+enum {
-+        PTLRPC_REQWAIT_CNTR = 0,
-+        PTLRPC_REQQDEPTH_CNTR,
-+        PTLRPC_REQACTIVE_CNTR,
-+        PTLRPC_TIMEOUT,
-+        PTLRPC_REQBUF_AVAIL_CNTR,
-+        PTLRPC_LAST_CNTR
-+};
-+
-+#define PTLRPC_FIRST_CNTR PTLRPC_REQWAIT_CNTR
-+
-+enum {
-+        LDLM_GLIMPSE_ENQUEUE = 0,
-+        LDLM_PLAIN_ENQUEUE,
-+        LDLM_EXTENT_ENQUEUE,
-+        LDLM_FLOCK_ENQUEUE,
-+        LDLM_IBITS_ENQUEUE,
-+        MDS_REINT_SETATTR,
-+        MDS_REINT_CREATE,
-+        MDS_REINT_LINK,
-+        MDS_REINT_UNLINK,
-+        MDS_REINT_RENAME,
-+        MDS_REINT_OPEN,
-+        BRW_READ_BYTES,
-+        BRW_WRITE_BYTES,
-+        EXTRA_LAST_OPC
-+};
-+
-+#define EXTRA_FIRST_OPC LDLM_GLIMPSE_ENQUEUE
-+/* class_obd.c */
-+extern cfs_proc_dir_entry_t *proc_lustre_root;
-+
-+struct obd_device;
-+struct file;
-+struct obd_histogram;
-+
-+/* Days / hours / mins / seconds format */
-+struct dhms {
-+        int d,h,m,s;
-+};
-+static inline void s2dhms(struct dhms *ts, time_t secs)
-+{
-+        ts->d = secs / 86400;
-+        secs = secs % 86400;
-+        ts->h = secs / 3600;
-+        secs = secs % 3600;
-+        ts->m = secs / 60;
-+        ts->s = secs % 60;
-+}
-+#define DHMS_FMT "%dd%dh%02dm%02ds"
-+#define DHMS_VARS(x) (x)->d, (x)->h, (x)->m, (x)->s
-+
-+
-+#ifdef LPROCFS
-+
-+static inline int lprocfs_stats_lock(struct lprocfs_stats *stats, int type)
-+{
-+        int rc = 0;
-+
-+        if (stats->ls_flags & LPROCFS_STATS_FLAG_NOPERCPU) {
-+                if (type & LPROCFS_GET_NUM_CPU)
-+                        rc = 1;
-+                if (type & LPROCFS_GET_SMP_ID)
-+                        rc = 0;
-+                spin_lock(&stats->ls_lock);
-+        } else {
-+                if (type & LPROCFS_GET_NUM_CPU)
-+                        rc = num_possible_cpus();
-+                if (type & LPROCFS_GET_SMP_ID)
-+                        rc = smp_processor_id();
-+        }
-+        return rc;
-+}
-+
-+static inline void lprocfs_stats_unlock(struct lprocfs_stats *stats)
-+{
-+        if (stats->ls_flags & LPROCFS_STATS_FLAG_NOPERCPU)
-+                spin_unlock(&stats->ls_lock);
-+}
-+
-+/* Two optimized LPROCFS counter increment functions are provided:
-+ *     lprocfs_counter_incr(cntr, value) - optimized for by-one counters
-+ *     lprocfs_counter_add(cntr) - use for multi-valued counters
-+ * Counter data layout allows config flag, counter lock and the
-+ * count itself to reside within a single cache line.
-+ */
-+
-+extern void lprocfs_counter_add(struct lprocfs_stats *stats, int idx,
-+                                long amount);
-+extern void lprocfs_counter_sub(struct lprocfs_stats *stats, int idx,
-+                                long amount);
-+
-+#define lprocfs_counter_incr(stats, idx) \
-+        lprocfs_counter_add(stats, idx, 1)
-+#define lprocfs_counter_decr(stats, idx) \
-+        lprocfs_counter_sub(stats, idx, 1)
-+
-+extern __s64 lprocfs_read_helper(struct lprocfs_counter *lc,
-+                                 enum lprocfs_fields_flags field);
-+
-+static inline __u64 lprocfs_stats_collector(struct lprocfs_stats *stats,
-+                                            int idx,
-+                                            enum lprocfs_fields_flags field)
-+{
-+        __u64 ret = 0;
-+        int i;
-+
-+        LASSERT(stats != NULL);
-+        for (i = 0; i < num_possible_cpus(); i++)
-+                ret += lprocfs_read_helper(&(stats->ls_percpu[i]->lp_cntr[idx]),
-+                                           field);
-+        return ret;
-+}
-+
-+extern struct lprocfs_stats *lprocfs_alloc_stats(unsigned int num,
-+                                                 enum lprocfs_stats_flags flags);
-+extern void lprocfs_clear_stats(struct lprocfs_stats *stats);
-+extern void lprocfs_free_stats(struct lprocfs_stats **stats);
-+extern void lprocfs_init_ops_stats(int num_private_stats,
-+                                   struct lprocfs_stats *stats);
-+extern void lprocfs_init_ldlm_stats(struct lprocfs_stats *ldlm_stats);
-+extern int lprocfs_alloc_obd_stats(struct obd_device *obddev,
-+                                   unsigned int num_private_stats);
-+extern void lprocfs_counter_init(struct lprocfs_stats *stats, int index,
-+                                 unsigned conf, const char *name,
-+                                 const char *units);
-+extern void lprocfs_free_obd_stats(struct obd_device *obddev);
-+struct obd_export;
-+extern int lprocfs_add_clear_entry(struct obd_device * obd,
-+                                   cfs_proc_dir_entry_t *entry);
-+extern int lprocfs_exp_setup(struct obd_export *exp,
-+                             lnet_nid_t *peer_nid, int *newnid);
-+extern int lprocfs_exp_cleanup(struct obd_export *exp);
-+extern int lprocfs_add_simple(struct proc_dir_entry *root,
-+                              char *name, read_proc_t *read_proc,
-+                              write_proc_t *write_proc, void *data);
-+extern int lprocfs_register_stats(cfs_proc_dir_entry_t *root, const char *name,
-+                                  struct lprocfs_stats *stats);
-+
-+/* lprocfs_status.c */
-+extern int lprocfs_add_vars(cfs_proc_dir_entry_t *root,
-+                            struct lprocfs_vars *var,
-+                            void *data);
-+
-+extern cfs_proc_dir_entry_t *lprocfs_register(const char *name,
-+                                               cfs_proc_dir_entry_t *parent,
-+                                               struct lprocfs_vars *list,
-+                                               void *data);
-+
-+extern void lprocfs_remove(cfs_proc_dir_entry_t **root);
-+
-+extern cfs_proc_dir_entry_t *lprocfs_srch(cfs_proc_dir_entry_t *root,
-+                                           const char *name);
-+
-+extern int lprocfs_obd_setup(struct obd_device *obd, struct lprocfs_vars *list);
-+extern int lprocfs_obd_cleanup(struct obd_device *obd);
-+extern int lprocfs_add_simple(struct proc_dir_entry *root, char *name,
-+                              read_proc_t *read_proc, write_proc_t *write_proc,
-+                              void *data);
-+struct nid_stat;
-+extern void lprocfs_free_per_client_stats(struct obd_device *obd);
-+extern int lprocfs_nid_stats_clear_write(struct file *file, const char *buffer,
-+                                         unsigned long count, void *data);
-+extern int lprocfs_nid_stats_clear_read(char *page, char **start, off_t off,
-+                                        int count, int *eof,  void *data);
-+
-+
-+extern struct file_operations lprocfs_evict_client_fops;
-+
-+extern int lprocfs_seq_create(cfs_proc_dir_entry_t *parent, char *name,
-+                              mode_t mode, struct file_operations *seq_fops,
-+                              void *data);
-+extern int lprocfs_obd_seq_create(struct obd_device *dev, char *name,
-+                                  mode_t mode, struct file_operations *seq_fops,
-+                                  void *data);
-+
-+/* Generic callbacks */
-+
-+extern int lprocfs_rd_u64(char *page, char **start, off_t off,
-+                          int count, int *eof, void *data);
-+extern int lprocfs_rd_atomic(char *page, char **start, off_t off,
-+                             int count, int *eof, void *data);
-+extern int lprocfs_wr_atomic(struct file *file, const char *buffer,
-+                             unsigned long count, void *data);
-+extern int lprocfs_rd_uint(char *page, char **start, off_t off,
-+                           int count, int *eof, void *data);
-+extern int lprocfs_wr_uint(struct file *file, const char *buffer,
-+                           unsigned long count, void *data);
-+extern int lprocfs_rd_uuid(char *page, char **start, off_t off,
-+                           int count, int *eof, void *data);
-+extern int lprocfs_rd_name(char *page, char **start, off_t off,
-+                           int count, int *eof, void *data);
-+extern int lprocfs_rd_fstype(char *page, char **start, off_t off,
-+                             int count, int *eof, void *data);
-+extern int lprocfs_rd_server_uuid(char *page, char **start, off_t off,
-+                                  int count, int *eof, void *data);
-+extern int lprocfs_rd_conn_uuid(char *page, char **start, off_t off,
-+                                int count, int *eof, void *data);
-+extern int lprocfs_rd_import(char *page, char **start, off_t off, int count,
-+                             int *eof, void *data);
-+extern int lprocfs_rd_connect_flags(char *page, char **start, off_t off,
-+                                    int count, int *eof, void *data);
-+extern int lprocfs_rd_num_exports(char *page, char **start, off_t off,
-+                                  int count, int *eof, void *data);
-+extern int lprocfs_rd_numrefs(char *page, char **start, off_t off,
-+                              int count, int *eof, void *data);
-+struct adaptive_timeout;
-+extern int lprocfs_at_hist_helper(char *page, int count, int rc,
-+                                  struct adaptive_timeout *at);
-+extern int lprocfs_rd_timeouts(char *page, char **start, off_t off,
-+                               int count, int *eof, void *data);
-+extern int lprocfs_wr_timeouts(struct file *file, const char *buffer,
-+                               unsigned long count, void *data);
-+extern int lprocfs_wr_evict_client(struct file *file, const char *buffer,
-+                                   unsigned long count, void *data);
-+extern int lprocfs_wr_ping(struct file *file, const char *buffer,
-+                           unsigned long count, void *data);
-+
-+/* Statfs helpers */
-+extern int lprocfs_rd_blksize(char *page, char **start, off_t off,
-+                              int count, int *eof, void *data);
-+extern int lprocfs_rd_kbytestotal(char *page, char **start, off_t off,
-+                                  int count, int *eof, void *data);
-+extern int lprocfs_rd_kbytesfree(char *page, char **start, off_t off,
-+                                 int count, int *eof, void *data);
-+extern int lprocfs_rd_kbytesavail(char *page, char **start, off_t off,
-+                                 int count, int *eof, void *data);
-+extern int lprocfs_rd_filestotal(char *page, char **start, off_t off,
-+                                 int count, int *eof, void *data);
-+extern int lprocfs_rd_filesfree(char *page, char **start, off_t off,
-+                                int count, int *eof, void *data);
-+extern int lprocfs_rd_filegroups(char *page, char **start, off_t off,
-+                                 int count, int *eof, void *data);
-+
-+extern int lprocfs_write_helper(const char *buffer, unsigned long count,
-+                                int *val);
-+extern int lprocfs_write_frac_helper(const char *buffer, unsigned long count,
-+                                     int *val, int mult);
-+extern int lprocfs_read_frac_helper(char *buffer, unsigned long count,
-+                                    long val, int mult);
-+extern int lprocfs_write_u64_helper(const char *buffer, unsigned long count,
-+                                    __u64 *val);
-+extern int lprocfs_write_frac_u64_helper(const char *buffer, unsigned long count,
-+                                         __u64 *val, int mult);
-+void lprocfs_oh_tally(struct obd_histogram *oh, unsigned int value);
-+void lprocfs_oh_tally_log2(struct obd_histogram *oh, unsigned int value);
-+void lprocfs_oh_clear(struct obd_histogram *oh);
-+unsigned long lprocfs_oh_sum(struct obd_histogram *oh);
-+
-+/* lprocfs_status.c: counter read/write functions */
-+extern int lprocfs_counter_read(char *page, char **start, off_t off,
-+                                int count, int *eof, void *data);
-+extern int lprocfs_counter_write(struct file *file, const char *buffer,
-+                                 unsigned long count, void *data);
-+
-+/* lprocfs_status.c: recovery status */
-+int lprocfs_obd_rd_recovery_status(char *page, char **start, off_t off,
-+                                   int count, int *eof, void *data);
-+
-+/* lprocfs_statuc.c: hash statistics */
-+int lprocfs_obd_rd_hash(char *page, char **start, off_t off,
-+                        int count, int *eof, void *data);
-+
-+extern int lprocfs_seq_release(struct inode *, struct file *);
-+
-+/* in lprocfs_stat.c, to protect the private data for proc entries */
-+extern struct rw_semaphore _lprocfs_lock;
-+#define LPROCFS_ENTRY()           do {  \
-+        down_read(&_lprocfs_lock);      \
-+} while(0)
-+#define LPROCFS_EXIT()            do {  \
-+        up_read(&_lprocfs_lock);        \
-+} while(0)
-+#define LPROCFS_ENTRY_AND_CHECK(dp) do {        \
-+        typecheck(struct proc_dir_entry *, dp); \
-+        LPROCFS_ENTRY();                        \
-+        if ((dp)->deleted) {                    \
-+                LPROCFS_EXIT();                 \
-+                return -ENODEV;                 \
-+        }                                       \
-+} while(0)
-+#define LPROCFS_WRITE_ENTRY()     do {  \
-+        down_write(&_lprocfs_lock);     \
-+} while(0)
-+#define LPROCFS_WRITE_EXIT()      do {  \
-+        up_write(&_lprocfs_lock);       \
-+} while(0)
-+
-+/* You must use these macros when you want to refer to
-+ * the import in a client obd_device for a lprocfs entry */
-+#define LPROCFS_CLIMP_CHECK(obd) do {           \
-+        typecheck(struct obd_device *, obd);    \
-+        down_read(&(obd)->u.cli.cl_sem);        \
-+        if ((obd)->u.cli.cl_import == NULL) {   \
-+             up_read(&(obd)->u.cli.cl_sem);     \
-+             return -ENODEV;                    \
-+        }                                       \
-+} while(0)
-+#define LPROCFS_CLIMP_EXIT(obd)                 \
-+        up_read(&(obd)->u.cli.cl_sem);
-+
-+
-+/* write the name##_seq_show function, call LPROC_SEQ_FOPS_RO for read-only
-+  proc entries; otherwise, you will define name##_seq_write function also for
-+  a read-write proc entry, and then call LPROC_SEQ_SEQ instead. Finally,
-+  call lprocfs_obd_seq_create(obd, filename, 0444, &name#_fops, data); */
-+#define __LPROC_SEQ_FOPS(name, custom_seq_write)                           \
-+static int name##_seq_open(struct inode *inode, struct file *file) {       \
-+        struct proc_dir_entry *dp = PDE(inode);                            \
-+        int rc;                                                            \
-+        LPROCFS_ENTRY_AND_CHECK(dp);                                       \
-+        rc = single_open(file, name##_seq_show, dp->data);                 \
-+        if (rc) {                                                          \
-+                LPROCFS_EXIT();                                            \
-+                return rc;                                                 \
-+        }                                                                  \
-+        return 0;                                                          \
-+}                                                                          \
-+struct file_operations name##_fops = {                                     \
-+        .owner   = THIS_MODULE,                                            \
-+        .open    = name##_seq_open,                                        \
-+        .read    = seq_read,                                               \
-+        .write   = custom_seq_write,                                       \
-+        .llseek  = seq_lseek,                                              \
-+        .release = lprocfs_seq_release,                                    \
-+}
-+
-+#define LPROC_SEQ_FOPS_RO(name)         __LPROC_SEQ_FOPS(name, NULL)
-+#define LPROC_SEQ_FOPS(name)            __LPROC_SEQ_FOPS(name, name##_seq_write)
-+
-+/* lproc_ptlrpc.c */
-+struct ptlrpc_request;
-+extern void target_print_req(void *seq_file, struct ptlrpc_request *req);
-+
-+#ifdef CRAY_XT3
-+/* lprocfs_status.c: read recovery max time bz13079 */
-+int lprocfs_obd_rd_recovery_maxtime(char *page, char **start, off_t off,
-+                                    int count, int *eof, void *data);
-+
-+/* lprocfs_status.c: write recovery max time bz13079 */
-+int lprocfs_obd_wr_recovery_maxtime(struct file *file, const char *buffer,
-+                                    unsigned long count, void *data);
-+#endif
-+
-+/* all quota proc functions */
-+extern int lprocfs_quota_rd_bunit(char *page, char **start, off_t off, int count,
-+                                  int *eof, void *data);
-+extern int lprocfs_quota_wr_bunit(struct file *file, const char *buffer,
-+                                  unsigned long count, void *data);
-+extern int lprocfs_quota_rd_btune(char *page, char **start, off_t off, int count,
-+                                  int *eof, void *data);
-+extern int lprocfs_quota_wr_btune(struct file *file, const char *buffer,
-+                                  unsigned long count, void *data);
-+extern int lprocfs_quota_rd_iunit(char *page, char **start, off_t off, int count,
-+                                  int *eof, void *data);
-+extern int lprocfs_quota_wr_iunit(struct file *file, const char *buffer,
-+                                  unsigned long count, void *data);
-+extern int lprocfs_quota_rd_itune(char *page, char **start, off_t off, int count,
-+                                  int *eof, void *data);
-+extern int lprocfs_quota_wr_itune(struct file *file, const char *buffer,
-+                                  unsigned long count, void *data);
-+extern int lprocfs_quota_rd_type(char *page, char **start, off_t off, int count,
-+                                 int *eof, void *data);
-+extern int lprocfs_quota_wr_type(struct file *file, const char *buffer,
-+                                 unsigned long count, void *data);
-+extern int lprocfs_quota_rd_switch_seconds(char *page, char **start, off_t off,
-+                                           int count, int *eof, void *data);
-+extern int lprocfs_quota_wr_switch_seconds(struct file *file, const char *buffer,
-+                                           unsigned long count, void *data);
-+extern int lprocfs_quota_rd_sync_blk(char *page, char **start, off_t off,
-+                                     int count, int *eof, void *data);
-+extern int lprocfs_quota_wr_sync_blk(struct file *file, const char *buffer,
-+                                     unsigned long count, void *data);
-+extern int lprocfs_quota_rd_switch_qs(char *page, char **start, off_t off,
-+                                      int count, int *eof, void *data);
-+extern int lprocfs_quota_wr_switch_qs(struct file *file, const char *buffer,
-+                                      unsigned long count, void *data);
-+extern int lprocfs_quota_rd_boundary_factor(char *page, char **start, off_t off,
-+                                            int count, int *eof, void *data);
-+extern int lprocfs_quota_wr_boundary_factor(struct file *file, const char *buffer,
-+                                            unsigned long count, void *data);
-+extern int lprocfs_quota_rd_least_bunit(char *page, char **start, off_t off,
-+                                        int count, int *eof, void *data);
-+extern int lprocfs_quota_wr_least_bunit(struct file *file, const char *buffer,
-+                                        unsigned long count, void *data);
-+extern int lprocfs_quota_rd_least_iunit(char *page, char **start, off_t off,
-+                                        int count, int *eof, void *data);
-+extern int lprocfs_quota_wr_least_iunit(struct file *file, const char *buffer,
-+                                        unsigned long count, void *data);
-+extern int lprocfs_quota_rd_qs_factor(char *page, char **start, off_t off,
-+                                      int count, int *eof, void *data);
-+extern int lprocfs_quota_wr_qs_factor(struct file *file, const char *buffer,
-+                                      unsigned long count, void *data);
-+
-+#else
-+/* LPROCFS is not defined */
-+static inline void lprocfs_counter_add(struct lprocfs_stats *stats,
-+                                       int index, long amount) { return; }
-+static inline void lprocfs_counter_incr(struct lprocfs_stats *stats,
-+                                        int index) { return; }
-+static inline void lprocfs_counter_sub(struct lprocfs_stats *stats,
-+                                       int index, long amount) { return; }
-+static inline void lprocfs_counter_init(struct lprocfs_stats *stats,
-+                                        int index, unsigned conf,
-+                                        const char *name, const char *units)
-+{ return; }
-+
-+static inline __u64 lc_read_helper(struct lprocfs_counter *lc,
-+                                   enum lprocfs_fields_flags field)
-+{ return 0; }
-+
-+static inline struct lprocfs_stats* lprocfs_alloc_stats(unsigned int num,
-+                                                        enum lprocfs_stats_flags flags)
-+{ return NULL; }
-+static inline void lprocfs_clear_stats(struct lprocfs_stats *stats)
-+{ return; }
-+static inline void lprocfs_free_stats(struct lprocfs_stats **stats)
-+{ return; }
-+static inline int lprocfs_register_stats(cfs_proc_dir_entry_t *root,
-+                                            const char *name,
-+                                            struct lprocfs_stats *stats)
-+{ return 0; }
-+static inline void lprocfs_init_ops_stats(int num_private_stats,
-+                                          struct lprocfs_stats *stats)
-+{ return; }
-+static inline void lprocfs_init_ldlm_stats(struct lprocfs_stats *ldlm_stats)
-+{ return; }
-+static inline int lprocfs_alloc_obd_stats(struct obd_device *obddev,
-+                                          unsigned int num_private_stats)
-+{ return 0; }
-+static inline void lprocfs_free_obd_stats(struct obd_device *obddev)
-+{ return; }
-+
-+struct obd_export;
-+static inline int lprocfs_add_clear_entry(struct obd_export *exp)
-+{ return 0; }
-+static inline int lprocfs_exp_setup(struct obd_export *exp,
-+                                    lnet_nid_t *peer_nid, int *newnid)
-+{ return 0; }
-+static inline int lprocfs_exp_cleanup(struct obd_export *exp)
-+{ return 0; }
-+static inline int lprocfs_add_simple(struct proc_dir_entry *root,
-+                                     char *name,
-+                                     read_proc_t *read_proc,
-+                                     write_proc_t *write_proc,
-+                                     void *data)
-+{return 0; }
-+struct nid_stat;
-+static inline void lprocfs_free_per_client_stats(struct obd_device *obd)
-+{}
-+static inline
-+int lprocfs_nid_stats_clear_write(struct file *file, const char *buffer,
-+                                  unsigned long count, void *data)
-+{return count;}
-+static inline
-+int lprocfs_nid_stats_clear_read(char *page, char **start, off_t off,
-+                                 int count, int *eof,  void *data)
-+{return count;}
-+
-+
-+static inline cfs_proc_dir_entry_t *
-+lprocfs_register(const char *name, cfs_proc_dir_entry_t *parent,
-+                 struct lprocfs_vars *list, void *data) { return NULL; }
-+static inline int lprocfs_add_vars(cfs_proc_dir_entry_t *root,
-+                                   struct lprocfs_vars *var,
-+                                   void *data) { return 0; }
-+static inline void lprocfs_remove(cfs_proc_dir_entry_t **root) {};
-+static inline cfs_proc_dir_entry_t *lprocfs_srch(cfs_proc_dir_entry_t *head,
-+                                    const char *name) {return 0;}
-+static inline int lprocfs_obd_setup(struct obd_device *dev,
-+                                    struct lprocfs_vars *list) { return 0; }
-+static inline int lprocfs_obd_cleanup(struct obd_device *dev)  { return 0; }
-+static inline int lprocfs_rd_u64(char *page, char **start, off_t off,
-+                                 int count, int *eof, void *data) { return 0; }
-+static inline int lprocfs_rd_uuid(char *page, char **start, off_t off,
-+                                  int count, int *eof, void *data) { return 0; }
-+static inline int lprocfs_rd_name(char *page, char **start, off_t off,
-+                                  int count, int *eof, void *data) { return 0; }
-+static inline int lprocfs_rd_server_uuid(char *page, char **start, off_t off,
-+                                         int count, int *eof, void *data)
-+{ return 0; }
-+static inline int lprocfs_rd_conn_uuid(char *page, char **start, off_t off,
-+                                       int count, int *eof, void *data)
-+{ return 0; }
-+static inline int lprocfs_rd_import(char *page, char **start, off_t off, int count,
-+                                    int *eof, void *data) { return 0; }
-+static inline int lprocfs_rd_connect_flags(char *page, char **start, off_t off,
-+                                           int count, int *eof, void *data)
-+{ return 0; }
-+static inline int lprocfs_rd_num_exports(char *page, char **start, off_t off,
-+                                         int count, int *eof, void *data)
-+{ return 0; }
-+static inline int lprocfs_rd_numrefs(char *page, char **start, off_t off,
-+                                     int count, int *eof, void *data)
-+{ return 0; }
-+struct adaptive_timeout;
-+static inline int lprocfs_at_hist_helper(char *page, int count, int rc,
-+                                         struct adaptive_timeout *at)
-+{ return 0; }
-+static inline int lprocfs_rd_timeouts(char *page, char **start, off_t off,
-+                                      int count, int *eof, void *data)
-+{ return 0; }
-+static inline int lprocfs_wr_timeouts(struct file *file, const char *buffer,
-+                                      unsigned long count, void *data)
-+{ return 0; }
-+static inline int lprocfs_wr_evict_client(struct file *file, const char *buffer,
-+                                          unsigned long count, void *data)
-+{ return 0; }
-+static inline int lprocfs_wr_ping(struct file *file, const char *buffer,
-+                                  unsigned long count, void *data)
-+{ return 0; }
-+
-+
-+/* Statfs helpers */
-+static inline
-+int lprocfs_rd_blksize(char *page, char **start, off_t off,
-+                       int count, int *eof, void *data) { return 0; }
-+static inline
-+int lprocfs_rd_kbytestotal(char *page, char **start, off_t off,
-+                           int count, int *eof, void *data) { return 0; }
-+static inline
-+int lprocfs_rd_kbytesfree(char *page, char **start, off_t off,
-+                          int count, int *eof, void *data) { return 0; }
-+static inline
-+int lprocfs_rd_kbytesavail(char *page, char **start, off_t off,
-+                           int count, int *eof, void *data) { return 0; }
-+static inline
-+int lprocfs_rd_filestotal(char *page, char **start, off_t off,
-+                          int count, int *eof, void *data) { return 0; }
-+static inline
-+int lprocfs_rd_filesfree(char *page, char **start, off_t off,
-+                         int count, int *eof, void *data)  { return 0; }
-+static inline
-+int lprocfs_rd_filegroups(char *page, char **start, off_t off,
-+                          int count, int *eof, void *data) { return 0; }
-+static inline
-+void lprocfs_oh_tally(struct obd_histogram *oh, unsigned int value) {}
-+static inline
-+void lprocfs_oh_tally_log2(struct obd_histogram *oh, unsigned int value) {}
-+static inline
-+void lprocfs_oh_clear(struct obd_histogram *oh) {}
-+static inline
-+unsigned long lprocfs_oh_sum(struct obd_histogram *oh) { return 0; }
-+static inline
-+int lprocfs_counter_read(char *page, char **start, off_t off,
-+                         int count, int *eof, void *data) { return 0; }
-+static inline
-+int lprocfs_counter_write(struct file *file, const char *buffer,
-+                          unsigned long count, void *data) { return 0; }
-+
-+static inline
-+__u64 lprocfs_stats_collector(struct lprocfs_stats *stats, int idx,
-+                               enum lprocfs_fields_flags field)
-+{ return (__u64)0; }
-+
-+#define LPROCFS_ENTRY()
-+#define LPROCFS_EXIT()
-+#define LPROCFS_ENTRY_AND_CHECK(dp)
-+#define LPROC_SEQ_FOPS_RO(name)
-+#define LPROC_SEQ_FOPS(name)
-+
-+/* lproc_ptlrpc.c */
-+#define target_print_req NULL
-+
-+#endif /* LPROCFS */
-+
-+#endif /* LPROCFS_SNMP_H */
-diff -urNad lustre~/lustre/llite/file.c lustre/lustre/llite/file.c
---- lustre~/lustre/llite/file.c	2009-03-13 09:45:02.000000000 +0100
-+++ lustre/lustre/llite/file.c	2009-03-13 09:45:03.000000000 +0100
-@@ -1801,11 +1801,12 @@
- #endif
- }
- 
-+#ifdef HAVE_KERNEL_SENDFILE
- /*
-  * Send file content (through pagecache) somewhere with helper
-  */
--static ssize_t ll_file_sendfile(struct file *in_file, loff_t *ppos,size_t count,
--                                read_actor_t actor, void *target)
-+static ssize_t ll_file_sendfile(struct file *in_file, loff_t *ppos,
-+                                size_t count, read_actor_t actor, void *target)
- {
-         struct inode *inode = in_file->f_dentry->d_inode;
-         struct ll_inode_info *lli = ll_i2info(inode);
-@@ -1814,10 +1815,10 @@
-         struct ll_lock_tree_node *node;
-         struct ost_lvb lvb;
-         struct ll_ra_read bead;
--        int rc;
--        ssize_t retval;
-+        ssize_t rc;
-         __u64 kms;
-         ENTRY;
-+
-         CDEBUG(D_VFSTRACE, "VFS Op:inode=%lu/%u(%p),size="LPSZ",offset=%Ld\n",
-                inode->i_ino, inode->i_generation, inode, count, *ppos);
- 
-@@ -1831,8 +1832,10 @@
-         in_file->f_ra.ra_pages = 0;
- 
-         /* File with no objects, nothing to lock */
--        if (!lsm)
--                RETURN(generic_file_sendfile(in_file, ppos, count, actor, target));
-+        if (!lsm) {
-+                rc = generic_file_sendfile(in_file, ppos, count, actor, target);
-+                RETURN(rc);
-+        }
- 
-         node = ll_node_from_inode(inode, *ppos, *ppos + count - 1, LCK_PR);
-         if (IS_ERR(node))
-@@ -1872,8 +1875,8 @@
-                 /* A glimpse is necessary to determine whether we return a
-                  * short read (B) or some zeroes at the end of the buffer (C) */
-                 ll_inode_size_unlock(inode, 1);
--                retval = ll_glimpse_size(inode, LDLM_FL_BLOCK_GRANTED);
--                if (retval)
-+                rc = ll_glimpse_size(inode, LDLM_FL_BLOCK_GRANTED);
-+                if (rc)
-                         goto out;
-         } else {
-                 /* region is within kms and, hence, within real file size (A) */
-@@ -1889,13 +1892,115 @@
-         ll_ra_read_in(in_file, &bead);
-         /* BUG: 5972 */
-         file_accessed(in_file);
--        retval = generic_file_sendfile(in_file, ppos, count, actor, target);
-+        rc = generic_file_sendfile(in_file, ppos, count, actor, target);
-         ll_ra_read_ex(in_file, &bead);
- 
-  out:
-         ll_tree_unlock(&tree);
--        RETURN(retval);
-+        RETURN(rc);
-+}
-+#endif
-+
-+/* change based on 
-+ * http://git.kernel.org/?p=linux/kernel/git/torvalds/linux-2.6.git;a=commit;h=f0930fffa99e7fe0a0c4b6c7d9a244dc88288c27
-+ */
-+#ifdef HAVE_KERNEL_SPLICE_READ
-+static ssize_t ll_file_splice_read(struct file *in_file, loff_t *ppos,
-+                                   struct pipe_inode_info *pipe, size_t count,
-+                                   unsigned int flags)
-+{
-+        struct inode *inode = in_file->f_dentry->d_inode;
-+        struct ll_inode_info *lli = ll_i2info(inode);
-+        struct lov_stripe_md *lsm = lli->lli_smd;
-+        struct ll_lock_tree tree;
-+        struct ll_lock_tree_node *node;
-+        struct ost_lvb lvb;
-+        struct ll_ra_read bead;
-+        ssize_t rc;
-+        __u64 kms;
-+        ENTRY;
-+
-+        CDEBUG(D_VFSTRACE, "VFS Op:inode=%lu/%u(%p),size="LPSZ",offset=%Ld\n",
-+               inode->i_ino, inode->i_generation, inode, count, *ppos);
-+
-+        /* "If nbyte is 0, read() will return 0 and have no other results."
-+         *                      -- Single Unix Spec */
-+        if (count == 0)
-+                RETURN(0);
-+
-+        ll_stats_ops_tally(ll_i2sbi(inode), LPROC_LL_READ_BYTES, count);
-+        /* turn off the kernel's read-ahead */
-+        in_file->f_ra.ra_pages = 0;
-+
-+        /* File with no objects, nothing to lock */
-+        if (!lsm) {
-+                rc = generic_file_splice_read(in_file, ppos, pipe, count, flags);
-+                RETURN(rc);
-+        }
-+
-+        node = ll_node_from_inode(inode, *ppos, *ppos + count - 1, LCK_PR);
-+        if (IS_ERR(node))
-+                RETURN(PTR_ERR(node));
-+
-+        tree.lt_fd = LUSTRE_FPRIVATE(in_file);
-+        rc = ll_tree_lock(&tree, node, NULL, count,
-+                          in_file->f_flags & O_NONBLOCK?LDLM_FL_BLOCK_NOWAIT:0);
-+        if (rc != 0)
-+                RETURN(rc);
-+
-+        ll_clear_file_contended(inode);
-+        ll_inode_size_lock(inode, 1);
-+        /*
-+         * Consistency guarantees: following possibilities exist for the
-+         * relation between region being read and real file size at this
-+         * moment:
-+         *
-+         *  (A): the region is completely inside of the file;
-+         *
-+         *  (B-x): x bytes of region are inside of the file, the rest is
-+         *  outside;
-+         *
-+         *  (C): the region is completely outside of the file.
-+         *
-+         * This classification is stable under DLM lock acquired by
-+         * ll_tree_lock() above, because to change class, other client has to
-+         * take DLM lock conflicting with our lock. Also, any updates to
-+         * ->i_size by other threads on this client are serialized by
-+         * ll_inode_size_lock(). This guarantees that short reads are handled
-+         * correctly in the face of concurrent writes and truncates.
-+         */
-+        inode_init_lvb(inode, &lvb);
-+        obd_merge_lvb(ll_i2sbi(inode)->ll_osc_exp, lsm, &lvb, 1);
-+        kms = lvb.lvb_size;
-+        if (*ppos + count - 1 > kms) {
-+                /* A glimpse is necessary to determine whether we return a
-+                 * short read (B) or some zeroes at the end of the buffer (C) */
-+                ll_inode_size_unlock(inode, 1);
-+                rc = ll_glimpse_size(inode, LDLM_FL_BLOCK_GRANTED);
-+                if (rc)
-+                        goto out;
-+        } else {
-+                /* region is within kms and, hence, within real file size (A) */
-+                i_size_write(inode, kms);
-+                ll_inode_size_unlock(inode, 1);
-+        }
-+
-+        CDEBUG(D_INFO, "Send ino %lu, "LPSZ" bytes, offset %lld, i_size %llu\n",
-+               inode->i_ino, count, *ppos, i_size_read(inode));
-+
-+        bead.lrr_start = *ppos >> CFS_PAGE_SHIFT;
-+        bead.lrr_count = (count + CFS_PAGE_SIZE - 1) >> CFS_PAGE_SHIFT;
-+        ll_ra_read_in(in_file, &bead);
-+        /* BUG: 5972 */
-+        file_accessed(in_file);
-+        rc = generic_file_splice_read(in_file, ppos, pipe, count, flags);
-+        ll_ra_read_ex(in_file, &bead);
-+
-+ out:
-+        ll_tree_unlock(&tree);
-+        RETURN(rc);
- }
-+#endif
- 
- static int ll_lov_recreate_obj(struct inode *inode, struct file *file,
-                                unsigned long arg)
-@@ -3084,7 +3189,11 @@
- }
- 
- #if (LINUX_VERSION_CODE >= KERNEL_VERSION(2,6,10))
-+#ifndef HAVE_INODE_PERMISION_2ARGS
- int ll_inode_permission(struct inode *inode, int mask, struct nameidata *nd)
-+#else
-+int ll_inode_permission(struct inode *inode, int mask)
-+#endif
- {
-         CDEBUG(D_VFSTRACE, "VFS Op:inode=%lu/%u(%p), mask %o\n",
-                inode->i_ino, inode->i_generation, inode, mask);
-@@ -3093,7 +3202,7 @@
-         return generic_permission(inode, mask, lustre_check_acl);
- }
- #else
--#if (LINUX_VERSION_CODE >= KERNEL_VERSION(2,6,0))
-+#ifndef HAVE_INODE_PERMISION_2ARGS
- int ll_inode_permission(struct inode *inode, int mask, struct nameidata *nd)
- #else
- int ll_inode_permission(struct inode *inode, int mask)
-@@ -3163,7 +3272,12 @@
-         .release        = ll_file_release,
-         .mmap           = ll_file_mmap,
-         .llseek         = ll_file_seek,
-+#ifdef HAVE_KERNEL_SPLICE_READ
-+        .splice_read    = ll_file_splice_read,
-+#endif
-+#ifdef HAVE_KERNEL_SENDFILE
-         .sendfile       = ll_file_sendfile,
-+#endif
-         .fsync          = ll_fsync,
- };
- 
-@@ -3185,7 +3299,12 @@
-         .release        = ll_file_release,
-         .mmap           = ll_file_mmap,
-         .llseek         = ll_file_seek,
-+#ifdef HAVE_KERNEL_SPLICE_READ
-+        .splice_read    = ll_file_splice_read,
-+#endif
-+#ifdef HAVE_KERNEL_SENDFILE
-         .sendfile       = ll_file_sendfile,
-+#endif
-         .fsync          = ll_fsync,
- #ifdef HAVE_F_OP_FLOCK
-         .flock          = ll_file_flock,
-@@ -3212,7 +3331,12 @@
-         .release        = ll_file_release,
-         .mmap           = ll_file_mmap,
-         .llseek         = ll_file_seek,
-+#ifdef HAVE_KERNEL_SPLICE_READ
-+        .splice_read    = ll_file_splice_read,
-+#endif
-+#ifdef HAVE_KERNEL_SENDFILE
-         .sendfile       = ll_file_sendfile,
-+#endif
-         .fsync          = ll_fsync,
- #ifdef HAVE_F_OP_FLOCK
-         .flock          = ll_file_noflock,
-diff -urNad lustre~/lustre/llite/file.c.orig lustre/lustre/llite/file.c.orig
---- lustre~/lustre/llite/file.c.orig	1970-01-01 00:00:00.000000000 +0000
-+++ lustre/lustre/llite/file.c.orig	2009-03-13 09:45:03.000000000 +0100
-@@ -0,0 +1,3335 @@
-+/* -*- mode: c; c-basic-offset: 8; indent-tabs-mode: nil; -*-
-+ * vim:expandtab:shiftwidth=8:tabstop=8:
-+ *
-+ * GPL HEADER START
-+ *
-+ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
-+ *
-+ * This program is free software; you can redistribute it and/or modify
-+ * it under the terms of the GNU General Public License version 2 only,
-+ * as published by the Free Software Foundation.
-+ *
-+ * This program is distributed in the hope that it will be useful, but
-+ * WITHOUT ANY WARRANTY; without even the implied warranty of
-+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
-+ * General Public License version 2 for more details (a copy is included
-+ * in the LICENSE file that accompanied this code).
-+ *
-+ * You should have received a copy of the GNU General Public License
-+ * version 2 along with this program; If not, see
-+ * http://www.sun.com/software/products/lustre/docs/GPLv2.pdf
-+ *
-+ * Please contact Sun Microsystems, Inc., 4150 Network Circle, Santa Clara,
-+ * CA 95054 USA or visit www.sun.com if you need additional information or
-+ * have any questions.
-+ *
-+ * GPL HEADER END
-+ */
-+/*
-+ * Copyright  2008 Sun Microsystems, Inc. All rights reserved
-+ * Use is subject to license terms.
-+ */
-+/*
-+ * This file is part of Lustre, http://www.lustre.org/
-+ * Lustre is a trademark of Sun Microsystems, Inc.
-+ *
-+ * lustre/llite/file.c
-+ *
-+ * Author: Peter Braam <braam at clusterfs.com>
-+ * Author: Phil Schwan <phil at clusterfs.com>
-+ * Author: Andreas Dilger <adilger at clusterfs.com>
-+ */
-+
-+#define DEBUG_SUBSYSTEM S_LLITE
-+#include <lustre_dlm.h>
-+#include <lustre_lite.h>
-+#include <linux/pagemap.h>
-+#include <linux/file.h>
-+#include <linux/posix_acl.h>
-+#include "llite_internal.h"
-+#include <lustre/ll_fiemap.h>
-+
-+/* also used by llite/special.c:ll_special_open() */
-+struct ll_file_data *ll_file_data_get(void)
-+{
-+        struct ll_file_data *fd;
-+
-+        OBD_SLAB_ALLOC_PTR(fd, ll_file_data_slab);
-+        return fd;
-+}
-+
-+static void ll_file_data_put(struct ll_file_data *fd)
-+{
-+        if (fd != NULL)
-+                OBD_SLAB_FREE_PTR(fd, ll_file_data_slab);
-+}
-+
-+static int ll_close_inode_openhandle(struct inode *inode,
-+                                     struct obd_client_handle *och)
-+{
-+        struct ptlrpc_request *req = NULL;
-+        struct obd_device *obd;
-+        struct obdo *oa;
-+        int rc;
-+        ENTRY;
-+
-+        obd = class_exp2obd(ll_i2mdcexp(inode));
-+        if (obd == NULL) {
-+                CERROR("Invalid MDC connection handle "LPX64"\n",
-+                       ll_i2mdcexp(inode)->exp_handle.h_cookie);
-+                GOTO(out, rc = 0);
-+        }
-+
-+        /*
-+         * here we check if this is forced umount. If so this is called on
-+         * canceling "open lock" and we do not call mdc_close() in this case, as
-+         * it will not be successful, as import is already deactivated.
-+         */
-+        if (obd->obd_force)
-+                GOTO(out, rc = 0);
-+
-+        OBDO_ALLOC(oa);
-+        if (!oa)
-+                RETURN(-ENOMEM); // XXX We leak openhandle and request here.
-+
-+        oa->o_id = inode->i_ino;
-+        oa->o_valid = OBD_MD_FLID;
-+        obdo_from_inode(oa, inode, OBD_MD_FLTYPE | OBD_MD_FLMODE |
-+                                   OBD_MD_FLSIZE | OBD_MD_FLBLOCKS |
-+                                   OBD_MD_FLATIME | OBD_MD_FLMTIME |
-+                                   OBD_MD_FLCTIME);
-+        if (ll_is_inode_dirty(inode)) {
-+                oa->o_flags = MDS_BFLAG_UNCOMMITTED_WRITES;
-+                oa->o_valid |= OBD_MD_FLFLAGS;
-+        }
-+
-+        rc = mdc_close(ll_i2mdcexp(inode), oa, och, &req);
-+        if (rc == EAGAIN) {
-+                /* We are the last writer, so the MDS has instructed us to get
-+                 * the file size and any write cookies, then close again. */
-+                ll_queue_done_writing(inode);
-+                rc = 0;
-+        } else if (rc) {
-+                CERROR("inode %lu mdc close failed: rc = %d\n",
-+                       inode->i_ino, rc);
-+        }
-+
-+        OBDO_FREE(oa);
-+
-+        if (rc == 0) {
-+                rc = ll_objects_destroy(req, inode);
-+                if (rc)
-+                        CERROR("inode %lu ll_objects destroy: rc = %d\n",
-+                               inode->i_ino, rc);
-+        }
-+
-+        ptlrpc_req_finished(req); /* This is close request */
-+        EXIT;
-+out:
-+        mdc_clear_open_replay_data(och);
-+
-+        return rc;
-+}
-+
-+int ll_mdc_real_close(struct inode *inode, int flags)
-+{
-+        struct ll_inode_info *lli = ll_i2info(inode);
-+        int rc = 0;
-+        struct obd_client_handle **och_p;
-+        struct obd_client_handle *och;
-+        __u64 *och_usecount;
-+
-+        ENTRY;
-+
-+        if (flags & FMODE_WRITE) {
-+                och_p = &lli->lli_mds_write_och;
-+                och_usecount = &lli->lli_open_fd_write_count;
-+        } else if (flags & FMODE_EXEC) {
-+                och_p = &lli->lli_mds_exec_och;
-+                och_usecount = &lli->lli_open_fd_exec_count;
-+         } else {
-+                LASSERT(flags & FMODE_READ);
-+                och_p = &lli->lli_mds_read_och;
-+                och_usecount = &lli->lli_open_fd_read_count;
-+        }
-+
-+        down(&lli->lli_och_sem);
-+        if (*och_usecount) { /* There are still users of this handle, so
-+                                skip freeing it. */
-+                up(&lli->lli_och_sem);
-+                RETURN(0);
-+        }
-+        och=*och_p;
-+        *och_p = NULL;
-+        up(&lli->lli_och_sem);
-+
-+        if (och) { /* There might be a race and somebody have freed this och
-+                      already */
-+                rc = ll_close_inode_openhandle(inode, och);
-+                och->och_fh.cookie = DEAD_HANDLE_MAGIC;
-+                OBD_FREE(och, sizeof *och);
-+        }
-+
-+        RETURN(rc);
-+}
-+
-+int ll_mdc_close(struct obd_export *mdc_exp, struct inode *inode,
-+                        struct file *file)
-+{
-+        struct ll_file_data *fd = LUSTRE_FPRIVATE(file);
-+        struct ll_inode_info *lli = ll_i2info(inode);
-+        int rc = 0;
-+        ENTRY;
-+
-+        /* clear group lock, if present */
-+        if (unlikely(fd->fd_flags & LL_FILE_GROUP_LOCKED)) {
-+                struct lov_stripe_md *lsm = ll_i2info(inode)->lli_smd;
-+                fd->fd_flags &= ~(LL_FILE_GROUP_LOCKED|LL_FILE_IGNORE_LOCK);
-+                rc = ll_extent_unlock(fd, inode, lsm, LCK_GROUP,
-+                                      &fd->fd_cwlockh);
-+        }
-+
-+        /* Let's see if we have good enough OPEN lock on the file and if
-+           we can skip talking to MDS */
-+        if (file->f_dentry->d_inode) { /* Can this ever be false? */
-+                int lockmode;
-+                int flags = LDLM_FL_BLOCK_GRANTED | LDLM_FL_TEST_LOCK;
-+                struct lustre_handle lockh;
-+                struct inode *inode = file->f_dentry->d_inode;
-+                struct ldlm_res_id file_res_id = {.name={inode->i_ino,
-+                                                         inode->i_generation}};
-+                ldlm_policy_data_t policy = {.l_inodebits={MDS_INODELOCK_OPEN}};
-+
-+                down(&lli->lli_och_sem);
-+                if (fd->fd_omode & FMODE_WRITE) {
-+                        lockmode = LCK_CW;
-+                        LASSERT(lli->lli_open_fd_write_count);
-+                        lli->lli_open_fd_write_count--;
-+                } else if (fd->fd_omode & FMODE_EXEC) {
-+                        lockmode = LCK_PR;
-+                        LASSERT(lli->lli_open_fd_exec_count);
-+                        lli->lli_open_fd_exec_count--;
-+                } else {
-+                        lockmode = LCK_CR;
-+                        LASSERT(lli->lli_open_fd_read_count);
-+                        lli->lli_open_fd_read_count--;
-+                }
-+                up(&lli->lli_och_sem);
-+
-+                if (!ldlm_lock_match(mdc_exp->exp_obd->obd_namespace, flags,
-+                                     &file_res_id, LDLM_IBITS, &policy,lockmode,
-+                                     &lockh)) {
-+                        rc = ll_mdc_real_close(file->f_dentry->d_inode,
-+                                                fd->fd_omode);
-+                }
-+        } else {
-+                CERROR("Releasing a file %p with negative dentry %p. Name %s",
-+                       file, file->f_dentry, file->f_dentry->d_name.name);
-+        }
-+
-+        LUSTRE_FPRIVATE(file) = NULL;
-+        ll_file_data_put(fd);
-+
-+        RETURN(rc);
-+}
-+
-+int lov_test_and_clear_async_rc(struct lov_stripe_md *lsm);
-+
-+/* While this returns an error code, fput() the caller does not, so we need
-+ * to make every effort to clean up all of our state here.  Also, applications
-+ * rarely check close errors and even if an error is returned they will not
-+ * re-try the close call.
-+ */
-+int ll_file_release(struct inode *inode, struct file *file)
-+{
-+        struct ll_file_data *fd;
-+        struct ll_sb_info *sbi = ll_i2sbi(inode);
-+        struct ll_inode_info *lli = ll_i2info(inode);
-+        struct lov_stripe_md *lsm = lli->lli_smd;
-+        int rc;
-+        ENTRY;
-+
-+        CDEBUG(D_VFSTRACE, "VFS Op:inode=%lu/%u(%p)\n", inode->i_ino,
-+               inode->i_generation, inode);
-+
-+
-+        if (inode->i_sb->s_root != file->f_dentry)
-+                ll_stats_ops_tally(sbi, LPROC_LL_RELEASE, 1);
-+        fd = LUSTRE_FPRIVATE(file);
-+        LASSERT(fd != NULL);
-+
-+        /* The last ref on @file, maybe not the the owner pid of statahead.
-+         * Different processes can open the same dir, "ll_opendir_key" means:
-+         * it is me that should stop the statahead thread. */
-+        if (lli->lli_opendir_key == fd && lli->lli_opendir_pid != 0)
-+                ll_stop_statahead(inode, lli->lli_opendir_key);
-+
-+        if (inode->i_sb->s_root == file->f_dentry) {
-+                LUSTRE_FPRIVATE(file) = NULL;
-+                ll_file_data_put(fd);
-+                RETURN(0);
-+        }
-+
-+        if (lsm)
-+                lov_test_and_clear_async_rc(lsm);
-+        lli->lli_async_rc = 0;
-+
-+        /* Ensure that dirty pages are flushed out with the right creds */
-+        if (file->f_mode & FMODE_WRITE)
-+                filemap_fdatawrite(file->f_mapping);
-+
-+        rc = ll_mdc_close(sbi->ll_mdc_exp, inode, file);
-+        RETURN(rc);
-+}
-+
-+static int ll_intent_file_open(struct file *file, void *lmm,
-+                               int lmmsize, struct lookup_intent *itp)
-+{
-+        struct ll_sb_info *sbi = ll_i2sbi(file->f_dentry->d_inode);
-+        struct mdc_op_data data;
-+        struct dentry *parent = file->f_dentry->d_parent;
-+        const char *name = file->f_dentry->d_name.name;
-+        const int len = file->f_dentry->d_name.len;
-+        struct inode *inode = file->f_dentry->d_inode;
-+        struct ptlrpc_request *req;
-+        int rc;
-+        ENTRY;
-+
-+        if (!parent)
-+                RETURN(-ENOENT);
-+
-+        ll_prepare_mdc_op_data(&data, parent->d_inode, inode,
-+                               name, len, O_RDWR, NULL);
-+
-+        /* Usually we come here only for NFSD, and we want open lock.
-+           But we can also get here with pre 2.6.15 patchless kernels, and in
-+           that case that lock is also ok */
-+        /* We can also get here if there was cached open handle in revalidate_it
-+         * but it disappeared while we were getting from there to ll_file_open.
-+         * But this means this file was closed and immediatelly opened which
-+         * makes a good candidate for using OPEN lock */
-+        /* If lmmsize & lmm are not 0, we are just setting stripe info
-+         * parameters. No need for the open lock */
-+        if (!lmm && !lmmsize)
-+                itp->it_flags |= MDS_OPEN_LOCK;
-+
-+        rc = mdc_intent_lock(sbi->ll_mdc_exp, &data, lmm, lmmsize, itp,
-+                              0 /*unused */, &req, ll_mdc_blocking_ast, 0);
-+        if (rc == -ESTALE) {
-+                /* reason for keep own exit path - don`t flood log
-+                * with messages with -ESTALE errors.
-+                */
-+                if (!it_disposition(itp, DISP_OPEN_OPEN) ||
-+                     it_open_error(DISP_OPEN_OPEN, itp))
-+                        GOTO(out, rc);
-+                ll_release_openhandle(file->f_dentry, itp);
-+                GOTO(out, rc);
-+        }
-+
-+        if (rc != 0 || it_open_error(DISP_OPEN_OPEN, itp)) {
-+                rc = rc ? rc : it_open_error(DISP_OPEN_OPEN, itp);
-+                CDEBUG(D_VFSTRACE, "lock enqueue: err: %d\n", rc);
-+                GOTO(out, rc);
-+        }
-+
-+        if (itp->d.lustre.it_lock_mode)
-+                mdc_set_lock_data(&itp->d.lustre.it_lock_handle,
-+                                  inode);
-+
-+        rc = ll_prep_inode(sbi->ll_osc_exp, &file->f_dentry->d_inode,
-+                           req, DLM_REPLY_REC_OFF, NULL);
-+out:
-+        ptlrpc_req_finished(itp->d.lustre.it_data);
-+        it_clear_disposition(itp, DISP_ENQ_COMPLETE);
-+        ll_intent_drop_lock(itp);
-+
-+        RETURN(rc);
-+}
-+
-+
-+static void ll_och_fill(struct ll_inode_info *lli, struct lookup_intent *it,
-+                        struct obd_client_handle *och)
-+{
-+        struct ptlrpc_request *req = it->d.lustre.it_data;
-+        struct mds_body *body;
-+
-+        LASSERT(och);
-+
-+        body = lustre_msg_buf(req->rq_repmsg, DLM_REPLY_REC_OFF, sizeof(*body));
-+        LASSERT(body != NULL);                  /* reply already checked out */
-+        /* and swabbed in mdc_enqueue */
-+        LASSERT(lustre_rep_swabbed(req, DLM_REPLY_REC_OFF));
-+
-+        memcpy(&och->och_fh, &body->handle, sizeof(body->handle));
-+        och->och_magic = OBD_CLIENT_HANDLE_MAGIC;
-+        lli->lli_io_epoch = body->io_epoch;
-+
-+        mdc_set_open_replay_data(och, it->d.lustre.it_data);
-+}
-+
-+int ll_local_open(struct file *file, struct lookup_intent *it,
-+                  struct ll_file_data *fd, struct obd_client_handle *och)
-+{
-+        ENTRY;
-+
-+        LASSERT(!LUSTRE_FPRIVATE(file));
-+
-+        LASSERT(fd != NULL);
-+
-+        if (och)
-+                ll_och_fill(ll_i2info(file->f_dentry->d_inode), it, och);
-+        LUSTRE_FPRIVATE(file) = fd;
-+        ll_readahead_init(file->f_dentry->d_inode, &fd->fd_ras);
-+        fd->fd_omode = it->it_flags;
-+
-+        RETURN(0);
-+}
-+
-+/* Open a file, and (for the very first open) create objects on the OSTs at
-+ * this time.  If opened with O_LOV_DELAY_CREATE, then we don't do the object
-+ * creation or open until ll_lov_setstripe() ioctl is called.  We grab
-+ * lli_open_sem to ensure no other process will create objects, send the
-+ * stripe MD to the MDS, or try to destroy the objects if that fails.
-+ *
-+ * If we already have the stripe MD locally then we don't request it in
-+ * mdc_open(), by passing a lmm_size = 0.
-+ *
-+ * It is up to the application to ensure no other processes open this file
-+ * in the O_LOV_DELAY_CREATE case, or the default striping pattern will be
-+ * used.  We might be able to avoid races of that sort by getting lli_open_sem
-+ * before returning in the O_LOV_DELAY_CREATE case and dropping it here
-+ * or in ll_file_release(), but I'm not sure that is desirable/necessary.
-+ */
-+int ll_file_open(struct inode *inode, struct file *file)
-+{
-+        struct ll_inode_info *lli = ll_i2info(inode);
-+        struct lookup_intent *it, oit = { .it_op = IT_OPEN,
-+                                          .it_flags = file->f_flags };
-+        struct lov_stripe_md *lsm;
-+        struct ptlrpc_request *req = NULL;
-+        struct obd_client_handle **och_p;
-+        __u64 *och_usecount;
-+        struct ll_file_data *fd;
-+        int rc = 0, opendir_set = 0;
-+        ENTRY;
-+
-+        CDEBUG(D_VFSTRACE, "VFS Op:inode=%lu/%u(%p), flags %o\n", inode->i_ino,
-+               inode->i_generation, inode, file->f_flags);
-+
-+#ifdef HAVE_VFS_INTENT_PATCHES
-+        it = file->f_it;
-+#else
-+        it = file->private_data; /* XXX: compat macro */
-+        file->private_data = NULL; /* prevent ll_local_open assertion */
-+#endif
-+
-+        fd = ll_file_data_get();
-+        if (fd == NULL)
-+                RETURN(-ENOMEM);
-+
-+        if (S_ISDIR(inode->i_mode)) {
-+again:
-+                spin_lock(&lli->lli_lock);
-+                if (lli->lli_opendir_key == NULL && lli->lli_opendir_pid == 0) {
-+                        LASSERT(lli->lli_sai == NULL);
-+                        lli->lli_opendir_key = fd;
-+                        lli->lli_opendir_pid = cfs_curproc_pid();
-+                        opendir_set = 1;
-+                } else if (unlikely(lli->lli_opendir_pid == cfs_curproc_pid() &&
-+                                    lli->lli_opendir_key != NULL)) {
-+                        /* Two cases for this:
-+                         * (1) The same process open such directory many times.
-+                         * (2) The old process opened the directory, and exited
-+                         *     before its children processes. Then new process
-+                         *     with the same pid opens such directory before the
-+                         *     old process's children processes exit.
-+                         * reset stat ahead for such cases. */
-+                        spin_unlock(&lli->lli_lock);
-+                        CDEBUG(D_INFO, "Conflict statahead for %.*s %lu/%u"
-+                               " reset it.\n", file->f_dentry->d_name.len,
-+                               file->f_dentry->d_name.name,
-+                               inode->i_ino, inode->i_generation);
-+                        ll_stop_statahead(inode, lli->lli_opendir_key);
-+                        goto again;
-+                }
-+                spin_unlock(&lli->lli_lock);
-+        }
-+
-+        if (inode->i_sb->s_root == file->f_dentry) {
-+                LUSTRE_FPRIVATE(file) = fd;
-+                RETURN(0);
-+        }
-+
-+        if (!it || !it->d.lustre.it_disposition) {
-+                /* Convert f_flags into access mode. We cannot use file->f_mode,
-+                 * because everything but O_ACCMODE mask was stripped from it */
-+                if ((oit.it_flags + 1) & O_ACCMODE)
-+                        oit.it_flags++;
-+                if (file->f_flags & O_TRUNC)
-+                        oit.it_flags |= FMODE_WRITE;
-+
-+                /* kernel only call f_op->open in dentry_open.  filp_open calls
-+                 * dentry_open after call to open_namei that checks permissions.
-+                 * Only nfsd_open call dentry_open directly without checking
-+                 * permissions and because of that this code below is safe. */
-+                if (oit.it_flags & FMODE_WRITE)
-+                        oit.it_flags |= MDS_OPEN_OWNEROVERRIDE;
-+
-+                /* We do not want O_EXCL here, presumably we opened the file
-+                 * already? XXX - NFS implications? */
-+                oit.it_flags &= ~O_EXCL;
-+
-+                it = &oit;
-+        }
-+
-+restart:
-+        /* Let's see if we have file open on MDS already. */
-+        if (it->it_flags & FMODE_WRITE) {
-+                och_p = &lli->lli_mds_write_och;
-+                och_usecount = &lli->lli_open_fd_write_count;
-+        } else if (it->it_flags & FMODE_EXEC) {
-+                och_p = &lli->lli_mds_exec_och;
-+                och_usecount = &lli->lli_open_fd_exec_count;
-+         } else {
-+                och_p = &lli->lli_mds_read_och;
-+                och_usecount = &lli->lli_open_fd_read_count;
-+        }
-+
-+        LASSERTF(it->it_flags != 0, "it %p dist %d \n", it,
-+                 it->d.lustre.it_disposition);
-+
-+        down(&lli->lli_och_sem);
-+        if (*och_p) { /* Open handle is present */
-+                if (it_disposition(it, DISP_OPEN_OPEN)) {
-+                        /* Well, there's extra open request that we do not need,
-+                           let's close it somehow. This will decref request. */
-+                        rc = it_open_error(DISP_OPEN_OPEN, it);
-+                        if (rc) {
-+                                up(&lli->lli_och_sem);
-+                                ll_file_data_put(fd);
-+                                GOTO(out_openerr, rc);
-+                        }
-+                        ll_release_openhandle(file->f_dentry, it);
-+                        lprocfs_counter_incr(ll_i2sbi(inode)->ll_stats,
-+                                             LPROC_LL_OPEN);
-+                }
-+                (*och_usecount)++;
-+
-+                rc = ll_local_open(file, it, fd, NULL);
-+
-+                LASSERTF(rc == 0, "rc = %d\n", rc);
-+        } else {
-+                LASSERT(*och_usecount == 0);
-+                if (!it->d.lustre.it_disposition) {
-+                        /* We cannot just request lock handle now, new ELC code
-+                           means that one of other OPEN locks for this file
-+                           could be cancelled, and since blocking ast handler
-+                           would attempt to grab och_sem as well, that would
-+                           result in a deadlock */
-+                        up(&lli->lli_och_sem);
-+                        rc = ll_intent_file_open(file, NULL, 0, it);
-+                        if (rc) {
-+                                ll_file_data_put(fd);
-+                                GOTO(out_openerr, rc);
-+                        }
-+
-+                        mdc_set_lock_data(&it->d.lustre.it_lock_handle,
-+                                          file->f_dentry->d_inode);
-+                        goto restart;
-+                }
-+
-+                OBD_ALLOC(*och_p, sizeof (struct obd_client_handle));
-+                if (!*och_p) {
-+                        ll_file_data_put(fd);
-+                        GOTO(out_och_free, rc = -ENOMEM);
-+                }
-+                (*och_usecount)++;
-+               req = it->d.lustre.it_data;
-+
-+                /* mdc_intent_lock() didn't get a request ref if there was an
-+                 * open error, so don't do cleanup on the request here
-+                 * (bug 3430) */
-+                /* XXX (green): Should not we bail out on any error here, not
-+                 * just open error? */
-+                rc = it_open_error(DISP_OPEN_OPEN, it);
-+                if (rc) {
-+                        ll_file_data_put(fd);
-+                        GOTO(out_och_free, rc);
-+                }
-+
-+                ll_stats_ops_tally(ll_i2sbi(inode), LPROC_LL_OPEN, 1);
-+                rc = ll_local_open(file, it, fd, *och_p);
-+                LASSERTF(rc == 0, "rc = %d\n", rc);
-+        }
-+        up(&lli->lli_och_sem);
-+
-+        /* Must do this outside lli_och_sem lock to prevent deadlock where
-+           different kind of OPEN lock for this same inode gets cancelled
-+           by ldlm_cancel_lru */
-+        if (!S_ISREG(inode->i_mode))
-+                GOTO(out, rc);
-+
-+        lsm = lli->lli_smd;
-+        if (lsm == NULL) {
-+                if (file->f_flags & O_LOV_DELAY_CREATE ||
-+                    !(file->f_mode & FMODE_WRITE)) {
-+                        CDEBUG(D_INODE, "object creation was delayed\n");
-+                        GOTO(out, rc);
-+                }
-+        }
-+        file->f_flags &= ~O_LOV_DELAY_CREATE;
-+        GOTO(out, rc);
-+ out:
-+        ptlrpc_req_finished(req);
-+        if (req)
-+                it_clear_disposition(it, DISP_ENQ_OPEN_REF);
-+        if (rc == 0) {
-+                ll_open_complete(inode);
-+        } else {
-+out_och_free:
-+                if (*och_p) {
-+                        OBD_FREE(*och_p, sizeof (struct obd_client_handle));
-+                        *och_p = NULL; /* OBD_FREE writes some magic there */
-+                        (*och_usecount)--;
-+                }
-+                up(&lli->lli_och_sem);
-+out_openerr:
-+                if (opendir_set != 0)
-+                        ll_stop_statahead(inode, lli->lli_opendir_key);
-+        }
-+
-+        return rc;
-+}
-+
-+/* Fills the obdo with the attributes for the inode defined by lsm */
-+int ll_lsm_getattr(struct obd_export *exp, struct lov_stripe_md *lsm,
-+                   struct obdo *oa)
-+{
-+        struct ptlrpc_request_set *set;
-+        struct obd_info oinfo = { { { 0 } } };
-+        int rc;
-+        ENTRY;
-+
-+        LASSERT(lsm != NULL);
-+
-+        memset(oa, 0, sizeof *oa);
-+        oinfo.oi_md = lsm;
-+        oinfo.oi_oa = oa;
-+        oa->o_id = lsm->lsm_object_id;
-+        oa->o_mode = S_IFREG;
-+        oa->o_valid = OBD_MD_FLID | OBD_MD_FLTYPE | OBD_MD_FLSIZE |
-+                OBD_MD_FLBLOCKS | OBD_MD_FLBLKSZ | OBD_MD_FLMTIME |
-+                OBD_MD_FLCTIME;
-+
-+        set = ptlrpc_prep_set();
-+        if (set == NULL) {
-+                rc = -ENOMEM;
-+        } else {
-+                rc = obd_getattr_async(exp, &oinfo, set);
-+                if (rc == 0)
-+                        rc = ptlrpc_set_wait(set);
-+                ptlrpc_set_destroy(set);
-+        }
-+        if (rc)
-+                RETURN(rc);
-+
-+        oa->o_valid &= (OBD_MD_FLBLOCKS | OBD_MD_FLBLKSZ | OBD_MD_FLMTIME |
-+                        OBD_MD_FLCTIME | OBD_MD_FLSIZE);
-+        RETURN(0);
-+}
-+
-+static int ll_lock_to_stripe_offset(struct inode *inode, struct ldlm_lock *lock)
-+{
-+        struct ll_inode_info *lli = ll_i2info(inode);
-+        struct lov_stripe_md *lsm = lli->lli_smd;
-+        struct obd_export *exp = ll_i2obdexp(inode);
-+        struct {
-+                char name[16];
-+                struct ldlm_lock *lock;
-+        } key = { .name = KEY_LOCK_TO_STRIPE, .lock = lock };
-+        __u32 stripe, vallen = sizeof(stripe);
-+        int rc;
-+        ENTRY;
-+
-+        if (lsm->lsm_stripe_count == 1)
-+                GOTO(check, stripe = 0);
-+
-+        /* get our offset in the lov */
-+        rc = obd_get_info(exp, sizeof(key), &key, &vallen, &stripe, lsm);
-+        if (rc != 0) {
-+                CERROR("obd_get_info: rc = %d\n", rc);
-+                RETURN(rc);
-+        }
-+        LASSERT(stripe < lsm->lsm_stripe_count);
-+
-+check:
-+        if (lsm->lsm_oinfo[stripe]->loi_id != lock->l_resource->lr_name.name[0]||
-+            lsm->lsm_oinfo[stripe]->loi_gr != lock->l_resource->lr_name.name[1]){
-+                LDLM_ERROR(lock, "resource doesn't match object "LPU64"/"LPU64,
-+                           lsm->lsm_oinfo[stripe]->loi_id,
-+                           lsm->lsm_oinfo[stripe]->loi_gr);
-+                RETURN(-ELDLM_NO_LOCK_DATA);
-+        }
-+
-+        RETURN(stripe);
-+}
-+
-+/* Get extra page reference to ensure it is not going away */
-+void ll_pin_extent_cb(void *data)
-+{
-+        struct page *page = data;
-+
-+        page_cache_get(page);
-+
-+        return;
-+}
-+/* Flush the page from page cache for an extent as its canceled.
-+ * Page to remove is delivered as @data.
-+ *
-+ * No one can dirty the extent until we've finished our work and they cannot
-+ * enqueue another lock.  The DLM protects us from ll_file_read/write here,
-+ * but other kernel actors could have pages locked.
-+ *
-+ * If @discard is set, there is no need to write the page if it is dirty.
-+ *
-+ * Called with the DLM lock held. */
-+int ll_page_removal_cb(void *data, int discard)
-+{
-+        int rc;
-+        struct page *page = data;
-+        struct address_space *mapping;
-+
-+        ENTRY;
-+
-+        /* We have page reference already from ll_pin_page */
-+        lock_page(page);
-+
-+        /* Already truncated by somebody */
-+        if (!page->mapping)
-+                GOTO(out, rc = 0);
-+
-+        mapping = page->mapping;
-+
-+        ll_teardown_mmaps(mapping,
-+                          (__u64)page->index << PAGE_CACHE_SHIFT,
-+                          ((__u64)page->index<<PAGE_CACHE_SHIFT)|
-+                                                              ~PAGE_CACHE_MASK);
-+        LL_CDEBUG_PAGE(D_PAGE, page, "removing page\n");
-+        if (!discard && PageWriteback(page))
-+                wait_on_page_writeback(page);
-+
-+        if (!discard && clear_page_dirty_for_io(page)) {
-+                rc = ll_call_writepage(page->mapping->host, page);
-+                /* either waiting for io to complete or reacquiring
-+                 * the lock that the failed writepage released */
-+                lock_page(page);
-+                wait_on_page_writeback(page);
-+                if (rc < 0) {
-+                        CERROR("writepage inode %lu(%p) of page %p "
-+                               "failed: %d\n", mapping->host->i_ino,
-+                               mapping->host, page, rc);
-+                        if (rc == -ENOSPC)
-+                                set_bit(AS_ENOSPC, &mapping->flags);
-+                        else
-+                                set_bit(AS_EIO, &mapping->flags);
-+                }
-+        }
-+        if (page->mapping != NULL) {
-+                struct ll_async_page *llap = llap_cast_private(page);
-+                // checking again to account for writeback's lock_page()
-+                LL_CDEBUG_PAGE(D_PAGE, page, "truncating\n");
-+                if (llap)
-+                        ll_ra_accounting(llap, page->mapping);
-+                ll_truncate_complete_page(page);
-+        }
-+        EXIT;
-+out:
-+        LASSERT(!PageWriteback(page));
-+        unlock_page(page);
-+        page_cache_release(page);
-+
-+        return 0;
-+}
-+
-+int ll_extent_lock_cancel_cb(struct ldlm_lock *lock, struct ldlm_lock_desc *new,
-+                             void *data, int flag)
-+{
-+        struct inode *inode;
-+        struct ll_inode_info *lli;
-+        struct lov_stripe_md *lsm;
-+        int stripe;
-+        __u64 kms;
-+
-+        ENTRY;
-+
-+        if ((unsigned long)data > 0 && (unsigned long)data < 0x1000) {
-+                LDLM_ERROR(lock, "cancelling lock with bad data %p", data);
-+                LBUG();
-+        }
-+
-+        inode = ll_inode_from_lock(lock);
-+        if (inode == NULL)
-+                RETURN(0);
-+        lli = ll_i2info(inode);
-+        if (lli == NULL)
-+                GOTO(iput, 0);
-+        if (lli->lli_smd == NULL)
-+                GOTO(iput, 0);
-+        lsm = lli->lli_smd;
-+
-+        stripe = ll_lock_to_stripe_offset(inode, lock);
-+        if (stripe < 0)
-+                GOTO(iput, 0);
-+
-+        lov_stripe_lock(lsm);
-+        lock_res_and_lock(lock);
-+        kms = ldlm_extent_shift_kms(lock,
-+                                    lsm->lsm_oinfo[stripe]->loi_kms);
-+
-+        if (lsm->lsm_oinfo[stripe]->loi_kms != kms)
-+                LDLM_DEBUG(lock, "updating kms from "LPU64" to "LPU64,
-+                           lsm->lsm_oinfo[stripe]->loi_kms, kms);
-+        lsm->lsm_oinfo[stripe]->loi_kms = kms;
-+        unlock_res_and_lock(lock);
-+        lov_stripe_unlock(lsm);
-+        ll_try_done_writing(inode);
-+        EXIT;
-+iput:
-+        iput(inode);
-+
-+        return 0;
-+}
-+
-+#if 0
-+int ll_async_completion_ast(struct ldlm_lock *lock, int flags, void *data)
-+{
-+        /* XXX ALLOCATE - 160 bytes */
-+        struct inode *inode = ll_inode_from_lock(lock);
-+        struct ll_inode_info *lli = ll_i2info(inode);
-+        struct lustre_handle lockh = { 0 };
-+        struct ost_lvb *lvb;
-+        int stripe;
-+        ENTRY;
-+
-+        if (flags & (LDLM_FL_BLOCK_WAIT | LDLM_FL_BLOCK_GRANTED |
-+                     LDLM_FL_BLOCK_CONV)) {
-+                LBUG(); /* not expecting any blocked async locks yet */
-+                LDLM_DEBUG(lock, "client-side async enqueue returned a blocked "
-+                           "lock, returning");
-+                ldlm_lock_dump(D_OTHER, lock, 0);
-+                ldlm_reprocess_all(lock->l_resource);
-+                RETURN(0);
-+        }
-+
-+        LDLM_DEBUG(lock, "client-side async enqueue: granted/glimpsed");
-+
-+        stripe = ll_lock_to_stripe_offset(inode, lock);
-+        if (stripe < 0)
-+                goto iput;
-+
-+        if (lock->l_lvb_len) {
-+                struct lov_stripe_md *lsm = lli->lli_smd;
-+                __u64 kms;
-+                lvb = lock->l_lvb_data;
-+                lsm->lsm_oinfo[stripe].loi_rss = lvb->lvb_size;
-+
-+                lock_res_and_lock(lock);
-+                ll_inode_size_lock(inode, 1);
-+                kms = MAX(lsm->lsm_oinfo[stripe].loi_kms, lvb->lvb_size);
-+                kms = ldlm_extent_shift_kms(NULL, kms);
-+                if (lsm->lsm_oinfo[stripe].loi_kms != kms)
-+                        LDLM_DEBUG(lock, "updating kms from "LPU64" to "LPU64,
-+                                   lsm->lsm_oinfo[stripe].loi_kms, kms);
-+                lsm->lsm_oinfo[stripe].loi_kms = kms;
-+                ll_inode_size_unlock(inode, 1);
-+                unlock_res_and_lock(lock);
-+        }
-+
-+iput:
-+        iput(inode);
-+        wake_up(&lock->l_waitq);
-+
-+        ldlm_lock2handle(lock, &lockh);
-+        ldlm_lock_decref(&lockh, LCK_PR);
-+        RETURN(0);
-+}
-+#endif
-+
-+static int ll_glimpse_callback(struct ldlm_lock *lock, void *reqp)
-+{
-+        struct ptlrpc_request *req = reqp;
-+        struct inode *inode = ll_inode_from_lock(lock);
-+        struct ll_inode_info *lli;
-+        struct lov_stripe_md *lsm;
-+        struct ost_lvb *lvb;
-+        int rc, stripe;
-+        int size[2] = { sizeof(struct ptlrpc_body), sizeof(*lvb) };
-+        ENTRY;
-+
-+        if (inode == NULL)
-+                GOTO(out, rc = -ELDLM_NO_LOCK_DATA);
-+        lli = ll_i2info(inode);
-+        if (lli == NULL)
-+                GOTO(iput, rc = -ELDLM_NO_LOCK_DATA);
-+        lsm = lli->lli_smd;
-+        if (lsm == NULL)
-+                GOTO(iput, rc = -ELDLM_NO_LOCK_DATA);
-+
-+        /* First, find out which stripe index this lock corresponds to. */
-+        stripe = ll_lock_to_stripe_offset(inode, lock);
-+        if (stripe < 0)
-+                GOTO(iput, rc = -ELDLM_NO_LOCK_DATA);
-+
-+        rc = lustre_pack_reply(req, 2, size, NULL);
-+        if (rc)
-+                GOTO(iput, rc);
-+
-+        lvb = lustre_msg_buf(req->rq_repmsg, REPLY_REC_OFF, sizeof(*lvb));
-+        lvb->lvb_size = lli->lli_smd->lsm_oinfo[stripe]->loi_kms;
-+        lvb->lvb_mtime = LTIME_S(inode->i_mtime);
-+        lvb->lvb_atime = LTIME_S(inode->i_atime);
-+        lvb->lvb_ctime = LTIME_S(inode->i_ctime);
-+
-+        LDLM_DEBUG(lock, "i_size: %llu -> stripe number %u -> kms "LPU64
-+                   " atime "LPU64", mtime "LPU64", ctime "LPU64,
-+                   i_size_read(inode), stripe, lvb->lvb_size, lvb->lvb_mtime,
-+                   lvb->lvb_atime, lvb->lvb_ctime);
-+ iput:
-+        iput(inode);
-+
-+ out:
-+        /* These errors are normal races, so we don't want to fill the console
-+         * with messages by calling ptlrpc_error() */
-+        if (rc == -ELDLM_NO_LOCK_DATA)
-+                lustre_pack_reply(req, 1, NULL, NULL);
-+
-+        req->rq_status = rc;
-+        return rc;
-+}
-+
-+int ll_glimpse_ioctl(struct ll_sb_info *sbi, struct lov_stripe_md *lsm,
-+                     lstat_t *st)
-+{
-+        struct lustre_handle lockh = { 0 };
-+        struct ldlm_enqueue_info einfo = { 0 };
-+        struct obd_info oinfo = { { { 0 } } };
-+        struct ost_lvb lvb;
-+        int rc;
-+
-+        ENTRY;
-+
-+        einfo.ei_type = LDLM_EXTENT;
-+        einfo.ei_mode = LCK_PR;
-+        einfo.ei_cb_bl = osc_extent_blocking_cb;
-+        einfo.ei_cb_cp = ldlm_completion_ast;
-+        einfo.ei_cb_gl = ll_glimpse_callback;
-+        einfo.ei_cbdata = NULL;
-+
-+        oinfo.oi_policy.l_extent.end = OBD_OBJECT_EOF;
-+        oinfo.oi_lockh = &lockh;
-+        oinfo.oi_md = lsm;
-+        oinfo.oi_flags = LDLM_FL_HAS_INTENT;
-+
-+        rc = obd_enqueue_rqset(sbi->ll_osc_exp, &oinfo, &einfo);
-+        if (rc == -ENOENT)
-+                RETURN(rc);
-+        if (rc != 0) {
-+                CERROR("obd_enqueue returned rc %d, "
-+                       "returning -EIO\n", rc);
-+                RETURN(rc > 0 ? -EIO : rc);
-+        }
-+
-+        lov_stripe_lock(lsm);
-+        memset(&lvb, 0, sizeof(lvb));
-+        obd_merge_lvb(sbi->ll_osc_exp, lsm, &lvb, 0);
-+        st->st_size = lvb.lvb_size;
-+        st->st_blocks = lvb.lvb_blocks;
-+        st->st_mtime = lvb.lvb_mtime;
-+        st->st_atime = lvb.lvb_atime;
-+        st->st_ctime = lvb.lvb_ctime;
-+        lov_stripe_unlock(lsm);
-+
-+        RETURN(rc);
-+}
-+
-+/* NB: obd_merge_lvb will prefer locally cached writes if they extend the
-+ * file (because it prefers KMS over RSS when larger) */
-+int ll_glimpse_size(struct inode *inode, int ast_flags)
-+{
-+        struct ll_inode_info *lli = ll_i2info(inode);
-+        struct ll_sb_info *sbi = ll_i2sbi(inode);
-+        struct lustre_handle lockh = { 0 };
-+        struct ldlm_enqueue_info einfo = { 0 };
-+        struct obd_info oinfo = { { { 0 } } };
-+        struct ost_lvb lvb;
-+        int rc;
-+        ENTRY;
-+
-+        CDEBUG(D_DLMTRACE, "Glimpsing inode %lu\n", inode->i_ino);
-+
-+        if (!lli->lli_smd) {
-+                CDEBUG(D_DLMTRACE, "No objects for inode %lu\n", inode->i_ino);
-+                RETURN(0);
-+        }
-+
-+        /* NOTE: this looks like DLM lock request, but it may not be one. Due
-+         *       to LDLM_FL_HAS_INTENT flag, this is glimpse request, that
-+         *       won't revoke any conflicting DLM locks held. Instead,
-+         *       ll_glimpse_callback() will be called on each client
-+         *       holding a DLM lock against this file, and resulting size
-+         *       will be returned for each stripe. DLM lock on [0, EOF] is
-+         *       acquired only if there were no conflicting locks. */
-+        einfo.ei_type = LDLM_EXTENT;
-+        einfo.ei_mode = LCK_PR;
-+        einfo.ei_cb_bl = osc_extent_blocking_cb;
-+        einfo.ei_cb_cp = ldlm_completion_ast;
-+        einfo.ei_cb_gl = ll_glimpse_callback;
-+        einfo.ei_cbdata = inode;
-+
-+        oinfo.oi_policy.l_extent.end = OBD_OBJECT_EOF;
-+        oinfo.oi_lockh = &lockh;
-+        oinfo.oi_md = lli->lli_smd;
-+        oinfo.oi_flags = ast_flags | LDLM_FL_HAS_INTENT;
-+
-+        rc = obd_enqueue_rqset(sbi->ll_osc_exp, &oinfo, &einfo);
-+        if (rc == -ENOENT)
-+                RETURN(rc);
-+        if (rc != 0) {
-+                CERROR("obd_enqueue returned rc %d, returning -EIO\n", rc);
-+                RETURN(rc > 0 ? -EIO : rc);
-+        }
-+
-+        ll_inode_size_lock(inode, 1);
-+        inode_init_lvb(inode, &lvb);
-+        rc = obd_merge_lvb(sbi->ll_osc_exp, lli->lli_smd, &lvb, 0);
-+        i_size_write(inode, lvb.lvb_size);
-+        inode->i_blocks = lvb.lvb_blocks;
-+        LTIME_S(inode->i_mtime) = lvb.lvb_mtime;
-+        LTIME_S(inode->i_atime) = lvb.lvb_atime;
-+        LTIME_S(inode->i_ctime) = lvb.lvb_ctime;
-+        ll_inode_size_unlock(inode, 1);
-+
-+        CDEBUG(D_DLMTRACE, "glimpse: size: %llu, blocks: %llu\n",
-+               i_size_read(inode), (long long)inode->i_blocks);
-+
-+        RETURN(rc);
-+}
-+
-+int ll_extent_lock(struct ll_file_data *fd, struct inode *inode,
-+                   struct lov_stripe_md *lsm, int mode,
-+                   ldlm_policy_data_t *policy, struct lustre_handle *lockh,
-+                   int ast_flags)
-+{
-+        struct ll_sb_info *sbi = ll_i2sbi(inode);
-+        struct ost_lvb lvb;
-+        struct ldlm_enqueue_info einfo = { 0 };
-+        struct obd_info oinfo = { { { 0 } } };
-+        int rc;
-+        ENTRY;
-+
-+        LASSERT(!lustre_handle_is_used(lockh));
-+        LASSERT(lsm != NULL);
-+
-+        /* don't drop the mmapped file to LRU */
-+        if (mapping_mapped(inode->i_mapping))
-+                ast_flags |= LDLM_FL_NO_LRU;
-+
-+        /* XXX phil: can we do this?  won't it screw the file size up? */
-+        if ((fd && (fd->fd_flags & LL_FILE_IGNORE_LOCK)) ||
-+            (sbi->ll_flags & LL_SBI_NOLCK))
-+                RETURN(0);
-+
-+        CDEBUG(D_DLMTRACE, "Locking inode %lu, start "LPU64" end "LPU64"\n",
-+               inode->i_ino, policy->l_extent.start, policy->l_extent.end);
-+
-+        einfo.ei_type = LDLM_EXTENT;
-+        einfo.ei_mode = mode;
-+        einfo.ei_cb_bl = osc_extent_blocking_cb;
-+        einfo.ei_cb_cp = ldlm_completion_ast;
-+        einfo.ei_cb_gl = ll_glimpse_callback;
-+        einfo.ei_cbdata = inode;
-+
-+        oinfo.oi_policy = *policy;
-+        oinfo.oi_lockh = lockh;
-+        oinfo.oi_md = lsm;
-+        oinfo.oi_flags = ast_flags;
-+
-+        rc = obd_enqueue(sbi->ll_osc_exp, &oinfo, &einfo, NULL);
-+        *policy = oinfo.oi_policy;
-+        if (rc > 0)
-+                rc = -EIO;
-+
-+        ll_inode_size_lock(inode, 1);
-+        inode_init_lvb(inode, &lvb);
-+        obd_merge_lvb(sbi->ll_osc_exp, lsm, &lvb, 1);
-+
-+        if (policy->l_extent.start == 0 &&
-+            policy->l_extent.end == OBD_OBJECT_EOF) {
-+                /* vmtruncate()->ll_truncate() first sets the i_size and then
-+                 * the kms under both a DLM lock and the
-+                 * ll_inode_size_lock().  If we don't get the
-+                 * ll_inode_size_lock() here we can match the DLM lock and
-+                 * reset i_size from the kms before the truncating path has
-+                 * updated the kms.  generic_file_write can then trust the
-+                 * stale i_size when doing appending writes and effectively
-+                 * cancel the result of the truncate.  Getting the
-+                 * ll_inode_size_lock() after the enqueue maintains the DLM
-+                 * -> ll_inode_size_lock() acquiring order. */
-+                i_size_write(inode, lvb.lvb_size);
-+                CDEBUG(D_INODE, "inode=%lu, updating i_size %llu\n",
-+                       inode->i_ino, i_size_read(inode));
-+        }
-+
-+        if (rc == 0) {
-+                LTIME_S(inode->i_mtime) = lvb.lvb_mtime;
-+                LTIME_S(inode->i_atime) = lvb.lvb_atime;
-+                LTIME_S(inode->i_ctime) = lvb.lvb_ctime;
-+        }
-+        ll_inode_size_unlock(inode, 1);
-+
-+        RETURN(rc);
-+}
-+
-+int ll_extent_unlock(struct ll_file_data *fd, struct inode *inode,
-+                     struct lov_stripe_md *lsm, int mode,
-+                     struct lustre_handle *lockh)
-+{
-+        struct ll_sb_info *sbi = ll_i2sbi(inode);
-+        int rc;
-+        ENTRY;
-+
-+        /* XXX phil: can we do this?  won't it screw the file size up? */
-+        if ((fd && (fd->fd_flags & LL_FILE_IGNORE_LOCK)) ||
-+            (sbi->ll_flags & LL_SBI_NOLCK))
-+                RETURN(0);
-+
-+        rc = obd_cancel(sbi->ll_osc_exp, lsm, mode, lockh);
-+
-+        RETURN(rc);
-+}
-+
-+static void ll_set_file_contended(struct inode *inode)
-+{
-+        struct ll_inode_info *lli = ll_i2info(inode);
-+
-+        lli->lli_contention_time = cfs_time_current();
-+        set_bit(LLI_F_CONTENDED, &lli->lli_flags);
-+}
-+
-+void ll_clear_file_contended(struct inode *inode)
-+{
-+        struct ll_inode_info *lli = ll_i2info(inode);
-+
-+        clear_bit(LLI_F_CONTENDED, &lli->lli_flags);
-+}
-+
-+static int ll_is_file_contended(struct file *file)
-+{
-+        struct inode *inode = file->f_dentry->d_inode;
-+        struct ll_inode_info *lli = ll_i2info(inode);
-+        struct ll_sb_info *sbi = ll_i2sbi(inode);
-+        struct ll_file_data *fd = LUSTRE_FPRIVATE(file);
-+        ENTRY;
-+
-+        if (!(sbi->ll_lco.lco_flags & OBD_CONNECT_SRVLOCK)) {
-+                CDEBUG(D_INFO, "the server does not support SRVLOCK feature,"
-+                       " osc connect flags = 0x"LPX64"\n",
-+                       sbi->ll_lco.lco_flags);
-+                RETURN(0);
-+        }
-+        if (fd && (fd->fd_flags & LL_FILE_IGNORE_LOCK))
-+                RETURN(1);
-+        if (test_bit(LLI_F_CONTENDED, &lli->lli_flags)) {
-+                cfs_time_t cur_time = cfs_time_current();
-+                cfs_time_t retry_time;
-+
-+                retry_time = cfs_time_add(
-+                        lli->lli_contention_time,
-+                        cfs_time_seconds(sbi->ll_contention_time));
-+                if (cfs_time_after(cur_time, retry_time)) {
-+                        ll_clear_file_contended(inode);
-+                        RETURN(0);
-+                }
-+                RETURN(1);
-+        }
-+        RETURN(0);
-+}
-+
-+static int ll_file_get_tree_lock_iov(struct ll_lock_tree *tree,
-+                                     struct file *file, const struct iovec *iov,
-+                                     unsigned long nr_segs,
-+                                     loff_t start, loff_t end, int rw)
-+{
-+        int append;
-+        int tree_locked = 0;
-+        int rc;
-+        struct inode * inode = file->f_dentry->d_inode;
-+
-+        append = (rw == OBD_BRW_WRITE) && (file->f_flags & O_APPEND);
-+
-+        if (append || !ll_is_file_contended(file)) {
-+                struct ll_lock_tree_node *node;
-+                int ast_flags;
-+
-+                ast_flags = append ? 0 : LDLM_FL_DENY_ON_CONTENTION;
-+                if (file->f_flags & O_NONBLOCK)
-+                        ast_flags |= LDLM_FL_BLOCK_NOWAIT;
-+                node = ll_node_from_inode(inode, start, end,
-+                                          (rw == OBD_BRW_WRITE) ? LCK_PW : LCK_PR);
-+                if (IS_ERR(node)) {
-+                        rc = PTR_ERR(node);
-+                        GOTO(out, rc);
-+                }
-+                tree->lt_fd = LUSTRE_FPRIVATE(file);
-+                rc = ll_tree_lock_iov(tree, node, iov, nr_segs, ast_flags);
-+                if (rc == 0)
-+                        tree_locked = 1;
-+                else if (rc == -EUSERS)
-+                        ll_set_file_contended(inode);
-+                else
-+                        GOTO(out, rc);
-+        }
-+        RETURN(tree_locked);
-+out:
-+        return rc;
-+}
-+
-+/* XXX: exact copy from kernel code (__generic_file_aio_write_nolock from rhel4)
-+ */
-+static size_t ll_file_get_iov_count(const struct iovec *iov,
-+                                     unsigned long *nr_segs)
-+{
-+        size_t count = 0;
-+        unsigned long seg;
-+
-+        for (seg = 0; seg < *nr_segs; seg++) {
-+                const struct iovec *iv = &iov[seg];
-+
-+                /*
-+                 * If any segment has a negative length, or the cumulative
-+                 * length ever wraps negative then return -EINVAL.
-+                 */
-+                count += iv->iov_len;
-+                if (unlikely((ssize_t)(count|iv->iov_len) < 0))
-+                        return -EINVAL;
-+                if (access_ok(VERIFY_WRITE, iv->iov_base, iv->iov_len))
-+                        continue;
-+                if (seg == 0)
-+                        return -EFAULT;
-+                *nr_segs = seg;
-+                count -= iv->iov_len;   /* This segment is no good */
-+                break;
-+        }
-+        return count;
-+}
-+
-+static int iov_copy_update(unsigned long *nr_segs, const struct iovec **iov_out,
-+                           unsigned long *nrsegs_copy,
-+                           struct iovec *iov_copy, size_t *offset,
-+                           size_t size)
-+{
-+        int i;
-+        const struct iovec *iov = *iov_out;
-+        for (i = 0; i < *nr_segs;
-+             i++) {
-+                const struct iovec *iv = &iov[i];
-+                struct iovec *ivc = &iov_copy[i];
-+                *ivc = *iv;
-+                if (i == 0) {
-+                        ivc->iov_len -= *offset;
-+                        ivc->iov_base += *offset;
-+                }
-+                if (ivc->iov_len >= size) {
-+                        ivc->iov_len = size;
-+                        if (i == 0)
-+                                *offset += size;
-+                        else
-+                                *offset = size;
-+                        break;
-+                }
-+                size -= ivc->iov_len;
-+        }
-+        *iov_out += i;
-+        *nr_segs -= i;
-+        *nrsegs_copy = i + 1;
-+
-+        return 0;
-+}
-+
-+static int ll_reget_short_lock(struct page *page, int rw,
-+                               obd_off start, obd_off end,
-+                               void **cookie)
-+{
-+        struct ll_async_page *llap;
-+        struct obd_export *exp;
-+        struct inode *inode = page->mapping->host;
-+
-+        ENTRY;
-+
-+        exp = ll_i2obdexp(inode);
-+        if (exp == NULL)
-+                RETURN(0);
-+
-+        llap = llap_cast_private(page);
-+        if (llap == NULL)
-+                RETURN(0);
-+
-+        RETURN(obd_reget_short_lock(exp, ll_i2info(inode)->lli_smd,
-+                                    &llap->llap_cookie, rw, start, end,
-+                                    cookie));
-+}
-+
-+static void ll_release_short_lock(struct inode *inode, obd_off end,
-+                                  void *cookie, int rw)
-+{
-+        struct obd_export *exp;
-+        int rc;
-+
-+        exp = ll_i2obdexp(inode);
-+        if (exp == NULL)
-+                return;
-+
-+        rc = obd_release_short_lock(exp, ll_i2info(inode)->lli_smd, end,
-+                                    cookie, rw);
-+        if (rc < 0)
-+                CERROR("unlock failed (%d)\n", rc);
-+}
-+
-+static inline int ll_file_get_fast_lock(struct file *file,
-+                                        obd_off ppos, obd_off end,
-+                                        const struct iovec *iov,
-+                                        unsigned long nr_segs,
-+                                        void **cookie, int rw)
-+{
-+        int rc = 0, seg;
-+        struct page *page;
-+
-+        ENTRY;
-+
-+        /* we would like this read request to be lockfree */
-+        for (seg = 0; seg < nr_segs; seg++) {
-+                const struct iovec *iv = &iov[seg];
-+                if (ll_region_mapped((unsigned long)iv->iov_base, iv->iov_len))
-+                        GOTO(out, rc);
-+        }
-+
-+        page = find_lock_page(file->f_dentry->d_inode->i_mapping,
-+                              ppos >> CFS_PAGE_SHIFT);
-+        if (page) {
-+                if (ll_reget_short_lock(page, rw, ppos, end, cookie))
-+                        rc = 1;
-+
-+                unlock_page(page);
-+                page_cache_release(page);
-+        }
-+
-+out:
-+        RETURN(rc);
-+}
-+
-+static inline void ll_file_put_fast_lock(struct inode *inode, obd_off end,
-+                                         void *cookie, int rw)
-+{
-+        ll_release_short_lock(inode, end, cookie, rw);
-+}
-+
-+enum ll_lock_style {
-+        LL_LOCK_STYLE_NOLOCK   = 0,
-+        LL_LOCK_STYLE_FASTLOCK = 1,
-+        LL_LOCK_STYLE_TREELOCK = 2
-+};
-+
-+static inline int ll_file_get_lock(struct file *file, obd_off ppos,
-+                                   obd_off end, const struct iovec *iov,
-+                                   unsigned long nr_segs, void **cookie,
-+                                   struct ll_lock_tree *tree, int rw)
-+{
-+        int rc;
-+
-+        ENTRY;
-+
-+        if (ll_file_get_fast_lock(file, ppos, end, iov, nr_segs, cookie, rw))
-+                RETURN(LL_LOCK_STYLE_FASTLOCK);
-+
-+        rc = ll_file_get_tree_lock_iov(tree, file, iov, nr_segs,
-+                                       ppos, end, rw);
-+        /* rc: 1 for tree lock, 0 for no lock, <0 for error */
-+        switch (rc) {
-+        case 1:
-+                RETURN(LL_LOCK_STYLE_TREELOCK);
-+        case 0:
-+                RETURN(LL_LOCK_STYLE_NOLOCK);
-+        }
-+
-+        /* an error happened if we reached this point, rc = -errno here */
-+        RETURN(rc);
-+}
-+
-+static inline void ll_file_put_lock(struct inode *inode, obd_off end,
-+                                    enum ll_lock_style lock_style,
-+                                    void *cookie, struct ll_lock_tree *tree,
-+                                    int rw)
-+
-+{
-+        switch (lock_style) {
-+        case LL_LOCK_STYLE_TREELOCK:
-+                ll_tree_unlock(tree);
-+                break;
-+        case LL_LOCK_STYLE_FASTLOCK:
-+                ll_file_put_fast_lock(inode, end, cookie, rw);
-+                break;
-+        default:
-+                CERROR("invalid locking style (%d)\n", lock_style);
-+        }
-+}
-+
-+#ifdef HAVE_FILE_READV
-+static ssize_t ll_file_readv(struct file *file, const struct iovec *iov,
-+                              unsigned long nr_segs, loff_t *ppos)
-+{
-+#else
-+static ssize_t ll_file_aio_read(struct kiocb *iocb, const struct iovec *iov,
-+                                unsigned long nr_segs, loff_t pos)
-+{
-+        struct file *file = iocb->ki_filp;
-+        loff_t *ppos = &iocb->ki_pos;
-+#endif
-+        struct inode *inode = file->f_dentry->d_inode;
-+        struct ll_inode_info *lli = ll_i2info(inode);
-+        struct lov_stripe_md *lsm = lli->lli_smd;
-+        struct ll_sb_info *sbi = ll_i2sbi(inode);
-+        struct ll_lock_tree tree;
-+        struct ost_lvb lvb;
-+        struct ll_ra_read bead;
-+        int ra = 0;
-+        obd_off end;
-+        ssize_t retval, chunk, sum = 0;
-+        int lock_style;
-+        struct iovec *iov_copy = NULL;
-+        unsigned long nrsegs_copy, nrsegs_orig = 0;
-+        size_t count, iov_offset = 0;
-+        __u64 kms;
-+        void *cookie;
-+        ENTRY;
-+
-+        count = ll_file_get_iov_count(iov, &nr_segs);
-+        CDEBUG(D_VFSTRACE, "VFS Op:inode=%lu/%u(%p),size="LPSZ",offset=%Ld\n",
-+               inode->i_ino, inode->i_generation, inode, count, *ppos);
-+        /* "If nbyte is 0, read() will return 0 and have no other results."
-+         *                      -- Single Unix Spec */
-+        if (count == 0)
-+                RETURN(0);
-+
-+        ll_stats_ops_tally(sbi, LPROC_LL_READ_BYTES, count);
-+
-+        if (!lsm) {
-+                /* Read on file with no objects should return zero-filled
-+                 * buffers up to file size (we can get non-zero sizes with
-+                 * mknod + truncate, then opening file for read. This is a
-+                 * common pattern in NFS case, it seems). Bug 6243 */
-+                int notzeroed;
-+                /* Since there are no objects on OSTs, we have nothing to get
-+                 * lock on and so we are forced to access inode->i_size
-+                 * unguarded */
-+
-+                /* Read beyond end of file */
-+                if (*ppos >= i_size_read(inode))
-+                        RETURN(0);
-+
-+                if (count > i_size_read(inode) - *ppos)
-+                        count = i_size_read(inode) - *ppos;
-+                /* Make sure to correctly adjust the file pos pointer for
-+                 * EFAULT case */
-+                for (nrsegs_copy = 0; nrsegs_copy < nr_segs; nrsegs_copy++) {
-+                        const struct iovec *iv = &iov[nrsegs_copy];
-+
-+                        if (count < iv->iov_len)
-+                                chunk = count;
-+                        else
-+                                chunk = iv->iov_len;
-+                        notzeroed = clear_user(iv->iov_base, chunk);
-+                        sum += (chunk - notzeroed);
-+                        count -= (chunk - notzeroed);
-+                        if (notzeroed || !count)
-+                                break;
-+                }
-+                *ppos += sum;
-+                if (!sum)
-+                        RETURN(-EFAULT);
-+                RETURN(sum);
-+        }
-+
-+repeat:
-+        if (sbi->ll_max_rw_chunk != 0) {
-+                /* first, let's know the end of the current stripe */
-+                end = *ppos;
-+                obd_extent_calc(sbi->ll_osc_exp, lsm, OBD_CALC_STRIPE_END,
-+                                (obd_off *)&end);
-+
-+                /* correct, the end is beyond the request */
-+                if (end > *ppos + count - 1)
-+                        end = *ppos + count - 1;
-+
-+                /* and chunk shouldn't be too large even if striping is wide */
-+                if (end - *ppos > sbi->ll_max_rw_chunk)
-+                        end = *ppos + sbi->ll_max_rw_chunk - 1;
-+
-+                chunk = end - *ppos + 1;
-+                if ((count == chunk) && (iov_offset == 0)) {
-+                        if (iov_copy)
-+                                OBD_FREE(iov_copy, sizeof(*iov) * nrsegs_orig);
-+
-+                        iov_copy = (struct iovec *)iov;
-+                        nrsegs_copy = nr_segs;
-+                } else {
-+                        if (!iov_copy) {
-+                                nrsegs_orig = nr_segs;
-+                                OBD_ALLOC(iov_copy, sizeof(*iov) * nr_segs);
-+                                if (!iov_copy)
-+                                        GOTO(out, retval = -ENOMEM);
-+                        }
-+
-+                        iov_copy_update(&nr_segs, &iov, &nrsegs_copy, iov_copy,
-+                                        &iov_offset, chunk);
-+                }
-+        } else {
-+                end = *ppos + count - 1;
-+                iov_copy = (struct iovec *)iov;
-+                nrsegs_copy = nr_segs;
-+        }
-+
-+        lock_style = ll_file_get_lock(file, (obd_off)(*ppos), end,
-+                                      iov_copy, nrsegs_copy, &cookie, &tree,
-+                                      OBD_BRW_READ);
-+        if (lock_style < 0)
-+                GOTO(out, retval = lock_style);
-+
-+        ll_inode_size_lock(inode, 1);
-+        /*
-+         * Consistency guarantees: following possibilities exist for the
-+         * relation between region being read and real file size at this
-+         * moment:
-+         *
-+         *  (A): the region is completely inside of the file;
-+         *
-+         *  (B-x): x bytes of region are inside of the file, the rest is
-+         *  outside;
-+         *
-+         *  (C): the region is completely outside of the file.
-+         *
-+         * This classification is stable under DLM lock acquired by
-+         * ll_tree_lock() above, because to change class, other client has to
-+         * take DLM lock conflicting with our lock. Also, any updates to
-+         * ->i_size by other threads on this client are serialized by
-+         * ll_inode_size_lock(). This guarantees that short reads are handled
-+         * correctly in the face of concurrent writes and truncates.
-+         */
-+        inode_init_lvb(inode, &lvb);
-+        obd_merge_lvb(ll_i2sbi(inode)->ll_osc_exp, lsm, &lvb, 1);
-+        kms = lvb.lvb_size;
-+        if (*ppos + count - 1 > kms) {
-+                /* A glimpse is necessary to determine whether we return a
-+                 * short read (B) or some zeroes at the end of the buffer (C) */
-+                ll_inode_size_unlock(inode, 1);
-+                retval = ll_glimpse_size(inode, LDLM_FL_BLOCK_GRANTED);
-+                if (retval) {
-+                        if (lock_style != LL_LOCK_STYLE_NOLOCK)
-+                                ll_file_put_lock(inode, end, lock_style,
-+                                                 cookie, &tree, OBD_BRW_READ);
-+                        goto out;
-+                }
-+        } else {
-+                /* region is within kms and, hence, within real file size (A).
-+                 * We need to increase i_size to cover the read region so that
-+                 * generic_file_read() will do its job, but that doesn't mean
-+                 * the kms size is _correct_, it is only the _minimum_ size.
-+                 * If someone does a stat they will get the correct size which
-+                 * will always be >= the kms value here.  b=11081 */
-+                if (i_size_read(inode) < kms)
-+                        i_size_write(inode, kms);
-+                ll_inode_size_unlock(inode, 1);
-+        }
-+
-+        chunk = end - *ppos + 1;
-+        CDEBUG(D_INODE,"Read ino %lu, "LPSZ" bytes, offset %lld, i_size %llu\n",
-+               inode->i_ino, chunk, *ppos, i_size_read(inode));
-+
-+        /* turn off the kernel's read-ahead */
-+        if (lock_style != LL_LOCK_STYLE_NOLOCK) {
-+                file->f_ra.ra_pages = 0;
-+                /* initialize read-ahead window once per syscall */
-+                if (ra == 0) {
-+                        ra = 1;
-+                        bead.lrr_start = *ppos >> CFS_PAGE_SHIFT;
-+                        bead.lrr_count = (count + CFS_PAGE_SIZE - 1) >> CFS_PAGE_SHIFT;
-+                        ll_ra_read_in(file, &bead);
-+                }
-+
-+                /* BUG: 5972 */
-+                file_accessed(file);
-+#ifdef HAVE_FILE_READV
-+                retval = generic_file_readv(file, iov_copy, nrsegs_copy, ppos);
-+#else
-+                retval = generic_file_aio_read(iocb, iov_copy, nrsegs_copy,
-+                                               *ppos);
-+#endif
-+                ll_file_put_lock(inode, end, lock_style, cookie,
-+                                 &tree, OBD_BRW_READ);
-+        } else {
-+                retval = ll_file_lockless_io(file, iov_copy, nrsegs_copy, ppos,
-+                                             READ, chunk);
-+        }
-+        ll_rw_stats_tally(sbi, current->pid, file, count, 0);
-+        if (retval > 0) {
-+                count -= retval;
-+                sum += retval;
-+                if (retval == chunk && count > 0)
-+                        goto repeat;
-+        }
-+
-+ out:
-+        if (ra != 0)
-+                ll_ra_read_ex(file, &bead);
-+        retval = (sum > 0) ? sum : retval;
-+
-+        if (iov_copy && iov_copy != iov)
-+                OBD_FREE(iov_copy, sizeof(*iov) * nrsegs_orig);
-+
-+        RETURN(retval);
-+}
-+
-+static ssize_t ll_file_read(struct file *file, char *buf, size_t count,
-+                            loff_t *ppos)
-+{
-+        struct iovec local_iov = { .iov_base = (void __user *)buf,
-+                                   .iov_len = count };
-+#ifdef HAVE_FILE_READV
-+        return ll_file_readv(file, &local_iov, 1, ppos);
-+#else
-+        struct kiocb kiocb;
-+        ssize_t ret;
-+
-+        init_sync_kiocb(&kiocb, file);
-+        kiocb.ki_pos = *ppos;
-+        kiocb.ki_left = count;
-+
-+        ret = ll_file_aio_read(&kiocb, &local_iov, 1, kiocb.ki_pos);
-+        *ppos = kiocb.ki_pos;
-+        return ret;
-+#endif
-+}
-+
-+/*
-+ * Write to a file (through the page cache).
-+ */
-+#ifdef HAVE_FILE_WRITEV
-+static ssize_t ll_file_writev(struct file *file, const struct iovec *iov,
-+                              unsigned long nr_segs, loff_t *ppos)
-+{
-+#else /* AIO stuff */
-+static ssize_t ll_file_aio_write(struct kiocb *iocb, const struct iovec *iov,
-+                                 unsigned long nr_segs, loff_t pos)
-+{
-+        struct file *file = iocb->ki_filp;
-+        loff_t *ppos = &iocb->ki_pos;
-+#endif
-+        struct inode *inode = file->f_dentry->d_inode;
-+        struct ll_sb_info *sbi = ll_i2sbi(inode);
-+        struct lov_stripe_md *lsm = ll_i2info(inode)->lli_smd;
-+        struct ll_lock_tree tree;
-+        loff_t maxbytes = ll_file_maxbytes(inode);
-+        loff_t lock_start, lock_end, end;
-+        ssize_t retval, chunk, sum = 0;
-+        int tree_locked;
-+        struct iovec *iov_copy = NULL;
-+        unsigned long nrsegs_copy, nrsegs_orig = 0;
-+        size_t count, iov_offset = 0;
-+        ENTRY;
-+
-+        count = ll_file_get_iov_count(iov, &nr_segs);
-+
-+        CDEBUG(D_VFSTRACE, "VFS Op:inode=%lu/%u(%p),size="LPSZ",offset=%Ld\n",
-+               inode->i_ino, inode->i_generation, inode, count, *ppos);
-+
-+        SIGNAL_MASK_ASSERT(); /* XXX BUG 1511 */
-+
-+        /* POSIX, but surprised the VFS doesn't check this already */
-+        if (count == 0)
-+                RETURN(0);
-+
-+        /* If file was opened for LL_IOC_LOV_SETSTRIPE but the ioctl wasn't
-+         * called on the file, don't fail the below assertion (bug 2388). */
-+        if (file->f_flags & O_LOV_DELAY_CREATE &&
-+            ll_i2info(inode)->lli_smd == NULL)
-+                RETURN(-EBADF);
-+
-+        LASSERT(ll_i2info(inode)->lli_smd != NULL);
-+
-+        down(&ll_i2info(inode)->lli_write_sem);
-+
-+repeat:
-+        chunk = 0; /* just to fix gcc's warning */
-+        end = *ppos + count - 1;
-+
-+        if (file->f_flags & O_APPEND) {
-+                lock_start = 0;
-+                lock_end = OBD_OBJECT_EOF;
-+                iov_copy = (struct iovec *)iov;
-+                nrsegs_copy = nr_segs;
-+        } else if (sbi->ll_max_rw_chunk != 0) {
-+                /* first, let's know the end of the current stripe */
-+                end = *ppos;
-+                obd_extent_calc(sbi->ll_osc_exp, lsm, OBD_CALC_STRIPE_END,
-+                                (obd_off *)&end);
-+
-+                /* correct, the end is beyond the request */
-+                if (end > *ppos + count - 1)
-+                        end = *ppos + count - 1;
-+
-+                /* and chunk shouldn't be too large even if striping is wide */
-+                if (end - *ppos > sbi->ll_max_rw_chunk)
-+                        end = *ppos + sbi->ll_max_rw_chunk - 1;
-+                lock_start = *ppos;
-+                lock_end = end;
-+                chunk = end - *ppos + 1;
-+                if ((count == chunk) && (iov_offset == 0)) {
-+                        if (iov_copy)
-+                                OBD_FREE(iov_copy, sizeof(*iov) * nrsegs_orig);
-+
-+                        iov_copy = (struct iovec *)iov;
-+                        nrsegs_copy = nr_segs;
-+                } else {
-+                        if (!iov_copy) {
-+                                nrsegs_orig = nr_segs;
-+                                OBD_ALLOC(iov_copy, sizeof(*iov) * nr_segs);
-+                                if (!iov_copy)
-+                                        GOTO(out, retval = -ENOMEM);
-+                        }
-+                        iov_copy_update(&nr_segs, &iov, &nrsegs_copy, iov_copy,
-+                                        &iov_offset, chunk);
-+                }
-+        } else {
-+                lock_start = *ppos;
-+                lock_end = end;
-+                iov_copy = (struct iovec *)iov;
-+                nrsegs_copy = nr_segs;
-+        }
-+
-+        tree_locked = ll_file_get_tree_lock_iov(&tree, file, iov_copy,
-+                                                nrsegs_copy,
-+                                                (obd_off)lock_start,
-+                                                (obd_off)lock_end,
-+                                                OBD_BRW_WRITE);
-+        if (tree_locked < 0)
-+                GOTO(out, retval = tree_locked);
-+
-+        /* This is ok, g_f_w will overwrite this under i_sem if it races
-+         * with a local truncate, it just makes our maxbyte checking easier.
-+         * The i_size value gets updated in ll_extent_lock() as a consequence
-+         * of the [0,EOF] extent lock we requested above. */
-+        if (file->f_flags & O_APPEND) {
-+                *ppos = i_size_read(inode);
-+                end = *ppos + count - 1;
-+        }
-+
-+        if (*ppos >= maxbytes) {
-+                send_sig(SIGXFSZ, current, 0);
-+                GOTO(out_unlock, retval = -EFBIG);
-+        }
-+        if (end > maxbytes - 1)
-+                end = maxbytes - 1;
-+
-+        /* generic_file_write handles O_APPEND after getting i_mutex */
-+        chunk = end - *ppos + 1;
-+        CDEBUG(D_INFO, "Writing inode %lu, "LPSZ" bytes, offset %Lu\n",
-+               inode->i_ino, chunk, *ppos);
-+        if (tree_locked)
-+#ifdef HAVE_FILE_WRITEV
-+                retval = generic_file_writev(file, iov_copy, nrsegs_copy, ppos);
-+#else
-+                retval = generic_file_aio_write(iocb, iov_copy, nrsegs_copy,
-+                                                *ppos);
-+#endif
-+        else
-+                retval = ll_file_lockless_io(file, iov_copy, nrsegs_copy,
-+                                             ppos, WRITE, chunk);
-+        ll_rw_stats_tally(ll_i2sbi(inode), current->pid, file, chunk, 1);
-+
-+out_unlock:
-+        if (tree_locked)
-+                ll_tree_unlock(&tree);
-+
-+out:
-+        if (retval > 0) {
-+                count -= retval;
-+                sum += retval;
-+                if (retval == chunk && count > 0)
-+                        goto repeat;
-+        }
-+
-+        up(&ll_i2info(inode)->lli_write_sem);
-+
-+        if (iov_copy && iov_copy != iov)
-+                OBD_FREE(iov_copy, sizeof(*iov) * nrsegs_orig);
-+
-+        retval = (sum > 0) ? sum : retval;
-+        ll_stats_ops_tally(ll_i2sbi(inode), LPROC_LL_WRITE_BYTES,
-+                           retval > 0 ? retval : 0);
-+        RETURN(retval);
-+}
-+
-+static ssize_t ll_file_write(struct file *file, const char *buf, size_t count,
-+                             loff_t *ppos)
-+{
-+        struct iovec local_iov = { .iov_base = (void __user *)buf,
-+                                   .iov_len = count };
-+
-+#ifdef HAVE_FILE_WRITEV
-+        return ll_file_writev(file, &local_iov, 1, ppos);
-+#else
-+        struct kiocb kiocb;
-+        ssize_t ret;
-+
-+        init_sync_kiocb(&kiocb, file);
-+        kiocb.ki_pos = *ppos;
-+        kiocb.ki_left = count;
-+
-+        ret = ll_file_aio_write(&kiocb, &local_iov, 1, kiocb.ki_pos);
-+        *ppos = kiocb.ki_pos;
-+
-+        return ret;
-+#endif
-+}
-+
-+/*
-+ * Send file content (through pagecache) somewhere with helper
-+ */
-+static ssize_t ll_file_sendfile(struct file *in_file, loff_t *ppos,size_t count,
-+                                read_actor_t actor, void *target)
-+{
-+        struct inode *inode = in_file->f_dentry->d_inode;
-+        struct ll_inode_info *lli = ll_i2info(inode);
-+        struct lov_stripe_md *lsm = lli->lli_smd;
-+        struct ll_lock_tree tree;
-+        struct ll_lock_tree_node *node;
-+        struct ost_lvb lvb;
-+        struct ll_ra_read bead;
-+        int rc;
-+        ssize_t retval;
-+        __u64 kms;
-+        ENTRY;
-+        CDEBUG(D_VFSTRACE, "VFS Op:inode=%lu/%u(%p),size="LPSZ",offset=%Ld\n",
-+               inode->i_ino, inode->i_generation, inode, count, *ppos);
-+
-+        /* "If nbyte is 0, read() will return 0 and have no other results."
-+         *                      -- Single Unix Spec */
-+        if (count == 0)
-+                RETURN(0);
-+
-+        ll_stats_ops_tally(ll_i2sbi(inode), LPROC_LL_READ_BYTES, count);
-+        /* turn off the kernel's read-ahead */
-+        in_file->f_ra.ra_pages = 0;
-+
-+        /* File with no objects, nothing to lock */
-+        if (!lsm)
-+                RETURN(generic_file_sendfile(in_file, ppos, count, actor, target));
-+
-+        node = ll_node_from_inode(inode, *ppos, *ppos + count - 1, LCK_PR);
-+        if (IS_ERR(node))
-+                RETURN(PTR_ERR(node));
-+
-+        tree.lt_fd = LUSTRE_FPRIVATE(in_file);
-+        rc = ll_tree_lock(&tree, node, NULL, count,
-+                          in_file->f_flags & O_NONBLOCK?LDLM_FL_BLOCK_NOWAIT:0);
-+        if (rc != 0)
-+                RETURN(rc);
-+
-+        ll_clear_file_contended(inode);
-+        ll_inode_size_lock(inode, 1);
-+        /*
-+         * Consistency guarantees: following possibilities exist for the
-+         * relation between region being read and real file size at this
-+         * moment:
-+         *
-+         *  (A): the region is completely inside of the file;
-+         *
-+         *  (B-x): x bytes of region are inside of the file, the rest is
-+         *  outside;
-+         *
-+         *  (C): the region is completely outside of the file.
-+         *
-+         * This classification is stable under DLM lock acquired by
-+         * ll_tree_lock() above, because to change class, other client has to
-+         * take DLM lock conflicting with our lock. Also, any updates to
-+         * ->i_size by other threads on this client are serialized by
-+         * ll_inode_size_lock(). This guarantees that short reads are handled
-+         * correctly in the face of concurrent writes and truncates.
-+         */
-+        inode_init_lvb(inode, &lvb);
-+        obd_merge_lvb(ll_i2sbi(inode)->ll_osc_exp, lsm, &lvb, 1);
-+        kms = lvb.lvb_size;
-+        if (*ppos + count - 1 > kms) {
-+                /* A glimpse is necessary to determine whether we return a
-+                 * short read (B) or some zeroes at the end of the buffer (C) */
-+                ll_inode_size_unlock(inode, 1);
-+                retval = ll_glimpse_size(inode, LDLM_FL_BLOCK_GRANTED);
-+                if (retval)
-+                        goto out;
-+        } else {
-+                /* region is within kms and, hence, within real file size (A) */
-+                i_size_write(inode, kms);
-+                ll_inode_size_unlock(inode, 1);
-+        }
-+
-+        CDEBUG(D_INFO, "Send ino %lu, "LPSZ" bytes, offset %lld, i_size %llu\n",
-+               inode->i_ino, count, *ppos, i_size_read(inode));
-+
-+        bead.lrr_start = *ppos >> CFS_PAGE_SHIFT;
-+        bead.lrr_count = (count + CFS_PAGE_SIZE - 1) >> CFS_PAGE_SHIFT;
-+        ll_ra_read_in(in_file, &bead);
-+        /* BUG: 5972 */
-+        file_accessed(in_file);
-+        retval = generic_file_sendfile(in_file, ppos, count, actor, target);
-+        ll_ra_read_ex(in_file, &bead);
-+
-+ out:
-+        ll_tree_unlock(&tree);
-+        RETURN(retval);
-+}
-+
-+static int ll_lov_recreate_obj(struct inode *inode, struct file *file,
-+                               unsigned long arg)
-+{
-+        struct ll_inode_info *lli = ll_i2info(inode);
-+        struct obd_export *exp = ll_i2obdexp(inode);
-+        struct ll_recreate_obj ucreatp;
-+        struct obd_trans_info oti = { 0 };
-+        struct obdo *oa = NULL;
-+        int lsm_size;
-+        int rc = 0;
-+        struct lov_stripe_md *lsm, *lsm2;
-+        ENTRY;
-+
-+        if (!cfs_capable(CFS_CAP_SYS_ADMIN))
-+                RETURN(-EPERM);
-+
-+        rc = copy_from_user(&ucreatp, (struct ll_recreate_obj *)arg,
-+                            sizeof(struct ll_recreate_obj));
-+        if (rc) {
-+                RETURN(-EFAULT);
-+        }
-+        OBDO_ALLOC(oa);
-+        if (oa == NULL)
-+                RETURN(-ENOMEM);
-+
-+        down(&lli->lli_size_sem);
-+        lsm = lli->lli_smd;
-+        if (lsm == NULL)
-+                GOTO(out, rc = -ENOENT);
-+        lsm_size = sizeof(*lsm) + (sizeof(struct lov_oinfo) *
-+                   (lsm->lsm_stripe_count));
-+
-+        OBD_ALLOC(lsm2, lsm_size);
-+        if (lsm2 == NULL)
-+                GOTO(out, rc = -ENOMEM);
-+
-+        oa->o_id = ucreatp.lrc_id;
-+        oa->o_nlink = ucreatp.lrc_ost_idx;
-+        oa->o_flags |= OBD_FL_RECREATE_OBJS;
-+        oa->o_valid = OBD_MD_FLID | OBD_MD_FLFLAGS;
-+        obdo_from_inode(oa, inode, OBD_MD_FLTYPE | OBD_MD_FLATIME |
-+                        OBD_MD_FLMTIME | OBD_MD_FLCTIME);
-+
-+        memcpy(lsm2, lsm, lsm_size);
-+        rc = obd_create(exp, oa, &lsm2, &oti);
-+
-+        OBD_FREE(lsm2, lsm_size);
-+        GOTO(out, rc);
-+out:
-+        up(&lli->lli_size_sem);
-+        OBDO_FREE(oa);
-+        return rc;
-+}
-+
-+int ll_lov_setstripe_ea_info(struct inode *inode, struct file *file,
-+                                    int flags, struct lov_user_md *lum,
-+                                    int lum_size)
-+{
-+        struct ll_inode_info *lli = ll_i2info(inode);
-+        struct lov_stripe_md *lsm;
-+        struct lookup_intent oit = {.it_op = IT_OPEN, .it_flags = flags};
-+        int rc = 0;
-+        ENTRY;
-+
-+        down(&lli->lli_size_sem);
-+        lsm = lli->lli_smd;
-+        if (lsm) {
-+                up(&lli->lli_size_sem);
-+                CDEBUG(D_IOCTL, "stripe already exists for ino %lu\n",
-+                       inode->i_ino);
-+                RETURN(-EEXIST);
-+        }
-+
-+        rc = ll_intent_file_open(file, lum, lum_size, &oit);
-+        if (rc)
-+                GOTO(out, rc);
-+        if (it_disposition(&oit, DISP_LOOKUP_NEG))
-+                GOTO(out_req_free, rc = -ENOENT);
-+        rc = oit.d.lustre.it_status;
-+        if (rc < 0)
-+                GOTO(out_req_free, rc);
-+
-+        ll_release_openhandle(file->f_dentry, &oit);
-+
-+ out:
-+        up(&lli->lli_size_sem);
-+        ll_intent_release(&oit);
-+        RETURN(rc);
-+out_req_free:
-+        ptlrpc_req_finished((struct ptlrpc_request *) oit.d.lustre.it_data);
-+        goto out;
-+}
-+
-+int ll_lov_getstripe_ea_info(struct inode *inode, const char *filename,
-+                             struct lov_mds_md **lmmp, int *lmm_size,
-+                             struct ptlrpc_request **request)
-+{
-+        struct ll_sb_info *sbi = ll_i2sbi(inode);
-+        struct ll_fid  fid;
-+        struct mds_body  *body;
-+        struct lov_mds_md *lmm = NULL;
-+        struct ptlrpc_request *req = NULL;
-+        int rc, lmmsize;
-+
-+        ll_inode2fid(&fid, inode);
-+
-+        rc = ll_get_max_mdsize(sbi, &lmmsize);
-+        if (rc)
-+                RETURN(rc);
-+
-+        rc = mdc_getattr_name(sbi->ll_mdc_exp, &fid,
-+                        filename, strlen(filename) + 1,
-+                        OBD_MD_FLEASIZE | OBD_MD_FLDIREA,
-+                        lmmsize, &req);
-+        if (rc < 0) {
-+                CDEBUG(D_INFO, "mdc_getattr_name failed "
-+                                "on %s: rc %d\n", filename, rc);
-+                GOTO(out, rc);
-+        }
-+
-+        body = lustre_msg_buf(req->rq_repmsg, REPLY_REC_OFF,
-+                        sizeof(*body));
-+        LASSERT(body != NULL); /* checked by mdc_getattr_name */
-+        /* swabbed by mdc_getattr_name */
-+        LASSERT(lustre_rep_swabbed(req, REPLY_REC_OFF));
-+
-+        lmmsize = body->eadatasize;
-+
-+        if (!(body->valid & (OBD_MD_FLEASIZE | OBD_MD_FLDIREA)) ||
-+                        lmmsize == 0) {
-+                GOTO(out, rc = -ENODATA);
-+        }
-+
-+        lmm = lustre_msg_buf(req->rq_repmsg, REPLY_REC_OFF + 1,
-+                        lmmsize);
-+        LASSERT(lmm != NULL);
-+        LASSERT(lustre_rep_swabbed(req, REPLY_REC_OFF + 1));
-+
-+        if ((lmm->lmm_magic != cpu_to_le32(LOV_MAGIC)) &&
-+             (lmm->lmm_magic != cpu_to_le32(LOV_MAGIC_JOIN))) {
-+                GOTO(out, rc = -EPROTO);
-+        }
-+        /*
-+         * This is coming from the MDS, so is probably in
-+         * little endian.  We convert it to host endian before
-+         * passing it to userspace.
-+         */
-+        if (LOV_MAGIC != cpu_to_le32(LOV_MAGIC)) {
-+                if (lmm->lmm_magic == cpu_to_le32(LOV_MAGIC)) {
-+                        lustre_swab_lov_user_md((struct lov_user_md *)lmm);
-+                        /* if function called for directory - we should be
-+                         * avoid swab not existent lsm objects */
-+                        if (S_ISREG(body->mode))
-+                                lustre_swab_lov_user_md_objects((struct lov_user_md *)lmm);
-+                } else if (lmm->lmm_magic == cpu_to_le32(LOV_MAGIC_JOIN)) {
-+                        lustre_swab_lov_user_md_join((struct lov_user_md_join *)lmm);
-+                }
-+        }
-+
-+        if (lmm->lmm_magic == LOV_MAGIC_JOIN) {
-+                struct lov_stripe_md *lsm;
-+                struct lov_user_md_join *lmj;
-+                int lmj_size, i, aindex = 0;
-+
-+                rc = obd_unpackmd(sbi->ll_osc_exp, &lsm, lmm, lmmsize);
-+                if (rc < 0)
-+                        GOTO(out, rc = -ENOMEM);
-+                rc = obd_checkmd(sbi->ll_osc_exp, sbi->ll_mdc_exp, lsm);
-+                if (rc)
-+                        GOTO(out_free_memmd, rc);
-+
-+                lmj_size = sizeof(struct lov_user_md_join) +
-+                        lsm->lsm_stripe_count *
-+                        sizeof(struct lov_user_ost_data_join);
-+                OBD_ALLOC(lmj, lmj_size);
-+                if (!lmj)
-+                        GOTO(out_free_memmd, rc = -ENOMEM);
-+
-+                memcpy(lmj, lmm, sizeof(struct lov_user_md_join));
-+                for (i = 0; i < lsm->lsm_stripe_count; i++) {
-+                        struct lov_extent *lex =
-+                                &lsm->lsm_array->lai_ext_array[aindex];
-+
-+                        if (lex->le_loi_idx + lex->le_stripe_count <= i)
-+                                aindex ++;
-+                        CDEBUG(D_INFO, "aindex %d i %d l_extent_start "
-+                                        LPU64" len %d\n", aindex, i,
-+                                        lex->le_start, (int)lex->le_len);
-+                        lmj->lmm_objects[i].l_extent_start =
-+                                lex->le_start;
-+
-+                        if ((int)lex->le_len == -1)
-+                                lmj->lmm_objects[i].l_extent_end = -1;
-+                        else
-+                                lmj->lmm_objects[i].l_extent_end =
-+                                        lex->le_start + lex->le_len;
-+                        lmj->lmm_objects[i].l_object_id =
-+                                lsm->lsm_oinfo[i]->loi_id;
-+                        lmj->lmm_objects[i].l_object_gr =
-+                                lsm->lsm_oinfo[i]->loi_gr;
-+                        lmj->lmm_objects[i].l_ost_gen =
-+                                lsm->lsm_oinfo[i]->loi_ost_gen;
-+                        lmj->lmm_objects[i].l_ost_idx =
-+                                lsm->lsm_oinfo[i]->loi_ost_idx;
-+                }
-+                lmm = (struct lov_mds_md *)lmj;
-+                lmmsize = lmj_size;
-+out_free_memmd:
-+                obd_free_memmd(sbi->ll_osc_exp, &lsm);
-+        }
-+out:
-+        *lmmp = lmm;
-+        *lmm_size = lmmsize;
-+        *request = req;
-+        return rc;
-+}
-+static int ll_lov_setea(struct inode *inode, struct file *file,
-+                            unsigned long arg)
-+{
-+        int flags = MDS_OPEN_HAS_OBJS | FMODE_WRITE;
-+        struct lov_user_md  *lump;
-+        int lum_size = sizeof(struct lov_user_md) +
-+                       sizeof(struct lov_user_ost_data);
-+        int rc;
-+        ENTRY;
-+
-+        if (!cfs_capable(CFS_CAP_SYS_ADMIN))
-+                RETURN(-EPERM);
-+
-+        OBD_ALLOC(lump, lum_size);
-+        if (lump == NULL) {
-+                RETURN(-ENOMEM);
-+        }
-+        rc = copy_from_user(lump, (struct lov_user_md  *)arg, lum_size);
-+        if (rc) {
-+                OBD_FREE(lump, lum_size);
-+                RETURN(-EFAULT);
-+        }
-+
-+        rc = ll_lov_setstripe_ea_info(inode, file, flags, lump, lum_size);
-+
-+        OBD_FREE(lump, lum_size);
-+        RETURN(rc);
-+}
-+
-+static int ll_lov_setstripe(struct inode *inode, struct file *file,
-+                            unsigned long arg)
-+{
-+        struct lov_user_md lum, *lump = (struct lov_user_md *)arg;
-+        int rc;
-+        int flags = FMODE_WRITE;
-+        ENTRY;
-+
-+        /* Bug 1152: copy properly when this is no longer true */
-+        LASSERT(sizeof(lum) == sizeof(*lump));
-+        LASSERT(sizeof(lum.lmm_objects[0]) == sizeof(lump->lmm_objects[0]));
-+        rc = copy_from_user(&lum, lump, sizeof(lum));
-+        if (rc)
-+                RETURN(-EFAULT);
-+
-+        rc = ll_lov_setstripe_ea_info(inode, file, flags, &lum, sizeof(lum));
-+        if (rc == 0) {
-+                 put_user(0, &lump->lmm_stripe_count);
-+                 rc = obd_iocontrol(LL_IOC_LOV_GETSTRIPE, ll_i2obdexp(inode),
-+                                    0, ll_i2info(inode)->lli_smd, lump);
-+        }
-+        RETURN(rc);
-+}
-+
-+static int ll_lov_getstripe(struct inode *inode, unsigned long arg)
-+{
-+        struct lov_stripe_md *lsm = ll_i2info(inode)->lli_smd;
-+
-+        if (!lsm)
-+                RETURN(-ENODATA);
-+
-+        return obd_iocontrol(LL_IOC_LOV_GETSTRIPE, ll_i2obdexp(inode), 0, lsm,
-+                            (void *)arg);
-+}
-+
-+static int ll_get_grouplock(struct inode *inode, struct file *file,
-+                            unsigned long arg)
-+{
-+        struct ll_file_data *fd = LUSTRE_FPRIVATE(file);
-+        ldlm_policy_data_t policy = { .l_extent = { .start = 0,
-+                                                    .end = OBD_OBJECT_EOF}};
-+        struct lustre_handle lockh = { 0 };
-+        struct ll_inode_info *lli = ll_i2info(inode);
-+        struct lov_stripe_md *lsm = lli->lli_smd;
-+        int flags = 0, rc;
-+        ENTRY;
-+
-+        if (fd->fd_flags & LL_FILE_GROUP_LOCKED) {
-+                RETURN(-EINVAL);
-+        }
-+
-+        policy.l_extent.gid = arg;
-+        if (file->f_flags & O_NONBLOCK)
-+                flags = LDLM_FL_BLOCK_NOWAIT;
-+
-+        rc = ll_extent_lock(fd, inode, lsm, LCK_GROUP, &policy, &lockh, flags);
-+        if (rc)
-+                RETURN(rc);
-+
-+        fd->fd_flags |= LL_FILE_GROUP_LOCKED|LL_FILE_IGNORE_LOCK;
-+        fd->fd_gid = arg;
-+        memcpy(&fd->fd_cwlockh, &lockh, sizeof(lockh));
-+
-+        RETURN(0);
-+}
-+
-+static int ll_put_grouplock(struct inode *inode, struct file *file,
-+                            unsigned long arg)
-+{
-+        struct ll_file_data *fd = LUSTRE_FPRIVATE(file);
-+        struct ll_inode_info *lli = ll_i2info(inode);
-+        struct lov_stripe_md *lsm = lli->lli_smd;
-+        int rc;
-+        ENTRY;
-+
-+        if (!(fd->fd_flags & LL_FILE_GROUP_LOCKED)) {
-+                /* Ugh, it's already unlocked. */
-+                RETURN(-EINVAL);
-+        }
-+
-+        if (fd->fd_gid != arg) /* Ugh? Unlocking with different gid? */
-+                RETURN(-EINVAL);
-+
-+        fd->fd_flags &= ~(LL_FILE_GROUP_LOCKED|LL_FILE_IGNORE_LOCK);
-+
-+        rc = ll_extent_unlock(fd, inode, lsm, LCK_GROUP, &fd->fd_cwlockh);
-+        if (rc)
-+                RETURN(rc);
-+
-+        fd->fd_gid = 0;
-+        memset(&fd->fd_cwlockh, 0, sizeof(fd->fd_cwlockh));
-+
-+        RETURN(0);
-+}
-+
-+#if LUSTRE_FIX >= 50
-+static int join_sanity_check(struct inode *head, struct inode *tail)
-+{
-+        ENTRY;
-+        if ((ll_i2sbi(head)->ll_flags & LL_SBI_JOIN) == 0) {
-+                CERROR("server do not support join \n");
-+                RETURN(-EINVAL);
-+        }
-+        if (!S_ISREG(tail->i_mode) || !S_ISREG(head->i_mode)) {
-+                CERROR("tail ino %lu and ino head %lu must be regular\n",
-+                       head->i_ino, tail->i_ino);
-+                RETURN(-EINVAL);
-+        }
-+        if (head->i_ino == tail->i_ino) {
-+                CERROR("file %lu can not be joined to itself \n", head->i_ino);
-+                RETURN(-EINVAL);
-+        }
-+        if (i_size_read(head) % JOIN_FILE_ALIGN) {
-+                CERROR("hsize %llu must be times of 64K\n", i_size_read(head));
-+                RETURN(-EINVAL);
-+        }
-+        RETURN(0);
-+}
-+
-+static int join_file(struct inode *head_inode, struct file *head_filp,
-+                     struct file *tail_filp)
-+{
-+        struct dentry *tail_dentry = tail_filp->f_dentry;
-+        struct lookup_intent oit = {.it_op = IT_OPEN,
-+                                   .it_flags = head_filp->f_flags|O_JOIN_FILE};
-+        struct ldlm_enqueue_info einfo = { LDLM_IBITS, LCK_PW,
-+                ll_mdc_blocking_ast, ldlm_completion_ast, NULL, NULL };
-+
-+        struct lustre_handle lockh;
-+        struct mdc_op_data *op_data;
-+        int    rc;
-+        loff_t data;
-+        ENTRY;
-+
-+        tail_dentry = tail_filp->f_dentry;
-+
-+        OBD_ALLOC_PTR(op_data);
-+        if (op_data == NULL) {
-+                RETURN(-ENOMEM);
-+        }
-+
-+        data = i_size_read(head_inode);
-+        ll_prepare_mdc_op_data(op_data, head_inode,
-+                               tail_dentry->d_parent->d_inode,
-+                               tail_dentry->d_name.name,
-+                               tail_dentry->d_name.len, 0, &data);
-+        rc = mdc_enqueue(ll_i2mdcexp(head_inode), &einfo, &oit,
-+                         op_data, &lockh, NULL, 0, 0);
-+
-+        if (rc < 0)
-+                GOTO(out, rc);
-+
-+        rc = oit.d.lustre.it_status;
-+
-+        if (rc < 0 || it_open_error(DISP_OPEN_OPEN, &oit)) {
-+                rc = rc ? rc : it_open_error(DISP_OPEN_OPEN, &oit);
-+                ptlrpc_req_finished((struct ptlrpc_request *)
-+                                    oit.d.lustre.it_data);
-+                GOTO(out, rc);
-+        }
-+
-+        if (oit.d.lustre.it_lock_mode) { /* If we got lock - release it right
-+                                           * away */
-+                ldlm_lock_decref(&lockh, oit.d.lustre.it_lock_mode);
-+                oit.d.lustre.it_lock_mode = 0;
-+        }
-+        ptlrpc_req_finished((struct ptlrpc_request *) oit.d.lustre.it_data);
-+        it_clear_disposition(&oit, DISP_ENQ_COMPLETE);
-+        ll_release_openhandle(head_filp->f_dentry, &oit);
-+out:
-+        if (op_data)
-+                OBD_FREE_PTR(op_data);
-+        ll_intent_release(&oit);
-+        RETURN(rc);
-+}
-+
-+static int ll_file_join(struct inode *head, struct file *filp,
-+                        char *filename_tail)
-+{
-+        struct inode *tail = NULL, *first = NULL, *second = NULL;
-+        struct dentry *tail_dentry;
-+        struct file *tail_filp, *first_filp, *second_filp;
-+        struct ll_lock_tree first_tree, second_tree;
-+        struct ll_lock_tree_node *first_node, *second_node;
-+        struct ll_inode_info *hlli = ll_i2info(head), *tlli;
-+        int rc = 0, cleanup_phase = 0;
-+        ENTRY;
-+
-+        CDEBUG(D_VFSTRACE, "VFS Op:head=%lu/%u(%p) tail %s\n",
-+               head->i_ino, head->i_generation, head, filename_tail);
-+
-+        tail_filp = filp_open(filename_tail, O_WRONLY, 0644);
-+        if (IS_ERR(tail_filp)) {
-+                CERROR("Can not open tail file %s", filename_tail);
-+                rc = PTR_ERR(tail_filp);
-+                GOTO(cleanup, rc);
-+        }
-+        tail = igrab(tail_filp->f_dentry->d_inode);
-+
-+        tlli = ll_i2info(tail);
-+        tail_dentry = tail_filp->f_dentry;
-+        LASSERT(tail_dentry);
-+        cleanup_phase = 1;
-+
-+        /*reorder the inode for lock sequence*/
-+        first = head->i_ino > tail->i_ino ? head : tail;
-+        second = head->i_ino > tail->i_ino ? tail : head;
-+        first_filp = head->i_ino > tail->i_ino ? filp : tail_filp;
-+        second_filp = head->i_ino > tail->i_ino ? tail_filp : filp;
-+
-+        CDEBUG(D_INFO, "reorder object from %lu:%lu to %lu:%lu \n",
-+               head->i_ino, tail->i_ino, first->i_ino, second->i_ino);
-+        first_node = ll_node_from_inode(first, 0, OBD_OBJECT_EOF, LCK_EX);
-+        if (IS_ERR(first_node)){
-+                rc = PTR_ERR(first_node);
-+                GOTO(cleanup, rc);
-+        }
-+        first_tree.lt_fd = first_filp->private_data;
-+        rc = ll_tree_lock(&first_tree, first_node, NULL, 0, 0);
-+        if (rc != 0)
-+                GOTO(cleanup, rc);
-+        cleanup_phase = 2;
-+
-+        second_node = ll_node_from_inode(second, 0, OBD_OBJECT_EOF, LCK_EX);
-+        if (IS_ERR(second_node)){
-+                rc = PTR_ERR(second_node);
-+                GOTO(cleanup, rc);
-+        }
-+        second_tree.lt_fd = second_filp->private_data;
-+        rc = ll_tree_lock(&second_tree, second_node, NULL, 0, 0);
-+        if (rc != 0)
-+                GOTO(cleanup, rc);
-+        cleanup_phase = 3;
-+
-+        rc = join_sanity_check(head, tail);
-+        if (rc)
-+                GOTO(cleanup, rc);
-+
-+        rc = join_file(head, filp, tail_filp);
-+        if (rc)
-+                GOTO(cleanup, rc);
-+cleanup:
-+        switch (cleanup_phase) {
-+        case 3:
-+                ll_tree_unlock(&second_tree);
-+                obd_cancel_unused(ll_i2obdexp(second),
-+                                  ll_i2info(second)->lli_smd, 0, NULL);
-+        case 2:
-+                ll_tree_unlock(&first_tree);
-+                obd_cancel_unused(ll_i2obdexp(first),
-+                                  ll_i2info(first)->lli_smd, 0, NULL);
-+        case 1:
-+                filp_close(tail_filp, 0);
-+                if (tail)
-+                        iput(tail);
-+                if (head && rc == 0) {
-+                        obd_free_memmd(ll_i2sbi(head)->ll_osc_exp,
-+                                       &hlli->lli_smd);
-+                        hlli->lli_smd = NULL;
-+                }
-+        case 0:
-+                break;
-+        default:
-+                CERROR("invalid cleanup_phase %d\n", cleanup_phase);
-+                LBUG();
-+        }
-+        RETURN(rc);
-+}
-+#endif  /* LUSTRE_FIX >= 50 */
-+
-+/**
-+ * Close inode open handle
-+ *
-+ * \param dentry [in]     dentry which contains the inode
-+ * \param it     [in,out] intent which contains open info and result
-+ *
-+ * \retval 0     success
-+ * \retval <0    failure
-+ */
-+int ll_release_openhandle(struct dentry *dentry, struct lookup_intent *it)
-+{
-+        struct inode *inode = dentry->d_inode;
-+        struct obd_client_handle *och;
-+        int rc;
-+        ENTRY;
-+
-+        LASSERT(inode);
-+
-+        /* Root ? Do nothing. */
-+        if (dentry->d_inode->i_sb->s_root == dentry)
-+                RETURN(0);
-+
-+        /* No open handle to close? Move away */
-+        if (!it_disposition(it, DISP_OPEN_OPEN))
-+                RETURN(0);
-+
-+        LASSERT(it_open_error(DISP_OPEN_OPEN, it) == 0);
-+
-+        OBD_ALLOC(och, sizeof(*och));
-+        if (!och)
-+                GOTO(out, rc = -ENOMEM);
-+
-+        ll_och_fill(ll_i2info(inode), it, och);
-+
-+        rc = ll_close_inode_openhandle(inode, och);
-+
-+        OBD_FREE(och, sizeof(*och));
-+ out:
-+        /* this one is in place of ll_file_open */
-+        if (it_disposition(it, DISP_ENQ_OPEN_REF))
-+                ptlrpc_req_finished(it->d.lustre.it_data);
-+        it_clear_disposition(it, DISP_ENQ_OPEN_REF);
-+        RETURN(rc);
-+}
-+
-+int ll_fiemap(struct inode *inode, struct ll_user_fiemap *fiemap,
-+              int num_bytes)
-+{
-+        struct obd_export *exp = ll_i2obdexp(inode);
-+        struct lov_stripe_md *lsm = ll_i2info(inode)->lli_smd;
-+        struct ll_fiemap_info_key fm_key = { .name = KEY_FIEMAP, };
-+        int vallen = num_bytes;
-+        int rc;
-+        ENTRY;
-+
-+        /* If the stripe_count > 1 and the application does not understand
-+         * DEVICE_ORDER flag, then it cannot interpret the extents correctly.
-+         */
-+        if (lsm->lsm_stripe_count > 1 &&
-+            !(fiemap->fm_flags & FIEMAP_FLAG_DEVICE_ORDER))
-+                return -EOPNOTSUPP;
-+
-+        fm_key.oa.o_id = lsm->lsm_object_id;
-+        fm_key.oa.o_valid = OBD_MD_FLID;
-+
-+        obdo_from_inode(&fm_key.oa, inode, OBD_MD_FLFID | OBD_MD_FLSIZE);
-+
-+        /* If filesize is 0, then there would be no objects for mapping */
-+        if (fm_key.oa.o_size == 0) {
-+                fiemap->fm_mapped_extents = 0;
-+                RETURN(0);
-+        }
-+
-+        memcpy(&fm_key.fiemap, fiemap, sizeof(*fiemap));
-+
-+        rc = obd_get_info(exp, sizeof(fm_key), &fm_key, &vallen, fiemap, lsm);
-+        if (rc)
-+                CERROR("obd_get_info failed: rc = %d\n", rc);
-+
-+        RETURN(rc);
-+}
-+
-+int ll_file_ioctl(struct inode *inode, struct file *file, unsigned int cmd,
-+                  unsigned long arg)
-+{
-+        struct ll_file_data *fd = LUSTRE_FPRIVATE(file);
-+        int flags;
-+        ENTRY;
-+
-+        CDEBUG(D_VFSTRACE, "VFS Op:inode=%lu/%u(%p),cmd=%x\n", inode->i_ino,
-+               inode->i_generation, inode, cmd);
-+        ll_stats_ops_tally(ll_i2sbi(inode), LPROC_LL_IOCTL, 1);
-+
-+        /* asm-ppc{,64} declares TCGETS, et. al. as type 't' not 'T' */
-+        if (_IOC_TYPE(cmd) == 'T' || _IOC_TYPE(cmd) == 't') /* tty ioctls */
-+                RETURN(-ENOTTY);
-+
-+        switch(cmd) {
-+        case LL_IOC_GETFLAGS:
-+                /* Get the current value of the file flags */
-+                return put_user(fd->fd_flags, (int *)arg);
-+        case LL_IOC_SETFLAGS:
-+        case LL_IOC_CLRFLAGS:
-+                /* Set or clear specific file flags */
-+                /* XXX This probably needs checks to ensure the flags are
-+                 *     not abused, and to handle any flag side effects.
-+                 */
-+                if (get_user(flags, (int *) arg))
-+                        RETURN(-EFAULT);
-+
-+                if (cmd == LL_IOC_SETFLAGS) {
-+                        if ((flags & LL_FILE_IGNORE_LOCK) &&
-+                            !(file->f_flags & O_DIRECT)) {
-+                                CERROR("%s: unable to disable locking on "
-+                                       "non-O_DIRECT file\n", current->comm);
-+                                RETURN(-EINVAL);
-+                        }
-+
-+                        fd->fd_flags |= flags;
-+                } else {
-+                        fd->fd_flags &= ~flags;
-+                }
-+                RETURN(0);
-+        case LL_IOC_LOV_SETSTRIPE:
-+                RETURN(ll_lov_setstripe(inode, file, arg));
-+        case LL_IOC_LOV_SETEA:
-+                RETURN(ll_lov_setea(inode, file, arg));
-+        case LL_IOC_LOV_GETSTRIPE:
-+                RETURN(ll_lov_getstripe(inode, arg));
-+        case LL_IOC_RECREATE_OBJ:
-+                RETURN(ll_lov_recreate_obj(inode, file, arg));
-+        case EXT3_IOC_FIEMAP: {
-+                struct ll_user_fiemap *fiemap_s;
-+                size_t num_bytes, ret_bytes;
-+                unsigned int extent_count;
-+                int rc = 0;
-+
-+                /* Get the extent count so we can calculate the size of
-+                 * required fiemap buffer */
-+                if (get_user(extent_count,
-+                    &((struct ll_user_fiemap __user *)arg)->fm_extent_count))
-+                        RETURN(-EFAULT);
-+                num_bytes = sizeof(*fiemap_s) + (extent_count *
-+                                                 sizeof(struct ll_fiemap_extent));
-+                OBD_VMALLOC(fiemap_s, num_bytes);
-+                if (fiemap_s == NULL)
-+                        RETURN(-ENOMEM);
-+
-+                if (copy_from_user(fiemap_s,(struct ll_user_fiemap __user *)arg,
-+                                   sizeof(*fiemap_s)))
-+                        GOTO(error, rc = -EFAULT);
-+
-+                if (fiemap_s->fm_flags & ~LUSTRE_FIEMAP_FLAGS_COMPAT) {
-+                        fiemap_s->fm_flags = fiemap_s->fm_flags &
-+                                                    ~LUSTRE_FIEMAP_FLAGS_COMPAT;
-+                        if (copy_to_user((char *)arg, fiemap_s,
-+                                         sizeof(*fiemap_s)))
-+                                GOTO(error, rc = -EFAULT);
-+
-+                        GOTO(error, rc = -EBADR);
-+                }
-+
-+                /* If fm_extent_count is non-zero, read the first extent since
-+                 * it is used to calculate end_offset and device from previous
-+                 * fiemap call. */
-+                if (extent_count) {
-+                        if (copy_from_user(&fiemap_s->fm_extents[0],
-+                            (char __user *)arg + sizeof(*fiemap_s),
-+                            sizeof(struct ll_fiemap_extent)))
-+                                GOTO(error, rc = -EFAULT);
-+                }
-+
-+                if (fiemap_s->fm_flags & FIEMAP_FLAG_SYNC) {
-+                        int rc;
-+
-+                        rc = filemap_fdatawrite(inode->i_mapping);
-+                        if (rc)
-+                                GOTO(error, rc);
-+                }
-+
-+                rc = ll_fiemap(inode, fiemap_s, num_bytes);
-+                if (rc)
-+                        GOTO(error, rc);
-+
-+                ret_bytes = sizeof(struct ll_user_fiemap);
-+
-+                if (extent_count != 0)
-+                        ret_bytes += (fiemap_s->fm_mapped_extents *
-+                                         sizeof(struct ll_fiemap_extent));
-+
-+                if (copy_to_user((void *)arg, fiemap_s, ret_bytes))
-+                        rc = -EFAULT;
-+
-+error:
-+                OBD_VFREE(fiemap_s, num_bytes);
-+                RETURN(rc);
-+        }
-+        case EXT3_IOC_GETFLAGS:
-+        case EXT3_IOC_SETFLAGS:
-+                RETURN(ll_iocontrol(inode, file, cmd, arg));
-+        case EXT3_IOC_GETVERSION_OLD:
-+        case EXT3_IOC_GETVERSION:
-+                RETURN(put_user(inode->i_generation, (int *)arg));
-+        case LL_IOC_JOIN: {
-+#if LUSTRE_FIX >= 50
-+                /* Allow file join in beta builds to allow debuggging */
-+                char *ftail;
-+                int rc;
-+
-+                ftail = getname((const char *)arg);
-+                if (IS_ERR(ftail))
-+                        RETURN(PTR_ERR(ftail));
-+                rc = ll_file_join(inode, file, ftail);
-+                putname(ftail);
-+                RETURN(rc);
-+#else
-+                CWARN("file join is not supported in this version of Lustre\n");
-+                RETURN(-ENOTTY);
-+#endif
-+        }
-+        case LL_IOC_GROUP_LOCK:
-+                RETURN(ll_get_grouplock(inode, file, arg));
-+        case LL_IOC_GROUP_UNLOCK:
-+                RETURN(ll_put_grouplock(inode, file, arg));
-+        case IOC_OBD_STATFS:
-+                RETURN(ll_obd_statfs(inode, (void *)arg));
-+        case OBD_IOC_GETNAME_OLD:
-+        case OBD_IOC_GETNAME: {
-+                struct obd_device *obd =
-+                        class_exp2obd(ll_i2sbi(inode)->ll_osc_exp);
-+                if (!obd)
-+                        RETURN(-EFAULT);
-+                if (copy_to_user((void *)arg, obd->obd_name,
-+                                strlen(obd->obd_name) + 1))
-+                        RETURN (-EFAULT);
-+                RETURN(0);
-+        }
-+
-+        /* We need to special case any other ioctls we want to handle,
-+         * to send them to the MDS/OST as appropriate and to properly
-+         * network encode the arg field.
-+        case EXT3_IOC_SETVERSION_OLD:
-+        case EXT3_IOC_SETVERSION:
-+        */
-+        default: {
-+                int err;
-+
-+                if (LLIOC_STOP ==
-+                    ll_iocontrol_call(inode, file, cmd, arg, &err))
-+                        RETURN(err);
-+
-+                RETURN(obd_iocontrol(cmd, ll_i2obdexp(inode), 0, NULL,
-+                                     (void *)arg));
-+        }
-+        }
-+}
-+
-+loff_t ll_file_seek(struct file *file, loff_t offset, int origin)
-+{
-+        struct inode *inode = file->f_dentry->d_inode;
-+        struct ll_inode_info *lli = ll_i2info(inode);
-+        struct lov_stripe_md *lsm = lli->lli_smd;
-+        loff_t retval;
-+        ENTRY;
-+        retval = offset + ((origin == 2) ? i_size_read(inode) :
-+                           (origin == 1) ? file->f_pos : 0);
-+        CDEBUG(D_VFSTRACE, "VFS Op:inode=%lu/%u(%p), to=%Lu=%#Lx(%s)\n",
-+               inode->i_ino, inode->i_generation, inode, retval, retval,
-+               origin == 2 ? "SEEK_END": origin == 1 ? "SEEK_CUR" : "SEEK_SET");
-+        ll_stats_ops_tally(ll_i2sbi(inode), LPROC_LL_LLSEEK, 1);
-+
-+        if (origin == 2) { /* SEEK_END */
-+                int nonblock = 0, rc;
-+
-+                if (file->f_flags & O_NONBLOCK)
-+                        nonblock = LDLM_FL_BLOCK_NOWAIT;
-+
-+                if (lsm != NULL) {
-+                        rc = ll_glimpse_size(inode, nonblock);
-+                        if (rc != 0)
-+                                RETURN(rc);
-+                }
-+
-+                ll_inode_size_lock(inode, 0);
-+                offset += i_size_read(inode);
-+                ll_inode_size_unlock(inode, 0);
-+        } else if (origin == 1) { /* SEEK_CUR */
-+                offset += file->f_pos;
-+        }
-+
-+        retval = -EINVAL;
-+        if (offset >= 0 && offset <= ll_file_maxbytes(inode)) {
-+                if (offset != file->f_pos) {
-+                        file->f_pos = offset;
-+                        file->f_version = 0;
-+                }
-+                retval = offset;
-+        }
-+
-+        RETURN(retval);
-+}
-+
-+int ll_fsync(struct file *file, struct dentry *dentry, int data)
-+{
-+        struct inode *inode = dentry->d_inode;
-+        struct ll_inode_info *lli = ll_i2info(inode);
-+        struct lov_stripe_md *lsm = lli->lli_smd;
-+        struct ll_fid fid;
-+        struct ptlrpc_request *req;
-+        int rc, err;
-+        ENTRY;
-+        CDEBUG(D_VFSTRACE, "VFS Op:inode=%lu/%u(%p)\n", inode->i_ino,
-+               inode->i_generation, inode);
-+        ll_stats_ops_tally(ll_i2sbi(inode), LPROC_LL_FSYNC, 1);
-+
-+        /* fsync's caller has already called _fdata{sync,write}, we want
-+         * that IO to finish before calling the osc and mdc sync methods */
-+        rc = filemap_fdatawait(inode->i_mapping);
-+
-+        /* catch async errors that were recorded back when async writeback
-+         * failed for pages in this mapping. */
-+        err = lli->lli_async_rc;
-+        lli->lli_async_rc = 0;
-+        if (rc == 0)
-+                rc = err;
-+        if (lsm) {
-+                err = lov_test_and_clear_async_rc(lsm);
-+                if (rc == 0)
-+                        rc = err;
-+        }
-+
-+        ll_inode2fid(&fid, inode);
-+        err = mdc_sync(ll_i2sbi(inode)->ll_mdc_exp, &fid, &req);
-+        if (!rc)
-+                rc = err;
-+        if (!err)
-+                ptlrpc_req_finished(req);
-+
-+        if (data && lsm) {
-+                struct obdo *oa;
-+
-+                OBDO_ALLOC(oa);
-+                if (!oa)
-+                        RETURN(rc ? rc : -ENOMEM);
-+
-+                oa->o_id = lsm->lsm_object_id;
-+                oa->o_valid = OBD_MD_FLID;
-+                obdo_from_inode(oa, inode, OBD_MD_FLTYPE | OBD_MD_FLATIME |
-+                                           OBD_MD_FLMTIME | OBD_MD_FLCTIME);
-+
-+                err = obd_sync(ll_i2sbi(inode)->ll_osc_exp, oa, lsm,
-+                               0, OBD_OBJECT_EOF);
-+                if (!rc)
-+                        rc = err;
-+                OBDO_FREE(oa);
-+        }
-+
-+        RETURN(rc);
-+}
-+
-+int ll_file_flock(struct file *file, int cmd, struct file_lock *file_lock)
-+{
-+        struct inode *inode = file->f_dentry->d_inode;
-+        struct ll_sb_info *sbi = ll_i2sbi(inode);
-+        struct ldlm_res_id res_id =
-+                    { .name = {inode->i_ino, inode->i_generation, LDLM_FLOCK} };
-+        struct ldlm_enqueue_info einfo = { LDLM_FLOCK, 0, NULL,
-+                ldlm_flock_completion_ast, NULL, file_lock };
-+        struct lustre_handle lockh = {0};
-+        ldlm_policy_data_t flock;
-+        int flags = 0;
-+        int rc;
-+        ENTRY;
-+
-+        CDEBUG(D_VFSTRACE, "VFS Op:inode=%lu file_lock=%p\n",
-+               inode->i_ino, file_lock);
-+        ll_stats_ops_tally(ll_i2sbi(inode), LPROC_LL_FLOCK, 1);
-+
-+        if (file_lock->fl_flags & FL_FLOCK) {
-+                LASSERT((cmd == F_SETLKW) || (cmd == F_SETLK));
-+                /* set missing params for flock() calls */
-+                file_lock->fl_end = OFFSET_MAX;
-+                file_lock->fl_pid = current->tgid;
-+        }
-+        flock.l_flock.pid = file_lock->fl_pid;
-+        flock.l_flock.start = file_lock->fl_start;
-+        flock.l_flock.end = file_lock->fl_end;
-+
-+        switch (file_lock->fl_type) {
-+        case F_RDLCK:
-+                einfo.ei_mode = LCK_PR;
-+                break;
-+        case F_UNLCK:
-+                /* An unlock request may or may not have any relation to
-+                 * existing locks so we may not be able to pass a lock handle
-+                 * via a normal ldlm_lock_cancel() request. The request may even
-+                 * unlock a byte range in the middle of an existing lock. In
-+                 * order to process an unlock request we need all of the same
-+                 * information that is given with a normal read or write record
-+                 * lock request. To avoid creating another ldlm unlock (cancel)
-+                 * message we'll treat a LCK_NL flock request as an unlock. */
-+                einfo.ei_mode = LCK_NL;
-+                break;
-+        case F_WRLCK:
-+                einfo.ei_mode = LCK_PW;
-+                break;
-+        default:
-+                CERROR("unknown fcntl lock type: %d\n", file_lock->fl_type);
-+                RETURN (-EINVAL);
-+        }
-+
-+        switch (cmd) {
-+        case F_SETLKW:
-+#ifdef F_SETLKW64
-+        case F_SETLKW64:
-+#endif
-+                flags = 0;
-+                break;
-+        case F_SETLK:
-+#ifdef F_SETLK64
-+        case F_SETLK64:
-+#endif
-+                flags = LDLM_FL_BLOCK_NOWAIT;
-+                break;
-+        case F_GETLK:
-+#ifdef F_GETLK64
-+        case F_GETLK64:
-+#endif
-+                flags = LDLM_FL_TEST_LOCK;
-+                /* Save the old mode so that if the mode in the lock changes we
-+                 * can decrement the appropriate reader or writer refcount. */
-+                file_lock->fl_type = einfo.ei_mode;
-+                break;
-+        default:
-+                CERROR("unknown fcntl lock command: %d\n", cmd);
-+                RETURN (-EINVAL);
-+        }
-+
-+        CDEBUG(D_DLMTRACE, "inode=%lu, pid=%u, flags=%#x, mode=%u, "
-+               "start="LPU64", end="LPU64"\n", inode->i_ino, flock.l_flock.pid,
-+               flags, einfo.ei_mode, flock.l_flock.start, flock.l_flock.end);
-+
-+        rc = ldlm_cli_enqueue(sbi->ll_mdc_exp, NULL, &einfo, res_id,
-+                              &flock, &flags, NULL, 0, NULL, &lockh, 0);
-+        if ((file_lock->fl_flags & FL_FLOCK) &&
-+            (rc == 0 || file_lock->fl_type == F_UNLCK))
-+                ll_flock_lock_file_wait(file, file_lock, (cmd == F_SETLKW));
-+#ifdef HAVE_F_OP_FLOCK
-+        if ((file_lock->fl_flags & FL_POSIX) &&
-+            (rc == 0 || file_lock->fl_type == F_UNLCK) &&
-+            !(flags & LDLM_FL_TEST_LOCK))
-+                posix_lock_file_wait(file, file_lock);
-+#endif
-+
-+        RETURN(rc);
-+}
-+
-+int ll_file_noflock(struct file *file, int cmd, struct file_lock *file_lock)
-+{
-+        ENTRY;
-+
-+        RETURN(-ENOSYS);
-+}
-+
-+int ll_have_md_lock(struct inode *inode, __u64 bits)
-+{
-+        struct lustre_handle lockh;
-+        struct ldlm_res_id res_id = { .name = {0} };
-+        struct obd_device *obddev;
-+        ldlm_policy_data_t policy = { .l_inodebits = {bits}};
-+        int flags;
-+        ENTRY;
-+
-+        if (!inode)
-+               RETURN(0);
-+
-+        obddev = ll_i2mdcexp(inode)->exp_obd;
-+        res_id.name[0] = inode->i_ino;
-+        res_id.name[1] = inode->i_generation;
-+
-+        CDEBUG(D_INFO, "trying to match res "LPU64"\n", res_id.name[0]);
-+
-+        flags = LDLM_FL_BLOCK_GRANTED | LDLM_FL_CBPENDING | LDLM_FL_TEST_LOCK;
-+        if (ldlm_lock_match(obddev->obd_namespace, flags, &res_id, LDLM_IBITS,
-+                            &policy, LCK_CR|LCK_CW|LCK_PR|LCK_PW, &lockh)) {
-+                RETURN(1);
-+        }
-+
-+        RETURN(0);
-+}
-+
-+static int ll_inode_revalidate_fini(struct inode *inode, int rc) {
-+        if (rc == -ENOENT) { /* Already unlinked. Just update nlink
-+                              * and return success */
-+                inode->i_nlink = 0;
-+                /* This path cannot be hit for regular files unless in
-+                 * case of obscure races, so no need to to validate
-+                 * size. */
-+                if (!S_ISREG(inode->i_mode) &&
-+                    !S_ISDIR(inode->i_mode))
-+                        return 0;
-+        }
-+
-+        if (rc) {
-+                CERROR("failure %d inode %lu\n", rc, inode->i_ino);
-+                return -abs(rc);
-+
-+        }
-+
-+        return 0;
-+}
-+
-+int ll_inode_revalidate_it(struct dentry *dentry, struct lookup_intent *it)
-+{
-+        struct inode *inode = dentry->d_inode;
-+        struct ptlrpc_request *req = NULL;
-+        struct obd_export *exp;
-+        int rc;
-+        ENTRY;
-+
-+        if (!inode) {
-+                CERROR("REPORT THIS LINE TO PETER\n");
-+                RETURN(0);
-+        }
-+        CDEBUG(D_VFSTRACE, "VFS Op:inode=%lu/%u(%p),name=%s\n",
-+               inode->i_ino, inode->i_generation, inode, dentry->d_name.name);
-+
-+        exp = ll_i2mdcexp(inode);
-+
-+        if (exp->exp_connect_flags & OBD_CONNECT_ATTRFID) {
-+                struct lookup_intent oit = { .it_op = IT_GETATTR };
-+                struct mdc_op_data op_data;
-+
-+                /* Call getattr by fid, so do not provide name at all. */
-+                ll_prepare_mdc_op_data(&op_data, dentry->d_parent->d_inode,
-+                                       dentry->d_inode, NULL, 0, 0, NULL);
-+                rc = mdc_intent_lock(exp, &op_data, NULL, 0,
-+                                     /* we are not interested in name
-+                                        based lookup */
-+                                     &oit, 0, &req,
-+                                     ll_mdc_blocking_ast, 0);
-+                if (rc < 0) {
-+                        rc = ll_inode_revalidate_fini(inode, rc);
-+                        GOTO (out, rc);
-+                }
-+
-+                rc = revalidate_it_finish(req, DLM_REPLY_REC_OFF, &oit, dentry);
-+                if (rc != 0) {
-+                        ll_intent_release(&oit);
-+                        GOTO(out, rc);
-+                }
-+
-+                /* Unlinked? Unhash dentry, so it is not picked up later by
-+                   do_lookup() -> ll_revalidate_it(). We cannot use d_drop
-+                   here to preserve get_cwd functionality on 2.6.
-+                   Bug 10503 */
-+                if (!dentry->d_inode->i_nlink) {
-+                        spin_lock(&ll_lookup_lock);
-+                        spin_lock(&dcache_lock);
-+                        ll_drop_dentry(dentry);
-+                        spin_unlock(&dcache_lock);
-+                        spin_unlock(&ll_lookup_lock);
-+                }
-+
-+                ll_lookup_finish_locks(&oit, dentry);
-+        } else if (!ll_have_md_lock(dentry->d_inode,
-+                                  MDS_INODELOCK_UPDATE|MDS_INODELOCK_LOOKUP)) {
-+                struct ll_sb_info *sbi = ll_i2sbi(dentry->d_inode);
-+                struct ll_fid fid;
-+                obd_valid valid = OBD_MD_FLGETATTR;
-+                int ealen = 0;
-+
-+                if (S_ISREG(inode->i_mode)) {
-+                        rc = ll_get_max_mdsize(sbi, &ealen);
-+                        if (rc)
-+                                RETURN(rc);
-+                        valid |= OBD_MD_FLEASIZE | OBD_MD_FLMODEASIZE;
-+                }
-+                ll_inode2fid(&fid, inode);
-+                rc = mdc_getattr(sbi->ll_mdc_exp, &fid, valid, ealen, &req);
-+                if (rc) {
-+                        rc = ll_inode_revalidate_fini(inode, rc);
-+                        RETURN(rc);
-+                }
-+
-+                rc = ll_prep_inode(sbi->ll_osc_exp, &inode, req, REPLY_REC_OFF,
-+                                   NULL);
-+                if (rc)
-+                        GOTO(out, rc);
-+        }
-+
-+        /* if object not yet allocated, don't validate size */
-+        if (ll_i2info(inode)->lli_smd == NULL)
-+                GOTO(out, rc = 0);
-+
-+        /* ll_glimpse_size will prefer locally cached writes if they extend
-+         * the file */
-+        rc = ll_glimpse_size(inode, 0);
-+
-+out:
-+        ptlrpc_req_finished(req);
-+        RETURN(rc);
-+}
-+
-+int ll_getattr_it(struct vfsmount *mnt, struct dentry *de,
-+                  struct lookup_intent *it, struct kstat *stat)
-+{
-+        struct inode *inode = de->d_inode;
-+        int res = 0;
-+
-+        res = ll_inode_revalidate_it(de, it);
-+        ll_stats_ops_tally(ll_i2sbi(inode), LPROC_LL_GETATTR, 1);
-+
-+        if (res)
-+                return res;
-+
-+        stat->dev = inode->i_sb->s_dev;
-+        stat->ino = inode->i_ino;
-+        stat->mode = inode->i_mode;
-+        stat->nlink = inode->i_nlink;
-+        stat->uid = inode->i_uid;
-+        stat->gid = inode->i_gid;
-+        stat->rdev = kdev_t_to_nr(inode->i_rdev);
-+        stat->atime = inode->i_atime;
-+        stat->mtime = inode->i_mtime;
-+        stat->ctime = inode->i_ctime;
-+#ifdef HAVE_INODE_BLKSIZE
-+        stat->blksize = inode->i_blksize;
-+#else
-+        stat->blksize = 1<<inode->i_blkbits;
-+#endif
-+
-+        ll_inode_size_lock(inode, 0);
-+        stat->size = i_size_read(inode);
-+        stat->blocks = inode->i_blocks;
-+        ll_inode_size_unlock(inode, 0);
-+
-+        return 0;
-+}
-+int ll_getattr(struct vfsmount *mnt, struct dentry *de, struct kstat *stat)
-+{
-+        struct lookup_intent it = { .it_op = IT_GETATTR };
-+
-+        return ll_getattr_it(mnt, de, &it, stat);
-+}
-+
-+static
-+int lustre_check_acl(struct inode *inode, int mask)
-+{
-+#ifdef CONFIG_FS_POSIX_ACL
-+        struct ll_inode_info *lli = ll_i2info(inode);
-+        struct posix_acl *acl;
-+        int rc;
-+        ENTRY;
-+
-+        spin_lock(&lli->lli_lock);
-+        acl = posix_acl_dup(lli->lli_posix_acl);
-+        spin_unlock(&lli->lli_lock);
-+
-+        if (!acl)
-+                RETURN(-EAGAIN);
-+
-+        rc = posix_acl_permission(inode, acl, mask);
-+        posix_acl_release(acl);
-+
-+        RETURN(rc);
-+#else
-+        return -EAGAIN;
-+#endif
-+}
-+
-+#if (LINUX_VERSION_CODE >= KERNEL_VERSION(2,6,10))
-+int ll_inode_permission(struct inode *inode, int mask, struct nameidata *nd)
-+{
-+        CDEBUG(D_VFSTRACE, "VFS Op:inode=%lu/%u(%p), mask %o\n",
-+               inode->i_ino, inode->i_generation, inode, mask);
-+
-+        ll_stats_ops_tally(ll_i2sbi(inode), LPROC_LL_INODE_PERM, 1);
-+        return generic_permission(inode, mask, lustre_check_acl);
-+}
-+#else
-+#if (LINUX_VERSION_CODE >= KERNEL_VERSION(2,6,0))
-+int ll_inode_permission(struct inode *inode, int mask, struct nameidata *nd)
-+#else
-+int ll_inode_permission(struct inode *inode, int mask)
-+#endif
-+{
-+        int mode = inode->i_mode;
-+        int rc;
-+
-+        CDEBUG(D_VFSTRACE, "VFS Op:inode=%lu/%u(%p), mask %o\n",
-+               inode->i_ino, inode->i_generation, inode, mask);
-+        ll_stats_ops_tally(ll_i2sbi(inode), LPROC_LL_INODE_PERM, 1);
-+
-+        if ((mask & MAY_WRITE) && IS_RDONLY(inode) &&
-+            (S_ISREG(mode) || S_ISDIR(mode) || S_ISLNK(mode)))
-+                return -EROFS;
-+        if ((mask & MAY_WRITE) && IS_IMMUTABLE(inode))
-+                return -EACCES;
-+        if (current->fsuid == inode->i_uid) {
-+                mode >>= 6;
-+        } else if (1) {
-+                if (((mode >> 3) & mask & S_IRWXO) != mask)
-+                        goto check_groups;
-+                rc = lustre_check_acl(inode, mask);
-+                if (rc == -EAGAIN)
-+                        goto check_groups;
-+                if (rc == -EACCES)
-+                        goto check_capabilities;
-+                return rc;
-+        } else {
-+check_groups:
-+                if (in_group_p(inode->i_gid))
-+                        mode >>= 3;
-+        }
-+        if ((mode & mask & S_IRWXO) == mask)
-+                return 0;
-+
-+check_capabilities:
-+        if (!(mask & MAY_EXEC) ||
-+            (inode->i_mode & S_IXUGO) || S_ISDIR(inode->i_mode))
-+                if (cfs_capable(CFS_CAP_DAC_OVERRIDE))
-+                        return 0;
-+
-+        if (cfs_capable(CFS_CAP_DAC_READ_SEARCH) && ((mask == MAY_READ) ||
-+            (S_ISDIR(inode->i_mode) && !(mask & MAY_WRITE))))
-+                return 0;
-+
-+        return -EACCES;
-+}
-+#endif
-+
-+/* -o localflock - only provides locally consistent flock locks */
-+struct file_operations ll_file_operations = {
-+        .read           = ll_file_read,
-+#ifdef HAVE_FILE_READV
-+        .readv          = ll_file_readv,
-+#else
-+        .aio_read       = ll_file_aio_read,
-+#endif
-+        .write          = ll_file_write,
-+#ifdef HAVE_FILE_WRITEV
-+        .writev         = ll_file_writev,
-+#else
-+        .aio_write      = ll_file_aio_write,
-+#endif
-+        .ioctl          = ll_file_ioctl,
-+        .open           = ll_file_open,
-+        .release        = ll_file_release,
-+        .mmap           = ll_file_mmap,
-+        .llseek         = ll_file_seek,
-+        .sendfile       = ll_file_sendfile,
-+        .fsync          = ll_fsync,
-+};
-+
-+struct file_operations ll_file_operations_flock = {
-+        .read           = ll_file_read,
-+#ifdef HAVE_FILE_READV
-+        .readv          = ll_file_readv,
-+#else
-+        .aio_read       = ll_file_aio_read,
-+#endif
-+        .write          = ll_file_write,
-+#ifdef HAVE_FILE_WRITEV
-+        .writev         = ll_file_writev,
-+#else
-+        .aio_write      = ll_file_aio_write,
-+#endif
-+        .ioctl          = ll_file_ioctl,
-+        .open           = ll_file_open,
-+        .release        = ll_file_release,
-+        .mmap           = ll_file_mmap,
-+        .llseek         = ll_file_seek,
-+        .sendfile       = ll_file_sendfile,
-+        .fsync          = ll_fsync,
-+#ifdef HAVE_F_OP_FLOCK
-+        .flock          = ll_file_flock,
-+#endif
-+        .lock           = ll_file_flock
-+};
-+
-+/* These are for -o noflock - to return ENOSYS on flock calls */
-+struct file_operations ll_file_operations_noflock = {
-+        .read           = ll_file_read,
-+#ifdef HAVE_FILE_READV
-+        .readv          = ll_file_readv,
-+#else
-+        .aio_read       = ll_file_aio_read,
-+#endif
-+        .write          = ll_file_write,
-+#ifdef HAVE_FILE_WRITEV
-+        .writev         = ll_file_writev,
-+#else
-+        .aio_write      = ll_file_aio_write,
-+#endif
-+        .ioctl          = ll_file_ioctl,
-+        .open           = ll_file_open,
-+        .release        = ll_file_release,
-+        .mmap           = ll_file_mmap,
-+        .llseek         = ll_file_seek,
-+        .sendfile       = ll_file_sendfile,
-+        .fsync          = ll_fsync,
-+#ifdef HAVE_F_OP_FLOCK
-+        .flock          = ll_file_noflock,
-+#endif
-+        .lock           = ll_file_noflock
-+};
-+
-+struct inode_operations ll_file_inode_operations = {
-+#ifdef HAVE_VFS_INTENT_PATCHES
-+        .setattr_raw    = ll_setattr_raw,
-+#endif
-+        .setattr        = ll_setattr,
-+        .truncate       = ll_truncate,
-+        .getattr        = ll_getattr,
-+        .permission     = ll_inode_permission,
-+        .setxattr       = ll_setxattr,
-+        .getxattr       = ll_getxattr,
-+        .listxattr      = ll_listxattr,
-+        .removexattr    = ll_removexattr,
-+};
-+
-+/* dynamic ioctl number support routins */
-+static struct llioc_ctl_data {
-+        struct rw_semaphore ioc_sem;
-+        struct list_head    ioc_head;
-+} llioc = {
-+        __RWSEM_INITIALIZER(llioc.ioc_sem),
-+        CFS_LIST_HEAD_INIT(llioc.ioc_head)
-+};
-+
-+
-+struct llioc_data {
-+        struct list_head        iocd_list;
-+        unsigned int            iocd_size;
-+        llioc_callback_t        iocd_cb;
-+        unsigned int            iocd_count;
-+        unsigned int            iocd_cmd[0];
-+};
-+
-+void *ll_iocontrol_register(llioc_callback_t cb, int count, unsigned int *cmd)
-+{
-+        unsigned int size;
-+        struct llioc_data *in_data = NULL;
-+        ENTRY;
-+
-+        if (cb == NULL || cmd == NULL ||
-+            count > LLIOC_MAX_CMD || count < 0)
-+                RETURN(NULL);
-+
-+        size = sizeof(*in_data) + count * sizeof(unsigned int);
-+        OBD_ALLOC(in_data, size);
-+        if (in_data == NULL)
-+                RETURN(NULL);
-+
-+        memset(in_data, 0, sizeof(*in_data));
-+        in_data->iocd_size = size;
-+        in_data->iocd_cb = cb;
-+        in_data->iocd_count = count;
-+        memcpy(in_data->iocd_cmd, cmd, sizeof(unsigned int) * count);
-+
-+        down_write(&llioc.ioc_sem);
-+        list_add_tail(&in_data->iocd_list, &llioc.ioc_head);
-+        up_write(&llioc.ioc_sem);
-+
-+        RETURN(in_data);
-+}
-+
-+void ll_iocontrol_unregister(void *magic)
-+{
-+        struct llioc_data *tmp;
-+
-+        if (magic == NULL)
-+                return;
-+
-+        down_write(&llioc.ioc_sem);
-+        list_for_each_entry(tmp, &llioc.ioc_head, iocd_list) {
-+                if (tmp == magic) {
-+                        unsigned int size = tmp->iocd_size;
-+
-+                        list_del(&tmp->iocd_list);
-+                        up_write(&llioc.ioc_sem);
-+
-+                        OBD_FREE(tmp, size);
-+                        return;
-+                }
-+        }
-+        up_write(&llioc.ioc_sem);
-+
-+        CWARN("didn't find iocontrol register block with magic: %p\n", magic);
-+}
-+
-+EXPORT_SYMBOL(ll_iocontrol_register);
-+EXPORT_SYMBOL(ll_iocontrol_unregister);
-+
-+enum llioc_iter ll_iocontrol_call(struct inode *inode, struct file *file,
-+                        unsigned int cmd, unsigned long arg, int *rcp)
-+{
-+        enum llioc_iter ret = LLIOC_CONT;
-+        struct llioc_data *data;
-+        int rc = -EINVAL, i;
-+
-+        down_read(&llioc.ioc_sem);
-+        list_for_each_entry(data, &llioc.ioc_head, iocd_list) {
-+                for (i = 0; i < data->iocd_count; i++) {
-+                        if (cmd != data->iocd_cmd[i])
-+                                continue;
-+
-+                        ret = data->iocd_cb(inode, file, cmd, arg, data, &rc);
-+                        break;
-+                }
-+
-+                if (ret == LLIOC_STOP)
-+                        break;
-+        }
-+        up_read(&llioc.ioc_sem);
-+
-+        if (rcp)
-+                *rcp = rc;
-+        return ret;
-+}
-diff -urNad lustre~/lustre/llite/llite_internal.h lustre/lustre/llite/llite_internal.h
---- lustre~/lustre/llite/llite_internal.h	2009-03-12 10:32:27.000000000 +0100
-+++ lustre/lustre/llite/llite_internal.h	2009-03-13 09:45:03.000000000 +0100
-@@ -647,7 +647,7 @@
-                struct lookup_intent *it, struct kstat *stat);
- int ll_getattr(struct vfsmount *mnt, struct dentry *de, struct kstat *stat);
- struct ll_file_data *ll_file_data_get(void);
--#if (LINUX_VERSION_CODE >= KERNEL_VERSION(2,6,0))
-+#ifndef HAVE_INODE_PERMISION_2ARGS
- int ll_inode_permission(struct inode *inode, int mask, struct nameidata *nd);
- #else
- int ll_inode_permission(struct inode *inode, int mask);
-@@ -727,9 +727,6 @@
- /* llite/llite_nfs.c */
- extern struct export_operations lustre_export_operations;
- __u32 get_uuid2int(const char *name, int len);
--struct dentry *ll_fh_to_dentry(struct super_block *sb, __u32 *data, int len,
--                               int fhtype, int parent);
--int ll_dentry_to_fh(struct dentry *, __u32 *datap, int *lenp, int need_parent);
- 
- /* llite/special.c */
- extern struct inode_operations ll_special_inode_operations;
-diff -urNad lustre~/lustre/llite/llite_internal.h.orig lustre/lustre/llite/llite_internal.h.orig
---- lustre~/lustre/llite/llite_internal.h.orig	1970-01-01 00:00:00.000000000 +0000
-+++ lustre/lustre/llite/llite_internal.h.orig	2009-03-13 09:45:03.000000000 +0100
-@@ -0,0 +1,1027 @@
-+/* -*- mode: c; c-basic-offset: 8; indent-tabs-mode: nil; -*-
-+ * vim:expandtab:shiftwidth=8:tabstop=8:
-+ *
-+ * GPL HEADER START
-+ *
-+ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
-+ *
-+ * This program is free software; you can redistribute it and/or modify
-+ * it under the terms of the GNU General Public License version 2 only,
-+ * as published by the Free Software Foundation.
-+ *
-+ * This program is distributed in the hope that it will be useful, but
-+ * WITHOUT ANY WARRANTY; without even the implied warranty of
-+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
-+ * General Public License version 2 for more details (a copy is included
-+ * in the LICENSE file that accompanied this code).
-+ *
-+ * You should have received a copy of the GNU General Public License
-+ * version 2 along with this program; If not, see
-+ * http://www.sun.com/software/products/lustre/docs/GPLv2.pdf
-+ *
-+ * Please contact Sun Microsystems, Inc., 4150 Network Circle, Santa Clara,
-+ * CA 95054 USA or visit www.sun.com if you need additional information or
-+ * have any questions.
-+ *
-+ * GPL HEADER END
-+ */
-+/*
-+ * Copyright  2008 Sun Microsystems, Inc. All rights reserved
-+ * Use is subject to license terms.
-+ */
-+/*
-+ * This file is part of Lustre, http://www.lustre.org/
-+ * Lustre is a trademark of Sun Microsystems, Inc.
-+ */
-+
-+#ifndef LLITE_INTERNAL_H
-+#define LLITE_INTERNAL_H
-+
-+#ifdef CONFIG_FS_POSIX_ACL
-+# include <linux/fs.h>
-+#ifdef HAVE_XATTR_ACL
-+# include <linux/xattr_acl.h>
-+#endif
-+#ifdef HAVE_LINUX_POSIX_ACL_XATTR_H
-+# include <linux/posix_acl_xattr.h>
-+#endif
-+#endif
-+
-+#include <lustre_debug.h>
-+#include <lustre_ver.h>
-+#include <linux/lustre_version.h>
-+#include <lustre_disk.h>  /* for s2sbi */
-+
-+#ifndef HAVE_LE_TYPES
-+typedef __u16 __le16;
-+typedef __u32 __le32;
-+#endif
-+ 
-+/*
-+struct lustre_intent_data {
-+        __u64 it_lock_handle[2];
-+        __u32 it_disposition;
-+        __u32 it_status;
-+        __u32 it_lock_mode;
-+        }; */
-+
-+/* If there is no FMODE_EXEC defined, make it to match nothing */
-+#ifndef FMODE_EXEC
-+#define FMODE_EXEC 0
-+#endif
-+
-+#define LL_IT2STR(it) ((it) ? ldlm_it2str((it)->it_op) : "0")
-+#define LUSTRE_FPRIVATE(file) ((file)->private_data)
-+
-+#ifdef HAVE_VFS_INTENT_PATCHES
-+static inline struct lookup_intent *ll_nd2it(struct nameidata *nd)
-+{
-+        return &nd->intent;
-+}
-+#endif
-+
-+/*
-+ * Directory entries are currently in the same format as ext2/ext3, but will
-+ * be changed in the future to accomodate FIDs
-+ */
-+#define LL_DIR_NAME_LEN (255)
-+#define LL_DIR_PAD      (4)
-+
-+struct ll_dir_entry {
-+        /* number of inode, referenced by this entry */
-+	__le32	lde_inode;
-+        /* total record length, multiple of LL_DIR_PAD */
-+	__le16	lde_rec_len;
-+        /* length of name */
-+	__u8	lde_name_len;
-+        /* file type: regular, directory, device, etc. */
-+	__u8	lde_file_type;
-+        /* name. NOT NUL-terminated */
-+	char	lde_name[LL_DIR_NAME_LEN];
-+};
-+
-+struct ll_dentry_data {
-+        int                      lld_cwd_count;
-+        int                      lld_mnt_count;
-+        struct obd_client_handle lld_cwd_och;
-+        struct obd_client_handle lld_mnt_och;
-+#ifndef HAVE_VFS_INTENT_PATCHES
-+        struct lookup_intent    *lld_it;
-+#endif
-+        unsigned int             lld_sa_generation;
-+};
-+
-+#define ll_d2d(de) ((struct ll_dentry_data*)((de)->d_fsdata))
-+
-+extern struct file_operations ll_pgcache_seq_fops;
-+
-+#define LLI_INODE_MAGIC                 0x111d0de5
-+#define LLI_INODE_DEAD                  0xdeadd00d
-+#define LLI_F_HAVE_OST_SIZE_LOCK        0
-+#define LLI_F_HAVE_MDS_SIZE_LOCK        1
-+#define LLI_F_CONTENDED                 2
-+#define LLI_F_SRVLOCK                   3
-+
-+struct ll_inode_info {
-+        int                     lli_inode_magic;
-+        struct semaphore        lli_size_sem;           /* protect open and change size */
-+        void                   *lli_size_sem_owner;
-+        struct semaphore        lli_write_sem;
-+        struct lov_stripe_md   *lli_smd;
-+        char                   *lli_symlink_name;
-+        __u64                   lli_maxbytes;
-+        __u64                   lli_io_epoch;
-+        unsigned long           lli_flags;
-+        cfs_time_t              lli_contention_time;
-+
-+        /* this lock protects s_d_w and p_w_ll and mmap_cnt */
-+        spinlock_t              lli_lock;
-+#ifdef HAVE_CLOSE_THREAD
-+        struct list_head        lli_pending_write_llaps;
-+        struct list_head        lli_close_item;
-+        int                     lli_send_done_writing;
-+#endif
-+        atomic_t                lli_mmap_cnt;
-+
-+        /* for writepage() only to communicate to fsync */
-+        int                     lli_async_rc;
-+
-+        struct posix_acl       *lli_posix_acl;
-+
-+        struct list_head        lli_dead_list;
-+
-+        struct semaphore        lli_och_sem; /* Protects access to och pointers
-+                                                and their usage counters */
-+        /* We need all three because every inode may be opened in different
-+           modes */
-+        struct obd_client_handle *lli_mds_read_och;
-+        __u64                   lli_open_fd_read_count;
-+        struct obd_client_handle *lli_mds_write_och;
-+        __u64                   lli_open_fd_write_count;
-+        struct obd_client_handle *lli_mds_exec_och;
-+        __u64                   lli_open_fd_exec_count;
-+        struct inode            lli_vfs_inode;
-+
-+        /* metadata stat-ahead */
-+        /*
-+         * "opendir_pid" is the token when lookup/revalid -- I am the owner of
-+         * dir statahead.
-+         */
-+        pid_t                   lli_opendir_pid;
-+        /* 
-+         * since parent-child threads can share the same @file struct,
-+         * "opendir_key" is the token when dir close for case of parent exit
-+         * before child -- it is me should cleanup the dir readahead. */
-+        void                   *lli_opendir_key;
-+        struct ll_statahead_info *lli_sai;
-+};
-+
-+/*
-+ * Locking to guarantee consistency of non-atomic updates to long long i_size,
-+ * consistency between file size and KMS, and consistency within
-+ * ->lli_smd->lsm_oinfo[]'s.
-+ *
-+ * Implemented by ->lli_size_sem and ->lsm_sem, nested in that order.
-+ */
-+
-+void ll_inode_size_lock(struct inode *inode, int lock_lsm);
-+void ll_inode_size_unlock(struct inode *inode, int unlock_lsm);
-+
-+// FIXME: replace the name of this with LL_I to conform to kernel stuff
-+// static inline struct ll_inode_info *LL_I(struct inode *inode)
-+static inline struct ll_inode_info *ll_i2info(struct inode *inode)
-+{
-+        return container_of(inode, struct ll_inode_info, lli_vfs_inode);
-+}
-+
-+/* default to about 40meg of readahead on a given system.  That much tied
-+ * up in 512k readahead requests serviced at 40ms each is about 1GB/s. */
-+#define SBI_DEFAULT_READAHEAD_MAX (40UL << (20 - CFS_PAGE_SHIFT))
-+
-+/* default to read-ahead full files smaller than 2MB on the second read */
-+#define SBI_DEFAULT_READAHEAD_WHOLE_MAX (2UL << (20 - CFS_PAGE_SHIFT))
-+
-+enum ra_stat {
-+        RA_STAT_HIT = 0,
-+        RA_STAT_MISS,
-+        RA_STAT_DISTANT_READPAGE,
-+        RA_STAT_MISS_IN_WINDOW,
-+        RA_STAT_FAILED_GRAB_PAGE,
-+        RA_STAT_FAILED_MATCH,
-+        RA_STAT_DISCARDED,
-+        RA_STAT_ZERO_LEN,
-+        RA_STAT_ZERO_WINDOW,
-+        RA_STAT_EOF,
-+        RA_STAT_MAX_IN_FLIGHT,
-+        RA_STAT_WRONG_GRAB_PAGE,
-+        _NR_RA_STAT,
-+};
-+
-+struct ll_ra_info {
-+        unsigned long             ra_cur_pages;
-+        unsigned long             ra_max_pages;
-+        unsigned long             ra_max_read_ahead_whole_pages;
-+        unsigned long             ra_stats[_NR_RA_STAT];
-+};
-+
-+/* LL_HIST_MAX=32 causes an overflow */
-+#define LL_HIST_MAX 28
-+#define LL_HIST_START 12 /* buckets start at 2^12 = 4k */
-+#define LL_PROCESS_HIST_MAX 10
-+struct per_process_info {
-+        pid_t pid;
-+        struct obd_histogram pp_r_hist;
-+        struct obd_histogram pp_w_hist;
-+};
-+
-+/* pp_extents[LL_PROCESS_HIST_MAX] will hold the combined process info */
-+struct ll_rw_extents_info {
-+        struct per_process_info pp_extents[LL_PROCESS_HIST_MAX + 1];
-+};
-+
-+#define LL_OFFSET_HIST_MAX 100
-+struct ll_rw_process_info {
-+        pid_t                     rw_pid;
-+        int                       rw_op;
-+        loff_t                    rw_range_start;
-+        loff_t                    rw_range_end;
-+        loff_t                    rw_last_file_pos;
-+        loff_t                    rw_offset;
-+        size_t                    rw_smallest_extent;
-+        size_t                    rw_largest_extent;
-+        struct file               *rw_last_file;
-+};
-+
-+
-+enum stats_track_type {
-+        STATS_TRACK_ALL = 0,  /* track all processes */
-+        STATS_TRACK_PID,      /* track process with this pid */
-+        STATS_TRACK_PPID,     /* track processes with this ppid */
-+        STATS_TRACK_GID,      /* track processes with this gid */
-+        STATS_TRACK_LAST,
-+};
-+
-+/* flags for sbi->ll_flags */
-+#define LL_SBI_NOLCK            0x01 /* DLM locking disabled (directio-only) */
-+#define LL_SBI_DATA_CHECKSUM    0x02 /* checksum each page on the wire */
-+#define LL_SBI_FLOCK            0x04
-+#define LL_SBI_USER_XATTR       0x08 /* support user xattr */
-+#define LL_SBI_ACL              0x10 /* support ACL */
-+#define LL_SBI_JOIN             0x20 /* support JOIN */
-+#define LL_SBI_LOCALFLOCK       0x40 /* Local flocks support by kernel */
-+#define LL_SBI_LRU_RESIZE       0x80 /* support lru resize */
-+#define LL_SBI_LLITE_CHECKSUM  0x100 /* checksum each page in memory */
-+
-+/* default value for ll_sb_info->contention_time */
-+#define SBI_DEFAULT_CONTENTION_SECONDS     60
-+/* default value for lockless_truncate_enable */
-+#define SBI_DEFAULT_LOCKLESS_TRUNCATE_ENABLE 1
-+
-+struct ll_sb_info {
-+        struct list_head          ll_list;
-+        /* this protects pglist and ra_info.  It isn't safe to
-+         * grab from interrupt contexts */
-+        spinlock_t                ll_lock;
-+        spinlock_t                ll_pp_extent_lock; /* Lock for pp_extent entries */
-+        spinlock_t                ll_process_lock; /* Lock for ll_rw_process_info */
-+        struct obd_uuid           ll_sb_uuid;
-+        struct obd_export        *ll_mdc_exp;
-+        struct obd_export        *ll_osc_exp;
-+        struct proc_dir_entry    *ll_proc_root;
-+        obd_id                    ll_rootino; /* number of root inode */
-+
-+        int                       ll_flags;
-+        struct list_head          ll_conn_chain; /* per-conn chain of SBs */
-+        struct lustre_client_ocd  ll_lco;
-+
-+        struct list_head          ll_orphan_dentry_list; /*please don't ask -p*/
-+        struct ll_close_queue    *ll_lcq;
-+
-+        struct lprocfs_stats     *ll_stats; /* lprocfs stats counter */
-+
-+        unsigned long             ll_async_page_max;
-+        unsigned long             ll_async_page_count;
-+        unsigned long             ll_pglist_gen;
-+        struct list_head          ll_pglist; /* all pages (llap_pglist_item) */
-+
-+        unsigned                  ll_contention_time; /* seconds */
-+        unsigned                  ll_lockless_truncate_enable; /* true/false */
-+
-+        struct ll_ra_info         ll_ra_info;
-+        unsigned int              ll_namelen;
-+        struct file_operations   *ll_fop;
-+
-+#ifdef HAVE_EXPORT___IGET
-+        struct list_head          ll_deathrow; /* inodes to be destroyed (b1443) */
-+        spinlock_t                ll_deathrow_lock;
-+#endif
-+        /* =0 - hold lock over whole read/write
-+         * >0 - max. chunk to be read/written w/o lock re-acquiring */
-+        unsigned long             ll_max_rw_chunk;
-+
-+        /* Statistics */
-+        struct ll_rw_extents_info ll_rw_extents_info;
-+        int                       ll_extent_process_count;
-+        struct ll_rw_process_info ll_rw_process_info[LL_PROCESS_HIST_MAX];
-+        unsigned int              ll_offset_process_count;
-+        struct ll_rw_process_info ll_rw_offset_info[LL_OFFSET_HIST_MAX];
-+        unsigned int              ll_rw_offset_entry_count;
-+        enum stats_track_type     ll_stats_track_type;
-+        int                       ll_stats_track_id;
-+        int                       ll_rw_stats_on;
-+        dev_t                     ll_sdev_orig; /* save s_dev before assign for
-+                                                 * clustred nfs */
-+
-+        /* metadata stat-ahead */
-+        unsigned int              ll_sa_max;     /* max statahead RPCs */
-+        unsigned int              ll_sa_wrong;   /* statahead thread stopped for
-+                                                  * low hit ratio */
-+        unsigned int              ll_sa_total;   /* statahead thread started
-+                                                  * count */
-+        unsigned long long        ll_sa_blocked; /* ls count waiting for
-+                                                  * statahead */
-+        unsigned long long        ll_sa_cached;  /* ls count got in cache */
-+        unsigned long long        ll_sa_hit;     /* hit count */
-+        unsigned long long        ll_sa_miss;    /* miss count */
-+};
-+
-+#define LL_DEFAULT_MAX_RW_CHUNK         (32 * 1024 * 1024)
-+
-+struct ll_ra_read {
-+        pgoff_t             lrr_start;
-+        pgoff_t             lrr_count;
-+        struct task_struct *lrr_reader;
-+        struct list_head    lrr_linkage;
-+};
-+
-+/*
-+ * per file-descriptor read-ahead data.
-+ */
-+struct ll_readahead_state {
-+        spinlock_t      ras_lock;
-+        /*
-+         * index of the last page that read(2) needed and that wasn't in the
-+         * cache. Used by ras_update() to detect seeks.
-+         *
-+         * XXX nikita: if access seeks into cached region, Lustre doesn't see
-+         * this.
-+         */
-+        unsigned long   ras_last_readpage;
-+        /*
-+         * number of pages read after last read-ahead window reset. As window
-+         * is reset on each seek, this is effectively a number of consecutive
-+         * accesses. Maybe ->ras_accessed_in_window is better name.
-+         *
-+         * XXX nikita: window is also reset (by ras_update()) when Lustre
-+         * believes that memory pressure evicts read-ahead pages. In that
-+         * case, it probably doesn't make sense to expand window to
-+         * PTLRPC_MAX_BRW_PAGES on the third access.
-+         */
-+        unsigned long   ras_consecutive_pages;
-+        /*
-+         * number of read requests after the last read-ahead window reset
-+         * As window is reset on each seek, this is effectively the number 
-+         * on consecutive read request and is used to trigger read-ahead.
-+         */
-+        unsigned long   ras_consecutive_requests;
-+        /*
-+         * Parameters of current read-ahead window. Handled by
-+         * ras_update(). On the initial access to the file or after a seek,
-+         * window is reset to 0. After 3 consecutive accesses, window is
-+         * expanded to PTLRPC_MAX_BRW_PAGES. Afterwards, window is enlarged by
-+         * PTLRPC_MAX_BRW_PAGES chunks up to ->ra_max_pages.
-+         */
-+        unsigned long   ras_window_start, ras_window_len;
-+        /*
-+         * Where next read-ahead should start at. This lies within read-ahead
-+         * window. Read-ahead window is read in pieces rather than at once
-+         * because: 1. lustre limits total number of pages under read-ahead by
-+         * ->ra_max_pages (see ll_ra_count_get()), 2. client cannot read pages
-+         * not covered by DLM lock.
-+         */
-+        unsigned long   ras_next_readahead;
-+        /*
-+         * Total number of ll_file_read requests issued, reads originating
-+         * due to mmap are not counted in this total.  This value is used to
-+         * trigger full file read-ahead after multiple reads to a small file.
-+         */
-+        unsigned long   ras_requests;
-+        /*
-+         * Page index with respect to the current request, these value 
-+         * will not be accurate when dealing with reads issued via mmap.
-+         */
-+        unsigned long   ras_request_index;
-+        /*
-+         * list of struct ll_ra_read's one per read(2) call current in
-+         * progress against this file descriptor. Used by read-ahead code,
-+         * protected by ->ras_lock.
-+         */
-+        struct list_head ras_read_beads;
-+        /* 
-+         * The following 3 items are used for detecting the stride I/O
-+         * mode. 
-+ 	 * In stride I/O mode, 
-+         * ...............|-----data-----|****gap*****|--------|******|.... 
-+         *    offset      |-stride_pages-|-stride_gap-| 
-+         * ras_stride_offset = offset;
-+         * ras_stride_length = stride_pages + stride_gap;
-+         * ras_stride_pages = stride_pages;
-+         * Note: all these three items are counted by pages.
-+         */
-+        unsigned long ras_stride_length;
-+        unsigned long ras_stride_pages;
-+        pgoff_t ras_stride_offset;
-+        /* 
-+         * number of consecutive stride request count, and it is similar as
-+         * ras_consecutive_requests, but used for stride I/O mode.
-+         * Note: only more than 2 consecutive stride request are detected,
-+         * stride read-ahead will be enable
-+         */
-+        unsigned long ras_consecutive_stride_requests;
-+};
-+
-+extern cfs_mem_cache_t *ll_file_data_slab;
-+struct lustre_handle;
-+struct ll_file_data {
-+        struct ll_readahead_state fd_ras;
-+        int fd_omode;
-+        struct lustre_handle fd_cwlockh;
-+        unsigned long fd_gid;
-+        __u32 fd_flags;
-+};
-+
-+struct lov_stripe_md;
-+
-+extern spinlock_t inode_lock;
-+
-+extern struct proc_dir_entry *proc_lustre_fs_root;
-+
-+static inline struct inode *ll_info2i(struct ll_inode_info *lli)
-+{
-+        return &lli->lli_vfs_inode;
-+}
-+
-+struct it_cb_data {
-+        struct inode *icbd_parent;
-+        struct dentry **icbd_childp;
-+        obd_id hash;
-+};
-+
-+void ll_i2gids(__u32 *suppgids, struct inode *i1,struct inode *i2);
-+
-+#define LLAP_MAGIC 98764321
-+
-+extern cfs_mem_cache_t *ll_async_page_slab;
-+extern size_t ll_async_page_slab_size;
-+struct ll_async_page {
-+        int              llap_magic;
-+         /* only trust these if the page lock is providing exclusion */
-+        unsigned int     llap_write_queued:1,
-+                         llap_defer_uptodate:1,
-+                         llap_origin:3,
-+                         llap_ra_used:1,
-+                         llap_ignore_quota:1,
-+                         llap_nocache:1,
-+                         llap_lockless_io_page:1;
-+        void            *llap_cookie;
-+        struct page     *llap_page;
-+        struct list_head llap_pending_write;
-+        struct list_head llap_pglist_item;
-+        /* checksum for paranoid I/O debugging */
-+        __u32 llap_checksum;
-+};
-+
-+/*
-+ * enumeration of llap_from_page() call-sites. Used to export statistics in
-+ * /proc/fs/lustre/llite/fsN/dump_page_cache.
-+ */
-+enum {
-+        LLAP_ORIGIN_UNKNOWN = 0,
-+        LLAP_ORIGIN_READPAGE,
-+        LLAP_ORIGIN_READAHEAD,
-+        LLAP_ORIGIN_COMMIT_WRITE,
-+        LLAP_ORIGIN_WRITEPAGE,
-+        LLAP_ORIGIN_REMOVEPAGE,
-+        LLAP_ORIGIN_LOCKLESS_IO,
-+        LLAP__ORIGIN_MAX,
-+};
-+extern char *llap_origins[];
-+
-+#ifdef HAVE_REGISTER_CACHE
-+#define ll_register_cache(cache) register_cache(cache)
-+#define ll_unregister_cache(cache) unregister_cache(cache)
-+#else
-+#define ll_register_cache(cache) do {} while (0)
-+#define ll_unregister_cache(cache) do {} while (0)
-+#endif
-+
-+void ll_ra_read_in(struct file *f, struct ll_ra_read *rar);
-+void ll_ra_read_ex(struct file *f, struct ll_ra_read *rar);
-+struct ll_ra_read *ll_ra_read_get(struct file *f);
-+
-+/* llite/lproc_llite.c */
-+#ifdef LPROCFS
-+int lprocfs_register_mountpoint(struct proc_dir_entry *parent,
-+                                struct super_block *sb, char *osc, char *mdc);
-+void lprocfs_unregister_mountpoint(struct ll_sb_info *sbi);
-+void ll_stats_ops_tally(struct ll_sb_info *sbi, int op, int count);
-+void lprocfs_llite_init_vars(struct lprocfs_static_vars *lvars);
-+#else
-+static inline int lprocfs_register_mountpoint(struct proc_dir_entry *parent,
-+                        struct super_block *sb, char *osc, char *mdc){return 0;}
-+static inline void lprocfs_unregister_mountpoint(struct ll_sb_info *sbi) {}
-+static void ll_stats_ops_tally(struct ll_sb_info *sbi, int op, int count) {}
-+static void lprocfs_llite_init_vars(struct lprocfs_static_vars *lvars)
-+{
-+        memset(lvars, 0, sizeof(*lvars));
-+}
-+#endif
-+
-+
-+/* llite/dir.c */
-+extern struct file_operations ll_dir_operations;
-+extern struct inode_operations ll_dir_inode_operations;
-+
-+struct page *ll_get_dir_page(struct inode *dir, unsigned long n);
-+
-+static inline unsigned ll_dir_rec_len(unsigned name_len)
-+{
-+        return (name_len + 8 + LL_DIR_PAD - 1) & ~(LL_DIR_PAD - 1);
-+}
-+
-+static inline struct ll_dir_entry *ll_entry_at(void *base, unsigned offset)
-+{
-+        return (struct ll_dir_entry *)((char *)base + offset);
-+}
-+
-+/*
-+ * p is at least 6 bytes before the end of page
-+ */
-+static inline struct ll_dir_entry *ll_dir_next_entry(struct ll_dir_entry *p)
-+{
-+        return ll_entry_at(p, le16_to_cpu(p->lde_rec_len));
-+}
-+
-+static inline void ll_put_page(struct page *page)
-+{
-+        kunmap(page);
-+        page_cache_release(page);
-+}
-+
-+static inline unsigned long dir_pages(struct inode *inode)
-+{
-+        return (inode->i_size + CFS_PAGE_SIZE - 1) >> CFS_PAGE_SHIFT;
-+}
-+
-+int ll_objects_destroy(struct ptlrpc_request *request, struct inode *dir);
-+struct inode *ll_iget(struct super_block *sb, ino_t hash,
-+                      struct lustre_md *lic);
-+int ll_mdc_cancel_unused(struct lustre_handle *, struct inode *, int flags,
-+                         void *opaque);
-+int ll_mdc_blocking_ast(struct ldlm_lock *, struct ldlm_lock_desc *,
-+                        void *data, int flag);
-+int ll_prepare_mdc_op_data(struct mdc_op_data *,
-+                           struct inode *i1, struct inode *i2,
-+                           const char *name, int namelen, int mode, void *data);
-+#ifndef HAVE_VFS_INTENT_PATCHES
-+struct lookup_intent *ll_convert_intent(struct open_intent *oit,
-+                                        int lookup_flags);
-+#endif
-+void ll_pin_extent_cb(void *data);
-+int ll_page_removal_cb(void *data, int discard);
-+int ll_extent_lock_cancel_cb(struct ldlm_lock *lock, struct ldlm_lock_desc *new,
-+                             void *data, int flag);
-+int lookup_it_finish(struct ptlrpc_request *request, int offset,
-+                     struct lookup_intent *it, void *data);
-+void ll_lookup_finish_locks(struct lookup_intent *it, struct dentry *dentry);
-+
-+/* llite/rw.c */
-+int ll_prepare_write(struct file *, struct page *, unsigned from, unsigned to);
-+int ll_commit_write(struct file *, struct page *, unsigned from, unsigned to);
-+int ll_writepage(struct page *page);
-+void ll_inode_fill_obdo(struct inode *inode, int cmd, struct obdo *oa);
-+int ll_ap_completion(void *data, int cmd, struct obdo *oa, int rc);
-+int llap_shrink_cache(struct ll_sb_info *sbi, int shrink_fraction);
-+extern struct cache_definition ll_cache_definition;
-+void ll_removepage(struct page *page);
-+int ll_readpage(struct file *file, struct page *page);
-+struct ll_async_page *llap_cast_private(struct page *page);
-+void ll_readahead_init(struct inode *inode, struct ll_readahead_state *ras);
-+void ll_ra_accounting(struct ll_async_page *llap,struct address_space *mapping);
-+void ll_truncate(struct inode *inode);
-+int ll_file_punch(struct inode *, loff_t, int);
-+ssize_t ll_file_lockless_io(struct file *, const struct iovec *,
-+                            unsigned long, loff_t *, int, ssize_t);
-+void ll_clear_file_contended(struct inode*);
-+int ll_sync_page_range(struct inode *, struct address_space *, loff_t, size_t);
-+
-+/* llite/file.c */
-+extern struct file_operations ll_file_operations;
-+extern struct file_operations ll_file_operations_flock;
-+extern struct file_operations ll_file_operations_noflock;
-+extern struct inode_operations ll_file_inode_operations;
-+extern int ll_inode_revalidate_it(struct dentry *, struct lookup_intent *);
-+extern int ll_have_md_lock(struct inode *inode, __u64 bits);
-+int ll_region_mapped(unsigned long addr, size_t count);
-+int ll_extent_lock(struct ll_file_data *, struct inode *,
-+                   struct lov_stripe_md *, int mode, ldlm_policy_data_t *,
-+                   struct lustre_handle *, int ast_flags);
-+int ll_extent_unlock(struct ll_file_data *, struct inode *,
-+                     struct lov_stripe_md *, int mode, struct lustre_handle *);
-+int ll_file_open(struct inode *inode, struct file *file);
-+int ll_file_release(struct inode *inode, struct file *file);
-+int ll_lsm_getattr(struct obd_export *, struct lov_stripe_md *, struct obdo *);
-+int ll_glimpse_ioctl(struct ll_sb_info *sbi, 
-+                     struct lov_stripe_md *lsm, lstat_t *st);
-+int ll_glimpse_size(struct inode *inode, int ast_flags);
-+int ll_local_open(struct file *file,
-+                  struct lookup_intent *it, struct ll_file_data *fd,
-+                  struct obd_client_handle *och);
-+int ll_release_openhandle(struct dentry *, struct lookup_intent *);
-+int ll_mdc_close(struct obd_export *mdc_exp, struct inode *inode,
-+                 struct file *file);
-+int ll_mdc_real_close(struct inode *inode, int flags);
-+extern void ll_rw_stats_tally(struct ll_sb_info *sbi, pid_t pid, struct file
-+                               *file, size_t count, int rw);
-+int ll_getattr_it(struct vfsmount *mnt, struct dentry *de,
-+               struct lookup_intent *it, struct kstat *stat);
-+int ll_getattr(struct vfsmount *mnt, struct dentry *de, struct kstat *stat);
-+struct ll_file_data *ll_file_data_get(void);
-+#if (LINUX_VERSION_CODE >= KERNEL_VERSION(2,6,0))
-+int ll_inode_permission(struct inode *inode, int mask, struct nameidata *nd);
-+#else
-+int ll_inode_permission(struct inode *inode, int mask);
-+#endif
-+int ll_lov_setstripe_ea_info(struct inode *inode, struct file *file,
-+                             int flags, struct lov_user_md *lum,
-+                             int lum_size);
-+int ll_lov_getstripe_ea_info(struct inode *inode, const char *filename,
-+                             struct lov_mds_md **lmm, int *lmm_size,
-+                             struct ptlrpc_request **request);
-+int ll_dir_setstripe(struct inode *inode, struct lov_user_md *lump,
-+                     int set_default);
-+int ll_dir_getstripe(struct inode *inode, struct lov_mds_md **lmm, 
-+                     int *lmm_size, struct ptlrpc_request **request);
-+int ll_fsync(struct file *file, struct dentry *dentry, int data);
-+int ll_fiemap(struct inode *inode, struct ll_user_fiemap *fiemap,
-+              int num_bytes);
-+
-+/* llite/dcache.c */
-+/* llite/namei.c */
-+/**
-+ * protect race ll_find_aliases vs ll_revalidate_it vs ll_unhash_aliases
-+ */
-+extern spinlock_t ll_lookup_lock;
-+extern struct dentry_operations ll_d_ops;
-+void ll_intent_drop_lock(struct lookup_intent *);
-+void ll_intent_release(struct lookup_intent *);
-+extern void ll_set_dd(struct dentry *de);
-+int ll_drop_dentry(struct dentry *dentry);
-+void ll_unhash_aliases(struct inode *);
-+void ll_frob_intent(struct lookup_intent **itp, struct lookup_intent *deft);
-+void ll_lookup_finish_locks(struct lookup_intent *it, struct dentry *dentry);
-+int ll_dcompare(struct dentry *parent, struct qstr *d_name, struct qstr *name);
-+int revalidate_it_finish(struct ptlrpc_request *request, int offset,
-+                         struct lookup_intent *it, struct dentry *de);
-+
-+/* llite/llite_lib.c */
-+extern struct super_operations lustre_super_operations;
-+
-+char *ll_read_opt(const char *opt, char *data);
-+void ll_lli_init(struct ll_inode_info *lli);
-+int ll_fill_super(struct super_block *sb);
-+void ll_put_super(struct super_block *sb);
-+void ll_kill_super(struct super_block *sb);
-+struct inode *ll_inode_from_lock(struct ldlm_lock *lock);
-+void ll_clear_inode(struct inode *inode);
-+int ll_setattr_raw(struct inode *inode, struct iattr *attr);
-+int ll_setattr(struct dentry *de, struct iattr *attr);
-+#ifndef HAVE_STATFS_DENTRY_PARAM
-+int ll_statfs(struct super_block *sb, struct kstatfs *sfs);
-+#else
-+int ll_statfs(struct dentry *de, struct kstatfs *sfs);
-+#endif
-+int ll_statfs_internal(struct super_block *sb, struct obd_statfs *osfs,
-+                       __u64 max_age, __u32 flags);
-+void ll_update_inode(struct inode *inode, struct lustre_md *md);
-+void ll_read_inode2(struct inode *inode, void *opaque);
-+int ll_iocontrol(struct inode *inode, struct file *file,
-+                 unsigned int cmd, unsigned long arg);
-+#ifdef HAVE_UMOUNTBEGIN_VFSMOUNT
-+void ll_umount_begin(struct vfsmount *vfsmnt, int flags);
-+#else
-+void ll_umount_begin(struct super_block *sb);
-+#endif
-+int ll_remount_fs(struct super_block *sb, int *flags, char *data);
-+int ll_show_options(struct seq_file *seq, struct vfsmount *vfs);
-+int ll_prep_inode(struct obd_export *exp, struct inode **inode,
-+                  struct ptlrpc_request *req, int offset, struct super_block *);
-+void lustre_dump_dentry(struct dentry *, int recur);
-+void lustre_dump_inode(struct inode *);
-+struct ll_async_page *llite_pglist_next_llap(struct ll_sb_info *sbi,
-+                                             struct list_head *list);
-+int ll_obd_statfs(struct inode *inode, void *arg);
-+int ll_get_max_mdsize(struct ll_sb_info *sbi, int *max_mdsize);
-+int ll_process_config(struct lustre_cfg *lcfg);
-+
-+/* llite/llite_nfs.c */
-+extern struct export_operations lustre_export_operations;
-+__u32 get_uuid2int(const char *name, int len);
-+struct dentry *ll_fh_to_dentry(struct super_block *sb, __u32 *data, int len,
-+                               int fhtype, int parent);
-+int ll_dentry_to_fh(struct dentry *, __u32 *datap, int *lenp, int need_parent);
-+
-+/* llite/special.c */
-+extern struct inode_operations ll_special_inode_operations;
-+extern struct file_operations ll_special_chr_inode_fops;
-+extern struct file_operations ll_special_chr_file_fops;
-+extern struct file_operations ll_special_blk_inode_fops;
-+extern struct file_operations ll_special_fifo_inode_fops;
-+extern struct file_operations ll_special_fifo_file_fops;
-+extern struct file_operations ll_special_sock_inode_fops;
-+
-+/* llite/symlink.c */
-+extern struct inode_operations ll_fast_symlink_inode_operations;
-+
-+/* llite/llite_close.c */
-+struct ll_close_queue {
-+        spinlock_t              lcq_lock;
-+        struct list_head        lcq_list;
-+        wait_queue_head_t       lcq_waitq;
-+        struct completion       lcq_comp;
-+};
-+
-+#ifdef HAVE_CLOSE_THREAD
-+void llap_write_pending(struct inode *inode, struct ll_async_page *llap);
-+void llap_write_complete(struct inode *inode, struct ll_async_page *llap);
-+void ll_open_complete(struct inode *inode);
-+int ll_is_inode_dirty(struct inode *inode);
-+void ll_try_done_writing(struct inode *inode);
-+void ll_queue_done_writing(struct inode *inode);
-+#else
-+static inline void llap_write_pending(struct inode *inode,
-+                                      struct ll_async_page *llap) { return; };
-+static inline void llap_write_complete(struct inode *inode,
-+                                       struct ll_async_page *llap) { return; };
-+static inline void ll_open_complete(struct inode *inode) { return; };
-+static inline int ll_is_inode_dirty(struct inode *inode) { return 0; };
-+static inline void ll_try_done_writing(struct inode *inode) { return; };
-+static inline void ll_queue_done_writing(struct inode *inode) { return; };
-+//static inline void ll_close_thread_shutdown(struct ll_close_queue *lcq) { return; };
-+//static inline int ll_close_thread_start(struct ll_close_queue **lcq_ret) { return 0; };
-+#endif
-+void ll_close_thread_shutdown(struct ll_close_queue *lcq);
-+int ll_close_thread_start(struct ll_close_queue **lcq_ret);
-+
-+/* llite/llite_mmap.c */
-+typedef struct rb_root  rb_root_t;
-+typedef struct rb_node  rb_node_t;
-+
-+struct ll_lock_tree_node;
-+struct ll_lock_tree {
-+        rb_root_t                       lt_root;
-+        struct list_head                lt_locked_list;
-+        struct ll_file_data             *lt_fd;
-+};
-+
-+int ll_teardown_mmaps(struct address_space *mapping, __u64 first, __u64 last);
-+int ll_file_mmap(struct file * file, struct vm_area_struct * vma);
-+struct ll_lock_tree_node * ll_node_from_inode(struct inode *inode, __u64 start,
-+                                              __u64 end, ldlm_mode_t mode);
-+int ll_tree_lock(struct ll_lock_tree *tree,
-+                 struct ll_lock_tree_node *first_node,
-+                 const char *buf, size_t count, int ast_flags);
-+int ll_tree_lock_iov(struct ll_lock_tree *tree,
-+                     struct ll_lock_tree_node *first_node,
-+                     const struct iovec *iov, unsigned long nr_segs,
-+                     int ast_flags);
-+int ll_tree_unlock(struct ll_lock_tree *tree);
-+
-+#define    ll_s2sbi(sb)        (s2lsi(sb)->lsi_llsbi)
-+
-+static inline __u64 ll_ts2u64(struct timespec *time)
-+{
-+        __u64 t = time->tv_sec;
-+        return t;
-+}
-+
-+/* don't need an addref as the sb_info should be holding one */
-+static inline struct obd_export *ll_s2obdexp(struct super_block *sb)
-+{
-+        return ll_s2sbi(sb)->ll_osc_exp;
-+}
-+
-+/* don't need an addref as the sb_info should be holding one */
-+static inline struct obd_export *ll_s2mdcexp(struct super_block *sb)
-+{
-+        return ll_s2sbi(sb)->ll_mdc_exp;
-+}
-+
-+static inline struct client_obd *sbi2mdc(struct ll_sb_info *sbi)
-+{
-+        struct obd_device *obd = sbi->ll_mdc_exp->exp_obd;
-+        if (obd == NULL)
-+                LBUG();
-+        return &obd->u.cli;
-+}
-+
-+// FIXME: replace the name of this with LL_SB to conform to kernel stuff
-+static inline struct ll_sb_info *ll_i2sbi(struct inode *inode)
-+{
-+        return ll_s2sbi(inode->i_sb);
-+}
-+
-+static inline struct obd_export *ll_i2obdexp(struct inode *inode)
-+{
-+        return ll_s2obdexp(inode->i_sb);
-+}
-+
-+static inline struct obd_export *ll_i2mdcexp(struct inode *inode)
-+{
-+        return ll_s2mdcexp(inode->i_sb);
-+}
-+
-+static inline void ll_inode2fid(struct ll_fid *fid, struct inode *inode)
-+{
-+        mdc_pack_fid(fid, inode->i_ino, inode->i_generation,
-+                     inode->i_mode & S_IFMT);
-+}
-+
-+static inline int ll_mds_max_easize(struct super_block *sb)
-+{
-+        return sbi2mdc(ll_s2sbi(sb))->cl_max_mds_easize;
-+}
-+
-+static inline __u64 ll_file_maxbytes(struct inode *inode)
-+{
-+        return ll_i2info(inode)->lli_maxbytes;
-+}
-+
-+/* llite/xattr.c */
-+int ll_setxattr(struct dentry *dentry, const char *name,
-+                const void *value, size_t size, int flags);
-+ssize_t ll_getxattr(struct dentry *dentry, const char *name,
-+                    void *buffer, size_t size);
-+ssize_t ll_listxattr(struct dentry *dentry, char *buffer, size_t size);
-+int ll_removexattr(struct dentry *dentry, const char *name);
-+
-+/* statahead.c */
-+
-+#define LL_SA_RPC_MIN   2
-+#define LL_SA_RPC_DEF   32
-+#define LL_SA_RPC_MAX   8192
-+
-+/* per inode struct, for dir only */
-+struct ll_statahead_info {
-+        struct inode           *sai_inode;
-+        unsigned int            sai_generation; /* generation for statahead */
-+        atomic_t                sai_refcount;   /* when access this struct, hold
-+                                                 * refcount */
-+        unsigned int            sai_sent;       /* stat requests sent count */
-+        unsigned int            sai_replied;    /* stat requests which received
-+                                                 * reply */
-+        unsigned int            sai_max;        /* max ahead of lookup */
-+        unsigned int            sai_index;      /* index of statahead entry */
-+        unsigned int            sai_index_next; /* index for the next statahead
-+                                                 * entry to be stated */
-+        unsigned int            sai_hit;        /* hit count */
-+        unsigned int            sai_miss;       /* miss count:
-+                                                 * for "ls -al" case, it includes
-+                                                 * hidden dentry miss;
-+                                                 * for "ls -l" case, it does not
-+                                                 * include hidden dentry miss.
-+                                                 * "sai_miss_hidden" is used for
-+                                                 * the later case.
-+                                                 */
-+        unsigned int            sai_consecutive_miss; /* consecutive miss */
-+        unsigned int            sai_miss_hidden;/* "ls -al", but first dentry
-+                                                 * is not a hidden one */
-+        unsigned int            sai_skip_hidden;/* skipped hidden dentry count */
-+        unsigned int            sai_ls_all:1;   /* "ls -al", do stat-ahead for
-+                                                 * hidden entries */
-+        cfs_waitq_t             sai_waitq;      /* stat-ahead wait queue */
-+        struct ptlrpc_thread    sai_thread;     /* stat-ahead thread */
-+        struct list_head        sai_entries_sent;     /* entries sent out */
-+        struct list_head        sai_entries_received; /* entries returned */
-+        struct list_head        sai_entries_stated;   /* entries stated */
-+};
-+
-+int do_statahead_enter(struct inode *dir, struct dentry **dentry, int lookup);
-+void ll_statahead_exit(struct dentry *dentry, int result);
-+void ll_stop_statahead(struct inode *inode, void *key);
-+
-+static inline
-+void ll_statahead_mark(struct dentry *dentry)
-+{
-+        struct ll_inode_info *lli = ll_i2info(dentry->d_parent->d_inode);
-+        struct ll_dentry_data *ldd = ll_d2d(dentry);
-+
-+        /* not the same process, don't mark */
-+        if (lli->lli_opendir_pid != cfs_curproc_pid())
-+                return;
-+
-+        spin_lock(&lli->lli_lock);
-+        if (likely(lli->lli_sai != NULL && ldd != NULL))
-+                ldd->lld_sa_generation = lli->lli_sai->sai_generation;
-+        spin_unlock(&lli->lli_lock);
-+}
-+
-+static inline
-+int ll_statahead_enter(struct inode *dir, struct dentry **dentryp, int lookup)
-+{
-+        struct ll_sb_info        *sbi = ll_i2sbi(dir);
-+        struct ll_inode_info     *lli = ll_i2info(dir);
-+        struct ll_dentry_data    *ldd = ll_d2d(*dentryp);
-+
-+        if (sbi->ll_sa_max == 0)
-+                return -ENOTSUPP;
-+
-+        /* not the same process, don't statahead */
-+        if (lli->lli_opendir_pid != cfs_curproc_pid())
-+                return -EBADF;
-+
-+        /*
-+         * When "ls" a dentry, the system trigger more than once "revalidate" or
-+         * "lookup", for "getattr", for "getxattr", and maybe for others.
-+         * Under patchless client mode, the operation intent is not accurate,
-+         * it maybe misguide the statahead thread. For example:
-+         * The "revalidate" call for "getattr" and "getxattr" of a dentry maybe
-+         * have the same operation intent -- "IT_GETATTR".
-+         * In fact, one dentry should has only one chance to interact with the
-+         * statahead thread, otherwise the statahead windows will be confused.
-+         * The solution is as following:
-+         * Assign "lld_sa_generation" with "sai_generation" when a dentry
-+         * "IT_GETATTR" for the first time, and the subsequent "IT_GETATTR"
-+         * will bypass interacting with statahead thread for checking:
-+         * "lld_sa_generation == lli_sai->sai_generation"
-+         */ 
-+        if (ldd && lli->lli_sai &&
-+            ldd->lld_sa_generation == lli->lli_sai->sai_generation)
-+                return -EAGAIN;
-+
-+        return do_statahead_enter(dir, dentryp, lookup);
-+}
-+
-+static void inline ll_dops_init(struct dentry *de, int block)
-+{
-+        struct ll_dentry_data *lld = ll_d2d(de);
-+
-+        if (lld == NULL && block != 0) {
-+                ll_set_dd(de);
-+                lld = ll_d2d(de);
-+        }
-+
-+        if (lld != NULL)
-+                lld->lld_sa_generation = 0;
-+
-+        de->d_op = &ll_d_ops;
-+}
-+
-+/* llite ioctl register support rountine */
-+#ifdef __KERNEL__
-+enum llioc_iter {
-+        LLIOC_CONT = 0,
-+        LLIOC_STOP
-+};
-+
-+#define LLIOC_MAX_CMD           256
-+
-+/*
-+ * Rules to write a callback function:
-+ *
-+ * Parameters:
-+ *  @magic: Dynamic ioctl call routine will feed this vaule with the pointer
-+ *      returned to ll_iocontrol_register.  Callback functions should use this
-+ *      data to check the potential collasion of ioctl cmd. If collasion is 
-+ *      found, callback function should return LLIOC_CONT.
-+ *  @rcp: The result of ioctl command.
-+ *
-+ *  Return values:
-+ *      If @magic matches the pointer returned by ll_iocontrol_data, the 
-+ *      callback should return LLIOC_STOP; return LLIOC_STOP otherwise.
-+ */
-+typedef enum llioc_iter (*llioc_callback_t)(struct inode *inode, 
-+                struct file *file, unsigned int cmd, unsigned long arg,
-+                void *magic, int *rcp);
-+
-+enum llioc_iter ll_iocontrol_call(struct inode *inode, struct file *file, 
-+                unsigned int cmd, unsigned long arg, int *rcp);
-+
-+/* export functions */
-+/* Register ioctl block dynamatically for a regular file. 
-+ *
-+ * @cmd: the array of ioctl command set
-+ * @count: number of commands in the @cmd
-+ * @cb: callback function, it will be called if an ioctl command is found to 
-+ *      belong to the command list @cmd.
-+ *
-+ * Return vaule:
-+ *      A magic pointer will be returned if success; 
-+ *      otherwise, NULL will be returned. 
-+ * */
-+void *ll_iocontrol_register(llioc_callback_t cb, int count, unsigned int *cmd);
-+void ll_iocontrol_unregister(void *magic);
-+
-+#endif
-+
-+#endif /* LLITE_INTERNAL_H */
-diff -urNad lustre~/lustre/llite/llite_lib.c lustre/lustre/llite/llite_lib.c
---- lustre~/lustre/llite/llite_lib.c	2009-03-12 10:32:27.000000000 +0100
-+++ lustre/lustre/llite/llite_lib.c	2009-03-13 09:45:03.000000000 +0100
-@@ -1346,7 +1346,7 @@
-                 rc = vmtruncate(inode, new_size);
-                 clear_bit(LLI_F_SRVLOCK, &lli->lli_flags);
-                 if (rc != 0) {
--                        LASSERT(atomic_read(&lli->lli_size_sem.count) <= 0);
-+                        LASSERT(SEM_COUNT(&lli->lli_size_sem) <= 0);
-                         ll_inode_size_unlock(inode, 0);
-                 }
-         }
-diff -urNad lustre~/lustre/llite/llite_lib.c.orig lustre/lustre/llite/llite_lib.c.orig
---- lustre~/lustre/llite/llite_lib.c.orig	1970-01-01 00:00:00.000000000 +0000
-+++ lustre/lustre/llite/llite_lib.c.orig	2009-03-13 09:45:03.000000000 +0100
-@@ -0,0 +1,2232 @@
-+/* -*- mode: c; c-basic-offset: 8; indent-tabs-mode: nil; -*-
-+ * vim:expandtab:shiftwidth=8:tabstop=8:
-+ *
-+ * GPL HEADER START
-+ *
-+ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
-+ *
-+ * This program is free software; you can redistribute it and/or modify
-+ * it under the terms of the GNU General Public License version 2 only,
-+ * as published by the Free Software Foundation.
-+ *
-+ * This program is distributed in the hope that it will be useful, but
-+ * WITHOUT ANY WARRANTY; without even the implied warranty of
-+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
-+ * General Public License version 2 for more details (a copy is included
-+ * in the LICENSE file that accompanied this code).
-+ *
-+ * You should have received a copy of the GNU General Public License
-+ * version 2 along with this program; If not, see
-+ * http://www.sun.com/software/products/lustre/docs/GPLv2.pdf
-+ *
-+ * Please contact Sun Microsystems, Inc., 4150 Network Circle, Santa Clara,
-+ * CA 95054 USA or visit www.sun.com if you need additional information or
-+ * have any questions.
-+ *
-+ * GPL HEADER END
-+ */
-+/*
-+ * Copyright  2008 Sun Microsystems, Inc. All rights reserved
-+ * Use is subject to license terms.
-+ */
-+/*
-+ * This file is part of Lustre, http://www.lustre.org/
-+ * Lustre is a trademark of Sun Microsystems, Inc.
-+ *
-+ * lustre/llite/llite_lib.c
-+ *
-+ * Lustre Light Super operations
-+ */
-+
-+#define DEBUG_SUBSYSTEM S_LLITE
-+
-+#include <linux/module.h>
-+#include <linux/types.h>
-+#include <linux/random.h>
-+#include <linux/version.h>
-+#include <linux/mm.h>
-+
-+#include <lustre_lite.h>
-+#include <lustre_ha.h>
-+#include <lustre_dlm.h>
-+#include <lprocfs_status.h>
-+#include <lustre_disk.h>
-+#include <lustre_param.h>
-+#include <lustre_cache.h>
-+#include "llite_internal.h"
-+
-+cfs_mem_cache_t *ll_file_data_slab;
-+
-+LIST_HEAD(ll_super_blocks);
-+spinlock_t ll_sb_lock = SPIN_LOCK_UNLOCKED;
-+
-+extern struct address_space_operations ll_aops;
-+extern struct address_space_operations ll_dir_aops;
-+
-+#ifndef log2
-+#define log2(n) ffz(~(n))
-+#endif
-+
-+
-+static struct ll_sb_info *ll_init_sbi(void)
-+{
-+        struct ll_sb_info *sbi = NULL;
-+        unsigned long pages;
-+        struct sysinfo si;
-+        class_uuid_t uuid;
-+        int i;
-+        ENTRY;
-+
-+        OBD_ALLOC(sbi, sizeof(*sbi));
-+        if (!sbi)
-+                RETURN(NULL);
-+
-+        spin_lock_init(&sbi->ll_lock);
-+        spin_lock_init(&sbi->ll_lco.lco_lock);
-+        spin_lock_init(&sbi->ll_pp_extent_lock);
-+        spin_lock_init(&sbi->ll_process_lock);
-+        sbi->ll_rw_stats_on = 0;
-+        INIT_LIST_HEAD(&sbi->ll_pglist);
-+
-+        si_meminfo(&si);
-+        pages = si.totalram - si.totalhigh;
-+        if (pages >> (20 - CFS_PAGE_SHIFT) < 512) {
-+#ifdef HAVE_BGL_SUPPORT
-+                sbi->ll_async_page_max = pages / 4;
-+#else
-+                sbi->ll_async_page_max = pages / 2;
-+#endif
-+        } else {
-+                sbi->ll_async_page_max = (pages / 4) * 3;
-+        }
-+        sbi->ll_ra_info.ra_max_pages = min(pages / 32,
-+                                           SBI_DEFAULT_READAHEAD_MAX);
-+        sbi->ll_ra_info.ra_max_read_ahead_whole_pages =
-+                                           SBI_DEFAULT_READAHEAD_WHOLE_MAX;
-+        sbi->ll_contention_time = SBI_DEFAULT_CONTENTION_SECONDS;
-+        sbi->ll_lockless_truncate_enable = SBI_DEFAULT_LOCKLESS_TRUNCATE_ENABLE;
-+        INIT_LIST_HEAD(&sbi->ll_conn_chain);
-+        INIT_LIST_HEAD(&sbi->ll_orphan_dentry_list);
-+
-+        ll_generate_random_uuid(uuid);
-+        class_uuid_unparse(uuid, &sbi->ll_sb_uuid);
-+        CDEBUG(D_CONFIG, "generated uuid: %s\n", sbi->ll_sb_uuid.uuid);
-+
-+        spin_lock(&ll_sb_lock);
-+        list_add_tail(&sbi->ll_list, &ll_super_blocks);
-+        spin_unlock(&ll_sb_lock);
-+
-+#ifdef ENABLE_CHECKSUM
-+        sbi->ll_flags |= LL_SBI_DATA_CHECKSUM;
-+#endif
-+#ifdef ENABLE_LLITE_CHECKSUM
-+        sbi->ll_flags |= LL_SBI_LLITE_CHECKSUM;
-+#endif
-+
-+#ifdef HAVE_LRU_RESIZE_SUPPORT
-+        sbi->ll_flags |= LL_SBI_LRU_RESIZE;
-+#endif
-+
-+#ifdef HAVE_EXPORT___IGET
-+        INIT_LIST_HEAD(&sbi->ll_deathrow);
-+        spin_lock_init(&sbi->ll_deathrow_lock);
-+#endif
-+        for (i = 0; i <= LL_PROCESS_HIST_MAX; i++) {
-+                spin_lock_init(&sbi->ll_rw_extents_info.pp_extents[i].pp_r_hist.oh_lock);
-+                spin_lock_init(&sbi->ll_rw_extents_info.pp_extents[i].pp_w_hist.oh_lock);
-+        }
-+
-+        /* metadata statahead is enabled by default */
-+        sbi->ll_sa_max = LL_SA_RPC_DEF;
-+
-+        RETURN(sbi);
-+}
-+
-+void ll_free_sbi(struct super_block *sb)
-+{
-+        struct ll_sb_info *sbi = ll_s2sbi(sb);
-+        ENTRY;
-+
-+        if (sbi != NULL) {
-+                spin_lock(&ll_sb_lock);
-+                list_del(&sbi->ll_list);
-+                spin_unlock(&ll_sb_lock);
-+                OBD_FREE(sbi, sizeof(*sbi));
-+        }
-+        EXIT;
-+}
-+
-+static struct dentry_operations ll_d_root_ops = {
-+#ifdef DCACHE_LUSTRE_INVALID
-+        .d_compare = ll_dcompare,
-+#endif
-+};
-+
-+static int client_common_fill_super(struct super_block *sb,
-+                                    char *mdc, char *osc)
-+{
-+        struct inode *root = 0;
-+        struct ll_sb_info *sbi = ll_s2sbi(sb);
-+        struct obd_device *obd;
-+        struct ll_fid rootfid;
-+        struct obd_statfs osfs;
-+        struct ptlrpc_request *request = NULL;
-+        struct lustre_handle osc_conn = {0, };
-+        struct lustre_handle mdc_conn = {0, };
-+        struct lustre_md md;
-+        struct obd_connect_data *data = NULL;
-+        int err, checksum;
-+        ENTRY;
-+
-+        obd = class_name2obd(mdc);
-+        if (!obd) {
-+                CERROR("MDC %s: not setup or attached\n", mdc);
-+                RETURN(-EINVAL);
-+        }
-+
-+        OBD_ALLOC(data, sizeof(*data));
-+        if (data == NULL)
-+                RETURN(-ENOMEM);
-+
-+        if (proc_lustre_fs_root) {
-+                err = lprocfs_register_mountpoint(proc_lustre_fs_root, sb,
-+                                                  osc, mdc);
-+                if (err < 0)
-+                        CERROR("could not register mount in /proc/fs/lustre\n");
-+        }
-+
-+        /* indicate the features supported by this client */
-+        data->ocd_connect_flags = OBD_CONNECT_VERSION | OBD_CONNECT_IBITS |
-+                OBD_CONNECT_JOIN | OBD_CONNECT_ATTRFID | OBD_CONNECT_NODEVOH |
-+                OBD_CONNECT_CANCELSET | OBD_CONNECT_AT;
-+#ifdef HAVE_LRU_RESIZE_SUPPORT
-+        if (sbi->ll_flags & LL_SBI_LRU_RESIZE)
-+                data->ocd_connect_flags |= OBD_CONNECT_LRU_RESIZE;
-+#endif
-+#ifdef CONFIG_FS_POSIX_ACL
-+        data->ocd_connect_flags |= OBD_CONNECT_ACL;
-+#endif
-+        data->ocd_ibits_known = MDS_INODELOCK_FULL;
-+        data->ocd_version = LUSTRE_VERSION_CODE;
-+
-+        if (sb->s_flags & MS_RDONLY)
-+                data->ocd_connect_flags |= OBD_CONNECT_RDONLY;
-+        if (sbi->ll_flags & LL_SBI_USER_XATTR)
-+                data->ocd_connect_flags |= OBD_CONNECT_XATTR;
-+
-+#ifdef HAVE_MS_FLOCK_LOCK
-+        /* force vfs to use lustre handler for flock() calls - bug 10743 */
-+        sb->s_flags |= MS_FLOCK_LOCK;
-+#endif
-+
-+        if (sbi->ll_flags & LL_SBI_FLOCK)
-+                sbi->ll_fop = &ll_file_operations_flock;
-+        else if (sbi->ll_flags & LL_SBI_LOCALFLOCK)
-+                sbi->ll_fop = &ll_file_operations;
-+        else
-+                sbi->ll_fop = &ll_file_operations_noflock;
-+
-+
-+        err = obd_connect(&mdc_conn, obd, &sbi->ll_sb_uuid, data, &sbi->ll_mdc_exp);
-+        if (err == -EBUSY) {
-+                LCONSOLE_ERROR_MSG(0x14f, "An MDT (mdc %s) is performing "
-+                                   "recovery, of which this client is not a "
-+                                   "part. Please wait for recovery to complete,"
-+                                   " abort, or time out.\n", mdc);
-+                GOTO(out, err);
-+        } else if (err) {
-+                CERROR("cannot connect to %s: rc = %d\n", mdc, err);
-+                GOTO(out, err);
-+        }
-+
-+        err = obd_statfs(obd, &osfs, cfs_time_current_64() - HZ, 0);
-+        if (err)
-+                GOTO(out_mdc, err);
-+
-+        /* MDC connect is surely finished by now because we actually sent
-+         * a statfs RPC, otherwise obd_connect() is asynchronous. */
-+        *data = class_exp2cliimp(sbi->ll_mdc_exp)->imp_connect_data;
-+
-+        LASSERT(osfs.os_bsize);
-+        sb->s_blocksize = osfs.os_bsize;
-+        sb->s_blocksize_bits = log2(osfs.os_bsize);
-+        sb->s_magic = LL_SUPER_MAGIC;
-+
-+        /* for bug 11559. in $LINUX/fs/read_write.c, function do_sendfile():
-+         *         retval = in_file->f_op->sendfile(...);
-+         *         if (*ppos > max)
-+         *                 retval = -EOVERFLOW;
-+         *
-+         * it will check if *ppos is greater than max. However, max equals to
-+         * s_maxbytes, which is a negative integer in a x86_64 box since loff_t
-+         * has been defined as a signed long long ineger in linux kernel. */
-+#if BITS_PER_LONG == 64
-+        sb->s_maxbytes = PAGE_CACHE_MAXBYTES >> 1;
-+#else
-+        sb->s_maxbytes = PAGE_CACHE_MAXBYTES;
-+#endif
-+        sbi->ll_namelen = osfs.os_namelen;
-+        sbi->ll_max_rw_chunk = LL_DEFAULT_MAX_RW_CHUNK;
-+
-+        if ((sbi->ll_flags & LL_SBI_USER_XATTR) &&
-+            !(data->ocd_connect_flags & OBD_CONNECT_XATTR)) {
-+                LCONSOLE_INFO("Disabling user_xattr feature because "
-+                              "it is not supported on the server\n");
-+                sbi->ll_flags &= ~LL_SBI_USER_XATTR;
-+        }
-+
-+        if (data->ocd_connect_flags & OBD_CONNECT_ACL) {
-+#ifdef MS_POSIXACL
-+                sb->s_flags |= MS_POSIXACL;
-+#endif
-+                sbi->ll_flags |= LL_SBI_ACL;
-+        } else
-+                sbi->ll_flags &= ~LL_SBI_ACL;
-+
-+        if (data->ocd_connect_flags & OBD_CONNECT_JOIN)
-+                sbi->ll_flags |= LL_SBI_JOIN;
-+
-+        obd = class_name2obd(osc);
-+        if (!obd) {
-+                CERROR("OSC %s: not setup or attached\n", osc);
-+                GOTO(out_mdc, err = -ENODEV);
-+        }
-+
-+        data->ocd_connect_flags = OBD_CONNECT_VERSION | OBD_CONNECT_GRANT |
-+                OBD_CONNECT_REQPORTAL | OBD_CONNECT_BRW_SIZE | 
-+                OBD_CONNECT_SRVLOCK | OBD_CONNECT_CANCELSET | OBD_CONNECT_AT |
-+                OBD_CONNECT_TRUNCLOCK;
-+
-+        if (!OBD_FAIL_CHECK(OBD_FAIL_OSC_CONNECT_CKSUM)) {
-+                /* OBD_CONNECT_CKSUM should always be set, even if checksums are
-+                 * disabled by default, because it can still be enabled on the
-+                 * fly via /proc. As a consequence, we still need to come to an
-+                 * agreement on the supported algorithms at connect time */
-+                data->ocd_connect_flags |= OBD_CONNECT_CKSUM;
-+
-+                if (OBD_FAIL_CHECK(OBD_FAIL_OSC_CKSUM_ADLER_ONLY))
-+                        data->ocd_cksum_types = OBD_CKSUM_ADLER;
-+                else
-+                        /* send the list of supported checksum types */
-+                        data->ocd_cksum_types = OBD_CKSUM_ALL;
-+        }
-+
-+#ifdef HAVE_LRU_RESIZE_SUPPORT
-+        if (sbi->ll_flags & LL_SBI_LRU_RESIZE)
-+                data->ocd_connect_flags |= OBD_CONNECT_LRU_RESIZE;
-+#endif
-+
-+        CDEBUG(D_RPCTRACE, "ocd_connect_flags: "LPX64" ocd_version: %d "
-+               "ocd_grant: %d\n", data->ocd_connect_flags,
-+               data->ocd_version, data->ocd_grant);
-+
-+        obd->obd_upcall.onu_owner = &sbi->ll_lco;
-+        obd->obd_upcall.onu_upcall = ll_ocd_update;
-+        data->ocd_brw_size = PTLRPC_MAX_BRW_PAGES << CFS_PAGE_SHIFT;
-+
-+        obd_register_lock_cancel_cb(obd, ll_extent_lock_cancel_cb);
-+        obd_register_page_removal_cb(obd, ll_page_removal_cb, ll_pin_extent_cb);
-+
-+
-+        err = obd_connect(&osc_conn, obd, &sbi->ll_sb_uuid, data, &sbi->ll_osc_exp);
-+        if (err == -EBUSY) {
-+                LCONSOLE_ERROR_MSG(0x150, "An OST (osc %s) is performing "
-+                                   "recovery, of which this client is not a "
-+                                   "part.  Please wait for recovery to "
-+                                   "complete, abort, or time out.\n", osc);
-+                GOTO(out, err); // need clear cb?
-+        } else if (err) {
-+                CERROR("cannot connect to %s: rc = %d\n", osc, err);
-+                GOTO(out_cb, err);
-+        }
-+        spin_lock(&sbi->ll_lco.lco_lock);
-+        sbi->ll_lco.lco_flags = data->ocd_connect_flags;
-+        sbi->ll_lco.lco_mdc_exp = sbi->ll_mdc_exp;
-+        sbi->ll_lco.lco_osc_exp = sbi->ll_osc_exp;
-+        spin_unlock(&sbi->ll_lco.lco_lock);
-+
-+        err = mdc_init_ea_size(sbi->ll_mdc_exp, sbi->ll_osc_exp);
-+        if (err) {
-+                CERROR("cannot set max EA and cookie sizes: rc = %d\n", err);
-+                GOTO(out_osc, err);
-+        }
-+
-+        err = obd_prep_async_page(sbi->ll_osc_exp, NULL, NULL, NULL,
-+                                  0, NULL, NULL, NULL, 0, NULL);
-+        if (err < 0) {
-+                LCONSOLE_ERROR_MSG(0x151, "There are no OST's in this "
-+                                   "filesystem. There must be at least one "
-+                                   "active OST for a client to start.\n");
-+                GOTO(out_osc, err);
-+        }
-+
-+        if (!ll_async_page_slab) {
-+                ll_async_page_slab_size =
-+                        size_round(sizeof(struct ll_async_page)) + err;
-+                ll_async_page_slab = cfs_mem_cache_create("ll_async_page",
-+                                                          ll_async_page_slab_size,
-+                                                          0, 0);
-+                if (!ll_async_page_slab)
-+                        GOTO(out_osc, err = -ENOMEM);
-+        }
-+
-+        err = mdc_getstatus(sbi->ll_mdc_exp, &rootfid);
-+        if (err) {
-+                CERROR("cannot mds_connect: rc = %d\n", err);
-+                GOTO(out_osc, err);
-+        }
-+        CDEBUG(D_SUPER, "rootfid "LPU64"\n", rootfid.id);
-+        sbi->ll_rootino = rootfid.id;
-+
-+        sb->s_op = &lustre_super_operations;
-+#if THREAD_SIZE >= 8192
-+        /* Disable the NFS export because of stack overflow
-+         * when THREAD_SIZE < 8192. Please refer to 17630. */
-+        sb->s_export_op = &lustre_export_operations;
-+#endif
-+
-+        /* make root inode
-+         * XXX: move this to after cbd setup? */
-+        err = mdc_getattr(sbi->ll_mdc_exp, &rootfid,
-+                          OBD_MD_FLGETATTR | OBD_MD_FLBLOCKS |
-+                          (sbi->ll_flags & LL_SBI_ACL ? OBD_MD_FLACL : 0),
-+                          0, &request);
-+        if (err) {
-+                CERROR("mdc_getattr failed for root: rc = %d\n", err);
-+                GOTO(out_osc, err);
-+        }
-+
-+        err = mdc_req2lustre_md(request, REPLY_REC_OFF, sbi->ll_osc_exp, &md);
-+        if (err) {
-+                CERROR("failed to understand root inode md: rc = %d\n",err);
-+                ptlrpc_req_finished (request);
-+                GOTO(out_osc, err);
-+        }
-+
-+        LASSERT(sbi->ll_rootino != 0);
-+        root = ll_iget(sb, sbi->ll_rootino, &md);
-+
-+        ptlrpc_req_finished(request);
-+
-+        if (root == NULL || is_bad_inode(root)) {
-+                mdc_free_lustre_md(sbi->ll_osc_exp, &md);
-+                CERROR("lustre_lite: bad iget4 for root\n");
-+                GOTO(out_root, err = -EBADF);
-+        }
-+
-+        err = ll_close_thread_start(&sbi->ll_lcq);
-+        if (err) {
-+                CERROR("cannot start close thread: rc %d\n", err);
-+                GOTO(out_root, err);
-+        }
-+
-+        checksum = sbi->ll_flags & LL_SBI_DATA_CHECKSUM;
-+        err = obd_set_info_async(sbi->ll_osc_exp, sizeof(KEY_CHECKSUM),
-+                                 KEY_CHECKSUM, sizeof(checksum),
-+                                 &checksum, NULL);
-+
-+        /* making vm readahead 0 for 2.4.x. In the case of 2.6.x,
-+           backing dev info assigned to inode mapping is used for
-+           determining maximal readahead. */
-+#if (LINUX_VERSION_CODE < KERNEL_VERSION(2,6,0)) && \
-+    !defined(KERNEL_HAS_AS_MAX_READAHEAD)
-+        /* bug 2805 - set VM readahead to zero */
-+        vm_max_readahead = vm_min_readahead = 0;
-+#endif
-+
-+        sb->s_root = d_alloc_root(root);
-+        if (data != NULL)
-+                OBD_FREE(data, sizeof(*data));
-+        sb->s_root->d_op = &ll_d_root_ops;
-+
-+        sbi->ll_sdev_orig = sb->s_dev;
-+        /* We set sb->s_dev equal on all lustre clients in order to support
-+         * NFS export clustering.  NFSD requires that the FSID be the same
-+         * on all clients. */
-+        /* s_dev is also used in lt_compare() to compare two fs, but that is
-+         * only a node-local comparison. */
-+        sb->s_dev = get_uuid2int(sbi2mdc(sbi)->cl_target_uuid.uuid,
-+                                 strlen(sbi2mdc(sbi)->cl_target_uuid.uuid));
-+
-+        RETURN(err);
-+
-+out_root:
-+        if (root)
-+                iput(root);
-+out_osc:
-+        obd_disconnect(sbi->ll_osc_exp);
-+        sbi->ll_osc_exp = NULL;
-+out_cb:
-+        obd = class_name2obd(osc);
-+        obd_unregister_lock_cancel_cb(obd, ll_extent_lock_cancel_cb);
-+        obd_unregister_page_removal_cb(obd, ll_page_removal_cb);
-+out_mdc:
-+        obd_disconnect(sbi->ll_mdc_exp);
-+        sbi->ll_mdc_exp = NULL;
-+out:
-+        if (data != NULL)
-+                OBD_FREE(data, sizeof(*data));
-+        lprocfs_unregister_mountpoint(sbi);
-+        RETURN(err);
-+}
-+
-+int ll_get_max_mdsize(struct ll_sb_info *sbi, int *lmmsize)
-+{
-+        int size, rc;
-+
-+        *lmmsize = obd_size_diskmd(sbi->ll_osc_exp, NULL);
-+        size = sizeof(int);
-+        rc = obd_get_info(sbi->ll_mdc_exp, sizeof(KEY_MAX_EASIZE),
-+                          KEY_MAX_EASIZE, &size, lmmsize, NULL);
-+        if (rc)
-+                CERROR("Get max mdsize error rc %d \n", rc);
-+
-+        RETURN(rc);
-+}
-+
-+void ll_dump_inode(struct inode *inode)
-+{
-+        struct list_head *tmp;
-+        int dentry_count = 0;
-+
-+        LASSERT(inode != NULL);
-+
-+        list_for_each(tmp, &inode->i_dentry)
-+                dentry_count++;
-+
-+        CERROR("inode %p dump: dev=%s ino=%lu mode=%o count=%u, %d dentries\n",
-+               inode, ll_i2mdcexp(inode)->exp_obd->obd_name, inode->i_ino,
-+               inode->i_mode, atomic_read(&inode->i_count), dentry_count);
-+}
-+
-+void lustre_dump_dentry(struct dentry *dentry, int recur)
-+{
-+        struct list_head *tmp;
-+        int subdirs = 0;
-+
-+        LASSERT(dentry != NULL);
-+
-+        list_for_each(tmp, &dentry->d_subdirs)
-+                subdirs++;
-+
-+        CERROR("dentry %p dump: name=%.*s parent=%.*s (%p), inode=%p, count=%u,"
-+               " flags=0x%x, fsdata=%p, %d subdirs\n", dentry,
-+               dentry->d_name.len, dentry->d_name.name,
-+               dentry->d_parent->d_name.len, dentry->d_parent->d_name.name,
-+               dentry->d_parent, dentry->d_inode, atomic_read(&dentry->d_count),
-+               dentry->d_flags, dentry->d_fsdata, subdirs);
-+        if (dentry->d_inode != NULL)
-+                ll_dump_inode(dentry->d_inode);
-+
-+        if (recur == 0)
-+                return;
-+
-+        list_for_each(tmp, &dentry->d_subdirs) {
-+                struct dentry *d = list_entry(tmp, struct dentry, d_child);
-+                lustre_dump_dentry(d, recur - 1);
-+        }
-+}
-+
-+#ifdef HAVE_EXPORT___IGET
-+static void prune_dir_dentries(struct inode *inode)
-+{
-+        struct dentry *dentry, *prev = NULL;
-+
-+        /* due to lustre specific logic, a directory
-+         * can have few dentries - a bug from VFS POV */
-+restart:
-+        spin_lock(&dcache_lock);
-+        if (!list_empty(&inode->i_dentry)) {
-+                dentry = list_entry(inode->i_dentry.prev,
-+                                    struct dentry, d_alias);
-+                /* in order to prevent infinite loops we
-+                 * break if previous dentry is busy */
-+                if (dentry != prev) {
-+                        prev = dentry;
-+                        dget_locked(dentry);
-+                        spin_unlock(&dcache_lock);
-+
-+                        /* try to kill all child dentries */
-+                        shrink_dcache_parent(dentry);
-+                        dput(dentry);
-+
-+                        /* now try to get rid of current dentry */
-+                        d_prune_aliases(inode);
-+                        goto restart;
-+                }
-+        }
-+        spin_unlock(&dcache_lock);
-+}
-+
-+static void prune_deathrow_one(struct ll_inode_info *lli)
-+{
-+        struct inode *inode = ll_info2i(lli);
-+
-+        /* first, try to drop any dentries - they hold a ref on the inode */
-+        if (S_ISDIR(inode->i_mode))
-+                prune_dir_dentries(inode);
-+        else
-+                d_prune_aliases(inode);
-+
-+
-+        /* if somebody still uses it, leave it */
-+        LASSERT(atomic_read(&inode->i_count) > 0);
-+        if (atomic_read(&inode->i_count) > 1)
-+                goto out;
-+
-+        CDEBUG(D_INODE, "inode %lu/%u(%d) looks a good candidate for prune\n",
-+               inode->i_ino,inode->i_generation, atomic_read(&inode->i_count));
-+
-+        /* seems nobody uses it anymore */
-+        inode->i_nlink = 0;
-+
-+out:
-+        iput(inode);
-+        return;
-+}
-+
-+static void prune_deathrow(struct ll_sb_info *sbi, int try)
-+{
-+        struct ll_inode_info *lli;
-+        int empty;
-+
-+        do {
-+                if (need_resched() && try)
-+                        break;
-+
-+                if (try) {
-+                        if (!spin_trylock(&sbi->ll_deathrow_lock))
-+                                break;
-+                } else {
-+                        spin_lock(&sbi->ll_deathrow_lock);
-+                }
-+
-+                empty = 1;
-+                lli = NULL;
-+                if (!list_empty(&sbi->ll_deathrow)) {
-+                        lli = list_entry(sbi->ll_deathrow.next,
-+                                         struct ll_inode_info,
-+                                         lli_dead_list);
-+                        list_del_init(&lli->lli_dead_list);
-+                        if (!list_empty(&sbi->ll_deathrow))
-+                                empty = 0;
-+                }
-+                spin_unlock(&sbi->ll_deathrow_lock);
-+
-+                if (lli)
-+                        prune_deathrow_one(lli);
-+
-+        } while (empty == 0);
-+}
-+#else /* !HAVE_EXPORT___IGET */
-+#define prune_deathrow(sbi, try) do {} while (0)
-+#endif /* HAVE_EXPORT___IGET */
-+
-+void client_common_put_super(struct super_block *sb)
-+{
-+        struct ll_sb_info *sbi = ll_s2sbi(sb);
-+        ENTRY;
-+
-+        ll_close_thread_shutdown(sbi->ll_lcq);
-+
-+        lprocfs_unregister_mountpoint(sbi);
-+
-+        /* destroy inodes in deathrow */
-+        prune_deathrow(sbi, 0);
-+
-+        list_del(&sbi->ll_conn_chain);
-+
-+        /* callbacks is cleared after disconnect each target */
-+        obd_disconnect(sbi->ll_osc_exp);
-+        sbi->ll_osc_exp = NULL;
-+
-+        obd_disconnect(sbi->ll_mdc_exp);
-+        sbi->ll_mdc_exp = NULL;
-+
-+        EXIT;
-+}
-+
-+void ll_kill_super(struct super_block *sb)
-+{
-+        struct ll_sb_info *sbi;
-+
-+        ENTRY;
-+
-+        /* not init sb ?*/
-+        if (!(sb->s_flags & MS_ACTIVE))
-+                return;
-+
-+        sbi = ll_s2sbi(sb);
-+        /* we need restore s_dev from changed for clustred NFS before put_super
-+         * because new kernels have cached s_dev and change sb->s_dev in 
-+         * put_super not affected real removing devices */
-+        if (sbi)
-+                sb->s_dev = sbi->ll_sdev_orig;
-+        EXIT;
-+}
-+
-+char *ll_read_opt(const char *opt, char *data)
-+{
-+        char *value;
-+        char *retval;
-+        ENTRY;
-+
-+        CDEBUG(D_SUPER, "option: %s, data %s\n", opt, data);
-+        if (strncmp(opt, data, strlen(opt)))
-+                RETURN(NULL);
-+        if ((value = strchr(data, '=')) == NULL)
-+                RETURN(NULL);
-+
-+        value++;
-+        OBD_ALLOC(retval, strlen(value) + 1);
-+        if (!retval) {
-+                CERROR("out of memory!\n");
-+                RETURN(NULL);
-+        }
-+
-+        memcpy(retval, value, strlen(value)+1);
-+        CDEBUG(D_SUPER, "Assigned option: %s, value %s\n", opt, retval);
-+        RETURN(retval);
-+}
-+
-+static inline int ll_set_opt(const char *opt, char *data, int fl)
-+{
-+        if (strncmp(opt, data, strlen(opt)) != 0)
-+                return(0);
-+        else
-+                return(fl);
-+}
-+
-+/* non-client-specific mount options are parsed in lmd_parse */
-+static int ll_options(char *options, int *flags)
-+{
-+        int tmp;
-+        char *s1 = options, *s2;
-+        ENTRY;
-+
-+        if (!options)
-+                RETURN(0);
-+
-+        CDEBUG(D_CONFIG, "Parsing opts %s\n", options);
-+
-+        while (*s1) {
-+                CDEBUG(D_SUPER, "next opt=%s\n", s1);
-+                tmp = ll_set_opt("nolock", s1, LL_SBI_NOLCK);
-+                if (tmp) {
-+                        *flags |= tmp;
-+                        goto next;
-+                }
-+                tmp = ll_set_opt("flock", s1, LL_SBI_FLOCK);
-+                if (tmp) {
-+                        *flags |= tmp;
-+                        goto next;
-+                }
-+                tmp = ll_set_opt("localflock", s1, LL_SBI_LOCALFLOCK);
-+                if (tmp) {
-+                        *flags |= tmp;
-+                        goto next;
-+                }
-+                tmp = ll_set_opt("noflock", s1, LL_SBI_FLOCK|LL_SBI_LOCALFLOCK);
-+                if (tmp) {
-+                        *flags &= ~tmp;
-+                        goto next;
-+                }
-+                tmp = ll_set_opt("user_xattr", s1, LL_SBI_USER_XATTR);
-+                if (tmp) {
-+                        *flags |= tmp;
-+                        goto next;
-+                }
-+                tmp = ll_set_opt("nouser_xattr", s1, LL_SBI_USER_XATTR);
-+                if (tmp) {
-+                        *flags &= ~tmp;
-+                        goto next;
-+                }
-+                tmp = ll_set_opt("acl", s1, LL_SBI_ACL);
-+                if (tmp) {
-+                        /* Ignore deprecated mount option.  The client will
-+                         * always try to mount with ACL support, whether this
-+                         * is used depends on whether server supports it. */
-+                        LCONSOLE_ERROR_MSG(0x152, "Ignoring deprecated "
-+                                                  "mount option 'acl'.\n");
-+                        goto next;
-+                }
-+                tmp = ll_set_opt("noacl", s1, LL_SBI_ACL);
-+                if (tmp) {
-+                        LCONSOLE_ERROR_MSG(0x152, "Ignoring deprecated "
-+                                                  "mount option 'noacl'.\n");
-+                        goto next;
-+                }
-+
-+                tmp = ll_set_opt("checksum", s1, LL_SBI_DATA_CHECKSUM);
-+                if (tmp) {
-+                        *flags |= tmp;
-+                        goto next;
-+                }
-+                tmp = ll_set_opt("nochecksum", s1, LL_SBI_DATA_CHECKSUM);
-+                if (tmp) {
-+                        *flags &= ~tmp;
-+                        goto next;
-+                }
-+
-+                tmp = ll_set_opt("lruresize", s1, LL_SBI_LRU_RESIZE);
-+                if (tmp) {
-+                        *flags |= tmp;
-+                        goto next;
-+                }
-+                tmp = ll_set_opt("nolruresize", s1, LL_SBI_LRU_RESIZE);
-+                if (tmp) {
-+                        *flags &= ~tmp;
-+                        goto next;
-+                }
-+                LCONSOLE_ERROR_MSG(0x152, "Unknown option '%s', won't mount.\n",
-+                                   s1);
-+                RETURN(-EINVAL);
-+
-+next:
-+                /* Find next opt */
-+                s2 = strchr(s1, ',');
-+                if (s2 == NULL)
-+                        break;
-+                s1 = s2 + 1;
-+        }
-+        RETURN(0);
-+}
-+
-+void ll_lli_init(struct ll_inode_info *lli)
-+{
-+        lli->lli_inode_magic = LLI_INODE_MAGIC;
-+        sema_init(&lli->lli_size_sem, 1);
-+        sema_init(&lli->lli_write_sem, 1);
-+        lli->lli_flags = 0;
-+        lli->lli_maxbytes = PAGE_CACHE_MAXBYTES;
-+        spin_lock_init(&lli->lli_lock);
-+        sema_init(&lli->lli_och_sem, 1);
-+        lli->lli_mds_read_och = lli->lli_mds_write_och = NULL;
-+        lli->lli_mds_exec_och = NULL;
-+        lli->lli_open_fd_read_count = lli->lli_open_fd_write_count = 0;
-+        lli->lli_open_fd_exec_count = 0;
-+        INIT_LIST_HEAD(&lli->lli_dead_list);
-+#ifdef HAVE_CLOSE_THREAD
-+        INIT_LIST_HEAD(&lli->lli_pending_write_llaps);
-+#endif
-+}
-+
-+/* COMPAT_146 */
-+#define MDCDEV "mdc_dev"
-+static int old_lustre_process_log(struct super_block *sb, char *newprofile,
-+                                  struct config_llog_instance *cfg)
-+{
-+        struct lustre_sb_info *lsi = s2lsi(sb);
-+        struct obd_device *obd;
-+        struct lustre_handle mdc_conn = {0, };
-+        struct obd_export *exp;
-+        char *ptr, *mdt, *profile;
-+        char niduuid[10] = "mdtnid0";
-+        class_uuid_t uuid;
-+        struct obd_uuid mdc_uuid;
-+        struct llog_ctxt *ctxt;
-+        struct obd_connect_data ocd = { 0 };
-+        lnet_nid_t nid;
-+        int i, rc = 0, recov_bk = 1, failnodes = 0;
-+        ENTRY;
-+
-+        ll_generate_random_uuid(uuid);
-+        class_uuid_unparse(uuid, &mdc_uuid);
-+        CDEBUG(D_HA, "generated uuid: %s\n", mdc_uuid.uuid);
-+
-+        /* Figure out the old mdt and profile name from new-style profile
-+           ("lustre" from "mds/lustre-client") */
-+        mdt = newprofile;
-+        profile = strchr(mdt, '/');
-+        if (profile == NULL) {
-+                CDEBUG(D_CONFIG, "Can't find MDT name in %s\n", newprofile);
-+                GOTO(out, rc = -EINVAL);
-+        }
-+        *profile = '\0';
-+        profile++;
-+        ptr = strrchr(profile, '-');
-+        if (ptr == NULL) {
-+                CDEBUG(D_CONFIG, "Can't find client name in %s\n", newprofile);
-+                GOTO(out, rc = -EINVAL);
-+        }
-+        *ptr = '\0';
-+
-+        LCONSOLE_WARN("This looks like an old mount command; I will try to "
-+                      "contact MDT '%s' for profile '%s'\n", mdt, profile);
-+
-+        /* Use nids from mount line: uml1,1 at elan:uml2,2 at elan:/lustre */
-+        i = 0;
-+        ptr = lsi->lsi_lmd->lmd_dev;
-+        while (class_parse_nid(ptr, &nid, &ptr) == 0) {
-+                rc = do_lcfg(MDCDEV, nid, LCFG_ADD_UUID, niduuid, 0,0,0);
-+                i++;
-+                /* Stop at the first failover nid */
-+                if (*ptr == ':')
-+                        break;
-+        }
-+        if (i == 0) {
-+                CERROR("No valid MDT nids found.\n");
-+                GOTO(out, rc = -EINVAL);
-+        }
-+        failnodes++;
-+
-+        rc = do_lcfg(MDCDEV, 0, LCFG_ATTACH, LUSTRE_MDC_NAME,mdc_uuid.uuid,0,0);
-+        if (rc < 0)
-+                GOTO(out_del_uuid, rc);
-+
-+        rc = do_lcfg(MDCDEV, 0, LCFG_SETUP, mdt, niduuid, 0, 0);
-+        if (rc < 0) {
-+                LCONSOLE_ERROR_MSG(0x153, "I couldn't establish a connection "
-+                                   "with the MDT. Check that the MDT host NID "
-+                                   "is correct and the networks are up.\n");
-+                GOTO(out_detach, rc);
-+        }
-+
-+        obd = class_name2obd(MDCDEV);
-+        if (obd == NULL)
-+                GOTO(out_cleanup, rc = -EINVAL);
-+
-+        /* Add any failover nids */
-+        while (*ptr == ':') {
-+                /* New failover node */
-+                sprintf(niduuid, "mdtnid%d", failnodes);
-+                i = 0;
-+                while (class_parse_nid(ptr, &nid, &ptr) == 0) {
-+                        i++;
-+                        rc = do_lcfg(MDCDEV, nid, LCFG_ADD_UUID, niduuid,0,0,0);
-+                        if (rc)
-+                                CERROR("Add uuid for %s failed %d\n",
-+                                       libcfs_nid2str(nid), rc);
-+                        if (*ptr == ':')
-+                                break;
-+                }
-+                if (i > 0) {
-+                        rc = do_lcfg(MDCDEV, 0, LCFG_ADD_CONN, niduuid, 0, 0,0);
-+                        if (rc)
-+                                CERROR("Add conn for %s failed %d\n",
-+                                       libcfs_nid2str(nid), rc);
-+                        failnodes++;
-+                } else {
-+                        /* at ":/fsname" */
-+                        break;
-+                }
-+        }
-+
-+        /* Try all connections, but only once. */
-+        rc = obd_set_info_async(obd->obd_self_export,
-+                                sizeof(KEY_INIT_RECOV_BACKUP), KEY_INIT_RECOV_BACKUP,
-+                                sizeof(recov_bk), &recov_bk, NULL);
-+        if (rc)
-+                GOTO(out_cleanup, rc);
-+
-+        /* If we don't have this then an ACL MDS will refuse the connection */
-+        ocd.ocd_connect_flags = OBD_CONNECT_ACL;
-+
-+        rc = obd_connect(&mdc_conn, obd, &mdc_uuid, &ocd, &exp);
-+        if (rc) {
-+                CERROR("cannot connect to %s: rc = %d\n", mdt, rc);
-+                GOTO(out_cleanup, rc);
-+        }
-+
-+        ctxt = llog_get_context(exp->exp_obd, LLOG_CONFIG_REPL_CTXT);
-+
-+        cfg->cfg_flags |= CFG_F_COMPAT146;
-+
-+#if 1
-+        rc = class_config_parse_llog(ctxt, profile, cfg);
-+#else
-+        /*
-+         * For debugging, it's useful to just dump the log
-+         */
-+        rc = class_config_dump_llog(ctxt, profile, cfg);
-+#endif
-+        llog_ctxt_put(ctxt);
-+        switch (rc) {
-+        case 0: {
-+                /* Set the caller's profile name to the old-style */
-+                memcpy(newprofile, profile, strlen(profile) + 1);
-+                break;
-+        }
-+        case -EINVAL:
-+                LCONSOLE_ERROR_MSG(0x154, "%s: The configuration '%s' could not"
-+                                   " be read from the MDT '%s'.  Make sure this"
-+                                   " client and the MDT are running compatible "
-+                                   "versions of Lustre.\n",
-+                                   obd->obd_name, profile, mdt);
-+                /* fall through */
-+        default:
-+                LCONSOLE_ERROR_MSG(0x155, "%s: The configuration '%s' could not"
-+                                   " be read from the MDT '%s'.  This may be "
-+                                   "the result of communication errors between "
-+                                   "the client and the MDT, or if the MDT is "
-+                                   "not running.\n", obd->obd_name, profile,
-+                                   mdt);
-+                break;
-+        }
-+
-+        /* We don't so much care about errors in cleaning up the config llog
-+         * connection, as we have already read the config by this point. */
-+        obd_disconnect(exp);
-+
-+out_cleanup:
-+        do_lcfg(MDCDEV, 0, LCFG_CLEANUP, 0, 0, 0, 0);
-+
-+out_detach:
-+        do_lcfg(MDCDEV, 0, LCFG_DETACH, 0, 0, 0, 0);
-+
-+out_del_uuid:
-+        /* class_add_uuid adds a nid even if the same uuid exists; we might
-+           delete any copy here.  So they all better match. */
-+        for (i = 0; i < failnodes; i++) {
-+                sprintf(niduuid, "mdtnid%d", i);
-+                do_lcfg(MDCDEV, 0, LCFG_DEL_UUID, niduuid, 0, 0, 0);
-+        }
-+        /* class_import_put will get rid of the additional connections */
-+out:
-+        RETURN(rc);
-+}
-+/* end COMPAT_146 */
-+
-+int ll_fill_super(struct super_block *sb)
-+{
-+        struct lustre_profile *lprof;
-+        struct lustre_sb_info *lsi = s2lsi(sb);
-+        struct ll_sb_info *sbi;
-+        char  *osc = NULL, *mdc = NULL;
-+        char  *profilenm = get_profile_name(sb);
-+        struct config_llog_instance cfg = {0, };
-+        char   ll_instance[sizeof(sb) * 2 + 1];
-+        int    err;
-+        ENTRY;
-+
-+        CDEBUG(D_VFSTRACE, "VFS Op: sb %p\n", sb);
-+
-+        cfs_module_get();
-+
-+        /* client additional sb info */
-+        lsi->lsi_llsbi = sbi = ll_init_sbi();
-+        if (!sbi) {
-+                cfs_module_put();
-+                RETURN(-ENOMEM);
-+        }
-+
-+        err = ll_options(lsi->lsi_lmd->lmd_opts, &sbi->ll_flags);
-+        if (err)
-+                GOTO(out_free, err);
-+
-+        /* Generate a string unique to this super, in case some joker tries
-+           to mount the same fs at two mount points.
-+           Use the address of the super itself.*/
-+        sprintf(ll_instance, "%p", sb);
-+        cfg.cfg_instance = ll_instance;
-+        cfg.cfg_uuid = lsi->lsi_llsbi->ll_sb_uuid;
-+        cfg.cfg_sb = sb;
-+
-+        /* set up client obds */
-+        if (strchr(profilenm, '/') != NULL) /* COMPAT_146 */
-+                err = -EINVAL; /* skip error messages, use old config code */
-+        else
-+                err = lustre_process_log(sb, profilenm, &cfg);
-+        /* COMPAT_146 */
-+        if (err < 0) {
-+                char *oldname;
-+                int rc, oldnamelen;
-+                oldnamelen = strlen(profilenm) + 1;
-+                /* Temp storage for 1.4.6 profile name */
-+                OBD_ALLOC(oldname, oldnamelen);
-+                if (oldname) {
-+                        memcpy(oldname, profilenm, oldnamelen);
-+                        rc = old_lustre_process_log(sb, oldname, &cfg);
-+                        if (rc >= 0) {
-+                                /* That worked - update the profile name
-+                                   permanently */
-+                                err = rc;
-+                                OBD_FREE(lsi->lsi_lmd->lmd_profile,
-+                                         strlen(lsi->lsi_lmd->lmd_profile) + 1);
-+                                OBD_ALLOC(lsi->lsi_lmd->lmd_profile,
-+                                         strlen(oldname) + 1);
-+                                if (!lsi->lsi_lmd->lmd_profile) {
-+                                        OBD_FREE(oldname, oldnamelen);
-+                                        GOTO(out_free, err = -ENOMEM);
-+                                }
-+                                memcpy(lsi->lsi_lmd->lmd_profile, oldname,
-+                                       strlen(oldname) + 1);
-+                                profilenm = get_profile_name(sb);
-+                                /* Don't ever try to recover the MGS */
-+                                rc = ptlrpc_set_import_active(
-+                                        lsi->lsi_mgc->u.cli.cl_import, 0);
-+                        }
-+                        OBD_FREE(oldname, oldnamelen);
-+                }
-+        }
-+        /* end COMPAT_146 */
-+        if (err < 0) {
-+                CERROR("Unable to process log: %d\n", err);
-+                GOTO(out_free, err);
-+        }
-+
-+        lprof = class_get_profile(profilenm);
-+        if (lprof == NULL) {
-+                LCONSOLE_ERROR_MSG(0x156, "The client profile '%s' could not be"
-+                                   " read from the MGS.  Does that filesystem "
-+                                   "exist?\n", profilenm);
-+                GOTO(out_free, err = -EINVAL);
-+        }
-+        CDEBUG(D_CONFIG, "Found profile %s: mdc=%s osc=%s\n", profilenm,
-+               lprof->lp_mdc, lprof->lp_osc);
-+
-+        OBD_ALLOC(osc, strlen(lprof->lp_osc) +
-+                  strlen(ll_instance) + 2);
-+        if (!osc)
-+                GOTO(out_free, err = -ENOMEM);
-+        sprintf(osc, "%s-%s", lprof->lp_osc, ll_instance);
-+
-+        OBD_ALLOC(mdc, strlen(lprof->lp_mdc) +
-+                  strlen(ll_instance) + 2);
-+        if (!mdc)
-+                GOTO(out_free, err = -ENOMEM);
-+        sprintf(mdc, "%s-%s", lprof->lp_mdc, ll_instance);
-+
-+        /* connections, registrations, sb setup */
-+        err = client_common_fill_super(sb, mdc, osc);
-+
-+out_free:
-+        if (mdc)
-+                OBD_FREE(mdc, strlen(mdc) + 1);
-+        if (osc)
-+                OBD_FREE(osc, strlen(osc) + 1);
-+        if (err)
-+                ll_put_super(sb);
-+        else
-+                LCONSOLE_WARN("Client %s has started\n", profilenm);
-+
-+        RETURN(err);
-+} /* ll_fill_super */
-+
-+
-+void ll_put_super(struct super_block *sb)
-+{
-+        struct config_llog_instance cfg;
-+        char   ll_instance[sizeof(sb) * 2 + 1];
-+        struct obd_device *obd;
-+        struct lustre_sb_info *lsi = s2lsi(sb);
-+        struct ll_sb_info *sbi = ll_s2sbi(sb);
-+        char *profilenm = get_profile_name(sb);
-+        int force = 1, next;
-+        ENTRY;
-+
-+        CDEBUG(D_VFSTRACE, "VFS Op: sb %p - %s\n", sb, profilenm);
-+
-+        sprintf(ll_instance, "%p", sb);
-+        cfg.cfg_instance = ll_instance;
-+        lustre_end_log(sb, NULL, &cfg);
-+
-+        if (sbi->ll_mdc_exp) {
-+                obd = class_exp2obd(sbi->ll_mdc_exp);
-+                if (obd) 
-+                        force = obd->obd_force;
-+        }
-+
-+        /* We need to set force before the lov_disconnect in
-+           lustre_common_put_super, since l_d cleans up osc's as well. */
-+        if (force) {
-+                next = 0;
-+                while ((obd = class_devices_in_group(&sbi->ll_sb_uuid,
-+                                                     &next)) != NULL) {
-+                        obd->obd_force = force;
-+                }
-+        }
-+
-+        if (sbi->ll_lcq) {
-+                /* Only if client_common_fill_super succeeded */
-+                client_common_put_super(sb);
-+        }
-+
-+        next = 0;
-+        while ((obd = class_devices_in_group(&sbi->ll_sb_uuid, &next)) !=NULL) {
-+                class_manual_cleanup(obd);
-+        }
-+
-+        if (profilenm)
-+                class_del_profile(profilenm);
-+
-+        ll_free_sbi(sb);
-+        lsi->lsi_llsbi = NULL;
-+
-+        lustre_common_put_super(sb);
-+
-+        LCONSOLE_WARN("client %s umount complete\n", ll_instance);
-+
-+        cfs_module_put();
-+
-+        EXIT;
-+} /* client_put_super */
-+
-+#ifdef HAVE_REGISTER_CACHE
-+#include <linux/cache_def.h>
-+#ifdef HAVE_CACHE_RETURN_INT
-+static int
-+#else
-+static void
-+#endif
-+ll_shrink_cache(int priority, unsigned int gfp_mask)
-+{
-+        struct ll_sb_info *sbi;
-+        int count = 0;
-+
-+        list_for_each_entry(sbi, &ll_super_blocks, ll_list)
-+                count += llap_shrink_cache(sbi, priority);
-+
-+#ifdef HAVE_CACHE_RETURN_INT
-+        return count;
-+#endif
-+}
-+
-+struct cache_definition ll_cache_definition = {
-+        .name = "llap_cache",
-+        .shrink = ll_shrink_cache
-+};
-+#endif /* HAVE_REGISTER_CACHE */
-+
-+struct inode *ll_inode_from_lock(struct ldlm_lock *lock)
-+{
-+        struct inode *inode = NULL;
-+        /* NOTE: we depend on atomic igrab() -bzzz */
-+        lock_res_and_lock(lock);
-+        if (lock->l_ast_data) {
-+                struct ll_inode_info *lli = ll_i2info(lock->l_ast_data);
-+                if (lli->lli_inode_magic == LLI_INODE_MAGIC) {
-+                        inode = igrab(lock->l_ast_data);
-+                } else {
-+                        inode = lock->l_ast_data;
-+                        ldlm_lock_debug(NULL, inode->i_state & I_FREEING ?
-+                                                D_INFO : D_WARNING,
-+                                        lock, __FILE__, __func__, __LINE__,
-+                                        "l_ast_data %p is bogus: magic %08x",
-+                                        lock->l_ast_data, lli->lli_inode_magic);
-+                        inode = NULL;
-+                }
-+        }
-+        unlock_res_and_lock(lock);
-+        return inode;
-+}
-+
-+static int null_if_equal(struct ldlm_lock *lock, void *data)
-+{
-+        if (data == lock->l_ast_data) {
-+                lock->l_ast_data = NULL;
-+
-+                if (lock->l_req_mode != lock->l_granted_mode)
-+                        LDLM_ERROR(lock,"clearing inode with ungranted lock");
-+        }
-+
-+        return LDLM_ITER_CONTINUE;
-+}
-+
-+void ll_clear_inode(struct inode *inode)
-+{
-+        struct ll_fid fid;
-+        struct ll_inode_info *lli = ll_i2info(inode);
-+        struct ll_sb_info *sbi = ll_i2sbi(inode);
-+        ENTRY;
-+
-+        CDEBUG(D_VFSTRACE, "VFS Op:inode=%lu/%u(%p)\n", inode->i_ino,
-+               inode->i_generation, inode);
-+
-+        if (S_ISDIR(inode->i_mode)) {
-+                /* these should have been cleared in ll_file_release */
-+                LASSERT(lli->lli_sai == NULL);
-+                LASSERT(lli->lli_opendir_key == NULL);
-+                LASSERT(lli->lli_opendir_pid == 0);
-+        }
-+
-+        ll_inode2fid(&fid, inode);
-+        clear_bit(LLI_F_HAVE_MDS_SIZE_LOCK, &lli->lli_flags);
-+        mdc_change_cbdata(sbi->ll_mdc_exp, &fid, null_if_equal, inode);
-+
-+        LASSERT(!lli->lli_open_fd_write_count);
-+        LASSERT(!lli->lli_open_fd_read_count);
-+        LASSERT(!lli->lli_open_fd_exec_count);
-+
-+        if (lli->lli_mds_write_och)
-+                ll_mdc_real_close(inode, FMODE_WRITE);
-+        if (lli->lli_mds_exec_och) {
-+                if (!FMODE_EXEC)
-+                        CERROR("No FMODE exec, bug exec och is present for "
-+                               "inode %ld\n", inode->i_ino);
-+                ll_mdc_real_close(inode, FMODE_EXEC);
-+        }
-+        if (lli->lli_mds_read_och)
-+                ll_mdc_real_close(inode, FMODE_READ);
-+
-+
-+        if (lli->lli_smd) {
-+                obd_change_cbdata(sbi->ll_osc_exp, lli->lli_smd,
-+                                  null_if_equal, inode);
-+
-+                obd_free_memmd(sbi->ll_osc_exp, &lli->lli_smd);
-+                lli->lli_smd = NULL;
-+        }
-+
-+        if (lli->lli_symlink_name) {
-+                OBD_FREE(lli->lli_symlink_name,
-+                         strlen(lli->lli_symlink_name) + 1);
-+                lli->lli_symlink_name = NULL;
-+        }
-+
-+#ifdef CONFIG_FS_POSIX_ACL
-+        if (lli->lli_posix_acl) {
-+                LASSERT(atomic_read(&lli->lli_posix_acl->a_refcount) == 1);
-+                posix_acl_release(lli->lli_posix_acl);
-+                lli->lli_posix_acl = NULL;
-+        }
-+#endif
-+
-+        lli->lli_inode_magic = LLI_INODE_DEAD;
-+
-+#ifdef HAVE_EXPORT___IGET
-+        spin_lock(&sbi->ll_deathrow_lock);
-+        list_del_init(&lli->lli_dead_list);
-+        spin_unlock(&sbi->ll_deathrow_lock);
-+#endif
-+
-+        EXIT;
-+}
-+static int ll_setattr_do_truncate(struct inode *inode, loff_t new_size)
-+{
-+        struct ll_sb_info *sbi = ll_i2sbi(inode);
-+        struct ll_inode_info *lli = ll_i2info(inode);
-+        struct lov_stripe_md *lsm = lli->lli_smd;
-+        int rc;
-+        ldlm_policy_data_t policy = { .l_extent = {new_size,
-+                                                   OBD_OBJECT_EOF } };
-+        struct lustre_handle lockh = { 0 };
-+        int local_lock = 0; /* 0 - no local lock;
-+                             * 1 - lock taken by lock_extent;
-+                             * 2 - by obd_match*/
-+        int ast_flags;
-+        int err;
-+        ENTRY;
-+
-+        UNLOCK_INODE_MUTEX(inode);
-+        UP_WRITE_I_ALLOC_SEM(inode);
-+
-+        if (sbi->ll_lockless_truncate_enable && 
-+            (sbi->ll_lco.lco_flags & OBD_CONNECT_TRUNCLOCK)) {
-+                ast_flags = LDLM_FL_BLOCK_GRANTED;
-+                rc = obd_match(sbi->ll_osc_exp, lsm, LDLM_EXTENT,
-+                               &policy, LCK_PW, &ast_flags, inode, &lockh);
-+                if (rc > 0) {
-+                        local_lock = 2;
-+                        rc = 0;
-+                } else if (rc == 0) {
-+                        rc = ll_file_punch(inode, new_size, 1);
-+                }
-+        } else {
-+                /* XXX when we fix the AST intents to pass the discard-range
-+                 * XXX extent, make ast_flags always LDLM_AST_DISCARD_DATA
-+                 * XXX here. */
-+                ast_flags = (new_size == 0) ? LDLM_AST_DISCARD_DATA : 0;
-+                rc = ll_extent_lock(NULL, inode, lsm, LCK_PW, &policy,
-+                                    &lockh, ast_flags);
-+                if (likely(rc == 0))
-+                        local_lock = 1;
-+        }
-+
-+        LOCK_INODE_MUTEX(inode);
-+        DOWN_WRITE_I_ALLOC_SEM(inode);
-+        if (likely(rc == 0)) {
-+                /* Only ll_inode_size_lock is taken at this level.
-+                 * lov_stripe_lock() is grabbed by ll_truncate() only over
-+                 * call to obd_adjust_kms().  If vmtruncate returns 0, then
-+                 * ll_truncate dropped ll_inode_size_lock() */
-+                ll_inode_size_lock(inode, 0);
-+                if (!local_lock)
-+                        set_bit(LLI_F_SRVLOCK, &lli->lli_flags);
-+                rc = vmtruncate(inode, new_size);
-+                clear_bit(LLI_F_SRVLOCK, &lli->lli_flags);
-+                if (rc != 0) {
-+                        LASSERT(atomic_read(&lli->lli_size_sem.count) <= 0);
-+                        ll_inode_size_unlock(inode, 0);
-+                }
-+        }
-+        if (local_lock) {
-+                if (local_lock == 2)
-+                        err = obd_cancel(sbi->ll_osc_exp, lsm, LCK_PW, &lockh);
-+                else
-+                        err = ll_extent_unlock(NULL, inode, lsm, LCK_PW, &lockh);
-+                if (unlikely(err != 0)){
-+                        CERROR("extent unlock failed: err=%d,"
-+                               " unlock method =%d\n", err, local_lock);
-+                        if (rc == 0)
-+                                rc = err;
-+                }
-+        }
-+        RETURN(rc);
-+}
-+
-+/* If this inode has objects allocated to it (lsm != NULL), then the OST
-+ * object(s) determine the file size and mtime.  Otherwise, the MDS will
-+ * keep these values until such a time that objects are allocated for it.
-+ * We do the MDS operations first, as it is checking permissions for us.
-+ * We don't to the MDS RPC if there is nothing that we want to store there,
-+ * otherwise there is no harm in updating mtime/atime on the MDS if we are
-+ * going to do an RPC anyways.
-+ *
-+ * If we are doing a truncate, we will send the mtime and ctime updates
-+ * to the OST with the punch RPC, otherwise we do an explicit setattr RPC.
-+ * I don't believe it is possible to get e.g. ATTR_MTIME_SET and ATTR_SIZE
-+ * at the same time.
-+ */
-+int ll_setattr_raw(struct inode *inode, struct iattr *attr)
-+{
-+        struct ll_inode_info *lli = ll_i2info(inode);
-+        struct lov_stripe_md *lsm = lli->lli_smd;
-+        struct ll_sb_info *sbi = ll_i2sbi(inode);
-+        struct ptlrpc_request *request = NULL;
-+        struct mdc_op_data op_data;
-+        struct lustre_md md;
-+        int ia_valid = attr->ia_valid;
-+        int rc = 0;
-+        ENTRY;
-+
-+        CDEBUG(D_VFSTRACE, "VFS Op:inode=%lu valid %x\n", inode->i_ino,
-+               attr->ia_valid);
-+        ll_stats_ops_tally(ll_i2sbi(inode), LPROC_LL_SETATTR, 1);
-+
-+        if (ia_valid & ATTR_SIZE) {
-+                if (attr->ia_size > ll_file_maxbytes(inode)) {
-+                        CDEBUG(D_INODE, "file too large %llu > "LPU64"\n",
-+                               attr->ia_size, ll_file_maxbytes(inode));
-+                        RETURN(-EFBIG);
-+                }
-+
-+                attr->ia_valid |= ATTR_MTIME | ATTR_CTIME;
-+        }
-+
-+        /* POSIX: check before ATTR_*TIME_SET set (from inode_change_ok) */
-+        if (ia_valid & (ATTR_MTIME_SET | ATTR_ATIME_SET)) {
-+                if (current->fsuid != inode->i_uid &&
-+                    !cfs_capable(CFS_CAP_FOWNER))
-+                        RETURN(-EPERM);
-+        }
-+
-+        /* We mark all of the fields "set" so MDS/OST does not re-set them */
-+        if (attr->ia_valid & ATTR_CTIME) {
-+                attr->ia_ctime = CURRENT_TIME;
-+                attr->ia_valid |= ATTR_CTIME_SET;
-+        }
-+        if (!(ia_valid & ATTR_ATIME_SET) && (attr->ia_valid & ATTR_ATIME)) {
-+                attr->ia_atime = CURRENT_TIME;
-+                attr->ia_valid |= ATTR_ATIME_SET;
-+        }
-+        if (!(ia_valid & ATTR_MTIME_SET) && (attr->ia_valid & ATTR_MTIME)) {
-+                attr->ia_mtime = CURRENT_TIME;
-+                attr->ia_valid |= ATTR_MTIME_SET;
-+        }
-+        if ((attr->ia_valid & ATTR_CTIME) && !(attr->ia_valid & ATTR_MTIME)) {
-+                /* To avoid stale mtime on mds, obtain it from ost and send
-+                   to mds. */
-+                rc = ll_glimpse_size(inode, 0);
-+                if (rc)
-+                        RETURN(rc);
-+
-+                attr->ia_valid |= ATTR_MTIME_SET | ATTR_MTIME;
-+                attr->ia_mtime = inode->i_mtime;
-+        }
-+
-+        if (attr->ia_valid & (ATTR_MTIME | ATTR_CTIME))
-+                CDEBUG(D_INODE, "setting mtime %lu, ctime %lu, now = %lu\n",
-+                       LTIME_S(attr->ia_mtime), LTIME_S(attr->ia_ctime),
-+                       CURRENT_SECONDS);
-+
-+        /* NB: ATTR_SIZE will only be set after this point if the size
-+         * resides on the MDS, ie, this file has no objects. */
-+        if (lsm)
-+                attr->ia_valid &= ~ATTR_SIZE;
-+
-+        /* We always do an MDS RPC, even if we're only changing the size;
-+         * only the MDS knows whether truncate() should fail with -ETXTBUSY */
-+        ll_prepare_mdc_op_data(&op_data, inode, NULL, NULL, 0, 0, NULL);
-+
-+        rc = mdc_setattr(sbi->ll_mdc_exp, &op_data,
-+                         attr, NULL, 0, NULL, 0, &request);
-+
-+        if (rc) {
-+                ptlrpc_req_finished(request);
-+                if (rc == -ENOENT) {
-+                        inode->i_nlink = 0;
-+                        /* Unlinked special device node?  Or just a race?
-+                         * Pretend we done everything. */
-+                        if (!S_ISREG(inode->i_mode) &&
-+                            !S_ISDIR(inode->i_mode))
-+                                rc = inode_setattr(inode, attr);
-+                } else if (rc != -EPERM && rc != -EACCES && rc != -ETXTBSY)
-+                        CERROR("mdc_setattr fails: rc = %d\n", rc);
-+                RETURN(rc);
-+        }
-+
-+        rc = mdc_req2lustre_md(request, REPLY_REC_OFF, sbi->ll_osc_exp, &md);
-+        if (rc) {
-+                ptlrpc_req_finished(request);
-+                RETURN(rc);
-+        }
-+
-+        /* We call inode_setattr to adjust timestamps.
-+         * If there is at least some data in file, we cleared ATTR_SIZE above to
-+         * avoid invoking vmtruncate, otherwise it is important to call
-+         * vmtruncate in inode_setattr to update inode->i_size (bug 6196) */
-+        rc = inode_setattr(inode, attr);
-+
-+        ll_update_inode(inode, &md);
-+        ptlrpc_req_finished(request);
-+
-+        if (!lsm || !S_ISREG(inode->i_mode)) {
-+                CDEBUG(D_INODE, "no lsm: not setting attrs on OST\n");
-+                RETURN(rc);
-+        }
-+
-+        /* We really need to get our PW lock before we change inode->i_size.
-+         * If we don't we can race with other i_size updaters on our node, like
-+         * ll_file_read.  We can also race with i_size propogation to other
-+         * nodes through dirtying and writeback of final cached pages.  This
-+         * last one is especially bad for racing o_append users on other
-+         * nodes. */
-+        if (ia_valid & ATTR_SIZE) {
-+                rc = ll_setattr_do_truncate(inode, attr->ia_size);
-+        } else if (ia_valid & (ATTR_MTIME | ATTR_MTIME_SET)) {
-+                obd_flag flags;
-+                struct obd_info oinfo = { { { 0 } } };
-+                struct obdo *oa;
-+                OBDO_ALLOC(oa);
-+
-+                CDEBUG(D_INODE, "set mtime on OST inode %lu to %lu\n",
-+                       inode->i_ino, LTIME_S(attr->ia_mtime));
-+
-+                if (oa) {
-+                        oa->o_id = lsm->lsm_object_id;
-+                        oa->o_valid = OBD_MD_FLID;
-+
-+                        flags = OBD_MD_FLTYPE | OBD_MD_FLATIME |
-+                                OBD_MD_FLMTIME | OBD_MD_FLCTIME |
-+                                OBD_MD_FLFID | OBD_MD_FLGENER;
-+
-+                        obdo_from_inode(oa, inode, flags);
-+
-+                        oinfo.oi_oa = oa;
-+                        oinfo.oi_md = lsm;
-+
-+                        rc = obd_setattr_rqset(sbi->ll_osc_exp, &oinfo, NULL);
-+                        if (rc)
-+                                CERROR("obd_setattr_async fails: rc=%d\n", rc);
-+                        OBDO_FREE(oa);
-+                } else {
-+                        rc = -ENOMEM;
-+                }
-+        }
-+        RETURN(rc);
-+}
-+
-+int ll_setattr(struct dentry *de, struct iattr *attr)
-+{
-+        int mode;
-+
-+        if ((attr->ia_valid & (ATTR_CTIME|ATTR_SIZE|ATTR_MODE)) ==
-+            (ATTR_CTIME|ATTR_SIZE|ATTR_MODE))
-+                attr->ia_valid |= MDS_OPEN_OWNEROVERRIDE;
-+        if ((attr->ia_valid & (ATTR_MODE|ATTR_FORCE|ATTR_SIZE)) == 
-+            (ATTR_SIZE|ATTR_MODE)) {
-+                mode = de->d_inode->i_mode;
-+                if (((mode & S_ISUID) && (!(attr->ia_mode & S_ISUID))) ||
-+                    ((mode & S_ISGID) && (mode & S_IXGRP) &&
-+                    (!(attr->ia_mode & S_ISGID))))
-+                        attr->ia_valid |= ATTR_FORCE;
-+        }
-+
-+        return ll_setattr_raw(de->d_inode, attr);
-+}
-+
-+int ll_statfs_internal(struct super_block *sb, struct obd_statfs *osfs,
-+                       __u64 max_age, __u32 flags)
-+{
-+        struct ll_sb_info *sbi = ll_s2sbi(sb);
-+        struct obd_statfs obd_osfs;
-+        int rc;
-+        ENTRY;
-+
-+        rc = obd_statfs(class_exp2obd(sbi->ll_mdc_exp), osfs, max_age, flags);
-+        if (rc) {
-+                CERROR("mdc_statfs fails: rc = %d\n", rc);
-+                RETURN(rc);
-+        }
-+
-+        osfs->os_type = sb->s_magic;
-+
-+        CDEBUG(D_SUPER, "MDC blocks "LPU64"/"LPU64" objects "LPU64"/"LPU64"\n",
-+               osfs->os_bavail, osfs->os_blocks, osfs->os_ffree,osfs->os_files);
-+
-+        rc = obd_statfs_rqset(class_exp2obd(sbi->ll_osc_exp),
-+                              &obd_osfs, max_age, flags);
-+        if (rc) {
-+                CERROR("obd_statfs fails: rc = %d\n", rc);
-+                RETURN(rc);
-+        }
-+
-+        CDEBUG(D_SUPER, "OSC blocks "LPU64"/"LPU64" objects "LPU64"/"LPU64"\n",
-+               obd_osfs.os_bavail, obd_osfs.os_blocks, obd_osfs.os_ffree,
-+               obd_osfs.os_files);
-+
-+        osfs->os_bsize = obd_osfs.os_bsize;
-+        osfs->os_blocks = obd_osfs.os_blocks;
-+        osfs->os_bfree = obd_osfs.os_bfree;
-+        osfs->os_bavail = obd_osfs.os_bavail;
-+
-+        /* If we don't have as many objects free on the OST as inodes
-+         * on the MDS, we reduce the total number of inodes to
-+         * compensate, so that the "inodes in use" number is correct.
-+         */
-+        if (obd_osfs.os_ffree < osfs->os_ffree) {
-+                osfs->os_files = (osfs->os_files - osfs->os_ffree) +
-+                        obd_osfs.os_ffree;
-+                osfs->os_ffree = obd_osfs.os_ffree;
-+        }
-+
-+        RETURN(rc);
-+}
-+#ifndef HAVE_STATFS_DENTRY_PARAM
-+int ll_statfs(struct super_block *sb, struct kstatfs *sfs)
-+{
-+#else
-+int ll_statfs(struct dentry *de, struct kstatfs *sfs)
-+{
-+        struct super_block *sb = de->d_sb;
-+#endif
-+        struct obd_statfs osfs;
-+        int rc;
-+
-+        CDEBUG(D_VFSTRACE, "VFS Op: at "LPU64" jiffies\n", get_jiffies_64());
-+        ll_stats_ops_tally(ll_s2sbi(sb), LPROC_LL_STAFS, 1);
-+
-+        /* For now we will always get up-to-date statfs values, but in the
-+         * future we may allow some amount of caching on the client (e.g.
-+         * from QOS or lprocfs updates). */
-+        rc = ll_statfs_internal(sb, &osfs, cfs_time_current_64() - 1, 0);
-+        if (rc)
-+                return rc;
-+
-+        statfs_unpack(sfs, &osfs);
-+
-+        /* We need to downshift for all 32-bit kernels, because we can't
-+         * tell if the kernel is being called via sys_statfs64() or not.
-+         * Stop before overflowing f_bsize - in which case it is better
-+         * to just risk EOVERFLOW if caller is using old sys_statfs(). */
-+        if (sizeof(long) < 8) {
-+                while (osfs.os_blocks > ~0UL && sfs->f_bsize < 0x40000000) {
-+                        sfs->f_bsize <<= 1;
-+
-+                        osfs.os_blocks >>= 1;
-+                        osfs.os_bfree >>= 1;
-+                        osfs.os_bavail >>= 1;
-+                }
-+        }
-+
-+        sfs->f_blocks = osfs.os_blocks;
-+        sfs->f_bfree = osfs.os_bfree;
-+        sfs->f_bavail = osfs.os_bavail;
-+
-+        return 0;
-+}
-+
-+void ll_inode_size_lock(struct inode *inode, int lock_lsm)
-+{
-+        struct ll_inode_info *lli;
-+        struct lov_stripe_md *lsm;
-+
-+        lli = ll_i2info(inode);
-+        LASSERT(lli->lli_size_sem_owner != current);
-+        down(&lli->lli_size_sem);
-+        LASSERT(lli->lli_size_sem_owner == NULL);
-+        lli->lli_size_sem_owner = current;
-+        lsm = lli->lli_smd;
-+        LASSERTF(lsm != NULL || lock_lsm == 0, "lsm %p, lock_lsm %d\n",
-+                 lsm, lock_lsm);
-+        if (lock_lsm)
-+                lov_stripe_lock(lsm);
-+}
-+
-+void ll_inode_size_unlock(struct inode *inode, int unlock_lsm)
-+{
-+        struct ll_inode_info *lli;
-+        struct lov_stripe_md *lsm;
-+
-+        lli = ll_i2info(inode);
-+        lsm = lli->lli_smd;
-+        LASSERTF(lsm != NULL || unlock_lsm == 0, "lsm %p, lock_lsm %d\n",
-+                 lsm, unlock_lsm);
-+        if (unlock_lsm)
-+                lov_stripe_unlock(lsm);
-+        LASSERT(lli->lli_size_sem_owner == current);
-+        lli->lli_size_sem_owner = NULL;
-+        up(&lli->lli_size_sem);
-+}
-+
-+static void ll_replace_lsm(struct inode *inode, struct lov_stripe_md *lsm)
-+{
-+        struct ll_inode_info *lli = ll_i2info(inode);
-+
-+        dump_lsm(D_INODE, lsm);
-+        dump_lsm(D_INODE, lli->lli_smd);
-+        LASSERTF(lsm->lsm_magic == LOV_MAGIC_JOIN,
-+                 "lsm must be joined lsm %p\n", lsm);
-+        obd_free_memmd(ll_i2obdexp(inode), &lli->lli_smd);
-+        CDEBUG(D_INODE, "replace lsm %p to lli_smd %p for inode %lu%u(%p)\n",
-+               lsm, lli->lli_smd, inode->i_ino, inode->i_generation, inode);
-+        lli->lli_smd = lsm;
-+        lli->lli_maxbytes = lsm->lsm_maxbytes;
-+        if (lli->lli_maxbytes > PAGE_CACHE_MAXBYTES)
-+                lli->lli_maxbytes = PAGE_CACHE_MAXBYTES;
-+}
-+
-+void ll_update_inode(struct inode *inode, struct lustre_md *md)
-+{
-+        struct ll_inode_info *lli = ll_i2info(inode);
-+        struct mds_body *body = md->body;
-+        struct lov_stripe_md *lsm = md->lsm;
-+
-+        LASSERT ((lsm != NULL) == ((body->valid & OBD_MD_FLEASIZE) != 0));
-+        if (lsm != NULL) {
-+                if (lli->lli_smd == NULL) {
-+                        if (lsm->lsm_magic != LOV_MAGIC &&
-+                            lsm->lsm_magic != LOV_MAGIC_JOIN) {
-+                                dump_lsm(D_ERROR, lsm);
-+                                LBUG();
-+                        }
-+                        CDEBUG(D_INODE, "adding lsm %p to inode %lu/%u(%p)\n",
-+                               lsm, inode->i_ino, inode->i_generation, inode);
-+                        /* ll_inode_size_lock() requires it is only called
-+                         * with lli_smd != NULL or lock_lsm == 0 or we can
-+                         * race between lock/unlock.  bug 9547 */
-+                        lli->lli_smd = lsm;
-+                        lli->lli_maxbytes = lsm->lsm_maxbytes;
-+                        if (lli->lli_maxbytes > PAGE_CACHE_MAXBYTES)
-+                                lli->lli_maxbytes = PAGE_CACHE_MAXBYTES;
-+                } else {
-+                        if (lli->lli_smd->lsm_magic == lsm->lsm_magic &&
-+                             lli->lli_smd->lsm_stripe_count ==
-+                                        lsm->lsm_stripe_count) {
-+                                if (lov_stripe_md_cmp(lli->lli_smd, lsm)) {
-+                                        CERROR("lsm mismatch for inode %ld\n",
-+                                                inode->i_ino);
-+                                        CERROR("lli_smd:\n");
-+                                        dump_lsm(D_ERROR, lli->lli_smd);
-+                                        CERROR("lsm:\n");
-+                                        dump_lsm(D_ERROR, lsm);
-+                                        LBUG();
-+                                }
-+                        } else
-+                                ll_replace_lsm(inode, lsm);
-+                }
-+                if (lli->lli_smd != lsm)
-+                        obd_free_memmd(ll_i2obdexp(inode), &lsm);
-+        }
-+
-+#ifdef CONFIG_FS_POSIX_ACL
-+        LASSERT(!md->posix_acl || (body->valid & OBD_MD_FLACL));
-+        if (body->valid & OBD_MD_FLACL) {
-+                spin_lock(&lli->lli_lock);
-+                if (lli->lli_posix_acl)
-+                        posix_acl_release(lli->lli_posix_acl);
-+                lli->lli_posix_acl = md->posix_acl;
-+                spin_unlock(&lli->lli_lock);
-+        }
-+#endif
-+
-+        if (body->valid & OBD_MD_FLID)
-+                inode->i_ino = body->ino;
-+        if (body->valid & OBD_MD_FLATIME &&
-+            body->atime > LTIME_S(inode->i_atime))
-+                LTIME_S(inode->i_atime) = body->atime;
-+
-+        /* mtime is always updated with ctime, but can be set in past.
-+           As write and utime(2) may happen within 1 second, and utime's
-+           mtime has a priority over write's one, so take mtime from mds
-+           for the same ctimes. */
-+        if (body->valid & OBD_MD_FLCTIME &&
-+            body->ctime >= LTIME_S(inode->i_ctime)) {
-+                LTIME_S(inode->i_ctime) = body->ctime;
-+                if (body->valid & OBD_MD_FLMTIME) {
-+                        CDEBUG(D_INODE, "setting ino %lu mtime "
-+                               "from %lu to "LPU64"\n", inode->i_ino,
-+                               LTIME_S(inode->i_mtime), body->mtime);
-+                        LTIME_S(inode->i_mtime) = body->mtime;
-+                }
-+        }
-+        if (body->valid & OBD_MD_FLMODE)
-+                inode->i_mode = (inode->i_mode & S_IFMT)|(body->mode & ~S_IFMT);
-+        if (body->valid & OBD_MD_FLTYPE)
-+                inode->i_mode = (inode->i_mode & ~S_IFMT)|(body->mode & S_IFMT);
-+        if (S_ISREG(inode->i_mode)) {
-+                inode->i_blkbits = min(PTLRPC_MAX_BRW_BITS+1, LL_MAX_BLKSIZE_BITS);
-+        } else {
-+                inode->i_blkbits = inode->i_sb->s_blocksize_bits;
-+        }
-+#ifdef HAVE_INODE_BLKSIZE
-+        inode->i_blksize = 1<<inode->i_blkbits;
-+#endif
-+        if (body->valid & OBD_MD_FLUID)
-+                inode->i_uid = body->uid;
-+        if (body->valid & OBD_MD_FLGID)
-+                inode->i_gid = body->gid;
-+        if (body->valid & OBD_MD_FLFLAGS)
-+                inode->i_flags = ll_ext_to_inode_flags(body->flags);
-+
-+        if (body->valid & OBD_MD_FLNLINK)
-+                inode->i_nlink = body->nlink;
-+        if (body->valid & OBD_MD_FLGENER)
-+                inode->i_generation = body->generation;
-+        if (body->valid & OBD_MD_FLRDEV)
-+                inode->i_rdev = old_decode_dev(body->rdev);
-+        if (body->valid & OBD_MD_FLSIZE) {
-+#if 0           /* Can't block ll_test_inode->ll_update_inode, b=14326*/
-+                ll_inode_size_lock(inode, 0);
-+                i_size_write(inode, body->size);
-+                ll_inode_size_unlock(inode, 0);
-+#else
-+                inode->i_size = body->size;
-+#endif
-+        }
-+        if (body->valid & OBD_MD_FLBLOCKS)
-+                inode->i_blocks = body->blocks;
-+
-+        if (body->valid & OBD_MD_FLSIZE)
-+                set_bit(LLI_F_HAVE_MDS_SIZE_LOCK, &lli->lli_flags);
-+}
-+
-+static struct backing_dev_info ll_backing_dev_info = {
-+        .ra_pages       = 0,    /* No readahead */
-+#if (LINUX_VERSION_CODE >= KERNEL_VERSION(2,6,12))
-+        .capabilities   = 0,    /* Does contribute to dirty memory */
-+#else
-+        .memory_backed  = 0,    /* Does contribute to dirty memory */
-+#endif
-+};
-+
-+void ll_read_inode2(struct inode *inode, void *opaque)
-+{
-+        struct lustre_md *md = opaque;
-+        struct ll_inode_info *lli = ll_i2info(inode);
-+        ENTRY;
-+
-+        CDEBUG(D_VFSTRACE, "VFS Op:inode=%lu/%u(%p)\n", inode->i_ino,
-+               inode->i_generation, inode);
-+
-+        ll_lli_init(lli);
-+
-+        LASSERT(!lli->lli_smd);
-+
-+        /* Core attributes from the MDS first.  This is a new inode, and
-+         * the VFS doesn't zero times in the core inode so we have to do
-+         * it ourselves.  They will be overwritten by either MDS or OST
-+         * attributes - we just need to make sure they aren't newer. */
-+        LTIME_S(inode->i_mtime) = 0;
-+        LTIME_S(inode->i_atime) = 0;
-+        LTIME_S(inode->i_ctime) = 0;
-+        inode->i_rdev = 0;
-+        ll_update_inode(inode, md);
-+
-+        /* OIDEBUG(inode); */
-+
-+        if (S_ISREG(inode->i_mode)) {
-+                struct ll_sb_info *sbi = ll_i2sbi(inode);
-+                inode->i_op = &ll_file_inode_operations;
-+                inode->i_fop = sbi->ll_fop;
-+                inode->i_mapping->a_ops = &ll_aops;
-+                EXIT;
-+        } else if (S_ISDIR(inode->i_mode)) {
-+                inode->i_op = &ll_dir_inode_operations;
-+                inode->i_fop = &ll_dir_operations;
-+                inode->i_mapping->a_ops = &ll_dir_aops;
-+                EXIT;
-+        } else if (S_ISLNK(inode->i_mode)) {
-+                inode->i_op = &ll_fast_symlink_inode_operations;
-+                EXIT;
-+        } else {
-+                inode->i_op = &ll_special_inode_operations;
-+                init_special_inode(inode, inode->i_mode,
-+                                   kdev_t_to_nr(inode->i_rdev));
-+                /* initializing backing dev info. */
-+                inode->i_mapping->backing_dev_info = &ll_backing_dev_info;
-+                EXIT;
-+        }
-+}
-+
-+int ll_iocontrol(struct inode *inode, struct file *file,
-+                 unsigned int cmd, unsigned long arg)
-+{
-+        struct ll_sb_info *sbi = ll_i2sbi(inode);
-+        struct ptlrpc_request *req = NULL;
-+        int rc, flags = 0;
-+        ENTRY;
-+
-+        switch(cmd) {
-+        case EXT3_IOC_GETFLAGS: {
-+                struct ll_fid fid;
-+                struct mds_body *body;
-+
-+                ll_inode2fid(&fid, inode);
-+                rc = mdc_getattr(sbi->ll_mdc_exp, &fid, OBD_MD_FLFLAGS,0,&req);
-+                if (rc) {
-+                        CERROR("failure %d inode %lu\n", rc, inode->i_ino);
-+                        RETURN(-abs(rc));
-+                }
-+
-+                body = lustre_msg_buf(req->rq_repmsg, REPLY_REC_OFF,
-+                                      sizeof(*body));
-+
-+                /* We want to return EXT3_*_FL flags to the caller via this
-+                 * ioctl.  An older MDS may be sending S_* flags, fix it up. */
-+                flags = ll_inode_to_ext_flags(body->flags,
-+                                              body->flags &MDS_BFLAG_EXT_FLAGS);
-+                ptlrpc_req_finished (req);
-+
-+                RETURN(put_user(flags, (int *)arg));
-+        }
-+        case EXT3_IOC_SETFLAGS: {
-+                struct mdc_op_data op_data;
-+                struct ll_iattr_struct attr;
-+                struct obd_info oinfo = { { { 0 } } };
-+                struct lov_stripe_md *lsm = ll_i2info(inode)->lli_smd;
-+
-+                if (get_user(flags, (int *)arg))
-+                        RETURN(-EFAULT);
-+
-+                oinfo.oi_md = lsm;
-+                OBDO_ALLOC(oinfo.oi_oa);
-+                if (!oinfo.oi_oa)
-+                        RETURN(-ENOMEM);
-+
-+                ll_prepare_mdc_op_data(&op_data, inode, NULL, NULL, 0, 0, NULL);
-+
-+                memset(&attr, 0, sizeof(attr));
-+                attr.ia_attr_flags = flags;
-+                ((struct iattr *)&attr)->ia_valid |= ATTR_ATTR_FLAG;
-+
-+                rc = mdc_setattr(sbi->ll_mdc_exp, &op_data,
-+                                 (struct iattr *)&attr, NULL, 0, NULL, 0, &req);
-+                ptlrpc_req_finished(req);
-+                if (rc || lsm == NULL) {
-+                        OBDO_FREE(oinfo.oi_oa);
-+                        RETURN(rc);
-+                }
-+
-+                oinfo.oi_oa->o_id = lsm->lsm_object_id;
-+                oinfo.oi_oa->o_flags = flags;
-+                oinfo.oi_oa->o_valid = OBD_MD_FLID | OBD_MD_FLFLAGS;
-+
-+                obdo_from_inode(oinfo.oi_oa, inode,
-+                                OBD_MD_FLFID | OBD_MD_FLGENER);
-+                rc = obd_setattr_rqset(sbi->ll_osc_exp, &oinfo, NULL);
-+                OBDO_FREE(oinfo.oi_oa);
-+                if (rc) {
-+                        if (rc != -EPERM && rc != -EACCES)
-+                                CERROR("mdc_setattr_async fails: rc = %d\n", rc);
-+                        RETURN(rc);
-+                }
-+
-+                inode->i_flags = ll_ext_to_inode_flags(flags |
-+                                                       MDS_BFLAG_EXT_FLAGS);
-+                RETURN(0);
-+        }
-+        default:
-+                RETURN(-ENOSYS);
-+        }
-+
-+        RETURN(0);
-+}
-+
-+/* umount -f client means force down, don't save state */
-+#ifdef HAVE_UMOUNTBEGIN_VFSMOUNT
-+void ll_umount_begin(struct vfsmount *vfsmnt, int flags)
-+{
-+        struct super_block *sb = vfsmnt->mnt_sb;
-+#else
-+void ll_umount_begin(struct super_block *sb)
-+{
-+#endif
-+        struct lustre_sb_info *lsi = s2lsi(sb);
-+        struct ll_sb_info *sbi = ll_s2sbi(sb);
-+        struct obd_device *obd;
-+        struct obd_ioctl_data ioc_data = { 0 };
-+        ENTRY;
-+
-+#ifdef HAVE_UMOUNTBEGIN_VFSMOUNT
-+        if (!(flags & MNT_FORCE)) {
-+                EXIT;
-+                return;
-+        }
-+#endif
-+
-+        /* Tell the MGC we got umount -f */
-+        lsi->lsi_flags |= LSI_UMOUNT_FORCE;
-+
-+        CDEBUG(D_VFSTRACE, "VFS Op: superblock %p count %d active %d\n", sb,
-+               sb->s_count, atomic_read(&sb->s_active));
-+
-+        obd = class_exp2obd(sbi->ll_mdc_exp);
-+        if (obd == NULL) {
-+                CERROR("Invalid MDC connection handle "LPX64"\n",
-+                       sbi->ll_mdc_exp->exp_handle.h_cookie);
-+                EXIT;
-+                return;
-+        }
-+        obd->obd_force = 1;
-+        obd_iocontrol(IOC_OSC_SET_ACTIVE, sbi->ll_mdc_exp, sizeof ioc_data,
-+                      &ioc_data, NULL);
-+
-+        obd = class_exp2obd(sbi->ll_osc_exp);
-+        if (obd == NULL) {
-+                CERROR("Invalid LOV connection handle "LPX64"\n",
-+                       sbi->ll_osc_exp->exp_handle.h_cookie);
-+                EXIT;
-+                return;
-+        }
-+
-+        obd->obd_force = 1;
-+        obd_iocontrol(IOC_OSC_SET_ACTIVE, sbi->ll_osc_exp, sizeof ioc_data,
-+                      &ioc_data, NULL);
-+
-+        /* Really, we'd like to wait until there are no requests outstanding,
-+         * and then continue.  For now, we just invalidate the requests,
-+         * schedule() and sleep one second if needed, and hope.
-+         */
-+        schedule();
-+#ifdef HAVE_UMOUNTBEGIN_VFSMOUNT
-+        if (atomic_read(&vfsmnt->mnt_count) > 2) {
-+                cfs_schedule_timeout(CFS_TASK_INTERRUPTIBLE,
-+                                     cfs_time_seconds(1));
-+                if (atomic_read(&vfsmnt->mnt_count) > 2)
-+                        LCONSOLE_WARN("Mount still busy with %d refs! You "
-+                                      "may try to umount it a bit later\n",
-+                                      atomic_read(&vfsmnt->mnt_count));
-+        }
-+#endif
-+
-+        EXIT;
-+}
-+
-+int ll_remount_fs(struct super_block *sb, int *flags, char *data)
-+{
-+        struct ll_sb_info *sbi = ll_s2sbi(sb);
-+        int err;
-+        __u32 read_only;
-+
-+        if ((*flags & MS_RDONLY) != (sb->s_flags & MS_RDONLY)) {
-+                read_only = *flags & MS_RDONLY;
-+                err = obd_set_info_async(sbi->ll_mdc_exp, sizeof(KEY_READONLY),
-+                                         KEY_READONLY, sizeof(read_only),
-+                                         &read_only, NULL);
-+
-+                /* MDS might have expected a different ro key value, b=17493 */
-+                if (err == -EINVAL) {
-+                        CDEBUG(D_CONFIG, "Retrying remount with 1.6.6 ro key\n");
-+                        err = obd_set_info_async(sbi->ll_mdc_exp,
-+                                                 sizeof(KEY_READONLY_166COMPAT),
-+                                                 KEY_READONLY_166COMPAT,
-+                                                 sizeof(read_only),
-+                                                 &read_only, NULL);
-+                }
-+
-+                if (err) {
-+                        CERROR("Failed to change the read-only flag during "
-+                               "remount: %d\n", err);
-+                        return err;
-+                }
-+
-+                if (read_only)
-+                        sb->s_flags |= MS_RDONLY;
-+                else
-+                        sb->s_flags &= ~MS_RDONLY;
-+        }
-+        return 0;
-+}
-+
-+int ll_prep_inode(struct obd_export *exp, struct inode **inode,
-+                  struct ptlrpc_request *req, int offset,struct super_block *sb)
-+{
-+        struct lustre_md md;
-+        struct ll_sb_info *sbi = NULL;
-+        int rc = 0;
-+        ENTRY;
-+
-+        LASSERT(*inode || sb);
-+        sbi = sb ? ll_s2sbi(sb) : ll_i2sbi(*inode);
-+        prune_deathrow(sbi, 1);
-+
-+        rc = mdc_req2lustre_md(req, offset, exp, &md);
-+        if (rc)
-+                RETURN(rc);
-+
-+        if (*inode) {
-+                ll_update_inode(*inode, &md);
-+        } else {
-+                LASSERT(sb);
-+                *inode = ll_iget(sb, md.body->ino, &md);
-+                if (*inode == NULL || is_bad_inode(*inode)) {
-+                        mdc_free_lustre_md(exp, &md);
-+                        rc = -ENOMEM;
-+                        CERROR("new_inode -fatal: rc %d\n", rc);
-+                        GOTO(out, rc);
-+                }
-+        }
-+
-+        rc = obd_checkmd(exp, ll_i2mdcexp(*inode),
-+                         ll_i2info(*inode)->lli_smd);
-+out:
-+        RETURN(rc);
-+}
-+
-+char *llap_origins[] = {
-+        [LLAP_ORIGIN_UNKNOWN] = "--",
-+        [LLAP_ORIGIN_READPAGE] = "rp",
-+        [LLAP_ORIGIN_READAHEAD] = "ra",
-+        [LLAP_ORIGIN_COMMIT_WRITE] = "cw",
-+        [LLAP_ORIGIN_WRITEPAGE] = "wp",
-+        [LLAP_ORIGIN_LOCKLESS_IO] = "ls"
-+};
-+
-+struct ll_async_page *llite_pglist_next_llap(struct ll_sb_info *sbi,
-+                                             struct list_head *list)
-+{
-+        struct ll_async_page *llap;
-+        struct list_head *pos;
-+
-+        list_for_each(pos, list) {
-+                if (pos == &sbi->ll_pglist)
-+                        return NULL;
-+                llap = list_entry(pos, struct ll_async_page, llap_pglist_item);
-+                if (llap->llap_page == NULL)
-+                        continue;
-+                return llap;
-+        }
-+        LBUG();
-+        return NULL;
-+}
-+
-+int ll_obd_statfs(struct inode *inode, void *arg)
-+{
-+        struct ll_sb_info *sbi = NULL;
-+        struct obd_device *client_obd = NULL, *lov_obd = NULL;
-+        struct lov_obd *lov = NULL;
-+        struct obd_statfs stat_buf = {0};
-+        char *buf = NULL;
-+        struct obd_ioctl_data *data = NULL;
-+        __u32 type, index;
-+        int len = 0, rc;
-+
-+        if (!inode || !(sbi = ll_i2sbi(inode)))
-+                GOTO(out_statfs, rc = -EINVAL);
-+
-+        rc = obd_ioctl_getdata(&buf, &len, arg);
-+        if (rc)
-+                GOTO(out_statfs, rc);
-+
-+        data = (void*)buf;
-+        if (!data->ioc_inlbuf1 || !data->ioc_inlbuf2 ||
-+            !data->ioc_pbuf1 || !data->ioc_pbuf2)
-+                GOTO(out_statfs, rc = -EINVAL);
-+
-+        memcpy(&type, data->ioc_inlbuf1, sizeof(__u32));
-+        memcpy(&index, data->ioc_inlbuf2, sizeof(__u32));
-+
-+        if (type == LL_STATFS_MDC) {
-+                if (index > 0)
-+                        GOTO(out_statfs, rc = -ENODEV);
-+                client_obd = class_exp2obd(sbi->ll_mdc_exp);
-+        } else if (type == LL_STATFS_LOV) {
-+                lov_obd = class_exp2obd(sbi->ll_osc_exp);
-+                lov = &lov_obd->u.lov;
-+
-+                if (index >= lov->desc.ld_tgt_count)
-+                        GOTO(out_statfs, rc = -ENODEV);
-+
-+                if (!lov->lov_tgts[index])
-+                        /* Try again with the next index */
-+                        GOTO(out_statfs, rc = -EAGAIN);
-+
-+                client_obd = class_exp2obd(lov->lov_tgts[index]->ltd_exp);
-+                if (!lov->lov_tgts[index]->ltd_active)
-+                        GOTO(out_uuid, rc = -ENODATA);
-+        }
-+
-+        if (!client_obd)
-+                GOTO(out_statfs, rc = -EINVAL);
-+
-+        rc = obd_statfs(client_obd, &stat_buf, cfs_time_current_64() - HZ, 1);
-+        if (rc)
-+                GOTO(out_statfs, rc);
-+
-+        if (copy_to_user(data->ioc_pbuf1, &stat_buf, data->ioc_plen1))
-+                GOTO(out_statfs, rc = -EFAULT);
-+
-+out_uuid:
-+        if (copy_to_user(data->ioc_pbuf2, obd2cli_tgt(client_obd),
-+                         data->ioc_plen2))
-+                rc = -EFAULT;
-+
-+out_statfs:
-+        if (buf)
-+                obd_ioctl_freedata(buf, len);
-+        return rc;
-+}
-+
-+int ll_process_config(struct lustre_cfg *lcfg)
-+{
-+        char *ptr;
-+        void *sb;
-+        struct lprocfs_static_vars lvars;
-+        unsigned long x;
-+        int rc = 0;
-+
-+        lprocfs_llite_init_vars(&lvars);
-+
-+        /* The instance name contains the sb: lustre-client-aacfe000 */
-+        ptr = strrchr(lustre_cfg_string(lcfg, 0), '-');
-+        if (!ptr || !*(++ptr))
-+                return -EINVAL;
-+        if (sscanf(ptr, "%lx", &x) != 1)
-+                return -EINVAL;
-+        sb = (void *)x;
-+        /* This better be a real Lustre superblock! */
-+        LASSERT(s2lsi((struct super_block *)sb)->lsi_lmd->lmd_magic == LMD_MAGIC);
-+
-+        /* Note we have not called client_common_fill_super yet, so
-+           proc fns must be able to handle that! */
-+        rc = class_process_proc_param(PARAM_LLITE, lvars.obd_vars,
-+                                      lcfg, sb);
-+        return(rc);
-+}
-+
-+int ll_show_options(struct seq_file *seq, struct vfsmount *vfs)
-+{
-+        struct ll_sb_info *sbi;
-+
-+        LASSERT((seq != NULL) && (vfs != NULL));
-+        sbi = ll_s2sbi(vfs->mnt_sb);
-+
-+        if (sbi->ll_flags & LL_SBI_NOLCK)
-+                seq_puts(seq, ",nolock");
-+
-+        if (sbi->ll_flags & LL_SBI_FLOCK)
-+                seq_puts(seq, ",flock");
-+
-+        if (sbi->ll_flags & LL_SBI_LOCALFLOCK)
-+                seq_puts(seq, ",localflock");
-+
-+        if (sbi->ll_flags & LL_SBI_USER_XATTR)
-+                seq_puts(seq, ",user_xattr");
-+
-+        if (sbi->ll_flags & LL_SBI_ACL)
-+                seq_puts(seq, ",acl");
-+
-+        RETURN(0);
-+}
-diff -urNad lustre~/lustre/llite/llite_mmap.c lustre/lustre/llite/llite_mmap.c
---- lustre~/lustre/llite/llite_mmap.c	2009-03-12 10:32:27.000000000 +0100
-+++ lustre/lustre/llite/llite_mmap.c	2009-03-13 09:45:03.000000000 +0100
-@@ -81,8 +81,7 @@
- int lt_get_mmap_locks(struct ll_lock_tree *tree,
-                       unsigned long addr, size_t count);
- 
--struct page *ll_nopage(struct vm_area_struct *vma, unsigned long address,
--                       int *type);
-+static struct vm_operations_struct ll_file_vm_ops;
- 
- struct ll_lock_tree_node * ll_node_from_inode(struct inode *inode, __u64 start,
-                                               __u64 end, ldlm_mode_t mode)
-@@ -285,9 +284,19 @@
-         return LCK_PR;
- }
- 
-+static void policy_from_vma_pgoff(ldlm_policy_data_t *policy,
-+                                  struct vm_area_struct *vma,
-+                                  __u64 pgoff, size_t count)
-+{
-+        policy->l_extent.start = pgoff << CFS_PAGE_SHIFT;
-+        policy->l_extent.end = (policy->l_extent.start + count - 1) |
-+                               ~CFS_PAGE_MASK;
-+}
-+
- static void policy_from_vma(ldlm_policy_data_t *policy,
-                             struct vm_area_struct *vma, unsigned long addr,
-                             size_t count)
-+
- {
-         policy->l_extent.start = ((addr - vma->vm_start) & CFS_PAGE_MASK) +
-                                  ((__u64)vma->vm_pgoff << CFS_PAGE_SHIFT);
-@@ -308,7 +317,7 @@
-         spin_lock(&mm->page_table_lock);
-         for(vma = find_vma(mm, addr);
-             vma != NULL && vma->vm_start < (addr + count); vma = vma->vm_next) {
--                if (vma->vm_ops && vma->vm_ops->nopage == ll_nopage &&
-+                if (vma->vm_ops && vma->vm_ops == &ll_file_vm_ops &&
-                     vma->vm_flags & VM_SHARED) {
-                         ret = vma;
-                         break;
-@@ -360,44 +369,30 @@
-         }
-         RETURN(0);
- }
--/**
-- * Page fault handler.
-- *
-- * \param vma - is virtiual area struct related to page fault
-- * \param address - address when hit fault
-- * \param type - of fault
-- *
-- * \return allocated and filled page for address
-- * \retval NOPAGE_SIGBUS if page not exist on this address
-- * \retval NOPAGE_OOM not have memory for allocate new page
-- */
--struct page *ll_nopage(struct vm_area_struct *vma, unsigned long address,
--                       int *type)
-+
-+static int ll_get_extent_lock(struct vm_area_struct *vma, unsigned long pgoff,
-+                              int *save_flags, struct lustre_handle *lockh)
- {
-         struct file *filp = vma->vm_file;
-         struct ll_file_data *fd = LUSTRE_FPRIVATE(filp);
-         struct inode *inode = filp->f_dentry->d_inode;
--        struct lustre_handle lockh = { 0 };
-         ldlm_policy_data_t policy;
-         ldlm_mode_t mode;
--        struct page *page = NULL;
-         struct ll_inode_info *lli = ll_i2info(inode);
--        struct lov_stripe_md *lsm;
-         struct ost_lvb lvb;
-         __u64 kms, old_mtime;
--        unsigned long pgoff, size, rand_read, seq_read;
--        int rc = 0;
-+        unsigned long size;
-         ENTRY;
- 
-         if (lli->lli_smd == NULL) {
-                 CERROR("No lsm on fault?\n");
--                RETURN(NOPAGE_SIGBUS);
-+                RETURN(0);
-         }
- 
-         ll_clear_file_contended(inode);
- 
-         /* start and end the lock on the first and last bytes in the page */
--        policy_from_vma(&policy, vma, address, CFS_PAGE_SIZE);
-+        policy_from_vma_pgoff(&policy, vma, pgoff, CFS_PAGE_SIZE);
- 
-         CDEBUG(D_MMAP, "nopage vma %p inode %lu, locking ["LPU64", "LPU64"]\n",
-                vma, inode->i_ino, policy.l_extent.start, policy.l_extent.end);
-@@ -405,26 +400,28 @@
-         mode = mode_from_vma(vma);
-         old_mtime = LTIME_S(inode->i_mtime);
- 
--        lsm = lli->lli_smd;
--        rc = ll_extent_lock(fd, inode, lsm, mode, &policy,
--                            &lockh, LDLM_FL_CBPENDING | LDLM_FL_NO_LRU);
--        if (rc != 0)
--                RETURN(NOPAGE_SIGBUS);
-+        if(ll_extent_lock(fd, inode, lli->lli_smd, mode, &policy,
-+                          lockh, LDLM_FL_CBPENDING | LDLM_FL_NO_LRU) != 0)
-+                RETURN(0);
- 
-         if (vma->vm_flags & VM_EXEC && LTIME_S(inode->i_mtime) != old_mtime)
-                 CWARN("binary changed. inode %lu\n", inode->i_ino);
- 
--        lov_stripe_lock(lsm);
-+        lov_stripe_lock(lli->lli_smd);
-         inode_init_lvb(inode, &lvb);
--        obd_merge_lvb(ll_i2obdexp(inode), lsm, &lvb, 1);
-+        if(obd_merge_lvb(ll_i2obdexp(inode), lli->lli_smd, &lvb, 1)) {
-+                lov_stripe_unlock(lli->lli_smd);
-+                RETURN(0);
-+        }
-         kms = lvb.lvb_size;
- 
--        pgoff = ((address - vma->vm_start) >> CFS_PAGE_SHIFT) + vma->vm_pgoff;
-         size = (kms + CFS_PAGE_SIZE - 1) >> CFS_PAGE_SHIFT;
-+        CDEBUG(D_INFO, "Kms %lu - %lu\n", size, pgoff);
- 
-         if (pgoff >= size) {
--                lov_stripe_unlock(lsm);
-+                lov_stripe_unlock(lli->lli_smd);
-                 ll_glimpse_size(inode, LDLM_FL_BLOCK_GRANTED);
-+                lov_stripe_lock(lli->lli_smd);
-         } else {
-                 /* XXX change inode size without ll_inode_size_lock() held!
-                  *     there is a race condition with truncate path. (see
-@@ -446,29 +443,69 @@
-                         CDEBUG(D_INODE, "ino=%lu, updating i_size %llu\n",
-                                inode->i_ino, i_size_read(inode));
-                 }
--                lov_stripe_unlock(lsm);
-         }
- 
-         /* If mapping is writeable, adjust kms to cover this page,
-          * but do not extend kms beyond actual file size.
-          * policy.l_extent.end is set to the end of the page by policy_from_vma
-          * bug 10919 */
--        lov_stripe_lock(lsm);
-         if (mode == LCK_PW)
--                obd_adjust_kms(ll_i2obdexp(inode), lsm,
-+                obd_adjust_kms(ll_i2obdexp(inode), lli->lli_smd,
-                                min_t(loff_t, policy.l_extent.end + 1,
-                                i_size_read(inode)), 0);
--        lov_stripe_unlock(lsm);
-+        lov_stripe_unlock(lli->lli_smd);
- 
-         /* disable VM_SEQ_READ and use VM_RAND_READ to make sure that
-          * the kernel will not read other pages not covered by ldlm in
-          * filemap_nopage. we do our readahead in ll_readpage.
-          */
--        rand_read = vma->vm_flags & VM_RAND_READ;
--        seq_read = vma->vm_flags & VM_SEQ_READ;
-+        *save_flags = vma->vm_flags & (VM_RAND_READ | VM_SEQ_READ);
-         vma->vm_flags &= ~ VM_SEQ_READ;
-         vma->vm_flags |= VM_RAND_READ;
- 
-+        return 1;
-+}
-+
-+static void ll_put_extent_lock(struct vm_area_struct *vma, int save_flags,
-+                             struct lustre_handle *lockh)
-+{
-+        struct file *filp = vma->vm_file;
-+        struct ll_file_data *fd = LUSTRE_FPRIVATE(filp);
-+        struct inode *inode = filp->f_dentry->d_inode;
-+        ldlm_mode_t mode;
-+
-+        mode = mode_from_vma(vma);
-+        vma->vm_flags &= ~(VM_RAND_READ | VM_SEQ_READ);
-+        vma->vm_flags |= save_flags;
-+
-+        ll_extent_unlock(fd, inode, ll_i2info(inode)->lli_smd, mode, lockh);
-+}
-+
-+#ifndef HAVE_VM_OP_FAULT
-+/**
-+ * Page fault handler.
-+ *
-+ * \param vma - is virtiual area struct related to page fault
-+ * \param address - address when hit fault
-+ * \param type - of fault
-+ *
-+ * \return allocated and filled page for address
-+ * \retval NOPAGE_SIGBUS if page not exist on this address
-+ * \retval NOPAGE_OOM not have memory for allocate new page
-+ */
-+struct page *ll_nopage(struct vm_area_struct *vma, unsigned long address,
-+                       int *type)
-+{
-+        struct lustre_handle lockh = { 0 };
-+        int save_fags = 0;
-+        unsigned long pgoff;
-+        struct page *page;
-+        ENTRY;
-+
-+        pgoff = ((address - vma->vm_start) >> CFS_PAGE_SHIFT) + vma->vm_pgoff;
-+        if(!ll_get_extent_lock(vma, pgoff, &save_fags, &lockh))
-+                RETURN(NOPAGE_SIGBUS);
-+
-         page = filemap_nopage(vma, address, type);
-         if (page != NOPAGE_SIGBUS && page != NOPAGE_OOM)
-                 LL_CDEBUG_PAGE(D_PAGE, page, "got addr %lu type %lx\n", address,
-@@ -477,13 +514,48 @@
-                 CDEBUG(D_PAGE, "got addr %lu type %lx - SIGBUS\n",  address,
-                                (long)type);
- 
--        vma->vm_flags &= ~VM_RAND_READ;
--        vma->vm_flags |= (rand_read | seq_read);
-+        ll_put_extent_lock(vma, save_fags, &lockh);
- 
--        ll_extent_unlock(fd, inode, ll_i2info(inode)->lli_smd, mode, &lockh);
-         RETURN(page);
- }
- 
-+#else
-+/* New fault() API*/
-+/**
-+ * Page fault handler.
-+ *
-+ * \param vma - is virtiual area struct related to page fault
-+ * \param address - address when hit fault
-+ * \param type - of fault
-+ *
-+ * \return allocated and filled page for address
-+ * \retval NOPAGE_SIGBUS if page not exist on this address
-+ * \retval NOPAGE_OOM not have memory for allocate new page
-+ */
-+int ll_fault(struct vm_area_struct *vma, struct vm_fault *vmf)
-+{
-+        struct lustre_handle lockh = { 0 };
-+        int save_fags = 0;
-+        int rc;
-+        ENTRY;
-+
-+        if(!ll_get_extent_lock(vma, vmf->pgoff, &save_fags, &lockh))
-+               RETURN(VM_FAULT_SIGBUS);
-+
-+        rc = filemap_fault(vma, vmf);
-+        if (vmf->page)
-+                LL_CDEBUG_PAGE(D_PAGE, vmf->page, "got addr %p type NOPAGE\n",
-+                               vmf->virtual_address);
-+        else
-+                CDEBUG(D_PAGE, "got addr %p - SIGBUS\n",
-+                       vmf->virtual_address);
-+
-+        ll_put_extent_lock(vma, save_fags, &lockh);
-+
-+        RETURN(rc);
-+}
-+#endif
-+
- /* To avoid cancel the locks covering mmapped region for lock cache pressure,
-  * we track the mapped vma count by lli_mmap_cnt.
-  * ll_vm_open():  when first vma is linked, split locks from lru.
-@@ -548,6 +620,7 @@
-         }
- }
- 
-+#ifndef HAVE_VM_OP_FAULT
- #ifndef HAVE_FILEMAP_POPULATE
- static int (*filemap_populate)(struct vm_area_struct * area, unsigned long address, unsigned long len, pgprot_t prot, unsigned long pgoff, int nonblock);
- #endif
-@@ -562,6 +635,7 @@
-         rc = filemap_populate(area, address, len, prot, pgoff, 1);
-         RETURN(rc);
- }
-+#endif
- 
- /* return the user space pointer that maps to a file offset via a vma */
- static inline unsigned long file_to_user(struct vm_area_struct *vma, __u64 byte)
-@@ -588,10 +662,14 @@
- }
- 
- static struct vm_operations_struct ll_file_vm_ops = {
--        .nopage         = ll_nopage,
-         .open           = ll_vm_open,
-         .close          = ll_vm_close,
-+#ifdef HAVE_VM_OP_FAULT
-+        .fault          = ll_fault,
-+#else
-+        .nopage         = ll_nopage,
-         .populate       = ll_populate,
-+#endif
- };
- 
- int ll_file_mmap(struct file * file, struct vm_area_struct * vma)
-@@ -602,7 +680,7 @@
-         ll_stats_ops_tally(ll_i2sbi(file->f_dentry->d_inode), LPROC_LL_MAP, 1);
-         rc = generic_file_mmap(file, vma);
-         if (rc == 0) {
--#ifndef HAVE_FILEMAP_POPULATE
-+#if !defined(HAVE_FILEMAP_POPULATE) && !defined(HAVE_VM_OP_FAULT)
-                 if (!filemap_populate)
-                         filemap_populate = vma->vm_ops->populate;
- #endif
-diff -urNad lustre~/lustre/llite/llite_nfs.c lustre/lustre/llite/llite_nfs.c
---- lustre~/lustre/llite/llite_nfs.c	2009-03-12 10:32:27.000000000 +0100
-+++ lustre/lustre/llite/llite_nfs.c	2009-03-13 09:45:03.000000000 +0100
-@@ -68,36 +68,30 @@
- }
- 
- static struct inode * search_inode_for_lustre(struct super_block *sb,
--                                              unsigned long ino,
--                                              unsigned long generation,
--                                              int mode)
-+                                              struct ll_fid *iid)
- {
-         struct ptlrpc_request *req = NULL;
-         struct ll_sb_info *sbi = ll_s2sbi(sb);
--        struct ll_fid fid;
-         unsigned long valid = 0;
-         int eadatalen = 0, rc;
-         struct inode *inode = NULL;
--        struct ll_fid iid = { .id = ino, .generation = generation };
-         ENTRY;
- 
--        inode = ILOOKUP(sb, ino, ll_nfs_test_inode, &iid);
-+        inode = ILOOKUP(sb, iid->id, ll_nfs_test_inode, iid);
- 
-         if (inode)
-                 RETURN(inode);
--        if (S_ISREG(mode)) {
--                rc = ll_get_max_mdsize(sbi, &eadatalen);
--                if (rc) 
--                        RETURN(ERR_PTR(rc));
--                valid |= OBD_MD_FLEASIZE;
--        }
--        fid.id = (__u64)ino;
--        fid.generation = generation;
--        fid.f_type = mode;
- 
--        rc = mdc_getattr(sbi->ll_mdc_exp, &fid, valid, eadatalen, &req);
-+        rc = ll_get_max_mdsize(sbi, &eadatalen);
-+        if (rc)
-+                RETURN(ERR_PTR(rc));
-+
-+        valid |= OBD_MD_FLEASIZE;
-+
-+        /* mds_fid2dentry is ignore f_type */
-+        rc = mdc_getattr(sbi->ll_mdc_exp, iid, valid, eadatalen, &req);
-         if (rc) {
--                CERROR("failure %d inode %lu\n", rc, ino);
-+                CERROR("failure %d inode "LPU64"\n", rc, iid->id);
-                 RETURN(ERR_PTR(rc));
-         }
- 
-@@ -111,27 +105,27 @@
-         RETURN(inode);
- }
- 
--static struct dentry *ll_iget_for_nfs(struct super_block *sb, unsigned long ino,
--                                      __u32 generation, umode_t mode)
-+static struct dentry *ll_iget_for_nfs(struct super_block *sb,
-+                                      struct ll_fid *iid)
- {
-         struct inode *inode;
-         struct dentry *result;
-         ENTRY;
- 
--        if (ino == 0)
-+        if (iid->id == 0)
-                 RETURN(ERR_PTR(-ESTALE));
- 
--        inode = search_inode_for_lustre(sb, ino, generation, mode);
--        if (IS_ERR(inode)) {
-+        inode = search_inode_for_lustre(sb, iid);
-+        if (IS_ERR(inode))
-                 RETURN(ERR_PTR(PTR_ERR(inode)));
--        }
-+
-         if (is_bad_inode(inode) ||
--            (generation && inode->i_generation != generation)){
-+            (iid->generation && inode->i_generation != iid->generation)) {
-                 /* we didn't find the right inode.. */
-                 CERROR("Inode %lu, Bad count: %lu %d or version  %u %u\n",
-                        inode->i_ino, (unsigned long)inode->i_nlink,
-                        atomic_read(&inode->i_count), inode->i_generation,
--                       generation);
-+                       iid->generation);
-                 iput(inode);
-                 RETURN(ERR_PTR(-ESTALE));
-         }
-@@ -146,57 +140,102 @@
-         RETURN(result);
- }
- 
--struct dentry *ll_fh_to_dentry(struct super_block *sb, __u32 *data, int len,
--                               int fhtype, int parent)
-+#define LUSTRE_NFS_FID                0x94
-+
-+struct lustre_nfs_fid {
-+        struct ll_fid   child;
-+        struct ll_fid   parent;
-+        umode_t         mode;
-+};
-+
-+/* The return value is file handle type:
-+ * 1 -- contains child file handle;
-+ * 2 -- contains child file handle and parent file handle;
-+ * 255 -- error.
-+ */
-+static int ll_encode_fh(struct dentry *de, __u32 *fh, int *plen,
-+                        int connectable)
- {
--        switch (fhtype) {
--                case 2:
--                        if (len < 5)
--                                break;
--                        if (parent)
--                                return ll_iget_for_nfs(sb, data[3], 0, data[4]);
--                case 1:
--                        if (len < 3)
--                                break;
--                        if (parent)
--                                break;
--                        return ll_iget_for_nfs(sb, data[0], data[1], data[2]);
--                default: break;
--        }
--        return ERR_PTR(-EINVAL);
-+        struct inode *inode = de->d_inode;
-+        struct inode *parent = de->d_parent->d_inode;
-+        struct lustre_nfs_fid *nfs_fid = (void *)fh;
-+        ENTRY;
-+
-+        CDEBUG(D_INFO, "encoding for (%lu) maxlen=%d minlen=%lu\n",
-+              inode->i_ino, *plen,
-+              sizeof(struct lustre_nfs_fid));
-+
-+        if (*plen < sizeof(struct lustre_nfs_fid))
-+                RETURN(255);
-+
-+        ll_inode2fid(&nfs_fid->child, inode);
-+        ll_inode2fid(&nfs_fid->parent, parent);
-+
-+        nfs_fid->mode = (S_IFMT & inode->i_mode);
-+        *plen = sizeof(struct lustre_nfs_fid);
-+
-+        RETURN(LUSTRE_NFS_FID);
- }
- 
--int ll_dentry_to_fh(struct dentry *dentry, __u32 *datap, int *lenp,
--                    int need_parent)
-+#ifdef HAVE_FH_TO_DENTRY
-+static struct dentry *ll_fh_to_dentry(struct super_block *sb, struct fid *fid,
-+                                      int fh_len, int fh_type)
- {
--        if (*lenp < 3)
--                return 255;
--        *datap++ = dentry->d_inode->i_ino;
--        *datap++ = dentry->d_inode->i_generation;
--        *datap++ = (__u32)(S_IFMT & dentry->d_inode->i_mode);
-+        struct lustre_nfs_fid *nfs_fid = (struct lustre_nfs_fid *)fid;
- 
--        if (*lenp == 3 || S_ISDIR(dentry->d_inode->i_mode)) {
--                *lenp = 3;
--                return 1;
--        }
--        if (dentry->d_parent) {
--                *datap++ = dentry->d_parent->d_inode->i_ino;
--                *datap++ = (__u32)(S_IFMT & dentry->d_parent->d_inode->i_mode);
-+        if (fh_type != LUSTRE_NFS_FID)
-+                RETURN(ERR_PTR(-EINVAL));
- 
--                *lenp = 5;
--                return 2;
--        }
--        *lenp = 3;
--        return 1;
-+        RETURN(ll_iget_for_nfs(sb, &nfs_fid->child));
-+}
-+static struct dentry *ll_fh_to_parent(struct super_block *sb, struct fid *fid,
-+                                      int fh_len, int fh_type)
-+{
-+        struct lustre_nfs_fid *nfs_fid = (struct lustre_nfs_fid *)fid;
-+
-+        if (fh_type != LUSTRE_NFS_FID)
-+                RETURN(ERR_PTR(-EINVAL));
-+        RETURN(ll_iget_for_nfs(sb, &nfs_fid->parent));
- }
- 
--#if THREAD_SIZE >= 8192
-+#else
-+/*
-+ * This length is counted as amount of __u32,
-+ *  It is composed of a fid and a mode
-+ */
-+static struct dentry *ll_decode_fh(struct super_block *sb, __u32 *fh, int fh_len,
-+                                     int fh_type,
-+                                     int (*acceptable)(void *, struct dentry *),
-+                                     void *context)
-+{
-+        struct lustre_nfs_fid *nfs_fid = (void *)fh;
-+        struct dentry *entry;
-+        ENTRY;
-+
-+        CDEBUG(D_INFO, "decoding for "LPU64" fh_len=%d fh_type=%x\n",
-+                nfs_fid->child.id, fh_len, fh_type);
-+
-+        if (fh_type != LUSTRE_NFS_FID)
-+                  RETURN(ERR_PTR(-ESTALE));
-+
-+        entry = sb->s_export_op->find_exported_dentry(sb, &nfs_fid->child,
-+                                                      &nfs_fid->parent,
-+                                                      acceptable, context);
-+        RETURN(entry);
-+}
-+
-+
- struct dentry *ll_get_dentry(struct super_block *sb, void *data)
- {
--        __u32 *inump = (__u32*)data;
--        return ll_iget_for_nfs(sb, inump[0], inump[1], S_IFREG);
-+        struct lustre_nfs_fid *fid = data;
-+        ENTRY;
-+
-+        RETURN(ll_iget_for_nfs(sb, &fid->child));
-+
- }
- 
-+#endif
-+
- struct dentry *ll_get_parent(struct dentry *dchild)
- {
-         struct ptlrpc_request *req = NULL;
-@@ -208,11 +247,11 @@
-         char dotdot[] = "..";
-         int  rc = 0;
-         ENTRY;
--        
-+
-         LASSERT(dir && S_ISDIR(dir->i_mode));
--        
--        sbi = ll_s2sbi(dir->i_sb);       
-- 
-+
-+        sbi = ll_s2sbi(dir->i_sb);
-+
-         fid.id = (__u64)dir->i_ino;
-         fid.generation = dir->i_generation;
-         fid.f_type = S_IFDIR;
-@@ -223,11 +262,12 @@
-                 CERROR("failure %d inode %lu get parent\n", rc, dir->i_ino);
-                 return ERR_PTR(rc);
-         }
--        body = lustre_msg_buf(req->rq_repmsg, REPLY_REC_OFF, sizeof (*body)); 
--       
-+        body = lustre_msg_buf(req->rq_repmsg, REPLY_REC_OFF, sizeof (*body));
-+
-         LASSERT((body->valid & OBD_MD_FLGENER) && (body->valid & OBD_MD_FLID));
--        
--        result = ll_iget_for_nfs(dir->i_sb, body->ino, body->generation, S_IFDIR);
-+        fid.id = body->ino;
-+        fid.generation = body->generation;
-+        result = ll_iget_for_nfs(dir->i_sb, &fid);
- 
-         if (IS_ERR(result))
-                 rc = PTR_ERR(result);
-@@ -236,10 +276,18 @@
-         if (rc)
-                 return ERR_PTR(rc);
-         RETURN(result);
--} 
-+}
- 
-+
-+#if THREAD_SIZE >= 8192
- struct export_operations lustre_export_operations = {
--       .get_parent = ll_get_parent,
--       .get_dentry = ll_get_dentry, 
-+        .encode_fh  = ll_encode_fh,
-+#ifdef HAVE_FH_TO_DENTRY
-+        .fh_to_dentry = ll_fh_to_dentry,
-+        .fh_to_parent = ll_fh_to_parent,
-+#else
-+        .get_dentry = ll_get_dentry,
-+        .decode_fh  = ll_decode_fh,
-+#endif
- };
- #endif
-diff -urNad lustre~/lustre/llite/lloop.c lustre/lustre/llite/lloop.c
---- lustre~/lustre/llite/lloop.c	2009-03-12 10:32:27.000000000 +0100
-+++ lustre/lustre/llite/lloop.c	2009-03-13 09:45:45.000000000 +0100
-@@ -152,7 +152,7 @@
-         struct semaphore   lo_bh_mutex;
-         atomic_t           lo_pending;
- 
--        request_queue_t    *lo_queue;
-+        struct request_queue  *lo_queue;
- 
-         /* data to handle bio for lustre. */
-         struct lo_request_data {
-@@ -283,7 +283,7 @@
-         return bio;
- }
- 
--static int loop_make_request(request_queue_t *q, struct bio *old_bio)
-+static int loop_make_request(struct request_queue *q, struct bio *old_bio)
- {
-         struct lloop_device *lo = q->queuedata;
-         int rw = bio_rw(old_bio);
-@@ -312,7 +312,7 @@
-         if (atomic_dec_and_test(&lo->lo_pending))
-                 up(&lo->lo_bh_mutex);
- out:
--        bio_io_error(old_bio, old_bio->bi_size);
-+        cfs_bio_io_error(old_bio, old_bio->bi_size);
-         return 0;
- inactive:
-         spin_unlock_irq(&lo->lo_lock);
-@@ -322,7 +322,7 @@
- /*
-  * kick off io on the underlying address space
-  */
--static void loop_unplug(request_queue_t *q)
-+static void loop_unplug(struct request_queue *q)
- {
-         struct lloop_device *lo = q->queuedata;
- 
-@@ -334,7 +334,7 @@
- {
-         int ret;
-         ret = do_bio_filebacked(lo, bio);
--        bio_endio(bio, bio->bi_size, ret);
-+        cfs_bio_endio(bio, bio->bi_size, ret);
- }
- 
- /*
-@@ -736,7 +736,7 @@
- 
- out_mem4:
-         while (i--)
--                blk_put_queue(loop_dev[i].lo_queue);
-+                blk_cleanup_queue(loop_dev[i].lo_queue);
-         i = max_loop;
- out_mem3:
-         while (i--)
-@@ -758,7 +758,7 @@
-         ll_iocontrol_unregister(ll_iocontrol_magic);
-         for (i = 0; i < max_loop; i++) {
-                 del_gendisk(disks[i]);
--                blk_put_queue(loop_dev[i].lo_queue);
-+                blk_cleanup_queue(loop_dev[i].lo_queue);
-                 put_disk(disks[i]);
-         }
-         if (ll_unregister_blkdev(lloop_major, "lloop"))
-diff -urNad lustre~/lustre/llite/lloop.c.orig lustre/lustre/llite/lloop.c.orig
---- lustre~/lustre/llite/lloop.c.orig	1970-01-01 00:00:00.000000000 +0000
-+++ lustre/lustre/llite/lloop.c.orig	2009-03-13 09:45:03.000000000 +0100
-@@ -0,0 +1,777 @@
-+/* -*- mode: c; c-basic-offset: 8; indent-tabs-mode: nil; -*-
-+ * vim:expandtab:shiftwidth=8:tabstop=8:
-+ *
-+ * GPL HEADER START
-+ *
-+ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
-+ *
-+ * This program is free software; you can redistribute it and/or modify
-+ * it under the terms of the GNU General Public License version 2 only,
-+ * as published by the Free Software Foundation.
-+ *
-+ * This program is distributed in the hope that it will be useful, but
-+ * WITHOUT ANY WARRANTY; without even the implied warranty of
-+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
-+ * General Public License version 2 for more details (a copy is included
-+ * in the LICENSE file that accompanied this code).
-+ *
-+ * You should have received a copy of the GNU General Public License
-+ * version 2 along with this program; If not, see
-+ * http://www.sun.com/software/products/lustre/docs/GPLv2.pdf
-+ *
-+ * Please contact Sun Microsystems, Inc., 4150 Network Circle, Santa Clara,
-+ * CA 95054 USA or visit www.sun.com if you need additional information or
-+ * have any questions.
-+ *
-+ * GPL HEADER END
-+ */
-+/*
-+ * Copyright  2008 Sun Microsystems, Inc. All rights reserved
-+ * Use is subject to license terms.
-+ */
-+/*
-+ * This file is part of Lustre, http://www.lustre.org/
-+ * Lustre is a trademark of Sun Microsystems, Inc.
-+ */
-+
-+/*
-+ *  linux/drivers/block/loop.c
-+ *
-+ *  Written by Theodore Ts'o, 3/29/93
-+ *
-+ * Copyright 1993 by Theodore Ts'o.  Redistribution of this file is
-+ * permitted under the GNU General Public License.
-+ *
-+ * DES encryption plus some minor changes by Werner Almesberger, 30-MAY-1993
-+ * more DES encryption plus IDEA encryption by Nicholas J. Leon, June 20, 1996
-+ *
-+ * Modularized and updated for 1.1.16 kernel - Mitch Dsouza 28th May 1994
-+ * Adapted for 1.3.59 kernel - Andries Brouwer, 1 Feb 1996
-+ *
-+ * Fixed do_loop_request() re-entrancy - Vincent.Renardias at waw.com Mar 20, 1997
-+ *
-+ * Added devfs support - Richard Gooch <rgooch at atnf.csiro.au> 16-Jan-1998
-+ *
-+ * Handle sparse backing files correctly - Kenn Humborg, Jun 28, 1998
-+ *
-+ * Loadable modules and other fixes by AK, 1998
-+ *
-+ * Make real block number available to downstream transfer functions, enables
-+ * CBC (and relatives) mode encryption requiring unique IVs per data block.
-+ * Reed H. Petty, rhp at draper.net
-+ *
-+ * Maximum number of loop devices now dynamic via max_loop module parameter.
-+ * Russell Kroll <rkroll at exploits.org> 19990701
-+ *
-+ * Maximum number of loop devices when compiled-in now selectable by passing
-+ * max_loop=<1-255> to the kernel on boot.
-+ * Erik I. Bols?, <eriki at himolde.no>, Oct 31, 1999
-+ *
-+ * Completely rewrite request handling to be make_request_fn style and
-+ * non blocking, pushing work to a helper thread. Lots of fixes from
-+ * Al Viro too.
-+ * Jens Axboe <axboe at suse.de>, Nov 2000
-+ *
-+ * Support up to 256 loop devices
-+ * Heinz Mauelshagen <mge at sistina.com>, Feb 2002
-+ *
-+ * Support for falling back on the write file operation when the address space
-+ * operations prepare_write and/or commit_write are not available on the
-+ * backing filesystem.
-+ * Anton Altaparmakov, 16 Feb 2005
-+ *
-+ * Still To Fix:
-+ * - Advisory locking is ignored here.
-+ * - Should use an own CAP_* category instead of CAP_SYS_ADMIN
-+ *
-+ */
-+
-+#ifndef AUTOCONF_INCLUDED
-+#include <linux/config.h>
-+#endif
-+#include <linux/module.h>
-+
-+#include <linux/sched.h>
-+#include <linux/fs.h>
-+#include <linux/file.h>
-+#include <linux/stat.h>
-+#include <linux/errno.h>
-+#include <linux/major.h>
-+#include <linux/wait.h>
-+#include <linux/blkdev.h>
-+#include <linux/blkpg.h>
-+#include <linux/init.h>
-+#include <linux/smp_lock.h>
-+#include <linux/swap.h>
-+#include <linux/slab.h>
-+#include <linux/suspend.h>
-+#include <linux/writeback.h>
-+#include <linux/buffer_head.h>                /* for invalidate_bdev() */
-+#include <linux/completion.h>
-+#include <linux/highmem.h>
-+#include <linux/gfp.h>
-+#include <linux/swap.h>
-+#include <linux/pagevec.h>
-+
-+#include <asm/uaccess.h>
-+
-+#include <lustre_lib.h>
-+#include <lustre_lite.h>
-+#include "llite_internal.h"
-+
-+#define LLOOP_MAX_SEGMENTS        PTLRPC_MAX_BRW_PAGES
-+
-+/* Possible states of device */
-+enum {
-+        LLOOP_UNBOUND,
-+        LLOOP_BOUND,
-+        LLOOP_RUNDOWN,
-+};
-+
-+struct lloop_device {
-+        int                lo_number;
-+        int                lo_refcnt;
-+        loff_t             lo_offset;
-+        loff_t             lo_sizelimit;
-+        int                lo_flags;
-+        int                (*ioctl)(struct lloop_device *, int cmd, 
-+                                 unsigned long arg); 
-+
-+        struct file *      lo_backing_file;
-+        struct block_device *lo_device;
-+        unsigned           lo_blocksize;
-+
-+        int                old_gfp_mask;
-+
-+        spinlock_t         lo_lock;
-+        struct bio         *lo_bio;
-+        struct bio         *lo_biotail;
-+        int                lo_state;
-+        struct semaphore   lo_sem;
-+        struct semaphore   lo_ctl_mutex;
-+        struct semaphore   lo_bh_mutex;
-+        atomic_t           lo_pending;
-+
-+        request_queue_t    *lo_queue;
-+
-+        /* data to handle bio for lustre. */
-+        struct lo_request_data {
-+                struct brw_page    lrd_pages[LLOOP_MAX_SEGMENTS];
-+                struct obdo        lrd_oa;
-+        } lo_requests[1];
-+
-+};
-+
-+/*
-+ * Loop flags
-+ */
-+enum {
-+        LO_FLAGS_READ_ONLY       = 1,
-+};
-+
-+static int lloop_major;
-+static int max_loop = 8;
-+static struct lloop_device *loop_dev;
-+static struct gendisk **disks;
-+static struct semaphore lloop_mutex;
-+static void *ll_iocontrol_magic = NULL;
-+
-+static loff_t get_loop_size(struct lloop_device *lo, struct file *file)
-+{
-+        loff_t size, offset, loopsize;
-+
-+        /* Compute loopsize in bytes */
-+        size = i_size_read(file->f_mapping->host);
-+        offset = lo->lo_offset;
-+        loopsize = size - offset;
-+        if (lo->lo_sizelimit > 0 && lo->lo_sizelimit < loopsize)
-+                loopsize = lo->lo_sizelimit;
-+
-+        /*
-+         * Unfortunately, if we want to do I/O on the device,
-+         * the number of 512-byte sectors has to fit into a sector_t.
-+         */
-+        return loopsize >> 9;
-+}
-+
-+static int do_bio_filebacked(struct lloop_device *lo, struct bio *bio)
-+{
-+        struct inode *inode = lo->lo_backing_file->f_dentry->d_inode;
-+        struct ll_inode_info *lli = ll_i2info(inode);
-+        struct lov_stripe_md *lsm = lli->lli_smd;
-+        struct obd_info oinfo = {{{0}}};
-+        struct brw_page *pg = lo->lo_requests[0].lrd_pages;
-+        struct obdo *oa = &lo->lo_requests[0].lrd_oa;
-+        pgoff_t offset;
-+        int ret, cmd, i;
-+        struct bio_vec *bvec;
-+
-+        BUG_ON(bio->bi_hw_segments > LLOOP_MAX_SEGMENTS);
-+
-+        offset = (pgoff_t)(bio->bi_sector << 9) + lo->lo_offset;
-+        bio_for_each_segment(bvec, bio, i) {
-+                BUG_ON(bvec->bv_offset != 0);
-+                BUG_ON(bvec->bv_len != CFS_PAGE_SIZE);
-+
-+                pg->pg = bvec->bv_page;
-+                pg->off = offset;
-+                pg->count = bvec->bv_len;
-+                pg->flag = OBD_BRW_SRVLOCK;
-+
-+                pg++;
-+                offset += bvec->bv_len;
-+        }
-+
-+        oa->o_mode = inode->i_mode;
-+        oa->o_id = lsm->lsm_object_id;
-+        oa->o_valid = OBD_MD_FLID | OBD_MD_FLMODE | OBD_MD_FLTYPE;
-+        obdo_from_inode(oa, inode, OBD_MD_FLFID | OBD_MD_FLGENER);
-+
-+        cmd = OBD_BRW_READ;
-+        if (bio_rw(bio) == WRITE)
-+                cmd = OBD_BRW_WRITE;
-+
-+        if (cmd == OBD_BRW_WRITE)
-+                ll_stats_ops_tally(ll_i2sbi(inode), LPROC_LL_BRW_WRITE, bio->bi_size);
-+        else
-+                ll_stats_ops_tally(ll_i2sbi(inode), LPROC_LL_BRW_READ, bio->bi_size);
-+        oinfo.oi_oa = oa;
-+        oinfo.oi_md = lsm;
-+        ret = obd_brw(cmd, ll_i2obdexp(inode), &oinfo, 
-+                      (obd_count)(i - bio->bi_idx), 
-+                      lo->lo_requests[0].lrd_pages, NULL);
-+        if (ret == 0)
-+                obdo_to_inode(inode, oa, OBD_MD_FLBLOCKS);
-+        return ret;
-+}
-+
-+
-+/*
-+ * Add bio to back of pending list
-+ */
-+static void loop_add_bio(struct lloop_device *lo, struct bio *bio)
-+{
-+        unsigned long flags;
-+
-+        spin_lock_irqsave(&lo->lo_lock, flags);
-+        if (lo->lo_biotail) {
-+                lo->lo_biotail->bi_next = bio;
-+                lo->lo_biotail = bio;
-+        } else
-+                lo->lo_bio = lo->lo_biotail = bio;
-+        spin_unlock_irqrestore(&lo->lo_lock, flags);
-+
-+        up(&lo->lo_bh_mutex);
-+}
-+
-+/*
-+ * Grab first pending buffer
-+ */
-+static struct bio *loop_get_bio(struct lloop_device *lo)
-+{
-+        struct bio *bio;
-+
-+        spin_lock_irq(&lo->lo_lock);
-+        if ((bio = lo->lo_bio)) {
-+                if (bio == lo->lo_biotail)
-+                        lo->lo_biotail = NULL;
-+                lo->lo_bio = bio->bi_next;
-+                bio->bi_next = NULL;
-+        }
-+        spin_unlock_irq(&lo->lo_lock);
-+
-+        return bio;
-+}
-+
-+static int loop_make_request(request_queue_t *q, struct bio *old_bio)
-+{
-+        struct lloop_device *lo = q->queuedata;
-+        int rw = bio_rw(old_bio);
-+
-+        if (!lo)
-+                goto out;
-+
-+        spin_lock_irq(&lo->lo_lock);
-+        if (lo->lo_state != LLOOP_BOUND)
-+                goto inactive;
-+        atomic_inc(&lo->lo_pending);
-+        spin_unlock_irq(&lo->lo_lock);
-+
-+        if (rw == WRITE) {
-+                if (lo->lo_flags & LO_FLAGS_READ_ONLY)
-+                        goto err;
-+        } else if (rw == READA) {
-+                rw = READ;
-+        } else if (rw != READ) {
-+                CERROR("lloop: unknown command (%x)\n", rw);
-+                goto err;
-+        }
-+        loop_add_bio(lo, old_bio);
-+        return 0;
-+err:
-+        if (atomic_dec_and_test(&lo->lo_pending))
-+                up(&lo->lo_bh_mutex);
-+out:
-+        bio_io_error(old_bio, old_bio->bi_size);
-+        return 0;
-+inactive:
-+        spin_unlock_irq(&lo->lo_lock);
-+        goto out;
-+}
-+
-+/*
-+ * kick off io on the underlying address space
-+ */
-+static void loop_unplug(request_queue_t *q)
-+{
-+        struct lloop_device *lo = q->queuedata;
-+
-+        clear_bit(QUEUE_FLAG_PLUGGED, &q->queue_flags);
-+        blk_run_address_space(lo->lo_backing_file->f_mapping);
-+}
-+
-+static inline void loop_handle_bio(struct lloop_device *lo, struct bio *bio)
-+{
-+        int ret;
-+        ret = do_bio_filebacked(lo, bio);
-+        bio_endio(bio, bio->bi_size, ret);
-+}
-+
-+/*
-+ * worker thread that handles reads/writes to file backed loop devices,
-+ * to avoid blocking in our make_request_fn. it also does loop decrypting
-+ * on reads for block backed loop, as that is too heavy to do from
-+ * b_end_io context where irqs may be disabled.
-+ */
-+static int loop_thread(void *data)
-+{
-+        struct lloop_device *lo = data;
-+        struct bio *bio;
-+
-+        daemonize("lloop%d", lo->lo_number);
-+
-+        set_user_nice(current, -20);
-+
-+        lo->lo_state = LLOOP_BOUND;
-+        atomic_inc(&lo->lo_pending);
-+
-+        /*
-+         * up sem, we are running
-+         */
-+        up(&lo->lo_sem);
-+
-+        for (;;) {
-+                down_interruptible(&lo->lo_bh_mutex);
-+                /*
-+                 * could be upped because of tear-down, not because of
-+                 * pending work
-+                 */
-+                if (!atomic_read(&lo->lo_pending))
-+                        break;
-+
-+                bio = loop_get_bio(lo);
-+                if (!bio) {
-+                        CWARN("lloop(minor: %d): missing bio\n", lo->lo_number);
-+                        continue;
-+                }
-+                loop_handle_bio(lo, bio);
-+
-+                /*
-+                 * upped both for pending work and tear-down, lo_pending
-+                 * will hit zero then
-+                 */
-+                if (atomic_dec_and_test(&lo->lo_pending))
-+                        break;
-+        }
-+
-+        up(&lo->lo_sem);
-+        return 0;
-+}
-+
-+static int loop_set_fd(struct lloop_device *lo, struct file *unused,
-+                       struct block_device *bdev, struct file *file)
-+{
-+        struct inode        *inode;
-+        struct address_space *mapping;
-+        int                lo_flags = 0;
-+        int                error;
-+        loff_t                size;
-+
-+        if (!try_module_get(THIS_MODULE))
-+                return -ENODEV;
-+
-+        error = -EBUSY;
-+        if (lo->lo_state != LLOOP_UNBOUND)
-+                goto out;
-+
-+        mapping = file->f_mapping;
-+        inode = mapping->host;
-+
-+        error = -EINVAL;
-+        if (!S_ISREG(inode->i_mode) || inode->i_sb->s_magic != LL_SUPER_MAGIC)
-+                goto out;
-+
-+        if (!(file->f_mode & FMODE_WRITE))
-+                lo_flags |= LO_FLAGS_READ_ONLY;
-+
-+        size = get_loop_size(lo, file);
-+
-+        if ((loff_t)(sector_t)size != size) {
-+                error = -EFBIG;
-+                goto out;
-+        }
-+
-+        /* remove all pages in cache so as dirty pages not to be existent. */
-+        truncate_inode_pages(mapping, 0);
-+
-+        set_device_ro(bdev, (lo_flags & LO_FLAGS_READ_ONLY) != 0);
-+
-+        lo->lo_blocksize = CFS_PAGE_SIZE;
-+        lo->lo_device = bdev;
-+        lo->lo_flags = lo_flags;
-+        lo->lo_backing_file = file;
-+        lo->ioctl = NULL;
-+        lo->lo_sizelimit = 0;
-+        lo->old_gfp_mask = mapping_gfp_mask(mapping);
-+        mapping_set_gfp_mask(mapping, lo->old_gfp_mask & ~(__GFP_IO|__GFP_FS));
-+
-+        lo->lo_bio = lo->lo_biotail = NULL;
-+
-+        /*
-+         * set queue make_request_fn, and add limits based on lower level
-+         * device
-+         */
-+        blk_queue_make_request(lo->lo_queue, loop_make_request);
-+        lo->lo_queue->queuedata = lo;
-+        lo->lo_queue->unplug_fn = loop_unplug;
-+
-+        /* queue parameters */
-+        blk_queue_hardsect_size(lo->lo_queue, CFS_PAGE_SIZE);
-+        blk_queue_max_sectors(lo->lo_queue, LLOOP_MAX_SEGMENTS);
-+        blk_queue_max_phys_segments(lo->lo_queue, LLOOP_MAX_SEGMENTS);
-+
-+        set_capacity(disks[lo->lo_number], size);
-+        bd_set_size(bdev, size << 9);
-+
-+        set_blocksize(bdev, lo->lo_blocksize);
-+
-+        kernel_thread(loop_thread, lo, CLONE_KERNEL);
-+        down(&lo->lo_sem);
-+        return 0;
-+
-+ out:
-+        /* This is safe: open() is still holding a reference. */
-+        module_put(THIS_MODULE);
-+        return error;
-+}
-+
-+static int loop_clr_fd(struct lloop_device *lo, struct block_device *bdev, 
-+                       int count)
-+{
-+        struct file *filp = lo->lo_backing_file;
-+        int gfp = lo->old_gfp_mask;
-+
-+        if (lo->lo_state != LLOOP_BOUND)
-+                return -ENXIO;
-+
-+        if (lo->lo_refcnt > count)        /* we needed one fd for the ioctl */
-+                return -EBUSY;
-+
-+        if (filp == NULL)
-+                return -EINVAL;
-+
-+        spin_lock_irq(&lo->lo_lock);
-+        lo->lo_state = LLOOP_RUNDOWN;
-+        if (atomic_dec_and_test(&lo->lo_pending))
-+                up(&lo->lo_bh_mutex);
-+        spin_unlock_irq(&lo->lo_lock);
-+
-+        down(&lo->lo_sem);
-+        lo->lo_backing_file = NULL;
-+        lo->ioctl = NULL;
-+        lo->lo_device = NULL;
-+        lo->lo_offset = 0;
-+        lo->lo_sizelimit = 0;
-+        lo->lo_flags = 0;
-+        ll_invalidate_bdev(bdev, 0);
-+        set_capacity(disks[lo->lo_number], 0);
-+        bd_set_size(bdev, 0);
-+        mapping_set_gfp_mask(filp->f_mapping, gfp);
-+        lo->lo_state = LLOOP_UNBOUND;
-+        fput(filp);
-+        /* This is safe: open() is still holding a reference. */
-+        module_put(THIS_MODULE);
-+        return 0;
-+}
-+
-+static int lo_open(struct inode *inode, struct file *file)
-+{
-+        struct lloop_device *lo = inode->i_bdev->bd_disk->private_data;
-+
-+        down(&lo->lo_ctl_mutex);
-+        lo->lo_refcnt++;
-+        up(&lo->lo_ctl_mutex);
-+
-+        return 0;
-+}
-+
-+static int lo_release(struct inode *inode, struct file *file)
-+{
-+        struct lloop_device *lo = inode->i_bdev->bd_disk->private_data;
-+
-+        down(&lo->lo_ctl_mutex);
-+        --lo->lo_refcnt;
-+        up(&lo->lo_ctl_mutex);
-+
-+        return 0;
-+}
-+
-+/* lloop device node's ioctl function. */
-+static int lo_ioctl(struct inode *inode, struct file *unused, 
-+        unsigned int cmd, unsigned long arg)
-+{
-+        struct lloop_device *lo = inode->i_bdev->bd_disk->private_data;
-+        struct block_device *bdev = inode->i_bdev;
-+        int err = 0;
-+
-+        down(&lloop_mutex);
-+        switch (cmd) {
-+        case LL_IOC_LLOOP_DETACH: {
-+                err = loop_clr_fd(lo, bdev, 2);
-+                if (err == 0)
-+                        blkdev_put(bdev); /* grabbed in LLOOP_ATTACH */
-+                break;
-+        }
-+
-+        case LL_IOC_LLOOP_INFO: {
-+                __u64 ino = 0;
-+
-+                if (lo->lo_state == LLOOP_BOUND)
-+                        ino = lo->lo_backing_file->f_dentry->d_inode->i_ino;
-+
-+                if (put_user(ino, (__u64 *)arg))
-+                        err = -EFAULT;
-+                break; 
-+        }
-+
-+        default:
-+                err = -EINVAL;
-+                break;
-+        }
-+        up(&lloop_mutex);
-+
-+        return err;
-+}
-+
-+static struct block_device_operations lo_fops = {
-+        .owner =        THIS_MODULE,
-+        .open =         lo_open,
-+        .release =      lo_release,
-+        .ioctl =        lo_ioctl,
-+};
-+
-+/* dynamic iocontrol callback. 
-+ * This callback is registered in lloop_init and will be called by 
-+ * ll_iocontrol_call. 
-+ * This is a llite regular file ioctl function. It takes the responsibility 
-+ * of attaching a file, and detaching a file by a lloop's device numner. 
-+ */
-+static enum llioc_iter lloop_ioctl(struct inode *unused, struct file *file, 
-+                unsigned int cmd, unsigned long arg,
-+                void *magic, int *rcp)
-+{
-+        struct lloop_device *lo = NULL;
-+        struct block_device *bdev = NULL;
-+        int err = 0;
-+        dev_t dev;
-+
-+        if (magic != ll_iocontrol_magic)
-+                return LLIOC_CONT;
-+
-+        if (disks == NULL)
-+                GOTO(out1, err = -ENODEV);
-+
-+        down(&lloop_mutex);
-+        switch (cmd) {
-+        case LL_IOC_LLOOP_ATTACH: {
-+                struct lloop_device *lo_free = NULL;
-+                int i;
-+
-+                for (i = 0; i < max_loop; i++, lo = NULL) {
-+                        lo = &loop_dev[i];
-+                        if (lo->lo_state == LLOOP_UNBOUND) {
-+                                if (!lo_free)
-+                                        lo_free = lo;
-+                                continue;
-+                        }
-+                        if (lo->lo_backing_file->f_dentry->d_inode == 
-+                            file->f_dentry->d_inode)
-+                                break;
-+                }
-+                if (lo || !lo_free)
-+                        GOTO(out, err = -EBUSY);
-+
-+                lo = lo_free;
-+                dev = MKDEV(lloop_major, lo->lo_number);
-+
-+                /* quit if the used pointer is writable */
-+                if (put_user((long)old_encode_dev(dev), (long*)arg))
-+                        GOTO(out, err = -EFAULT);
-+
-+                bdev = open_by_devnum(dev, file->f_mode);
-+                if (IS_ERR(bdev))
-+                        GOTO(out, err = PTR_ERR(bdev));
-+
-+                get_file(file);
-+                err = loop_set_fd(lo, NULL, bdev, file);
-+                if (err) {
-+                        fput(file);
-+                        blkdev_put(bdev);
-+                }
-+
-+                break;
-+        }
-+
-+        case LL_IOC_LLOOP_DETACH_BYDEV: {
-+                int minor;
-+                
-+                dev = old_decode_dev(arg);
-+                if (MAJOR(dev) != lloop_major)
-+                        GOTO(out, err = -EINVAL);
-+
-+                minor = MINOR(dev);
-+                if (minor > max_loop - 1)
-+                        GOTO(out, err = -EINVAL);
-+
-+                lo = &loop_dev[minor];
-+                if (lo->lo_state != LLOOP_BOUND)
-+                        GOTO(out, err = -EINVAL);
-+
-+                bdev = lo->lo_device;
-+                err = loop_clr_fd(lo, bdev, 1);
-+                if (err == 0)
-+                        blkdev_put(bdev); /* grabbed in LLOOP_ATTACH */
-+
-+                break;
-+        }
-+
-+        default:
-+                err = -EINVAL;
-+                break;
-+        }
-+
-+out:
-+        up(&lloop_mutex);
-+out1:
-+        if (rcp)
-+                *rcp = err;
-+        return LLIOC_STOP;
-+}
-+
-+static int __init lloop_init(void)
-+{
-+        int        i;
-+        unsigned int cmdlist[] = {
-+                LL_IOC_LLOOP_ATTACH,
-+                LL_IOC_LLOOP_DETACH_BYDEV,
-+        };
-+
-+        if (max_loop < 1 || max_loop > 256) {
-+                CWARN("lloop: invalid max_loop (must be between"
-+                      " 1 and 256), using default (8)\n");
-+                max_loop = 8;
-+        }
-+
-+        lloop_major = register_blkdev(0, "lloop");
-+        if (lloop_major < 0)
-+                return -EIO;
-+
-+        ll_iocontrol_magic = ll_iocontrol_register(lloop_ioctl, 2, cmdlist);
-+        if (ll_iocontrol_magic == NULL)
-+                goto out_mem1;
-+
-+        loop_dev = kmalloc(max_loop * sizeof(struct lloop_device), GFP_KERNEL);
-+        if (!loop_dev)
-+                goto out_mem1;
-+        memset(loop_dev, 0, max_loop * sizeof(struct lloop_device));
-+
-+        disks = kmalloc(max_loop * sizeof(struct gendisk *), GFP_KERNEL);
-+        if (!disks)
-+                goto out_mem2;
-+
-+        for (i = 0; i < max_loop; i++) {
-+                disks[i] = alloc_disk(1);
-+                if (!disks[i])
-+                        goto out_mem3;
-+        }
-+
-+        init_MUTEX(&lloop_mutex);
-+
-+        for (i = 0; i < max_loop; i++) {
-+                struct lloop_device *lo = &loop_dev[i];
-+                struct gendisk *disk = disks[i];
-+
-+                memset(lo, 0, sizeof(*lo));
-+                lo->lo_queue = blk_alloc_queue(GFP_KERNEL);
-+                if (!lo->lo_queue)
-+                        goto out_mem4;
-+
-+                init_MUTEX(&lo->lo_ctl_mutex);
-+                init_MUTEX_LOCKED(&lo->lo_sem);
-+                init_MUTEX_LOCKED(&lo->lo_bh_mutex);
-+                lo->lo_number = i;
-+                spin_lock_init(&lo->lo_lock);
-+                disk->major = lloop_major;
-+                disk->first_minor = i;
-+                disk->fops = &lo_fops;
-+                sprintf(disk->disk_name, "lloop%d", i);
-+                disk->private_data = lo;
-+                disk->queue = lo->lo_queue;
-+        }
-+
-+        /* We cannot fail after we call this, so another loop!*/
-+        for (i = 0; i < max_loop; i++)
-+                add_disk(disks[i]);
-+        return 0;
-+
-+out_mem4:
-+        while (i--)
-+                blk_put_queue(loop_dev[i].lo_queue);
-+        i = max_loop;
-+out_mem3:
-+        while (i--)
-+                put_disk(disks[i]);
-+        kfree(disks);
-+out_mem2:
-+        kfree(loop_dev);
-+out_mem1:
-+        unregister_blkdev(lloop_major, "lloop");
-+        ll_iocontrol_unregister(ll_iocontrol_magic);
-+        CERROR("lloop: ran out of memory\n");
-+        return -ENOMEM;
-+}
-+
-+static void lloop_exit(void)
-+{
-+        int i;
-+
-+        ll_iocontrol_unregister(ll_iocontrol_magic);
-+        for (i = 0; i < max_loop; i++) {
-+                del_gendisk(disks[i]);
-+                blk_put_queue(loop_dev[i].lo_queue);
-+                put_disk(disks[i]);
-+        }
-+        if (ll_unregister_blkdev(lloop_major, "lloop"))
-+                CWARN("lloop: cannot unregister blkdev\n");
-+
-+        kfree(disks);
-+        kfree(loop_dev);
-+}
-+
-+module_init(lloop_init);
-+module_exit(lloop_exit);
-+
-+CFS_MODULE_PARM(max_loop, "i", int, 0444, "maximum of lloop_device");
-+MODULE_AUTHOR("Sun Microsystems, Inc. <http://www.lustre.org/>");
-+MODULE_DESCRIPTION("Lustre virtual block device");
-+MODULE_LICENSE("GPL");
-diff -urNad lustre~/lustre/llite/rw.c lustre/lustre/llite/rw.c
---- lustre~/lustre/llite/rw.c	2009-03-12 10:32:27.000000000 +0100
-+++ lustre/lustre/llite/rw.c	2009-03-13 09:45:03.000000000 +0100
-@@ -61,6 +61,8 @@
- 
- #define DEBUG_SUBSYSTEM S_LLITE
- 
-+#include <linux/page-flags.h>
-+
- #include <lustre_lite.h>
- #include "llite_internal.h"
- #include <linux/lustre_compat25.h>
-@@ -186,7 +188,7 @@
-                 GOTO(out_unlock, 0);
-         }
- 
--        LASSERT(atomic_read(&lli->lli_size_sem.count) <= 0);
-+        LASSERT(SEM_COUNT(&lli->lli_size_sem) <= 0);
- 
-         if (!srvlock) {
-                 struct ost_lvb lvb;
-@@ -2122,7 +2124,7 @@
-                 rc = generic_write_checks(file, ppos, &count, 0);
-                 if (rc)
-                         GOTO(out, rc);
--                rc = ll_remove_suid(file->f_dentry, file->f_vfsmnt);
-+                rc = ll_remove_suid(file, file->f_vfsmnt);
-                 if (rc)
-                         GOTO(out, rc);
-         }
-diff -urNad lustre~/lustre/llite/rw.c.orig lustre/lustre/llite/rw.c.orig
---- lustre~/lustre/llite/rw.c.orig	1970-01-01 00:00:00.000000000 +0000
-+++ lustre/lustre/llite/rw.c.orig	2009-03-13 09:45:03.000000000 +0100
-@@ -0,0 +1,2215 @@
-+/* -*- mode: c; c-basic-offset: 8; indent-tabs-mode: nil; -*-
-+ * vim:expandtab:shiftwidth=8:tabstop=8:
-+ *
-+ * GPL HEADER START
-+ *
-+ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
-+ *
-+ * This program is free software; you can redistribute it and/or modify
-+ * it under the terms of the GNU General Public License version 2 only,
-+ * as published by the Free Software Foundation.
-+ *
-+ * This program is distributed in the hope that it will be useful, but
-+ * WITHOUT ANY WARRANTY; without even the implied warranty of
-+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
-+ * General Public License version 2 for more details (a copy is included
-+ * in the LICENSE file that accompanied this code).
-+ *
-+ * You should have received a copy of the GNU General Public License
-+ * version 2 along with this program; If not, see
-+ * http://www.sun.com/software/products/lustre/docs/GPLv2.pdf
-+ *
-+ * Please contact Sun Microsystems, Inc., 4150 Network Circle, Santa Clara,
-+ * CA 95054 USA or visit www.sun.com if you need additional information or
-+ * have any questions.
-+ *
-+ * GPL HEADER END
-+ */
-+/*
-+ * Copyright  2008 Sun Microsystems, Inc. All rights reserved
-+ * Use is subject to license terms.
-+ */
-+/*
-+ * This file is part of Lustre, http://www.lustre.org/
-+ * Lustre is a trademark of Sun Microsystems, Inc.
-+ *
-+ * lustre/llite/rw.c
-+ *
-+ * Lustre Lite I/O page cache routines shared by different kernel revs
-+ */
-+
-+#ifndef AUTOCONF_INCLUDED
-+#include <linux/config.h>
-+#endif
-+#include <linux/kernel.h>
-+#include <linux/mm.h>
-+#include <linux/string.h>
-+#include <linux/stat.h>
-+#include <linux/errno.h>
-+#include <linux/smp_lock.h>
-+#include <linux/unistd.h>
-+#include <linux/version.h>
-+#include <asm/system.h>
-+#include <asm/uaccess.h>
-+
-+#include <linux/fs.h>
-+#include <linux/stat.h>
-+#include <asm/uaccess.h>
-+#include <linux/mm.h>
-+#include <linux/pagemap.h>
-+#include <linux/smp_lock.h>
-+
-+#define DEBUG_SUBSYSTEM S_LLITE
-+
-+#include <lustre_lite.h>
-+#include "llite_internal.h"
-+#include <linux/lustre_compat25.h>
-+
-+#ifndef list_for_each_prev_safe
-+#define list_for_each_prev_safe(pos, n, head) \
-+        for (pos = (head)->prev, n = pos->prev; pos != (head); \
-+                pos = n, n = pos->prev )
-+#endif
-+
-+cfs_mem_cache_t *ll_async_page_slab = NULL;
-+size_t ll_async_page_slab_size = 0;
-+
-+/* SYNCHRONOUS I/O to object storage for an inode */
-+static int ll_brw(int cmd, struct inode *inode, struct obdo *oa,
-+                  struct page *page, int flags)
-+{
-+        struct ll_inode_info *lli = ll_i2info(inode);
-+        struct lov_stripe_md *lsm = lli->lli_smd;
-+        struct obd_info oinfo = { { { 0 } } };
-+        struct brw_page pg;
-+        int rc;
-+        ENTRY;
-+
-+        pg.pg = page;
-+        pg.off = ((obd_off)page->index) << CFS_PAGE_SHIFT;
-+
-+        if ((cmd & OBD_BRW_WRITE) && (pg.off+CFS_PAGE_SIZE>i_size_read(inode)))
-+                pg.count = i_size_read(inode) % CFS_PAGE_SIZE;
-+        else
-+                pg.count = CFS_PAGE_SIZE;
-+
-+        LL_CDEBUG_PAGE(D_PAGE, page, "%s %d bytes ino %lu at "LPU64"/"LPX64"\n",
-+                       cmd & OBD_BRW_WRITE ? "write" : "read", pg.count,
-+                       inode->i_ino, pg.off, pg.off);
-+        if (pg.count == 0) {
-+                CERROR("ZERO COUNT: ino %lu: size %p:%Lu(%p:%Lu) idx %lu off "
-+                       LPU64"\n", inode->i_ino, inode, i_size_read(inode),
-+                       page->mapping->host, i_size_read(page->mapping->host),
-+                       page->index, pg.off);
-+        }
-+
-+        pg.flag = flags;
-+
-+        if (cmd & OBD_BRW_WRITE)
-+                ll_stats_ops_tally(ll_i2sbi(inode), LPROC_LL_BRW_WRITE,
-+                                   pg.count);
-+        else
-+                ll_stats_ops_tally(ll_i2sbi(inode), LPROC_LL_BRW_READ,
-+                           pg.count);
-+        oinfo.oi_oa = oa;
-+        oinfo.oi_md = lsm;
-+        rc = obd_brw(cmd, ll_i2obdexp(inode), &oinfo, 1, &pg, NULL);
-+        if (rc == 0)
-+                obdo_to_inode(inode, oa, OBD_MD_FLBLOCKS);
-+        else if (rc != -EIO)
-+                CERROR("error from obd_brw: rc = %d\n", rc);
-+        RETURN(rc);
-+}
-+
-+int ll_file_punch(struct inode * inode, loff_t new_size, int srvlock)
-+{
-+        struct ll_inode_info *lli = ll_i2info(inode);
-+        struct obd_info oinfo = { { { 0 } } };
-+        struct obdo oa;
-+        int rc;
-+
-+        ENTRY;
-+        CDEBUG(D_INFO, "calling punch for "LPX64" (new size %Lu=%#Lx)\n",
-+               lli->lli_smd->lsm_object_id, new_size, new_size);
-+
-+        oinfo.oi_md = lli->lli_smd;
-+        oinfo.oi_policy.l_extent.start = new_size;
-+        oinfo.oi_policy.l_extent.end = OBD_OBJECT_EOF;
-+        oinfo.oi_oa = &oa;
-+        oa.o_id = lli->lli_smd->lsm_object_id;
-+        oa.o_valid = OBD_MD_FLID;
-+        if (srvlock) {
-+                /* set OBD_MD_FLFLAGS in o_valid, only if we 
-+                 * set OBD_FL_TRUNCLOCK, otherwise ost_punch
-+                 * and filter_setattr get confused, see the comment
-+                 * in ost_punch */
-+                oa.o_flags = OBD_FL_TRUNCLOCK;
-+                oa.o_valid |= OBD_MD_FLFLAGS;
-+        }
-+        obdo_from_inode(&oa, inode, OBD_MD_FLTYPE | OBD_MD_FLMODE |OBD_MD_FLFID|
-+                        OBD_MD_FLATIME | OBD_MD_FLMTIME | OBD_MD_FLCTIME |
-+                        OBD_MD_FLUID | OBD_MD_FLGID | OBD_MD_FLGENER |
-+                        OBD_MD_FLBLOCKS);
-+        rc = obd_punch_rqset(ll_i2obdexp(inode), &oinfo, NULL);
-+        if (rc) {
-+                CERROR("obd_truncate fails (%d) ino %lu\n", rc, inode->i_ino);
-+                RETURN(rc);
-+        }
-+        obdo_to_inode(inode, &oa, OBD_MD_FLSIZE | OBD_MD_FLBLOCKS |
-+                      OBD_MD_FLATIME | OBD_MD_FLMTIME | OBD_MD_FLCTIME);
-+        RETURN(0);
-+}
-+/* this isn't where truncate starts.   roughly:
-+ * sys_truncate->ll_setattr_raw->vmtruncate->ll_truncate. setattr_raw grabs
-+ * DLM lock on [size, EOF], i_mutex, ->lli_size_sem, and WRITE_I_ALLOC_SEM to
-+ * avoid races.
-+ *
-+ * must be called under ->lli_size_sem */
-+void ll_truncate(struct inode *inode)
-+{
-+        struct ll_inode_info *lli = ll_i2info(inode);
-+        int srvlock = test_bit(LLI_F_SRVLOCK, &lli->lli_flags);
-+        loff_t new_size;
-+        ENTRY;
-+        CDEBUG(D_VFSTRACE, "VFS Op:inode=%lu/%u(%p) to %Lu=%#Lx\n",inode->i_ino,
-+               inode->i_generation, inode, i_size_read(inode), i_size_read(inode));
-+
-+        ll_stats_ops_tally(ll_i2sbi(inode), LPROC_LL_TRUNC, 1);
-+        if (lli->lli_size_sem_owner != current) {
-+                EXIT;
-+                return;
-+        }
-+
-+        if (!lli->lli_smd) {
-+                CDEBUG(D_INODE, "truncate on inode %lu with no objects\n",
-+                       inode->i_ino);
-+                GOTO(out_unlock, 0);
-+        }
-+
-+        LASSERT(atomic_read(&lli->lli_size_sem.count) <= 0);
-+
-+        if (!srvlock) {
-+                struct ost_lvb lvb;
-+                int rc;
-+
-+                /* XXX I'm pretty sure this is a hack to paper over a more fundamental
-+                 * race condition. */
-+                lov_stripe_lock(lli->lli_smd);
-+                inode_init_lvb(inode, &lvb);
-+                rc = obd_merge_lvb(ll_i2obdexp(inode), lli->lli_smd, &lvb, 0);
-+                inode->i_blocks = lvb.lvb_blocks;
-+                if (lvb.lvb_size == i_size_read(inode) && rc == 0) {
-+                        CDEBUG(D_VFSTRACE, "skipping punch for obj "LPX64", %Lu=%#Lx\n",
-+                               lli->lli_smd->lsm_object_id, i_size_read(inode),
-+                               i_size_read(inode));
-+                        lov_stripe_unlock(lli->lli_smd);
-+                        GOTO(out_unlock, 0);
-+                }
-+
-+                obd_adjust_kms(ll_i2obdexp(inode), lli->lli_smd,
-+                               i_size_read(inode), 1);
-+                lov_stripe_unlock(lli->lli_smd);
-+        }
-+
-+        if (unlikely((ll_i2sbi(inode)->ll_flags & LL_SBI_LLITE_CHECKSUM) &&
-+                     (i_size_read(inode) & ~CFS_PAGE_MASK))) {
-+                /* If the truncate leaves a partial page, update its checksum */
-+                struct page *page = find_get_page(inode->i_mapping,
-+                                                  i_size_read(inode) >>
-+                                                  CFS_PAGE_SHIFT);
-+                if (page != NULL) {
-+                        struct ll_async_page *llap = llap_cast_private(page);
-+                        if (llap != NULL) {
-+                                char *kaddr = kmap_atomic(page, KM_USER0);
-+                                llap->llap_checksum =
-+                                        init_checksum(OSC_DEFAULT_CKSUM);
-+                                llap->llap_checksum =
-+                                        compute_checksum(llap->llap_checksum,
-+                                                         kaddr, CFS_PAGE_SIZE,
-+                                                         OSC_DEFAULT_CKSUM);
-+                                kunmap_atomic(kaddr, KM_USER0);
-+                        }
-+                        page_cache_release(page);
-+                }
-+        }
-+
-+        new_size = i_size_read(inode);
-+        ll_inode_size_unlock(inode, 0);
-+        if (!srvlock)
-+                ll_file_punch(inode, new_size, 0);
-+        else
-+                ll_stats_ops_tally(ll_i2sbi(inode), LPROC_LL_LOCKLESS_TRUNC, 1);
-+
-+        EXIT;
-+        return;
-+
-+ out_unlock:
-+        ll_inode_size_unlock(inode, 0);
-+} /* ll_truncate */
-+
-+int ll_prepare_write(struct file *file, struct page *page, unsigned from,
-+                     unsigned to)
-+{
-+        struct inode *inode = page->mapping->host;
-+        struct ll_inode_info *lli = ll_i2info(inode);
-+        struct lov_stripe_md *lsm = lli->lli_smd;
-+        obd_off offset = ((obd_off)page->index) << CFS_PAGE_SHIFT;
-+        struct obd_info oinfo = { { { 0 } } };
-+        struct brw_page pga;
-+        struct obdo oa;
-+        struct ost_lvb lvb;
-+        int rc = 0;
-+        ENTRY;
-+
-+        LASSERT(PageLocked(page));
-+        (void)llap_cast_private(page); /* assertion */
-+
-+        /* Check to see if we should return -EIO right away */
-+        pga.pg = page;
-+        pga.off = offset;
-+        pga.count = CFS_PAGE_SIZE;
-+        pga.flag = 0;
-+
-+        oa.o_mode = inode->i_mode;
-+        oa.o_id = lsm->lsm_object_id;
-+        oa.o_valid = OBD_MD_FLID | OBD_MD_FLMODE | OBD_MD_FLTYPE;
-+        obdo_from_inode(&oa, inode, OBD_MD_FLFID | OBD_MD_FLGENER);
-+
-+        oinfo.oi_oa = &oa;
-+        oinfo.oi_md = lsm;
-+        rc = obd_brw(OBD_BRW_CHECK, ll_i2obdexp(inode), &oinfo, 1, &pga, NULL);
-+        if (rc)
-+                RETURN(rc);
-+
-+        if (PageUptodate(page)) {
-+                LL_CDEBUG_PAGE(D_PAGE, page, "uptodate\n");
-+                RETURN(0);
-+        }
-+
-+        /* We're completely overwriting an existing page, so _don't_ set it up
-+         * to date until commit_write */
-+        if (from == 0 && to == CFS_PAGE_SIZE) {
-+                LL_CDEBUG_PAGE(D_PAGE, page, "full page write\n");
-+                POISON_PAGE(page, 0x11);
-+                RETURN(0);
-+        }
-+
-+        /* If are writing to a new page, no need to read old data.  The extent
-+         * locking will have updated the KMS, and for our purposes here we can
-+         * treat it like i_size. */
-+        lov_stripe_lock(lsm);
-+        inode_init_lvb(inode, &lvb);
-+        obd_merge_lvb(ll_i2obdexp(inode), lsm, &lvb, 1);
-+        lov_stripe_unlock(lsm);
-+        if (lvb.lvb_size <= offset) {
-+                char *kaddr = kmap_atomic(page, KM_USER0);
-+                LL_CDEBUG_PAGE(D_PAGE, page, "kms "LPU64" <= offset "LPU64"\n",
-+                               lvb.lvb_size, offset);
-+                memset(kaddr, 0, CFS_PAGE_SIZE);
-+                kunmap_atomic(kaddr, KM_USER0);
-+                GOTO(prepare_done, rc = 0);
-+        }
-+
-+        /* XXX could be an async ocp read.. read-ahead? */
-+        rc = ll_brw(OBD_BRW_READ, inode, &oa, page, 0);
-+        if (rc == 0) {
-+                /* bug 1598: don't clobber blksize */
-+                oa.o_valid &= ~(OBD_MD_FLSIZE | OBD_MD_FLBLKSZ);
-+                obdo_refresh_inode(inode, &oa, oa.o_valid);
-+        }
-+
-+        EXIT;
-+ prepare_done:
-+        if (rc == 0)
-+                SetPageUptodate(page);
-+
-+        return rc;
-+}
-+
-+/**
-+ * make page ready for ASYNC write
-+ * \param data - pointer to llap cookie
-+ * \param cmd - is OBD_BRW_* macroses
-+ *
-+ * \retval 0 is page successfully prepared to send
-+ * \retval -EAGAIN is page not need to send
-+ */
-+static int ll_ap_make_ready(void *data, int cmd)
-+{
-+        struct ll_async_page *llap;
-+        struct page *page;
-+        ENTRY;
-+
-+        llap = LLAP_FROM_COOKIE(data);
-+        page = llap->llap_page;
-+
-+        /* we're trying to write, but the page is locked.. come back later */
-+        if (TryLockPage(page))
-+                RETURN(-EAGAIN);
-+
-+        LASSERTF(!(cmd & OBD_BRW_READ) || !PageWriteback(page),
-+                "cmd %x page %p ino %lu index %lu fl %lx\n", cmd, page,
-+                 page->mapping->host->i_ino, page->index, page->flags);
-+
-+        /* if we left PageDirty we might get another writepage call
-+         * in the future.  list walkers are bright enough
-+         * to check page dirty so we can leave it on whatever list
-+         * its on.  XXX also, we're called with the cli list so if
-+         * we got the page cache list we'd create a lock inversion
-+         * with the removepage path which gets the page lock then the
-+         * cli lock */
-+        if(!clear_page_dirty_for_io(page)) {
-+		unlock_page(page);
-+		RETURN(-EAGAIN);
-+	}
-+
-+        /* This actually clears the dirty bit in the radix tree.*/
-+        set_page_writeback(page);
-+
-+        LL_CDEBUG_PAGE(D_PAGE, page, "made ready\n");
-+        page_cache_get(page);
-+
-+        RETURN(0);
-+}
-+
-+/* We have two reasons for giving llite the opportunity to change the
-+ * write length of a given queued page as it builds the RPC containing
-+ * the page:
-+ *
-+ * 1) Further extending writes may have landed in the page cache
-+ *    since a partial write first queued this page requiring us
-+ *    to write more from the page cache.  (No further races are possible, since
-+ *    by the time this is called, the page is locked.)
-+ * 2) We might have raced with truncate and want to avoid performing
-+ *    write RPCs that are just going to be thrown away by the
-+ *    truncate's punch on the storage targets.
-+ *
-+ * The kms serves these purposes as it is set at both truncate and extending
-+ * writes.
-+ */
-+static int ll_ap_refresh_count(void *data, int cmd)
-+{
-+        struct ll_inode_info *lli;
-+        struct ll_async_page *llap;
-+        struct lov_stripe_md *lsm;
-+        struct page *page;
-+        struct inode *inode;
-+        struct ost_lvb lvb;
-+        __u64 kms;
-+        ENTRY;
-+
-+        /* readpage queues with _COUNT_STABLE, shouldn't get here. */
-+        LASSERT(cmd != OBD_BRW_READ);
-+
-+        llap = LLAP_FROM_COOKIE(data);
-+        page = llap->llap_page;
-+        inode = page->mapping->host;
-+        lli = ll_i2info(inode);
-+        lsm = lli->lli_smd;
-+
-+        lov_stripe_lock(lsm);
-+        inode_init_lvb(inode, &lvb);
-+        obd_merge_lvb(ll_i2obdexp(inode), lsm, &lvb, 1);
-+        kms = lvb.lvb_size;
-+        lov_stripe_unlock(lsm);
-+
-+        /* catch race with truncate */
-+        if (((__u64)page->index << CFS_PAGE_SHIFT) >= kms)
-+                return 0;
-+
-+        /* catch sub-page write at end of file */
-+        if (((__u64)page->index << CFS_PAGE_SHIFT) + CFS_PAGE_SIZE > kms)
-+                return kms % CFS_PAGE_SIZE;
-+
-+        return CFS_PAGE_SIZE;
-+}
-+
-+void ll_inode_fill_obdo(struct inode *inode, int cmd, struct obdo *oa)
-+{
-+        struct lov_stripe_md *lsm;
-+        obd_flag valid_flags;
-+
-+        lsm = ll_i2info(inode)->lli_smd;
-+
-+        oa->o_id = lsm->lsm_object_id;
-+        oa->o_valid = OBD_MD_FLID;
-+        valid_flags = OBD_MD_FLTYPE | OBD_MD_FLATIME;
-+        if (cmd & OBD_BRW_WRITE) {
-+                oa->o_valid |= OBD_MD_FLEPOCH;
-+                oa->o_easize = ll_i2info(inode)->lli_io_epoch;
-+
-+                valid_flags |= OBD_MD_FLMTIME | OBD_MD_FLCTIME |
-+                        OBD_MD_FLUID | OBD_MD_FLGID |
-+                        OBD_MD_FLFID | OBD_MD_FLGENER;
-+        }
-+
-+        obdo_from_inode(oa, inode, valid_flags);
-+}
-+
-+static void ll_ap_fill_obdo(void *data, int cmd, struct obdo *oa)
-+{
-+        struct ll_async_page *llap;
-+        ENTRY;
-+
-+        llap = LLAP_FROM_COOKIE(data);
-+        ll_inode_fill_obdo(llap->llap_page->mapping->host, cmd, oa);
-+
-+        EXIT;
-+}
-+
-+static void ll_ap_update_obdo(void *data, int cmd, struct obdo *oa,
-+                              obd_valid valid)
-+{
-+        struct ll_async_page *llap;
-+        ENTRY;
-+
-+        llap = LLAP_FROM_COOKIE(data);
-+        obdo_from_inode(oa, llap->llap_page->mapping->host, valid);
-+
-+        EXIT;
-+}
-+
-+static struct obd_async_page_ops ll_async_page_ops = {
-+        .ap_make_ready =        ll_ap_make_ready,
-+        .ap_refresh_count =     ll_ap_refresh_count,
-+        .ap_fill_obdo =         ll_ap_fill_obdo,
-+        .ap_update_obdo =       ll_ap_update_obdo,
-+        .ap_completion =        ll_ap_completion,
-+};
-+
-+struct ll_async_page *llap_cast_private(struct page *page)
-+{
-+        struct ll_async_page *llap = (struct ll_async_page *)page_private(page);
-+
-+        LASSERTF(llap == NULL || llap->llap_magic == LLAP_MAGIC,
-+                 "page %p private %lu gave magic %d which != %d\n",
-+                 page, page_private(page), llap->llap_magic, LLAP_MAGIC);
-+
-+        return llap;
-+}
-+
-+/* Try to shrink the page cache for the @sbi filesystem by 1/@shrink_fraction.
-+ *
-+ * There is an llap attached onto every page in lustre, linked off @sbi.
-+ * We add an llap to the list so we don't lose our place during list walking.
-+ * If llaps in the list are being moved they will only move to the end
-+ * of the LRU, and we aren't terribly interested in those pages here (we
-+ * start at the beginning of the list where the least-used llaps are.
-+ */
-+int llap_shrink_cache(struct ll_sb_info *sbi, int shrink_fraction)
-+{
-+        struct ll_async_page *llap, dummy_llap = { .llap_magic = 0xd11ad11a };
-+        unsigned long total, want, count = 0;
-+
-+        total = sbi->ll_async_page_count;
-+
-+        /* There can be a large number of llaps (600k or more in a large
-+         * memory machine) so the VM 1/6 shrink ratio is likely too much.
-+         * Since we are freeing pages also, we don't necessarily want to
-+         * shrink so much.  Limit to 40MB of pages + llaps per call. */
-+        if (shrink_fraction == 0)
-+                want = sbi->ll_async_page_count - sbi->ll_async_page_max + 32;
-+        else
-+                want = (total + shrink_fraction - 1) / shrink_fraction;
-+
-+        if (want > 40 << (20 - CFS_PAGE_SHIFT))
-+                want = 40 << (20 - CFS_PAGE_SHIFT);
-+
-+        CDEBUG(D_CACHE, "shrinking %lu of %lu pages (1/%d)\n",
-+               want, total, shrink_fraction);
-+
-+        spin_lock(&sbi->ll_lock);
-+        list_add(&dummy_llap.llap_pglist_item, &sbi->ll_pglist);
-+
-+        while (--total >= 0 && count < want) {
-+                struct page *page;
-+                int keep;
-+
-+                if (unlikely(need_resched())) {
-+                        spin_unlock(&sbi->ll_lock);
-+                        cond_resched();
-+                        spin_lock(&sbi->ll_lock);
-+                }
-+
-+                llap = llite_pglist_next_llap(sbi,&dummy_llap.llap_pglist_item);
-+                list_del_init(&dummy_llap.llap_pglist_item);
-+                if (llap == NULL)
-+                        break;
-+
-+                page = llap->llap_page;
-+                LASSERT(page != NULL);
-+
-+                list_add(&dummy_llap.llap_pglist_item, &llap->llap_pglist_item);
-+
-+                /* Page needs/undergoing IO */
-+                if (TryLockPage(page)) {
-+                        LL_CDEBUG_PAGE(D_PAGE, page, "can't lock\n");
-+                        continue;
-+                }
-+
-+               keep = (llap->llap_write_queued || PageDirty(page) ||
-+                      PageWriteback(page) || (!PageUptodate(page) &&
-+                      llap->llap_origin != LLAP_ORIGIN_READAHEAD));
-+
-+                LL_CDEBUG_PAGE(D_PAGE, page,"%s LRU page: %s%s%s%s%s origin %s\n",
-+                               keep ? "keep" : "drop",
-+                               llap->llap_write_queued ? "wq " : "",
-+                               PageDirty(page) ? "pd " : "",
-+                               PageUptodate(page) ? "" : "!pu ",
-+                               PageWriteback(page) ? "wb" : "",
-+                               llap->llap_defer_uptodate ? "" : "!du",
-+                               llap_origins[llap->llap_origin]);
-+
-+                /* If page is dirty or undergoing IO don't discard it */
-+                if (keep) {
-+                        unlock_page(page);
-+                        continue;
-+                }
-+
-+                page_cache_get(page);
-+                spin_unlock(&sbi->ll_lock);
-+
-+                if (page->mapping != NULL) {
-+                        ll_teardown_mmaps(page->mapping,
-+                                         (__u64)page->index << CFS_PAGE_SHIFT,
-+                                         ((__u64)page->index << CFS_PAGE_SHIFT)|
-+                                          ~CFS_PAGE_MASK);
-+                        if (!PageDirty(page) && !page_mapped(page)) {
-+                                ll_ra_accounting(llap, page->mapping);
-+                                ll_truncate_complete_page(page);
-+                                ++count;
-+                        } else {
-+                                LL_CDEBUG_PAGE(D_PAGE, page, "Not dropping page"
-+                                                             " because it is "
-+                                                             "%s\n",
-+                                                              PageDirty(page)?
-+                                                              "dirty":"mapped");
-+                        }
-+                }
-+                unlock_page(page);
-+                page_cache_release(page);
-+
-+                spin_lock(&sbi->ll_lock);
-+        }
-+        list_del(&dummy_llap.llap_pglist_item);
-+        spin_unlock(&sbi->ll_lock);
-+
-+        CDEBUG(D_CACHE, "shrank %lu/%lu and left %lu unscanned\n",
-+               count, want, total);
-+
-+        return count;
-+}
-+
-+static struct ll_async_page *llap_from_page_with_lockh(struct page *page,
-+                                                       unsigned origin,
-+                                                       struct lustre_handle *lockh)
-+{
-+        struct ll_async_page *llap;
-+        struct obd_export *exp;
-+        struct inode *inode = page->mapping->host;
-+        struct ll_sb_info *sbi;
-+        int rc;
-+        ENTRY;
-+
-+        if (!inode) {
-+                static int triggered;
-+
-+                if (!triggered) {
-+                        LL_CDEBUG_PAGE(D_ERROR, page, "Bug 10047. Wrong anon "
-+                                       "page received\n");
-+                        libcfs_debug_dumpstack(NULL);
-+                        triggered = 1;
-+                }
-+                RETURN(ERR_PTR(-EINVAL));
-+        }
-+        sbi = ll_i2sbi(inode);
-+        LASSERT(ll_async_page_slab);
-+        LASSERTF(origin < LLAP__ORIGIN_MAX, "%u\n", origin);
-+
-+        llap = llap_cast_private(page);
-+        if (llap != NULL) {
-+                /* move to end of LRU list, except when page is just about to
-+                 * die */
-+                if (origin != LLAP_ORIGIN_REMOVEPAGE) {
-+                        spin_lock(&sbi->ll_lock);
-+                        sbi->ll_pglist_gen++;
-+                        list_del_init(&llap->llap_pglist_item);
-+                        list_add_tail(&llap->llap_pglist_item, &sbi->ll_pglist);
-+                        spin_unlock(&sbi->ll_lock);
-+                }
-+                GOTO(out, llap);
-+        }
-+
-+        exp = ll_i2obdexp(page->mapping->host);
-+        if (exp == NULL)
-+                RETURN(ERR_PTR(-EINVAL));
-+
-+        /* limit the number of lustre-cached pages */
-+        if (sbi->ll_async_page_count >= sbi->ll_async_page_max)
-+                llap_shrink_cache(sbi, 0);
-+
-+        OBD_SLAB_ALLOC(llap, ll_async_page_slab, CFS_ALLOC_STD,
-+                       ll_async_page_slab_size);
-+        if (llap == NULL)
-+                RETURN(ERR_PTR(-ENOMEM));
-+        llap->llap_magic = LLAP_MAGIC;
-+        llap->llap_cookie = (void *)llap + size_round(sizeof(*llap));
-+
-+        /* XXX: for bug 11270 - check for lockless origin here! */
-+        if (origin == LLAP_ORIGIN_LOCKLESS_IO)
-+                llap->llap_nocache = 1;
-+
-+        rc = obd_prep_async_page(exp, ll_i2info(inode)->lli_smd, NULL, page,
-+                                 (obd_off)page->index << CFS_PAGE_SHIFT,
-+                                 &ll_async_page_ops, llap, &llap->llap_cookie,
-+                                 llap->llap_nocache, lockh);
-+        if (rc) {
-+                OBD_SLAB_FREE(llap, ll_async_page_slab,
-+                              ll_async_page_slab_size);
-+                RETURN(ERR_PTR(rc));
-+        }
-+
-+        CDEBUG(D_CACHE, "llap %p page %p cookie %p obj off "LPU64"\n", llap,
-+               page, llap->llap_cookie, (obd_off)page->index << CFS_PAGE_SHIFT);
-+        /* also zeroing the PRIVBITS low order bitflags */
-+        __set_page_ll_data(page, llap);
-+        llap->llap_page = page;
-+
-+        spin_lock(&sbi->ll_lock);
-+        sbi->ll_pglist_gen++;
-+        sbi->ll_async_page_count++;
-+        list_add_tail(&llap->llap_pglist_item, &sbi->ll_pglist);
-+        spin_unlock(&sbi->ll_lock);
-+
-+ out:
-+        if (unlikely(sbi->ll_flags & LL_SBI_LLITE_CHECKSUM)) {
-+                __u32 csum;
-+                char *kaddr = kmap_atomic(page, KM_USER0);
-+                csum = init_checksum(OSC_DEFAULT_CKSUM);
-+                csum = compute_checksum(csum, kaddr, CFS_PAGE_SIZE,
-+                                        OSC_DEFAULT_CKSUM);
-+                kunmap_atomic(kaddr, KM_USER0);
-+                if (origin == LLAP_ORIGIN_READAHEAD ||
-+                    origin == LLAP_ORIGIN_READPAGE ||
-+                    origin == LLAP_ORIGIN_LOCKLESS_IO) {
-+                        llap->llap_checksum = 0;
-+                } else if (origin == LLAP_ORIGIN_COMMIT_WRITE ||
-+                           llap->llap_checksum == 0) {
-+                        llap->llap_checksum = csum;
-+                        CDEBUG(D_PAGE, "page %p cksum %x\n", page, csum);
-+                } else if (llap->llap_checksum == csum) {
-+                        /* origin == LLAP_ORIGIN_WRITEPAGE */
-+                        CDEBUG(D_PAGE, "page %p cksum %x confirmed\n",
-+                               page, csum);
-+                } else {
-+                        /* origin == LLAP_ORIGIN_WRITEPAGE */
-+                        LL_CDEBUG_PAGE(D_ERROR, page, "old cksum %x != new "
-+                                       "%x!\n", llap->llap_checksum, csum);
-+                }
-+        }
-+
-+        llap->llap_origin = origin;
-+        RETURN(llap);
-+}
-+
-+static inline struct ll_async_page *llap_from_page(struct page *page,
-+                                                   unsigned origin)
-+{
-+        return llap_from_page_with_lockh(page, origin, NULL);
-+}
-+
-+static int queue_or_sync_write(struct obd_export *exp, struct inode *inode,
-+                               struct ll_async_page *llap,
-+                               unsigned to, obd_flag async_flags)
-+{
-+        unsigned long size_index = i_size_read(inode) >> CFS_PAGE_SHIFT;
-+        struct obd_io_group *oig;
-+        struct ll_sb_info *sbi = ll_i2sbi(inode);
-+        int rc, noquot = llap->llap_ignore_quota ? OBD_BRW_NOQUOTA : 0;
-+        ENTRY;
-+
-+        /* _make_ready only sees llap once we've unlocked the page */
-+        llap->llap_write_queued = 1;
-+        rc = obd_queue_async_io(exp, ll_i2info(inode)->lli_smd, NULL,
-+                                llap->llap_cookie, OBD_BRW_WRITE | noquot,
-+                                0, 0, 0, async_flags);
-+        if (rc == 0) {
-+                LL_CDEBUG_PAGE(D_PAGE, llap->llap_page, "write queued\n");
-+                llap_write_pending(inode, llap);
-+                GOTO(out, 0);
-+        }
-+
-+        llap->llap_write_queued = 0;
-+
-+        rc = oig_init(&oig);
-+        if (rc)
-+                GOTO(out, rc);
-+
-+        /* make full-page requests if we are not at EOF (bug 4410) */
-+        if (to != CFS_PAGE_SIZE && llap->llap_page->index < size_index) {
-+                LL_CDEBUG_PAGE(D_PAGE, llap->llap_page,
-+                               "sync write before EOF: size_index %lu, to %d\n",
-+                               size_index, to);
-+                to = CFS_PAGE_SIZE;
-+        } else if (to != CFS_PAGE_SIZE && llap->llap_page->index == size_index){
-+                int size_to = i_size_read(inode) & ~CFS_PAGE_MASK;
-+                LL_CDEBUG_PAGE(D_PAGE, llap->llap_page,
-+                               "sync write at EOF: size_index %lu, to %d/%d\n",
-+                               size_index, to, size_to);
-+                if (to < size_to)
-+                        to = size_to;
-+        }
-+
-+        /* compare the checksum once before the page leaves llite */
-+        if (unlikely((sbi->ll_flags & LL_SBI_LLITE_CHECKSUM) &&
-+                     llap->llap_checksum != 0)) {
-+                __u32 csum;
-+                struct page *page = llap->llap_page;
-+                char *kaddr = kmap_atomic(page, KM_USER0);
-+                csum = init_checksum(OSC_DEFAULT_CKSUM);
-+                csum = compute_checksum(csum, kaddr, CFS_PAGE_SIZE,
-+                                        OSC_DEFAULT_CKSUM);
-+                kunmap_atomic(kaddr, KM_USER0);
-+                if (llap->llap_checksum == csum) {
-+                        CDEBUG(D_PAGE, "page %p cksum %x confirmed\n",
-+                               page, csum);
-+                } else {
-+                        CERROR("page %p old cksum %x != new cksum %x!\n",
-+                               page, llap->llap_checksum, csum);
-+                }
-+        }
-+
-+        rc = obd_queue_group_io(exp, ll_i2info(inode)->lli_smd, NULL, oig,
-+                                llap->llap_cookie, OBD_BRW_WRITE | noquot,
-+                                0, to, 0, ASYNC_READY | ASYNC_URGENT |
-+                                ASYNC_COUNT_STABLE | ASYNC_GROUP_SYNC);
-+        if (rc)
-+                GOTO(free_oig, rc);
-+
-+        rc = obd_trigger_group_io(exp, ll_i2info(inode)->lli_smd, NULL, oig);
-+        if (rc)
-+                GOTO(free_oig, rc);
-+
-+        rc = oig_wait(oig);
-+
-+        if (!rc && async_flags & ASYNC_READY) {
-+                unlock_page(llap->llap_page);
-+                if (PageWriteback(llap->llap_page))
-+                        end_page_writeback(llap->llap_page);
-+        }
-+
-+        LL_CDEBUG_PAGE(D_PAGE, llap->llap_page, "sync write returned %d\n", rc);
-+
-+free_oig:
-+        oig_release(oig);
-+out:
-+        RETURN(rc);
-+}
-+
-+/* update our write count to account for i_size increases that may have
-+ * happened since we've queued the page for io. */
-+
-+/* be careful not to return success without setting the page Uptodate or
-+ * the next pass through prepare_write will read in stale data from disk. */
-+int ll_commit_write(struct file *file, struct page *page, unsigned from,
-+                    unsigned to)
-+{
-+        struct ll_file_data *fd = LUSTRE_FPRIVATE(file);
-+        struct inode *inode = page->mapping->host;
-+        struct ll_inode_info *lli = ll_i2info(inode);
-+        struct lov_stripe_md *lsm = lli->lli_smd;
-+        struct obd_export *exp;
-+        struct ll_async_page *llap;
-+        loff_t size;
-+        struct lustre_handle *lockh = NULL;
-+        int rc = 0;
-+        ENTRY;
-+
-+        SIGNAL_MASK_ASSERT(); /* XXX BUG 1511 */
-+        LASSERT(inode == file->f_dentry->d_inode);
-+        LASSERT(PageLocked(page));
-+
-+        CDEBUG(D_INODE, "inode %p is writing page %p from %d to %d at %lu\n",
-+               inode, page, from, to, page->index);
-+
-+        if (fd->fd_flags & LL_FILE_GROUP_LOCKED)
-+                lockh = &fd->fd_cwlockh;
-+
-+        llap = llap_from_page_with_lockh(page, LLAP_ORIGIN_COMMIT_WRITE, lockh);
-+        if (IS_ERR(llap))
-+                RETURN(PTR_ERR(llap));
-+
-+        exp = ll_i2obdexp(inode);
-+        if (exp == NULL)
-+                RETURN(-EINVAL);
-+
-+        llap->llap_ignore_quota = cfs_capable(CFS_CAP_SYS_RESOURCE);
-+
-+        /* queue a write for some time in the future the first time we
-+         * dirty the page */
-+        if (!PageDirty(page)) {
-+                ll_stats_ops_tally(ll_i2sbi(inode), LPROC_LL_DIRTY_MISSES, 1);
-+
-+                rc = queue_or_sync_write(exp, inode, llap, to, 0);
-+                if (rc)
-+                        GOTO(out, rc);
-+        } else {
-+                ll_stats_ops_tally(ll_i2sbi(inode), LPROC_LL_DIRTY_HITS, 1);
-+        }
-+
-+        /* put the page in the page cache, from now on ll_removepage is
-+         * responsible for cleaning up the llap.
-+         * only set page dirty when it's queued to be write out */
-+        if (llap->llap_write_queued)
-+                set_page_dirty(page);
-+
-+out:
-+        size = (((obd_off)page->index) << CFS_PAGE_SHIFT) + to;
-+        ll_inode_size_lock(inode, 0);
-+        if (rc == 0) {
-+                lov_stripe_lock(lsm);
-+                obd_adjust_kms(exp, lsm, size, 0);
-+                lov_stripe_unlock(lsm);
-+                if (size > i_size_read(inode))
-+                        i_size_write(inode, size);
-+                SetPageUptodate(page);
-+        } else if (size > i_size_read(inode)) {
-+                /* this page beyond the pales of i_size, so it can't be
-+                 * truncated in ll_p_r_e during lock revoking. we must
-+                 * teardown our book-keeping here. */
-+                ll_removepage(page);
-+        }
-+        ll_inode_size_unlock(inode, 0);
-+        RETURN(rc);
-+}
-+
-+static unsigned long ll_ra_count_get(struct ll_sb_info *sbi, unsigned long len)
-+{
-+        struct ll_ra_info *ra = &sbi->ll_ra_info;
-+        unsigned long ret;
-+        ENTRY;
-+
-+        spin_lock(&sbi->ll_lock);
-+        ret = min(ra->ra_max_pages - ra->ra_cur_pages, len);
-+        ra->ra_cur_pages += ret;
-+        spin_unlock(&sbi->ll_lock);
-+
-+        RETURN(ret);
-+}
-+
-+static void ll_ra_count_put(struct ll_sb_info *sbi, unsigned long len)
-+{
-+        struct ll_ra_info *ra = &sbi->ll_ra_info;
-+        spin_lock(&sbi->ll_lock);
-+        LASSERTF(ra->ra_cur_pages >= len, "r_c_p %lu len %lu\n",
-+                 ra->ra_cur_pages, len);
-+        ra->ra_cur_pages -= len;
-+        spin_unlock(&sbi->ll_lock);
-+}
-+
-+/* called for each page in a completed rpc.*/
-+int ll_ap_completion(void *data, int cmd, struct obdo *oa, int rc)
-+{
-+        struct ll_async_page *llap;
-+        struct page *page;
-+        int ret = 0;
-+        ENTRY;
-+
-+        llap = LLAP_FROM_COOKIE(data);
-+        page = llap->llap_page;
-+        LASSERT(PageLocked(page));
-+        LASSERT(CheckWriteback(page,cmd));
-+
-+        LL_CDEBUG_PAGE(D_PAGE, page, "completing cmd %d with %d\n", cmd, rc);
-+
-+        if (cmd & OBD_BRW_READ && llap->llap_defer_uptodate)
-+                ll_ra_count_put(ll_i2sbi(page->mapping->host), 1);
-+
-+        if (rc == 0)  {
-+                if (cmd & OBD_BRW_READ) {
-+                        if (!llap->llap_defer_uptodate)
-+                                SetPageUptodate(page);
-+                } else {
-+                        llap->llap_write_queued = 0;
-+                }
-+                ClearPageError(page);
-+        } else {
-+                if (cmd & OBD_BRW_READ) {
-+                        llap->llap_defer_uptodate = 0;
-+                }
-+                SetPageError(page);
-+                if (rc == -ENOSPC)
-+                        set_bit(AS_ENOSPC, &page->mapping->flags);
-+                else
-+                        set_bit(AS_EIO, &page->mapping->flags);
-+        }
-+
-+        /* be carefull about clear WB.
-+         * if WB will cleared after page lock is released - paralel IO can be
-+         * started before ap_make_ready is finished - so we will be have page
-+         * with PG_Writeback set from ->writepage() and completed READ which
-+         * clear this flag */
-+        if ((cmd & OBD_BRW_WRITE) && PageWriteback(page))
-+                end_page_writeback(page);
-+
-+        unlock_page(page);
-+
-+        if (cmd & OBD_BRW_WRITE) {
-+                llap_write_complete(page->mapping->host, llap);
-+                ll_try_done_writing(page->mapping->host);
-+        }
-+
-+        page_cache_release(page);
-+
-+        RETURN(ret);
-+}
-+
-+static void __ll_put_llap(struct page *page)
-+{
-+        struct inode *inode = page->mapping->host;
-+        struct obd_export *exp;
-+        struct ll_async_page *llap;
-+        struct ll_sb_info *sbi = ll_i2sbi(inode);
-+        int rc;
-+        ENTRY;
-+
-+        exp = ll_i2obdexp(inode);
-+        if (exp == NULL) {
-+                CERROR("page %p ind %lu gave null export\n", page, page->index);
-+                EXIT;
-+                return;
-+        }
-+
-+        llap = llap_from_page(page, LLAP_ORIGIN_REMOVEPAGE);
-+        if (IS_ERR(llap)) {
-+                CERROR("page %p ind %lu couldn't find llap: %ld\n", page,
-+                       page->index, PTR_ERR(llap));
-+                EXIT;
-+                return;
-+        }
-+
-+        //llap_write_complete(inode, llap);
-+        rc = obd_teardown_async_page(exp, ll_i2info(inode)->lli_smd, NULL,
-+                                     llap->llap_cookie);
-+        if (rc != 0)
-+                CERROR("page %p ind %lu failed: %d\n", page, page->index, rc);
-+
-+        /* this unconditional free is only safe because the page lock
-+         * is providing exclusivity to memory pressure/truncate/writeback..*/
-+        __clear_page_ll_data(page);
-+
-+        spin_lock(&sbi->ll_lock);
-+        if (!list_empty(&llap->llap_pglist_item))
-+                list_del_init(&llap->llap_pglist_item);
-+        sbi->ll_pglist_gen++;
-+        sbi->ll_async_page_count--;
-+        spin_unlock(&sbi->ll_lock);
-+        OBD_SLAB_FREE(llap, ll_async_page_slab, ll_async_page_slab_size);
-+
-+        EXIT;
-+}
-+
-+/* the kernel calls us here when a page is unhashed from the page cache.
-+ * the page will be locked and the kernel is holding a spinlock, so
-+ * we need to be careful.  we're just tearing down our book-keeping
-+ * here. */
-+void ll_removepage(struct page *page)
-+{
-+        struct ll_async_page *llap = llap_cast_private(page);
-+        ENTRY;
-+
-+        LASSERT(!in_interrupt());
-+
-+        /* sync pages or failed read pages can leave pages in the page
-+         * cache that don't have our data associated with them anymore */
-+        if (page_private(page) == 0) {
-+                EXIT;
-+                return;
-+        }
-+
-+        LASSERT(!llap->llap_lockless_io_page);
-+        LASSERT(!llap->llap_nocache);
-+
-+        LL_CDEBUG_PAGE(D_PAGE, page, "being evicted\n");
-+        __ll_put_llap(page);
-+
-+        EXIT;
-+}
-+
-+static int ll_issue_page_read(struct obd_export *exp,
-+                              struct ll_async_page *llap,
-+                              struct obd_io_group *oig, int defer)
-+{
-+        struct page *page = llap->llap_page;
-+        int rc;
-+
-+        page_cache_get(page);
-+        llap->llap_defer_uptodate = defer;
-+        llap->llap_ra_used = 0;
-+        rc = obd_queue_group_io(exp, ll_i2info(page->mapping->host)->lli_smd,
-+                                NULL, oig, llap->llap_cookie, OBD_BRW_READ, 0,
-+                                CFS_PAGE_SIZE, 0, ASYNC_COUNT_STABLE | ASYNC_READY |
-+                                              ASYNC_URGENT);
-+        if (rc) {
-+                LL_CDEBUG_PAGE(D_ERROR, page, "read queue failed: rc %d\n", rc);
-+                page_cache_release(page);
-+        }
-+        RETURN(rc);
-+}
-+
-+static void ll_ra_stats_inc_unlocked(struct ll_ra_info *ra, enum ra_stat which)
-+{
-+        LASSERTF(which >= 0 && which < _NR_RA_STAT, "which: %u\n", which);
-+        ra->ra_stats[which]++;
-+}
-+
-+static void ll_ra_stats_inc(struct address_space *mapping, enum ra_stat which)
-+{
-+        struct ll_sb_info *sbi = ll_i2sbi(mapping->host);
-+        struct ll_ra_info *ra = &ll_i2sbi(mapping->host)->ll_ra_info;
-+
-+        spin_lock(&sbi->ll_lock);
-+        ll_ra_stats_inc_unlocked(ra, which);
-+        spin_unlock(&sbi->ll_lock);
-+}
-+
-+void ll_ra_accounting(struct ll_async_page *llap, struct address_space *mapping)
-+{
-+        if (!llap->llap_defer_uptodate || llap->llap_ra_used)
-+                return;
-+
-+        ll_ra_stats_inc(mapping, RA_STAT_DISCARDED);
-+}
-+
-+#define RAS_CDEBUG(ras) \
-+        CDEBUG(D_READA,                                                      \
-+               "lrp %lu cr %lu cp %lu ws %lu wl %lu nra %lu r %lu ri %lu"    \
-+               "csr %lu sf %lu sp %lu sl %lu \n", 		     	     \
-+               ras->ras_last_readpage, ras->ras_consecutive_requests,        \
-+               ras->ras_consecutive_pages, ras->ras_window_start,            \
-+               ras->ras_window_len, ras->ras_next_readahead,                 \
-+               ras->ras_requests, ras->ras_request_index,		     \
-+               ras->ras_consecutive_stride_requests, ras->ras_stride_offset, \
-+               ras->ras_stride_pages, ras->ras_stride_length)
-+
-+static int index_in_window(unsigned long index, unsigned long point,
-+                           unsigned long before, unsigned long after)
-+{
-+        unsigned long start = point - before, end = point + after;
-+
-+        if (start > point)
-+               start = 0;
-+        if (end < point)
-+               end = ~0;
-+
-+        return start <= index && index <= end;
-+}
-+
-+static struct ll_readahead_state *ll_ras_get(struct file *f)
-+{
-+        struct ll_file_data       *fd;
-+
-+        fd = LUSTRE_FPRIVATE(f);
-+        return &fd->fd_ras;
-+}
-+
-+void ll_ra_read_in(struct file *f, struct ll_ra_read *rar)
-+{
-+        struct ll_readahead_state *ras;
-+
-+        ras = ll_ras_get(f);
-+
-+        spin_lock(&ras->ras_lock);
-+        ras->ras_requests++;
-+        ras->ras_request_index = 0;
-+        ras->ras_consecutive_requests++;
-+        rar->lrr_reader = current;
-+
-+        list_add(&rar->lrr_linkage, &ras->ras_read_beads);
-+        spin_unlock(&ras->ras_lock);
-+}
-+
-+void ll_ra_read_ex(struct file *f, struct ll_ra_read *rar)
-+{
-+        struct ll_readahead_state *ras;
-+
-+        ras = ll_ras_get(f);
-+
-+        spin_lock(&ras->ras_lock);
-+        list_del_init(&rar->lrr_linkage);
-+        spin_unlock(&ras->ras_lock);
-+}
-+
-+static struct ll_ra_read *ll_ra_read_get_locked(struct ll_readahead_state *ras)
-+{
-+        struct ll_ra_read *scan;
-+
-+        list_for_each_entry(scan, &ras->ras_read_beads, lrr_linkage) {
-+                if (scan->lrr_reader == current)
-+                        return scan;
-+        }
-+        return NULL;
-+}
-+
-+struct ll_ra_read *ll_ra_read_get(struct file *f)
-+{
-+        struct ll_readahead_state *ras;
-+        struct ll_ra_read         *bead;
-+
-+        ras = ll_ras_get(f);
-+
-+        spin_lock(&ras->ras_lock);
-+        bead = ll_ra_read_get_locked(ras);
-+        spin_unlock(&ras->ras_lock);
-+        return bead;
-+}
-+
-+static int ll_read_ahead_page(struct obd_export *exp, struct obd_io_group *oig,
-+                              int index, struct address_space *mapping)
-+{
-+        struct ll_async_page *llap;
-+        struct page *page;
-+        unsigned int gfp_mask = 0;
-+        int rc = 0;
-+
-+        gfp_mask = GFP_HIGHUSER & ~__GFP_WAIT;
-+#ifdef __GFP_NOWARN
-+        gfp_mask |= __GFP_NOWARN;
-+#endif
-+        page = grab_cache_page_nowait_gfp(mapping, index, gfp_mask);
-+        if (page == NULL) {
-+                ll_ra_stats_inc(mapping, RA_STAT_FAILED_GRAB_PAGE);
-+                CDEBUG(D_READA, "g_c_p_n failed\n");
-+                return 0;
-+        }
-+
-+        /* Check if page was truncated or reclaimed */
-+        if (page->mapping != mapping) {
-+                ll_ra_stats_inc(mapping, RA_STAT_WRONG_GRAB_PAGE);
-+                CDEBUG(D_READA, "g_c_p_n returned invalid page\n");
-+                GOTO(unlock_page, rc = 0);	
-+        }
-+
-+        /* we do this first so that we can see the page in the /proc
-+         * accounting */
-+        llap = llap_from_page(page, LLAP_ORIGIN_READAHEAD);
-+        if (IS_ERR(llap) || llap->llap_defer_uptodate) {
-+                if (PTR_ERR(llap) == -ENOLCK) {
-+                        ll_ra_stats_inc(mapping, RA_STAT_FAILED_MATCH);
-+                        CDEBUG(D_READA | D_PAGE,
-+                               "Adding page to cache failed index "
-+                                "%d\n", index);
-+                                CDEBUG(D_READA, "nolock page\n");
-+                                GOTO(unlock_page, rc = -ENOLCK);
-+                }
-+                CDEBUG(D_READA, "read-ahead page\n");
-+                GOTO(unlock_page, rc = 0);	
-+        }
-+
-+        /* skip completed pages */
-+        if (Page_Uptodate(page))
-+                GOTO(unlock_page, rc = 0);	
-+
-+        /* bail out when we hit the end of the lock. */
-+        rc = ll_issue_page_read(exp, llap, oig, 1);
-+        if (rc == 0) {
-+                LL_CDEBUG_PAGE(D_READA | D_PAGE, page, "started read-ahead\n");
-+                rc = 1;
-+        } else {
-+unlock_page:	
-+                unlock_page(page);
-+                LL_CDEBUG_PAGE(D_READA | D_PAGE, page, "skipping read-ahead\n");
-+        }
-+        page_cache_release(page);
-+        return rc;
-+}
-+
-+/* ra_io_arg will be filled in the beginning of ll_readahead with
-+ * ras_lock, then the following ll_read_ahead_pages will read RA
-+ * pages according to this arg, all the items in this structure are
-+ * counted by page index.
-+ */
-+struct ra_io_arg {
-+        unsigned long ria_start;  /* start offset of read-ahead*/
-+        unsigned long ria_end;    /* end offset of read-ahead*/
-+        /* If stride read pattern is detected, ria_stoff means where
-+         * stride read is started. Note: for normal read-ahead, the
-+         * value here is meaningless, and also it will not be accessed*/
-+        pgoff_t ria_stoff;
-+        /* ria_length and ria_pages are the length and pages length in the
-+         * stride I/O mode. And they will also be used to check whether
-+         * it is stride I/O read-ahead in the read-ahead pages*/
-+        unsigned long ria_length;
-+        unsigned long ria_pages;
-+};
-+
-+#define RIA_DEBUG(ria) 						      \
-+        CDEBUG(D_READA, "rs %lu re %lu ro %lu rl %lu rp %lu\n",       \
-+        ria->ria_start, ria->ria_end, ria->ria_stoff, ria->ria_length,\
-+        ria->ria_pages)
-+
-+#define RAS_INCREASE_STEP (1024 * 1024 >> CFS_PAGE_SHIFT)
-+
-+static inline int stride_io_mode(struct ll_readahead_state *ras)
-+{
-+        return ras->ras_consecutive_stride_requests > 1;
-+}
-+
-+/* The function calculates how much pages will be read in
-+ * [off, off + length], which will be read by stride I/O mode,
-+ * stride_offset = st_off, stride_lengh = st_len,
-+ * stride_pages = st_pgs
-+ */
-+static unsigned long
-+stride_pg_count(pgoff_t st_off, unsigned long st_len, unsigned long st_pgs,
-+                unsigned long off, unsigned length)
-+{
-+        unsigned long cont_len = st_off > off ?  st_off - off : 0;
-+        __u64 stride_len = length + off > st_off ?
-+                           length + off + 1 - st_off : 0;
-+        unsigned long left, pg_count;
-+
-+        if (st_len == 0 || length == 0)
-+                return length;
-+
-+        left = do_div(stride_len, st_len);
-+        left = min(left, st_pgs);
-+
-+        pg_count = left + stride_len * st_pgs + cont_len;
-+
-+        LASSERT(pg_count >= left);
-+
-+        CDEBUG(D_READA, "st_off %lu, st_len %lu st_pgs %lu off %lu length %u"
-+               "pgcount %lu\n", st_off, st_len, st_pgs, off, length, pg_count);
-+
-+        return pg_count;
-+}
-+
-+static int ria_page_count(struct ra_io_arg *ria)
-+{
-+        __u64 length = ria->ria_end >= ria->ria_start ?
-+                       ria->ria_end - ria->ria_start + 1 : 0;
-+
-+        return stride_pg_count(ria->ria_stoff, ria->ria_length,
-+                               ria->ria_pages, ria->ria_start,
-+                               length);
-+}
-+
-+/*Check whether the index is in the defined ra-window */
-+static int ras_inside_ra_window(unsigned long idx, struct ra_io_arg *ria)
-+{
-+        /* If ria_length == ria_pages, it means non-stride I/O mode,
-+         * idx should always inside read-ahead window in this case
-+         * For stride I/O mode, just check whether the idx is inside
-+         * the ria_pages. */
-+        return ria->ria_length == 0 || ria->ria_length == ria->ria_pages ||
-+               (idx - ria->ria_stoff) % ria->ria_length < ria->ria_pages;
-+}
-+
-+static int ll_read_ahead_pages(struct obd_export *exp,
-+                               struct obd_io_group *oig,
-+                               struct ra_io_arg *ria,	
-+                               unsigned long *reserved_pages,
-+                               struct address_space *mapping,
-+                               unsigned long *ra_end)
-+{
-+        int rc, count = 0, stride_ria;
-+        unsigned long page_idx;
-+
-+        LASSERT(ria != NULL);
-+        RIA_DEBUG(ria);
-+
-+        stride_ria = ria->ria_length > ria->ria_pages && ria->ria_pages > 0;
-+        for (page_idx = ria->ria_start; page_idx <= ria->ria_end &&
-+                        *reserved_pages > 0; page_idx++) {
-+                if (ras_inside_ra_window(page_idx, ria)) {
-+                        /* If the page is inside the read-ahead window*/
-+                        rc = ll_read_ahead_page(exp, oig, page_idx, mapping);
-+        		if (rc == 1) {
-+	        		(*reserved_pages)--;
-+		        	count ++;
-+		        } else if (rc == -ENOLCK)
-+			        break;
-+                } else if (stride_ria) {
-+                        /* If it is not in the read-ahead window, and it is
-+                         * read-ahead mode, then check whether it should skip
-+                         * the stride gap */
-+			pgoff_t offset;
-+                        /* FIXME: This assertion only is valid when it is for
-+                         * forward read-ahead, it will be fixed when backward
-+                         * read-ahead is implemented */
-+                        LASSERTF(page_idx > ria->ria_stoff, "since %lu in the"
-+                                " gap of ra window,it should bigger than stride"
-+                                " offset %lu \n", page_idx, ria->ria_stoff);
-+
-+                        offset = page_idx - ria->ria_stoff;
-+			offset = offset % (ria->ria_length);
-+			if (offset > ria->ria_pages) {
-+				page_idx += ria->ria_length - offset;
-+                                CDEBUG(D_READA, "i %lu skip %lu \n", page_idx,
-+                                       ria->ria_length - offset);
-+                                continue;
-+                        }
-+                }
-+        }
-+        *ra_end = page_idx;
-+        return count;
-+}
-+
-+static int ll_readahead(struct ll_readahead_state *ras,
-+                         struct obd_export *exp, struct address_space *mapping,
-+                         struct obd_io_group *oig, int flags)
-+{
-+        unsigned long start = 0, end = 0, reserved;
-+        unsigned long ra_end, len;
-+        struct inode *inode;
-+        struct lov_stripe_md *lsm;
-+        struct ll_ra_read *bead;
-+        struct ost_lvb lvb;
-+        struct ra_io_arg ria = { 0 };
-+        int ret = 0;
-+        __u64 kms;
-+        ENTRY;
-+
-+        inode = mapping->host;
-+        lsm = ll_i2info(inode)->lli_smd;
-+
-+        lov_stripe_lock(lsm);
-+        inode_init_lvb(inode, &lvb);
-+        obd_merge_lvb(ll_i2obdexp(inode), lsm, &lvb, 1);
-+        kms = lvb.lvb_size;
-+        lov_stripe_unlock(lsm);
-+        if (kms == 0) {
-+                ll_ra_stats_inc(mapping, RA_STAT_ZERO_LEN);
-+                RETURN(0);
-+        }
-+
-+        spin_lock(&ras->ras_lock);
-+        bead = ll_ra_read_get_locked(ras);
-+        /* Enlarge the RA window to encompass the full read */
-+        if (bead != NULL && ras->ras_window_start + ras->ras_window_len <
-+            bead->lrr_start + bead->lrr_count) {
-+                ras->ras_window_len = bead->lrr_start + bead->lrr_count -
-+                                      ras->ras_window_start;
-+        }
-+       	/* Reserve a part of the read-ahead window that we'll be issuing */
-+        if (ras->ras_window_len) {
-+                start = ras->ras_next_readahead;
-+                end = ras->ras_window_start + ras->ras_window_len - 1;
-+        }
-+        if (end != 0) {
-+                /* Truncate RA window to end of file */
-+                end = min(end, (unsigned long)((kms - 1) >> CFS_PAGE_SHIFT));
-+                ras->ras_next_readahead = max(end, end + 1);
-+                RAS_CDEBUG(ras);
-+        }
-+        ria.ria_start = start;
-+        ria.ria_end = end;
-+        /* If stride I/O mode is detected, get stride window*/
-+        if (stride_io_mode(ras)) {
-+                ria.ria_stoff = ras->ras_stride_offset;
-+                ria.ria_length = ras->ras_stride_length;
-+                ria.ria_pages = ras->ras_stride_pages;
-+        }
-+        spin_unlock(&ras->ras_lock);
-+
-+        if (end == 0) {
-+                ll_ra_stats_inc(mapping, RA_STAT_ZERO_WINDOW);
-+                RETURN(0);
-+        }
-+
-+        len = ria_page_count(&ria);
-+        if (len == 0)
-+                RETURN(0);
-+
-+        reserved = ll_ra_count_get(ll_i2sbi(inode), len);
-+        if (reserved < len)
-+                ll_ra_stats_inc(mapping, RA_STAT_MAX_IN_FLIGHT);
-+
-+        CDEBUG(D_READA, "reserved page %lu \n", reserved);
-+
-+        ret = ll_read_ahead_pages(exp, oig, &ria, &reserved, mapping, &ra_end);
-+
-+        LASSERTF(reserved >= 0, "reserved %lu\n", reserved);
-+        if (reserved != 0)
-+                ll_ra_count_put(ll_i2sbi(inode), reserved);
-+
-+        if (ra_end == end + 1 && ra_end == (kms >> CFS_PAGE_SHIFT))
-+                ll_ra_stats_inc(mapping, RA_STAT_EOF);
-+
-+        /* if we didn't get to the end of the region we reserved from
-+         * the ras we need to go back and update the ras so that the
-+         * next read-ahead tries from where we left off.  we only do so
-+         * if the region we failed to issue read-ahead on is still ahead
-+         * of the app and behind the next index to start read-ahead from */
-+        CDEBUG(D_READA, "ra_end %lu end %lu stride end %lu \n",
-+               ra_end, end, ria.ria_end);
-+
-+        if (ra_end != (end + 1)) {
-+                spin_lock(&ras->ras_lock);
-+                if (ra_end < ras->ras_next_readahead &&
-+                    index_in_window(ra_end, ras->ras_window_start, 0,
-+                                    ras->ras_window_len)) {
-+                	ras->ras_next_readahead = ra_end;
-+                       	RAS_CDEBUG(ras);
-+                }
-+                spin_unlock(&ras->ras_lock);
-+        }
-+
-+        RETURN(ret);
-+}
-+
-+static void ras_set_start(struct ll_readahead_state *ras, unsigned long index)
-+{
-+        ras->ras_window_start = index & (~(RAS_INCREASE_STEP - 1));
-+}
-+
-+/* called with the ras_lock held or from places where it doesn't matter */
-+static void ras_reset(struct ll_readahead_state *ras, unsigned long index)
-+{
-+        ras->ras_last_readpage = index;
-+        ras->ras_consecutive_requests = 0;
-+        ras->ras_consecutive_pages = 0;
-+        ras->ras_window_len = 0;
-+        ras_set_start(ras, index);
-+        ras->ras_next_readahead = max(ras->ras_window_start, index);
-+
-+        RAS_CDEBUG(ras);
-+}
-+
-+/* called with the ras_lock held or from places where it doesn't matter */
-+static void ras_stride_reset(struct ll_readahead_state *ras)
-+{
-+        ras->ras_consecutive_stride_requests = 0;
-+        ras->ras_stride_length = 0;
-+        ras->ras_stride_pages = 0;
-+        RAS_CDEBUG(ras);
-+}
-+
-+void ll_readahead_init(struct inode *inode, struct ll_readahead_state *ras)
-+{
-+        spin_lock_init(&ras->ras_lock);
-+        ras_reset(ras, 0);
-+        ras->ras_requests = 0;
-+        INIT_LIST_HEAD(&ras->ras_read_beads);
-+}
-+
-+/* 
-+ * Check whether the read request is in the stride window.
-+ * If it is in the stride window, return 1, otherwise return 0.
-+ */
-+static int index_in_stride_window(unsigned long index,
-+                                  struct ll_readahead_state *ras,
-+                                  struct inode *inode)
-+{
-+        unsigned long stride_gap = index - ras->ras_last_readpage - 1;
-+ 
-+        if (ras->ras_stride_length == 0 || ras->ras_stride_pages == 0)
-+                return 0;
-+
-+        /* If it is contiguous read */
-+        if (stride_gap == 0) 
-+                return ras->ras_consecutive_pages + 1 <= ras->ras_stride_pages;
-+        
-+        /*Otherwise check the stride by itself */
-+        return (ras->ras_stride_length - ras->ras_stride_pages) == stride_gap &&
-+                 ras->ras_consecutive_pages == ras->ras_stride_pages;
-+}
-+
-+static void ras_update_stride_detector(struct ll_readahead_state *ras,
-+                                       unsigned long index)
-+{
-+        unsigned long stride_gap = index - ras->ras_last_readpage - 1;
-+
-+        if (!stride_io_mode(ras) && (stride_gap != 0 || 
-+             ras->ras_consecutive_stride_requests == 0)) {
-+                ras->ras_stride_pages = ras->ras_consecutive_pages;
-+                ras->ras_stride_length = stride_gap +ras->ras_consecutive_pages;
-+        }
-+        RAS_CDEBUG(ras);
-+}
-+
-+static unsigned long
-+stride_page_count(struct ll_readahead_state *ras, unsigned long len)
-+{
-+        return stride_pg_count(ras->ras_stride_offset, ras->ras_stride_length,
-+                               ras->ras_stride_pages, ras->ras_stride_offset,
-+                               len);
-+}
-+
-+/* Stride Read-ahead window will be increased inc_len according to
-+ * stride I/O pattern */
-+static void ras_stride_increase_window(struct ll_readahead_state *ras,
-+                                       struct ll_ra_info *ra,
-+                                       unsigned long inc_len)
-+{
-+        unsigned long left, step, window_len;
-+        unsigned long stride_len;
-+
-+        LASSERT(ras->ras_stride_length > 0);
-+
-+        stride_len = ras->ras_window_start + ras->ras_window_len -
-+                     ras->ras_stride_offset;
-+
-+        LASSERTF(stride_len >= 0, "window_start %lu, window_len %lu"
-+                 " stride_offset %lu\n", ras->ras_window_start,
-+                 ras->ras_window_len, ras->ras_stride_offset);
-+
-+        left = stride_len % ras->ras_stride_length;
-+
-+        window_len = ras->ras_window_len - left;
-+
-+        if (left < ras->ras_stride_pages)
-+                left += inc_len;
-+        else
-+                left = ras->ras_stride_pages + inc_len;
-+
-+        LASSERT(ras->ras_stride_pages != 0);
-+
-+        step = left / ras->ras_stride_pages;
-+        left %= ras->ras_stride_pages;
-+
-+        window_len += step * ras->ras_stride_length + left;
-+
-+        if (stride_page_count(ras, window_len) <= ra->ra_max_pages)
-+                ras->ras_window_len = window_len;
-+
-+        RAS_CDEBUG(ras);
-+}
-+
-+/* Set stride I/O read-ahead window start offset */
-+static void ras_set_stride_offset(struct ll_readahead_state *ras)
-+{
-+        unsigned long window_len = ras->ras_next_readahead -
-+                                   ras->ras_window_start;
-+        unsigned long left;
-+
-+        LASSERT(ras->ras_stride_length != 0);
-+
-+        left = window_len % ras->ras_stride_length;
-+
-+        ras->ras_stride_offset = ras->ras_next_readahead - left;
-+
-+        RAS_CDEBUG(ras);
-+}
-+
-+static void ras_update(struct ll_sb_info *sbi, struct inode *inode,
-+                       struct ll_readahead_state *ras, unsigned long index,
-+                       unsigned hit)
-+{
-+        struct ll_ra_info *ra = &sbi->ll_ra_info;
-+        int zero = 0, stride_detect = 0, ra_miss = 0;
-+        ENTRY;
-+
-+        spin_lock(&sbi->ll_lock);
-+        spin_lock(&ras->ras_lock);
-+
-+        ll_ra_stats_inc_unlocked(ra, hit ? RA_STAT_HIT : RA_STAT_MISS);
-+
-+        /* reset the read-ahead window in two cases.  First when the app seeks
-+         * or reads to some other part of the file.  Secondly if we get a
-+         * read-ahead miss that we think we've previously issued.  This can
-+         * be a symptom of there being so many read-ahead pages that the VM is
-+         * reclaiming it before we get to it. */
-+        if (!index_in_window(index, ras->ras_last_readpage, 8, 8)) {
-+                zero = 1;
-+                ll_ra_stats_inc_unlocked(ra, RA_STAT_DISTANT_READPAGE);
-+        } else if (!hit && ras->ras_window_len &&
-+                   index < ras->ras_next_readahead &&
-+                   index_in_window(index, ras->ras_window_start, 0,
-+                                   ras->ras_window_len)) {
-+		ra_miss = 1;
-+                ll_ra_stats_inc_unlocked(ra, RA_STAT_MISS_IN_WINDOW);
-+        }
-+
-+        /* On the second access to a file smaller than the tunable
-+         * ra_max_read_ahead_whole_pages trigger RA on all pages in the
-+         * file up to ra_max_pages.  This is simply a best effort and
-+         * only occurs once per open file.  Normal RA behavior is reverted
-+         * to for subsequent IO.  The mmap case does not increment
-+         * ras_requests and thus can never trigger this behavior. */
-+        if (ras->ras_requests == 2 && !ras->ras_request_index) {
-+                __u64 kms_pages;
-+
-+                kms_pages = (i_size_read(inode) + CFS_PAGE_SIZE - 1) >>
-+                            CFS_PAGE_SHIFT;
-+
-+                CDEBUG(D_READA, "kmsp "LPU64" mwp %lu mp %lu\n", kms_pages,
-+                       ra->ra_max_read_ahead_whole_pages, ra->ra_max_pages);
-+
-+                if (kms_pages &&
-+                    kms_pages <= ra->ra_max_read_ahead_whole_pages) {
-+                        ras->ras_window_start = 0;
-+                        ras->ras_last_readpage = 0;
-+                        ras->ras_next_readahead = 0;
-+                        ras->ras_window_len = min(ra->ra_max_pages,
-+                                ra->ra_max_read_ahead_whole_pages);
-+                        GOTO(out_unlock, 0);
-+                }
-+        }
-+        if (zero) {
-+		/* check whether it is in stride I/O mode*/
-+                if (!index_in_stride_window(index, ras, inode)) {
-+                        ras_reset(ras, index);
-+                        ras->ras_consecutive_pages++;
-+                        ras_stride_reset(ras);
-+                        GOTO(out_unlock, 0);
-+                } else {
-+        	        ras->ras_consecutive_requests = 0;
-+                        if (++ras->ras_consecutive_stride_requests > 1)
-+                                stride_detect = 1;
-+                        RAS_CDEBUG(ras);
-+                }
-+        } else {
-+                if (ra_miss) {
-+                        if (index_in_stride_window(index, ras, inode) &&
-+                            stride_io_mode(ras)) {
-+                                /*If stride-RA hit cache miss, the stride dector 
-+                                 *will not be reset to avoid the overhead of
-+                                 *redetecting read-ahead mode */
-+                                if (index != ras->ras_last_readpage + 1)
-+                                       ras->ras_consecutive_pages = 0;
-+                                RAS_CDEBUG(ras);
-+                        } else {
-+                                /*Reset both stride window and normal RA window*/ 
-+                                ras_reset(ras, index);
-+                                ras->ras_consecutive_pages++;
-+                                ras_stride_reset(ras);
-+                                GOTO(out_unlock, 0);
-+                        }
-+                } else if (stride_io_mode(ras)) {
-+                        /* If this is contiguous read but in stride I/O mode
-+                         * currently, check whether stride step still is valid,
-+                         * if invalid, it will reset the stride ra window*/ 	
-+                        if (!index_in_stride_window(index, ras, inode)) {
-+                                /*Shrink stride read-ahead window to be zero*/
-+                                ras_stride_reset(ras);
-+                                ras->ras_window_len = 0;
-+                                ras->ras_next_readahead = index;
-+                        }
-+                }
-+        }
-+        ras->ras_consecutive_pages++;
-+        ras_update_stride_detector(ras, index);
-+        ras->ras_last_readpage = index;
-+        ras_set_start(ras, index);
-+        ras->ras_next_readahead = max(ras->ras_window_start,
-+                                      ras->ras_next_readahead);
-+        RAS_CDEBUG(ras);
-+
-+        /* Trigger RA in the mmap case where ras_consecutive_requests
-+         * is not incremented and thus can't be used to trigger RA */
-+        if (!ras->ras_window_len && ras->ras_consecutive_pages == 4) {
-+                ras->ras_window_len = RAS_INCREASE_STEP;
-+                GOTO(out_unlock, 0);
-+        }
-+
-+        /* Initially reset the stride window offset to next_readahead*/
-+        if (ras->ras_consecutive_stride_requests == 2 && stride_detect)
-+                ras_set_stride_offset(ras);
-+
-+        /* The initial ras_window_len is set to the request size.  To avoid
-+         * uselessly reading and discarding pages for random IO the window is
-+         * only increased once per consecutive request received. */
-+        if ((ras->ras_consecutive_requests > 1 &&
-+            !ras->ras_request_index) || stride_detect) {
-+                if (stride_io_mode(ras))
-+                        ras_stride_increase_window(ras, ra, RAS_INCREASE_STEP);
-+                else
-+                        ras->ras_window_len = min(ras->ras_window_len +
-+                                                  RAS_INCREASE_STEP,
-+                                                  ra->ra_max_pages);
-+        }
-+        EXIT;
-+out_unlock:
-+        RAS_CDEBUG(ras);
-+        ras->ras_request_index++;
-+        spin_unlock(&ras->ras_lock);
-+        spin_unlock(&sbi->ll_lock);
-+        return;
-+}
-+
-+int ll_writepage(struct page *page)
-+{
-+        struct inode *inode = page->mapping->host;
-+        struct ll_inode_info *lli = ll_i2info(inode);
-+        struct obd_export *exp;
-+        struct ll_async_page *llap;
-+        int rc = 0;
-+        ENTRY;
-+
-+        LASSERT(PageLocked(page));
-+
-+        exp = ll_i2obdexp(inode);
-+        if (exp == NULL)
-+                GOTO(out, rc = -EINVAL);
-+
-+        llap = llap_from_page(page, LLAP_ORIGIN_WRITEPAGE);
-+        if (IS_ERR(llap))
-+                GOTO(out, rc = PTR_ERR(llap));
-+
-+        LASSERT(!llap->llap_nocache);
-+        LASSERT(!PageWriteback(page));
-+        set_page_writeback(page);
-+
-+        page_cache_get(page);
-+        if (llap->llap_write_queued) {
-+                LL_CDEBUG_PAGE(D_PAGE, page, "marking urgent\n");
-+                rc = obd_set_async_flags(exp, lli->lli_smd, NULL,
-+                                         llap->llap_cookie,
-+                                         ASYNC_READY | ASYNC_URGENT);
-+        } else {
-+                rc = queue_or_sync_write(exp, inode, llap, CFS_PAGE_SIZE,
-+                                         ASYNC_READY | ASYNC_URGENT);
-+        }
-+        if (rc) {
-+                /* re-dirty page on error so it retries write */
-+                if (PageWriteback(page))
-+                        end_page_writeback(page);
-+
-+                /* resend page only for not started IO*/
-+                if (!PageError(page))
-+                        ll_redirty_page(page);
-+
-+                page_cache_release(page);
-+        }
-+out:
-+        if (rc) {
-+                if (!lli->lli_async_rc)
-+                        lli->lli_async_rc = rc;
-+                /* resend page only for not started IO*/
-+                unlock_page(page);
-+        }
-+        RETURN(rc);
-+}
-+
-+/*
-+ * for now we do our readpage the same on both 2.4 and 2.5.  The kernel's
-+ * read-ahead assumes it is valid to issue readpage all the way up to
-+ * i_size, but our dlm locks make that not the case.  We disable the
-+ * kernel's read-ahead and do our own by walking ahead in the page cache
-+ * checking for dlm lock coverage.  the main difference between 2.4 and
-+ * 2.6 is how read-ahead gets batched and issued, but we're using our own,
-+ * so they look the same.
-+ */
-+int ll_readpage(struct file *filp, struct page *page)
-+{
-+        struct ll_file_data *fd = LUSTRE_FPRIVATE(filp);
-+        struct inode *inode = page->mapping->host;
-+        struct obd_export *exp;
-+        struct ll_async_page *llap;
-+        struct obd_io_group *oig = NULL;
-+        struct lustre_handle *lockh = NULL;
-+        int rc;
-+        ENTRY;
-+
-+        LASSERT(PageLocked(page));
-+        LASSERT(!PageUptodate(page));
-+        CDEBUG(D_VFSTRACE, "VFS Op:inode=%lu/%u(%p),offset=%Lu=%#Lx\n",
-+               inode->i_ino, inode->i_generation, inode,
-+               (((loff_t)page->index) << CFS_PAGE_SHIFT),
-+               (((loff_t)page->index) << CFS_PAGE_SHIFT));
-+        LASSERT(atomic_read(&filp->f_dentry->d_inode->i_count) > 0);
-+
-+        if (!ll_i2info(inode)->lli_smd) {
-+                /* File with no objects - one big hole */
-+                /* We use this just for remove_from_page_cache that is not
-+                 * exported, we'd make page back up to date. */
-+                ll_truncate_complete_page(page);
-+                clear_page(kmap(page));
-+                kunmap(page);
-+                SetPageUptodate(page);
-+                unlock_page(page);
-+                RETURN(0);
-+        }
-+
-+        rc = oig_init(&oig);
-+        if (rc < 0)
-+                GOTO(out, rc);
-+
-+        exp = ll_i2obdexp(inode);
-+        if (exp == NULL)
-+                GOTO(out, rc = -EINVAL);
-+
-+        if (fd->fd_flags & LL_FILE_GROUP_LOCKED)
-+                lockh = &fd->fd_cwlockh;
-+
-+        llap = llap_from_page_with_lockh(page, LLAP_ORIGIN_READPAGE, lockh);
-+        if (IS_ERR(llap)) {
-+                if (PTR_ERR(llap) == -ENOLCK) {
-+                        CWARN("ino %lu page %lu (%llu) not covered by "
-+                              "a lock (mmap?).  check debug logs.\n",
-+                              inode->i_ino, page->index,
-+                              (long long)page->index << PAGE_CACHE_SHIFT);
-+                }
-+                GOTO(out, rc = PTR_ERR(llap));
-+        }
-+
-+        if (ll_i2sbi(inode)->ll_ra_info.ra_max_pages)
-+                ras_update(ll_i2sbi(inode), inode, &fd->fd_ras, page->index,
-+                           llap->llap_defer_uptodate);
-+
-+
-+        if (llap->llap_defer_uptodate) {
-+                /* This is the callpath if we got the page from a readahead */
-+                llap->llap_ra_used = 1;
-+                rc = ll_readahead(&fd->fd_ras, exp, page->mapping, oig,
-+                                  fd->fd_flags);
-+                if (rc > 0)
-+                        obd_trigger_group_io(exp, ll_i2info(inode)->lli_smd,
-+                                             NULL, oig);
-+                LL_CDEBUG_PAGE(D_PAGE, page, "marking uptodate from defer\n");
-+                SetPageUptodate(page);
-+                unlock_page(page);
-+                GOTO(out_oig, rc = 0);
-+        }
-+
-+        rc = ll_issue_page_read(exp, llap, oig, 0);
-+        if (rc)
-+                GOTO(out, rc);
-+
-+        LL_CDEBUG_PAGE(D_PAGE, page, "queued readpage\n");
-+        /* We have just requested the actual page we want, see if we can tack
-+         * on some readahead to that page's RPC before it is sent. */
-+        if (ll_i2sbi(inode)->ll_ra_info.ra_max_pages)
-+                ll_readahead(&fd->fd_ras, exp, page->mapping, oig,
-+                             fd->fd_flags);
-+
-+        rc = obd_trigger_group_io(exp, ll_i2info(inode)->lli_smd, NULL, oig);
-+
-+out:
-+        if (rc)
-+                unlock_page(page);
-+out_oig:
-+        if (oig != NULL)
-+                oig_release(oig);
-+        RETURN(rc);
-+}
-+
-+static void ll_file_put_pages(struct page **pages, int numpages)
-+{
-+        int i;
-+        struct page **pp;
-+        ENTRY;
-+
-+        for (i = 0, pp = pages; i < numpages; i++, pp++) {
-+                if (*pp) {
-+                        LL_CDEBUG_PAGE(D_PAGE, (*pp), "free\n");
-+                        __ll_put_llap(*pp);
-+                        if (page_private(*pp))
-+                                CERROR("the llap wasn't freed\n");
-+                        (*pp)->mapping = NULL;
-+                        if (page_count(*pp) != 1)
-+                                CERROR("page %p, flags %#lx, count %i, private %p\n",
-+                                (*pp), (unsigned long)(*pp)->flags, page_count(*pp),
-+                                (void*)page_private(*pp));
-+                        __free_pages(*pp, 0);
-+                }
-+        }
-+        OBD_FREE(pages, numpages * sizeof(struct page*));
-+        EXIT;
-+}
-+
-+static struct page **ll_file_prepare_pages(int numpages, struct inode *inode,
-+                                           unsigned long first)
-+{
-+        struct page **pages;
-+        int i;
-+        int rc = 0;
-+        ENTRY;
-+
-+        OBD_ALLOC(pages, sizeof(struct page *) * numpages);
-+        if (pages == NULL)
-+                RETURN(ERR_PTR(-ENOMEM));
-+        for (i = 0; i < numpages; i++) {
-+                struct page *page;
-+                struct ll_async_page *llap;
-+
-+                page = alloc_pages(GFP_HIGHUSER, 0);
-+                if (page == NULL)
-+                        GOTO(err, rc = -ENOMEM);
-+                pages[i] = page;
-+                /* llap_from_page needs page index and mapping to be set */
-+                page->index = first++;
-+                page->mapping = inode->i_mapping;
-+                llap = llap_from_page(page, LLAP_ORIGIN_LOCKLESS_IO);
-+                if (IS_ERR(llap))
-+                        GOTO(err, rc = PTR_ERR(llap));
-+                llap->llap_lockless_io_page = 1;
-+        }
-+        RETURN(pages);
-+err:
-+        ll_file_put_pages(pages, numpages);
-+        RETURN(ERR_PTR(rc));
-+ }
-+
-+static ssize_t ll_file_copy_pages(struct page **pages, int numpages,
-+                                  const struct iovec *iov, unsigned long nsegs,
-+                                  ssize_t iov_offset, loff_t pos, size_t count,
-+                                  int rw)
-+{
-+        ssize_t amount = 0;
-+        int i;
-+        int updatechecksum = ll_i2sbi(pages[0]->mapping->host)->ll_flags &
-+                             LL_SBI_LLITE_CHECKSUM;
-+        ENTRY;
-+
-+        for (i = 0; i < numpages; i++) {
-+                unsigned offset, bytes, left = 0;
-+                char *vaddr;
-+
-+                vaddr = kmap(pages[i]);
-+                offset = pos & (CFS_PAGE_SIZE - 1);
-+                bytes = min_t(unsigned, CFS_PAGE_SIZE - offset, count);
-+                LL_CDEBUG_PAGE(D_PAGE, pages[i], "op = %s, addr = %p, "
-+                               "bytes = %u\n",
-+                               (rw == WRITE) ? "CFU" : "CTU",
-+                               vaddr + offset, bytes);
-+                while (bytes > 0 && !left && nsegs) {
-+                        unsigned copy = min_t(ssize_t, bytes,
-+                                               iov->iov_len - iov_offset);
-+                        if (rw == WRITE) {
-+                                left = copy_from_user(vaddr + offset,
-+                                                      iov->iov_base +iov_offset,
-+                                                      copy);
-+                                if (updatechecksum) {
-+                                        struct ll_async_page *llap;
-+
-+                                        llap = llap_cast_private(pages[i]);
-+                                        llap->llap_checksum =
-+                                                init_checksum(OSC_DEFAULT_CKSUM);
-+                                        llap->llap_checksum =
-+                                           compute_checksum(llap->llap_checksum,
-+                                                            vaddr,CFS_PAGE_SIZE,
-+                                                            OSC_DEFAULT_CKSUM);
-+                                }
-+                        } else {
-+                                left = copy_to_user(iov->iov_base + iov_offset,
-+                                                    vaddr + offset, copy);
-+                        }
-+
-+                        amount += copy;
-+                        count -= copy;
-+                        pos += copy;
-+                        iov_offset += copy;
-+                        bytes -= copy;
-+                        if (iov_offset == iov->iov_len) {
-+                                iov_offset = 0;
-+                                iov++;
-+                                nsegs--;
-+                        }
-+                }
-+                kunmap(pages[i]);
-+                if (left) {
-+                        amount -= left;
-+                        break;
-+                }
-+        }
-+        if (amount == 0)
-+                RETURN(-EFAULT);
-+        RETURN(amount);
-+}
-+
-+static int ll_file_oig_pages(struct inode * inode, struct page **pages,
-+                             int numpages, loff_t pos, size_t count, int rw)
-+{
-+        struct obd_io_group *oig;
-+        struct ll_inode_info *lli = ll_i2info(inode);
-+        struct obd_export *exp;
-+        loff_t org_pos = pos;
-+        obd_flag brw_flags;
-+        int rc;
-+        int i;
-+        ENTRY;
-+
-+        exp = ll_i2obdexp(inode);
-+        if (exp == NULL)
-+                RETURN(-EINVAL);
-+        rc = oig_init(&oig);
-+        if (rc)
-+                RETURN(rc);
-+        brw_flags = OBD_BRW_SRVLOCK;
-+        if (cfs_capable(CFS_CAP_SYS_RESOURCE))
-+                brw_flags |= OBD_BRW_NOQUOTA;
-+
-+        for (i = 0; i < numpages; i++) {
-+                struct ll_async_page *llap;
-+                unsigned from, bytes;
-+
-+                from = pos & (CFS_PAGE_SIZE - 1);
-+                bytes = min_t(unsigned, CFS_PAGE_SIZE - from,
-+                              count - pos + org_pos);
-+                llap = llap_cast_private(pages[i]);
-+                LASSERT(llap);
-+
-+                lock_page(pages[i]);
-+
-+                LL_CDEBUG_PAGE(D_PAGE, pages[i], "offset "LPU64","
-+                               " from %u, bytes = %u\n",
-+                               pos, from, bytes);
-+                LASSERTF(pos >> CFS_PAGE_SHIFT == pages[i]->index,
-+                         "wrong page index %lu (%lu)\n",
-+                         pages[i]->index,
-+                         (unsigned long)(pos >> CFS_PAGE_SHIFT));
-+                rc = obd_queue_group_io(exp, lli->lli_smd, NULL, oig,
-+                                        llap->llap_cookie,
-+                                        (rw == WRITE) ?
-+                                        OBD_BRW_WRITE:OBD_BRW_READ,
-+                                        from, bytes, brw_flags,
-+                                        ASYNC_READY | ASYNC_URGENT |
-+                                        ASYNC_COUNT_STABLE | ASYNC_GROUP_SYNC);
-+                if (rc) {
-+                        i++;
-+                        GOTO(out, rc);
-+                }
-+                pos += bytes;
-+        }
-+        rc = obd_trigger_group_io(exp, lli->lli_smd, NULL, oig);
-+        if (rc)
-+                GOTO(out, rc);
-+        rc = oig_wait(oig);
-+out:
-+        while(--i >= 0)
-+                unlock_page(pages[i]);
-+        oig_release(oig);
-+        RETURN(rc);
-+}
-+
-+/* Advance through passed iov, adjust iov pointer as necessary and return
-+ * starting offset in individual entry we are pointing at. Also reduce
-+ * nr_segs as needed */
-+static ssize_t ll_iov_advance(const struct iovec **iov, unsigned long *nr_segs,
-+                              ssize_t offset)
-+{
-+        while (*nr_segs > 0) {
-+                if ((*iov)->iov_len > offset)
-+                        return ((*iov)->iov_len - offset);
-+                offset -= (*iov)->iov_len;
-+                (*iov)++;
-+                (*nr_segs)--;
-+        }
-+        return 0;
-+}
-+
-+ssize_t ll_file_lockless_io(struct file *file, const struct iovec *iov,
-+                            unsigned long nr_segs,
-+                            loff_t *ppos, int rw, ssize_t count)
-+{
-+        loff_t pos;
-+        struct inode *inode = file->f_dentry->d_inode;
-+        ssize_t rc = 0;
-+        int max_pages;
-+        size_t amount = 0;
-+        unsigned long first, last;
-+        const struct iovec *iv = &iov[0];
-+        unsigned long nsegs = nr_segs;
-+        unsigned long offset = 0;
-+        ENTRY;
-+
-+        if (rw == READ) {
-+                loff_t isize;
-+
-+                ll_inode_size_lock(inode, 0);
-+                isize = i_size_read(inode);
-+                ll_inode_size_unlock(inode, 0);
-+                if (*ppos >= isize)
-+                        GOTO(out, rc = 0);
-+                if (*ppos + count >= isize)
-+                        count -= *ppos + count - isize;
-+                if (count == 0)
-+                        GOTO(out, rc);
-+        } else {
-+                rc = generic_write_checks(file, ppos, &count, 0);
-+                if (rc)
-+                        GOTO(out, rc);
-+                rc = ll_remove_suid(file->f_dentry, file->f_vfsmnt);
-+                if (rc)
-+                        GOTO(out, rc);
-+        }
-+
-+        pos = *ppos;
-+        first = pos >> CFS_PAGE_SHIFT;
-+        last = (pos + count - 1) >> CFS_PAGE_SHIFT;
-+        max_pages = PTLRPC_MAX_BRW_PAGES *
-+                ll_i2info(inode)->lli_smd->lsm_stripe_count;
-+        CDEBUG(D_INFO, "%u, stripe_count = %u\n",
-+               PTLRPC_MAX_BRW_PAGES /* max_pages_per_rpc */,
-+               ll_i2info(inode)->lli_smd->lsm_stripe_count);
-+
-+        while (first <= last && rc >= 0) {
-+                int pages_for_io;
-+                struct page **pages;
-+                size_t bytes = count - amount;
-+
-+                pages_for_io = min_t(int, last - first + 1, max_pages);
-+                pages = ll_file_prepare_pages(pages_for_io, inode, first);
-+                if (IS_ERR(pages)) {
-+                        rc = PTR_ERR(pages);
-+                        break;
-+                }
-+                if (rw == WRITE) {
-+                        rc = ll_file_copy_pages(pages, pages_for_io, iv, nsegs,
-+                                                offset, pos + amount, bytes,
-+                                                rw);
-+                        if (rc < 0)
-+                                GOTO(put_pages, rc);
-+                        offset = ll_iov_advance(&iv, &nsegs, offset + rc);
-+                        bytes = rc;
-+                }
-+                rc = ll_file_oig_pages(inode, pages, pages_for_io,
-+                                       pos + amount, bytes, rw);
-+                if (rc)
-+                        GOTO(put_pages, rc);
-+                if (rw == READ) {
-+                        rc = ll_file_copy_pages(pages, pages_for_io, iv, nsegs,
-+                                                offset, pos + amount, bytes, rw);
-+                        if (rc < 0)
-+                                GOTO(put_pages, rc);
-+                        offset = ll_iov_advance(&iv, &nsegs, offset + rc);
-+                        bytes = rc;
-+                }
-+                amount += bytes;
-+put_pages:
-+                ll_file_put_pages(pages, pages_for_io);
-+                first += pages_for_io;
-+                /* a short read/write check */
-+                if (pos + amount < ((loff_t)first << CFS_PAGE_SHIFT))
-+                        break;
-+                /* Check if we are out of userspace buffers. (how that could
-+                   happen?) */
-+                if (nsegs == 0)
-+                        break;
-+        }
-+        /* NOTE: don't update i_size and KMS in absence of LDLM locks even
-+         * write makes the file large */
-+        file_accessed(file);
-+        if (rw == READ && amount < count && rc == 0) {
-+                unsigned long not_cleared;
-+
-+                while (nsegs > 0) {
-+                        ssize_t to_clear = min_t(ssize_t, count - amount,
-+                                                 iv->iov_len - offset);
-+                        not_cleared = clear_user(iv->iov_base + offset,
-+                                                 to_clear);
-+                        amount += to_clear - not_cleared;
-+                        if (not_cleared) {
-+                                rc = -EFAULT;
-+                                break;
-+                        }
-+                        offset = 0;
-+                        iv++;
-+                        nsegs--;
-+                }
-+        }
-+        if (amount > 0) {
-+                lprocfs_counter_add(ll_i2sbi(inode)->ll_stats,
-+                                    (rw == WRITE) ?
-+                                    LPROC_LL_LOCKLESS_WRITE :
-+                                    LPROC_LL_LOCKLESS_READ,
-+                                    (long)amount);
-+                *ppos += amount;
-+                RETURN(amount);
-+        }
-+out:
-+        RETURN(rc);
-+}
-diff -urNad lustre~/lustre/llite/symlink.c lustre/lustre/llite/symlink.c
---- lustre~/lustre/llite/symlink.c	2009-03-12 10:32:27.000000000 +0100
-+++ lustre/lustre/llite/symlink.c	2009-03-13 09:45:03.000000000 +0100
-@@ -177,8 +177,12 @@
-                 up(&lli->lli_size_sem);
-         }
-         if (rc) {
-+#ifdef HAVE_PATH_RELEASE
-                 path_release(nd); /* Kernel assumes that ->follow_link()
-                                      releases nameidata on error */
-+#else
-+                path_put(&nd->path);
-+#endif
-                 GOTO(out, rc);
-         }
- 
-diff -urNad lustre~/lustre/lvfs/lvfs_linux.c lustre/lustre/lvfs/lvfs_linux.c
---- lustre~/lustre/lvfs/lvfs_linux.c	2009-03-12 10:32:27.000000000 +0100
-+++ lustre/lustre/lvfs/lvfs_linux.c	2009-03-13 09:45:03.000000000 +0100
-@@ -148,10 +148,10 @@
-         */
- 
-         save->fs = get_fs();
--        LASSERT(atomic_read(&current->fs->pwd->d_count));
-+        LASSERT(atomic_read(&cfs_fs_pwd(current->fs)->d_count));
-         LASSERT(atomic_read(&new_ctx->pwd->d_count));
--        save->pwd = dget(current->fs->pwd);
--        save->pwdmnt = mntget(current->fs->pwdmnt);
-+        save->pwd = dget(cfs_fs_pwd(current->fs));
-+        save->pwdmnt = mntget(cfs_fs_mnt(current->fs));
-         save->luc.luc_umask = current->fs->umask;
- 
-         LASSERT(save->pwd);
-@@ -205,10 +205,10 @@
-                atomic_read(&current->fs->pwdmnt->mnt_count));
-         */
- 
--        LASSERTF(current->fs->pwd == new_ctx->pwd, "%p != %p\n",
--                 current->fs->pwd, new_ctx->pwd);
--        LASSERTF(current->fs->pwdmnt == new_ctx->pwdmnt, "%p != %p\n",
--                 current->fs->pwdmnt, new_ctx->pwdmnt);
-+        LASSERTF(cfs_fs_pwd(current->fs) == new_ctx->pwd, "%p != %p\n",
-+                 cfs_fs_pwd(current->fs), new_ctx->pwd);
-+        LASSERTF(cfs_fs_mnt(current->fs) == new_ctx->pwdmnt, "%p != %p\n",
-+                 cfs_fs_mnt(current->fs), new_ctx->pwdmnt);
- 
-         set_fs(saved->fs);
-         ll_set_fs_pwd(current->fs, saved->pwdmnt, saved->pwd);
-diff -urNad lustre~/lustre/mgc/mgc_request.c lustre/lustre/mgc/mgc_request.c
---- lustre~/lustre/mgc/mgc_request.c	2009-03-12 10:32:27.000000000 +0100
-+++ lustre/lustre/mgc/mgc_request.c	2009-03-13 09:45:03.000000000 +0100
-@@ -415,7 +415,7 @@
-         obd->obd_lvfs_ctxt.fs = get_ds();
- 
-         push_ctxt(&saved, &obd->obd_lvfs_ctxt, NULL);
--        dentry = lookup_one_len(MOUNT_CONFIGS_DIR, current->fs->pwd,
-+        dentry = lookup_one_len(MOUNT_CONFIGS_DIR, cfs_fs_pwd(current->fs),
-                                 strlen(MOUNT_CONFIGS_DIR));
-         pop_ctxt(&saved, &obd->obd_lvfs_ctxt, NULL);
-         if (IS_ERR(dentry)) {
-diff -urNad lustre~/lustre/obdclass/linux/linux-module.c lustre/lustre/obdclass/linux/linux-module.c
---- lustre~/lustre/obdclass/linux/linux-module.c	2009-03-12 10:32:27.000000000 +0100
-+++ lustre/lustre/obdclass/linux/linux-module.c	2009-03-13 09:45:03.000000000 +0100
-@@ -419,13 +419,14 @@
-         ENTRY;
- 
-         obd_sysctl_init();
--        proc_lustre_root = proc_mkdir("lustre", proc_root_fs);
-+        proc_lustre_root = lprocfs_register("fs/lustre", NULL,
-+                                              lprocfs_base, NULL);
-         if (!proc_lustre_root) {
-                 printk(KERN_ERR
-                        "LustreError: error registering /proc/fs/lustre\n");
-                 RETURN(-ENOMEM);
-         }
--        proc_version = lprocfs_add_vars(proc_lustre_root, lprocfs_base, NULL);
-+
-         entry = create_proc_entry("devices", 0444, proc_lustre_root);
-         if (entry == NULL) {
-                 CERROR("error registering /proc/fs/lustre/devices\n");
-diff -urNad lustre~/lustre/obdclass/linux/linux-sysctl.c lustre/lustre/obdclass/linux/linux-sysctl.c
---- lustre~/lustre/obdclass/linux/linux-sysctl.c	2009-03-12 10:32:27.000000000 +0100
-+++ lustre/lustre/obdclass/linux/linux-sysctl.c	2009-03-13 09:45:03.000000000 +0100
-@@ -56,7 +56,9 @@
- 
- cfs_sysctl_table_header_t *obd_table_header = NULL;
- 
--#define OBD_SYSCTL 300
-+#ifndef HAVE_SYSCTL_UNNUMBERED
-+
-+#define CTL_LUSTRE      300
- 
- enum {
-         OBD_FAIL_LOC = 1,       /* control test failures instrumentation */
-@@ -74,6 +76,23 @@
-         OBD_ALLOC_FAIL_RATE,    /* memory allocation random failure rate */
-         OBD_MAX_DIRTY_PAGES,    /* maximum dirty pages */
- };
-+#else
-+#define CTL_LUSTRE              CTL_UNNUMBERED
-+#define OBD_FAIL_LOC            CTL_UNNUMBERED
-+#define OBD_FAIL_VAL            CTL_UNNUMBERED
-+#define OBD_TIMEOUT             CTL_UNNUMBERED
-+#define OBD_DUMP_ON_TIMEOUT     CTL_UNNUMBERED
-+#define OBD_MEMUSED             CTL_UNNUMBERED
-+#define OBD_PAGESUSED           CTL_UNNUMBERED
-+#define OBD_MAXMEMUSED          CTL_UNNUMBERED
-+#define OBD_MAXPAGESUSED        CTL_UNNUMBERED
-+#define OBD_SYNCFILTER          CTL_UNNUMBERED
-+#define OBD_LDLM_TIMEOUT        CTL_UNNUMBERED
-+#define OBD_DUMP_ON_EVICTION    CTL_UNNUMBERED
-+#define OBD_DEBUG_PEER_ON_TIMEOUT CTL_UNNUMBERED
-+#define OBD_ALLOC_FAIL_RATE     CTL_UNNUMBERED
-+#define OBD_MAX_DIRTY_PAGES     CTL_UNNUMBERED
-+#endif
- 
- int LL_PROC_PROTO(proc_fail_loc)
- {
-@@ -120,7 +139,8 @@
-                         obd_max_dirty_pages = 4 << (20 - CFS_PAGE_SHIFT);
-                 }
-         } else {
--                char buf[21];
-+                char buf[22];
-+                struct ctl_table dummy;
-                 int len;
- 
-                 len = lprocfs_read_frac_helper(buf, sizeof(buf),
-@@ -129,7 +149,13 @@
-                 if (len > *lenp)
-                         len = *lenp;
-                 buf[len] = '\0';
--                if (copy_to_user(buffer, buf, len))
-+
-+                dummy = *table;
-+                dummy.data = buf;
-+                dummy.maxlen = sizeof(buf);
-+
-+                rc = ll_proc_dostring(&dummy,write,filp,buffer,lenp, ppos);
-+                if (rc)
-                         return -EFAULT;
-                 *lenp = len;
-         }
-@@ -152,7 +178,8 @@
-                                                (unsigned int*)table->data,
-                                                OBD_ALLOC_FAIL_MULT);
-         } else {
--                char buf[21];
-+                char buf[22];
-+                struct ctl_table dummy;
-                 int  len;
- 
-                 len = lprocfs_read_frac_helper(buf, sizeof(buf),
-@@ -161,7 +188,12 @@
-                 if (len > *lenp)
-                         len = *lenp;
-                 buf[len] = '\0';
--                if (copy_to_user(buffer, buf, len))
-+                dummy = *table;
-+                dummy.data = buf;
-+                dummy.maxlen = sizeof(buf);
-+
-+                rc = ll_proc_dostring(&dummy,write,filp,buffer,lenp, ppos);
-+                if(rc)
-                         return -EFAULT;
-                 *lenp = len;
-         }
-@@ -172,6 +204,7 @@
- 
- int LL_PROC_PROTO(proc_memory_alloc)
- {
-+        struct ctl_table dummy;
-         char buf[22];
-         int len;
-         DECLARE_LL_PROC_PPOS_DECL;
-@@ -187,15 +220,17 @@
-         if (len > *lenp)
-                 len = *lenp;
-         buf[len] = '\0';
--        if (copy_to_user(buffer, buf, len))
--                return -EFAULT;
--        *lenp = len;
--        *ppos += *lenp;
--        return 0;
-+
-+        dummy = *table;
-+        dummy.data = buf;
-+        dummy.maxlen = sizeof(buf);
-+
-+        return ll_proc_dostring(&dummy,write,filp,buffer,lenp, ppos);
- }
- 
- int LL_PROC_PROTO(proc_pages_alloc)
- {
-+        struct ctl_table dummy;
-         char buf[22];
-         int len;
-         DECLARE_LL_PROC_PPOS_DECL;
-@@ -211,15 +246,17 @@
-         if (len > *lenp)
-                 len = *lenp;
-         buf[len] = '\0';
--        if (copy_to_user(buffer, buf, len))
--                return -EFAULT;
--        *lenp = len;
--        *ppos += *lenp;
--        return 0;
-+
-+        dummy = *table;
-+        dummy.data = buf;
-+        dummy.maxlen = sizeof(buf);
-+
-+        return ll_proc_dostring(&dummy,write,filp,buffer,lenp, ppos);
- }
- 
- int LL_PROC_PROTO(proc_mem_max)
- {
-+        struct ctl_table dummy;
-         char buf[22];
-         int len;
-         DECLARE_LL_PROC_PPOS_DECL;
-@@ -235,17 +272,19 @@
-         if (len > *lenp)
-                 len = *lenp;
-         buf[len] = '\0';
--        if (copy_to_user(buffer, buf, len))
--                return -EFAULT;
--        *lenp = len;
--        *ppos += *lenp;
--        return 0;
-+
-+        dummy = *table;
-+        dummy.data = buf;
-+        dummy.maxlen = sizeof(buf);
-+
-+        return ll_proc_dostring(&dummy,write,filp,buffer,lenp, ppos);
- }
- 
- int LL_PROC_PROTO(proc_pages_max)
- {
-         char buf[22];
-         int len;
-+        struct ctl_table dummy;
-         DECLARE_LL_PROC_PPOS_DECL;
- 
-         if (!*lenp || (*ppos && !write)) {
-@@ -254,16 +293,17 @@
-         }
-         if (write)
-                 return -EINVAL;
-+         dummy = *table;
-+         dummy.data = buf;
-+         dummy.maxlen = sizeof(buf);
-+         len = snprintf(buf, sizeof(buf), LPU64,
-+                        obd_pages_max());
- 
--        len = snprintf(buf, sizeof(buf), LPU64"\n", obd_pages_max());
--        if (len > *lenp)
--                len = *lenp;
--        buf[len] = '\0';
--        if (copy_to_user(buffer, buf, len))
--                return -EFAULT;
--        *lenp = len;
--        *ppos += *lenp;
--        return 0;
-+         if (len > *lenp)
-+                 len = *lenp;
-+         buf[len] = '\0';
-+
-+         return ll_proc_dostring(&dummy,write,filp,buffer,lenp, ppos);
- }
- 
- static cfs_sysctl_table_t obd_table[] = {
-@@ -281,7 +321,8 @@
-                 .data     = &obd_fail_val,
-                 .maxlen   = sizeof(int),
-                 .mode     = 0644,
--                .proc_handler = &proc_dointvec
-+                .proc_handler = &proc_dointvec,
-+                .strategy = &sysctl_intvec,
-         },
-         {
-                 .ctl_name = OBD_TIMEOUT,
-@@ -297,7 +338,7 @@
-                 .data     = &obd_debug_peer_on_timeout,
-                 .maxlen   = sizeof(int),
-                 .mode     = 0644,
--                .proc_handler = &proc_dointvec
-+                .proc_handler = &proc_dointvec,
-         },
-         {
-                 .ctl_name = OBD_DUMP_ON_TIMEOUT,
-@@ -305,7 +346,7 @@
-                 .data     = &obd_dump_on_timeout,
-                 .maxlen   = sizeof(int),
-                 .mode     = 0644,
--                .proc_handler = &proc_dointvec
-+                .proc_handler = &proc_dointvec,
-         },
-         {
-                 .ctl_name = OBD_DUMP_ON_EVICTION,
-@@ -313,7 +354,7 @@
-                 .data     = &obd_dump_on_eviction,
-                 .maxlen   = sizeof(int),
-                 .mode     = 0644,
--                .proc_handler = &proc_dointvec
-+                .proc_handler = &proc_dointvec,
-         },
-         {
-                 .ctl_name = OBD_MEMUSED,
-@@ -321,7 +362,7 @@
-                 .data     = NULL,
-                 .maxlen   = 0,
-                 .mode     = 0444,
--                .proc_handler = &proc_memory_alloc
-+                .proc_handler = &proc_memory_alloc,
-         },
-         {
-                 .ctl_name = OBD_PAGESUSED,
-@@ -329,7 +370,7 @@
-                 .data     = NULL,
-                 .maxlen   = 0,
-                 .mode     = 0444,
--                .proc_handler = &proc_pages_alloc
-+                .proc_handler = &proc_pages_alloc,
-         },
-         {
-                 .ctl_name = OBD_MAXMEMUSED,
-@@ -337,7 +378,7 @@
-                 .data     = NULL,
-                 .maxlen   = 0,
-                 .mode     = 0444,
--                .proc_handler = &proc_mem_max
-+                .proc_handler = &proc_mem_max,
-         },
-         {
-                 .ctl_name = OBD_MAXPAGESUSED,
-@@ -345,7 +386,7 @@
-                 .data     = NULL,
-                 .maxlen   = 0,
-                 .mode     = 0444,
--                .proc_handler = &proc_pages_max
-+                .proc_handler = &proc_pages_max,
-         },
-         {
-                 .ctl_name = OBD_LDLM_TIMEOUT,
-@@ -378,7 +419,7 @@
- 
- static cfs_sysctl_table_t parent_table[] = {
-        {
--               .ctl_name = OBD_SYSCTL,
-+               .ctl_name = CTL_LUSTRE,
-                .procname = "lustre",
-                .data     = NULL,
-                .maxlen   = 0,
-diff -urNad lustre~/lustre/obdclass/lprocfs_status.c lustre/lustre/obdclass/lprocfs_status.c
---- lustre~/lustre/obdclass/lprocfs_status.c	2009-03-12 10:32:27.000000000 +0100
-+++ lustre/lustre/obdclass/lprocfs_status.c	2009-03-13 09:45:03.000000000 +0100
-@@ -151,7 +151,7 @@
- 
-         LPROCFS_ENTRY();
-         OBD_FAIL_TIMEOUT(OBD_FAIL_LPROC_REMOVE, 10);
--        if (!dp->deleted && dp->read_proc)
-+        if (!LPROCFS_CHECK_DELETED(dp) && dp->read_proc)
-                 rc = dp->read_proc(page, &start, *ppos, PAGE_SIZE,
-                         &eof, dp->data);
-         LPROCFS_EXIT();
-@@ -191,7 +191,7 @@
-         int rc = -EIO;
- 
-         LPROCFS_ENTRY();
--        if (!dp->deleted && dp->write_proc)
-+        if (!LPROCFS_CHECK_DELETED(dp) && dp->write_proc)
-                 rc = dp->write_proc(f, buf, size, dp->data);
-         LPROCFS_EXIT();
-         return rc;
-diff -urNad lustre~/lustre/obdclass/lprocfs_status.c.orig lustre/lustre/obdclass/lprocfs_status.c.orig
---- lustre~/lustre/obdclass/lprocfs_status.c.orig	1970-01-01 00:00:00.000000000 +0000
-+++ lustre/lustre/obdclass/lprocfs_status.c.orig	2009-03-13 09:45:03.000000000 +0100
-@@ -0,0 +1,2062 @@
-+/* -*- mode: c; c-basic-offset: 8; indent-tabs-mode: nil; -*-
-+ * vim:expandtab:shiftwidth=8:tabstop=8:
-+ *
-+ * GPL HEADER START
-+ *
-+ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
-+ *
-+ * This program is free software; you can redistribute it and/or modify
-+ * it under the terms of the GNU General Public License version 2 only,
-+ * as published by the Free Software Foundation.
-+ *
-+ * This program is distributed in the hope that it will be useful, but
-+ * WITHOUT ANY WARRANTY; without even the implied warranty of
-+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
-+ * General Public License version 2 for more details (a copy is included
-+ * in the LICENSE file that accompanied this code).
-+ *
-+ * You should have received a copy of the GNU General Public License
-+ * version 2 along with this program; If not, see
-+ * http://www.sun.com/software/products/lustre/docs/GPLv2.pdf
-+ *
-+ * Please contact Sun Microsystems, Inc., 4150 Network Circle, Santa Clara,
-+ * CA 95054 USA or visit www.sun.com if you need additional information or
-+ * have any questions.
-+ *
-+ * GPL HEADER END
-+ */
-+/*
-+ * Copyright  2008 Sun Microsystems, Inc. All rights reserved
-+ * Use is subject to license terms.
-+ */
-+/*
-+ * This file is part of Lustre, http://www.lustre.org/
-+ * Lustre is a trademark of Sun Microsystems, Inc.
-+ *
-+ * lustre/obdclass/lprocfs_status.c
-+ *
-+ * Author: Hariharan Thantry <thantry at users.sourceforge.net>
-+ */
-+
-+#ifndef EXPORT_SYMTAB
-+# define EXPORT_SYMTAB
-+#endif
-+#define DEBUG_SUBSYSTEM S_CLASS
-+
-+#ifndef __KERNEL__
-+# include <liblustre.h>
-+#endif
-+
-+#include <obd_class.h>
-+#include <lprocfs_status.h>
-+#include <lustre_fsfilt.h>
-+
-+#if defined(LPROCFS)
-+
-+#define MAX_STRING_SIZE 128
-+
-+/* for bug 10866, global variable */
-+DECLARE_RWSEM(_lprocfs_lock);
-+EXPORT_SYMBOL(_lprocfs_lock);
-+
-+int lprocfs_seq_release(struct inode *inode, struct file *file)
-+{
-+        LPROCFS_EXIT();
-+        return seq_release(inode, file);
-+}
-+EXPORT_SYMBOL(lprocfs_seq_release);
-+
-+struct proc_dir_entry *lprocfs_srch(struct proc_dir_entry *head,
-+                                    const char *name)
-+{
-+        struct proc_dir_entry *temp;
-+
-+        if (head == NULL)
-+                return NULL;
-+
-+        LPROCFS_ENTRY();
-+        temp = head->subdir;
-+        while (temp != NULL) {
-+                if (strcmp(temp->name, name) == 0) {
-+                        LPROCFS_EXIT();
-+                        return temp;
-+                }
-+
-+                temp = temp->next;
-+        }
-+        LPROCFS_EXIT();
-+        return NULL;
-+}
-+
-+/* lprocfs API calls */
-+
-+/* Function that emulates snprintf but also has the side effect of advancing
-+   the page pointer for the next write into the buffer, incrementing the total
-+   length written to the buffer, and decrementing the size left in the
-+   buffer. */
-+static int lprocfs_obd_snprintf(char **page, int end, int *len,
-+                                const char *format, ...)
-+{
-+        va_list list;
-+        int n;
-+
-+        if (*len >= end)
-+                return 0;
-+
-+        va_start(list, format);
-+        n = vsnprintf(*page, end - *len, format, list);
-+        va_end(list);
-+
-+        *page += n; *len += n;
-+        return n;
-+}
-+
-+int lprocfs_add_simple(struct proc_dir_entry *root, char *name,
-+                       read_proc_t *read_proc, write_proc_t *write_proc,
-+                       void *data)
-+{
-+        struct proc_dir_entry *proc;
-+        mode_t mode = 0;
-+
-+        if (root == NULL || name == NULL)
-+                return -EINVAL;
-+        if (read_proc)
-+                mode = 0444;
-+        if (write_proc)
-+                mode |= 0200;
-+        proc = create_proc_entry(name, mode, root);
-+        if (!proc) {
-+                CERROR("LprocFS: No memory to create /proc entry %s", name);
-+                return -ENOMEM;
-+        }
-+        proc->read_proc = read_proc;
-+        proc->write_proc = write_proc;
-+        proc->data = data;
-+        return 0;
-+}
-+
-+static ssize_t lprocfs_fops_read(struct file *f, char __user *buf, size_t size,
-+                                 loff_t *ppos)
-+{
-+        struct proc_dir_entry *dp = PDE(f->f_dentry->d_inode);
-+        char *page, *start = NULL;
-+        int rc = 0, eof = 1, count;
-+
-+        if (*ppos >= PAGE_SIZE)
-+                return 0;
-+
-+        page = (char *)__get_free_page(GFP_KERNEL);
-+        if (page == NULL)
-+                return -ENOMEM;
-+
-+        LPROCFS_ENTRY();
-+        OBD_FAIL_TIMEOUT(OBD_FAIL_LPROC_REMOVE, 10);
-+        if (!dp->deleted && dp->read_proc)
-+                rc = dp->read_proc(page, &start, *ppos, PAGE_SIZE,
-+                        &eof, dp->data);
-+        LPROCFS_EXIT();
-+        if (rc <= 0)
-+                goto out;
-+
-+        /* for lustre proc read, the read count must be less than PAGE_SIZE */
-+        LASSERT(eof == 1);
-+
-+        if (start == NULL) {
-+                rc -= *ppos;
-+                if (rc < 0)
-+                        rc = 0;
-+                if (rc == 0)
-+                        goto out;
-+                start = page + *ppos;
-+        } else if (start < page) {
-+                start = page;
-+        }
-+
-+        count = (rc < size) ? rc : size;
-+        if (copy_to_user(buf, start, count)) {
-+                rc = -EFAULT;
-+                goto out;
-+        }
-+        *ppos += count;
-+
-+out:
-+        free_page((unsigned long)page);
-+        return rc;
-+}
-+
-+static ssize_t lprocfs_fops_write(struct file *f, const char __user *buf,
-+                                  size_t size, loff_t *ppos)
-+{
-+        struct proc_dir_entry *dp = PDE(f->f_dentry->d_inode);
-+        int rc = -EIO;
-+
-+        LPROCFS_ENTRY();
-+        if (!dp->deleted && dp->write_proc)
-+                rc = dp->write_proc(f, buf, size, dp->data);
-+        LPROCFS_EXIT();
-+        return rc;
-+}
-+
-+static struct file_operations lprocfs_generic_fops = {
-+        .owner = THIS_MODULE,
-+        .read = lprocfs_fops_read,
-+        .write = lprocfs_fops_write,
-+};
-+
-+int lprocfs_evict_client_open(struct inode *inode, struct file *f)
-+{
-+        struct proc_dir_entry *dp = PDE(f->f_dentry->d_inode);
-+        struct obd_device *obd = dp->data;
-+
-+        atomic_inc(&obd->obd_evict_inprogress);
-+
-+        return 0;
-+}
-+
-+int lprocfs_evict_client_release(struct inode *inode, struct file *f)
-+{
-+        struct proc_dir_entry *dp = PDE(f->f_dentry->d_inode);
-+        struct obd_device *obd = dp->data;
-+
-+        atomic_dec(&obd->obd_evict_inprogress);
-+        wake_up(&obd->obd_evict_inprogress_waitq);
-+
-+        return 0;
-+}
-+
-+struct file_operations lprocfs_evict_client_fops = {
-+        .owner = THIS_MODULE,
-+        .read = lprocfs_fops_read,
-+        .write = lprocfs_fops_write,
-+        .open = lprocfs_evict_client_open,
-+        .release = lprocfs_evict_client_release,
-+};
-+EXPORT_SYMBOL(lprocfs_evict_client_fops);
-+
-+/**
-+ * Add /proc entrys.
-+ *
-+ * \param root [in]  The parent proc entry on which new entry will be added.
-+ * \param list [in]  Array of proc entries to be added.
-+ * \param data [in]  The argument to be passed when entries read/write routines
-+ *                   are called through /proc file.
-+ *
-+ * \retval 0   on success
-+ *         < 0 on error
-+ */
-+int lprocfs_add_vars(struct proc_dir_entry *root, struct lprocfs_vars *list,
-+                     void *data)
-+{
-+        if (root == NULL || list == NULL)
-+                return -EINVAL;
-+
-+        while (list->name != NULL) {
-+                struct proc_dir_entry *cur_root, *proc;
-+                char *pathcopy, *cur, *next, pathbuf[64];
-+                int pathsize = strlen(list->name) + 1;
-+
-+                proc = NULL;
-+                cur_root = root;
-+
-+                /* need copy of path for strsep */
-+                if (strlen(list->name) > sizeof(pathbuf) - 1) {
-+                        OBD_ALLOC(pathcopy, pathsize);
-+                        if (pathcopy == NULL)
-+                                return -ENOMEM;
-+                } else {
-+                        pathcopy = pathbuf;
-+                }
-+
-+                next = pathcopy;
-+                strcpy(pathcopy, list->name);
-+
-+                while (cur_root != NULL && (cur = strsep(&next, "/"))) {
-+                        if (*cur =='\0') /* skip double/trailing "/" */
-+                                continue;
-+
-+                        proc = lprocfs_srch(cur_root, cur);
-+                        CDEBUG(D_OTHER, "cur_root=%s, cur=%s, next=%s, (%s)\n",
-+                               cur_root->name, cur, next,
-+                               (proc ? "exists" : "new"));
-+                        if (next != NULL) {
-+                                cur_root = (proc ? proc :
-+                                            proc_mkdir(cur, cur_root));
-+                        } else if (proc == NULL) {
-+                                mode_t mode = 0;
-+                                if (list->proc_mode != 0000) {
-+                                        mode = list->proc_mode;
-+                                } else {
-+                                        if (list->read_fptr)
-+                                                mode = 0444;
-+                                        if (list->write_fptr)
-+                                                mode |= 0200;
-+                                }
-+                                proc = create_proc_entry(cur, mode, cur_root);
-+                        }
-+                }
-+
-+                if (pathcopy != pathbuf)
-+                        OBD_FREE(pathcopy, pathsize);
-+
-+                if (cur_root == NULL || proc == NULL) {
-+                        CERROR("LprocFS: No memory to create /proc entry %s",
-+                               list->name);
-+                        return -ENOMEM;
-+                }
-+
-+                if (list->fops)
-+                        proc->proc_fops = list->fops;
-+                else
-+                        proc->proc_fops = &lprocfs_generic_fops;
-+                proc->read_proc = list->read_fptr;
-+                proc->write_proc = list->write_fptr;
-+                proc->data = (list->data ? list->data : data);
-+                list++;
-+        }
-+        return 0;
-+}
-+
-+void lprocfs_remove(struct proc_dir_entry **rooth)
-+{
-+        struct proc_dir_entry *root = *rooth;
-+        struct proc_dir_entry *temp = root;
-+        struct proc_dir_entry *rm_entry;
-+        struct proc_dir_entry *parent;
-+
-+        if (!root)
-+                return;
-+        *rooth = NULL;
-+
-+        parent = root->parent;
-+        LASSERT(parent != NULL);
-+        LPROCFS_WRITE_ENTRY(); /* search vs remove race */
-+
-+        while (1) {
-+                while (temp->subdir != NULL)
-+                        temp = temp->subdir;
-+
-+                rm_entry = temp;
-+                temp = temp->parent;
-+
-+                /* Memory corruption once caused this to fail, and
-+                   without this LASSERT we would loop here forever. */
-+                LASSERTF(strlen(rm_entry->name) == rm_entry->namelen,
-+                         "0x%p  %s/%s len %d\n", rm_entry, temp->name,
-+                         rm_entry->name, (int)strlen(rm_entry->name));
-+
-+                /* Now, the rm_entry->deleted flags is protected
-+                 * by _lprocfs_lock. */
-+                rm_entry->data = NULL;
-+                remove_proc_entry(rm_entry->name, temp);
-+                if (temp == parent)
-+                        break;
-+        }
-+        LPROCFS_WRITE_EXIT();
-+}
-+
-+struct proc_dir_entry *lprocfs_register(const char *name,
-+                                        struct proc_dir_entry *parent,
-+                                        struct lprocfs_vars *list, void *data)
-+{
-+        struct proc_dir_entry *newchild;
-+
-+        newchild = lprocfs_srch(parent, name);
-+        if (newchild != NULL) {
-+                CERROR(" Lproc: Attempting to register %s more than once \n",
-+                       name);
-+                return ERR_PTR(-EALREADY);
-+        }
-+
-+        newchild = proc_mkdir(name, parent);
-+        if (newchild != NULL && list != NULL) {
-+                int rc = lprocfs_add_vars(newchild, list, data);
-+                if (rc) {
-+                        lprocfs_remove(&newchild);
-+                        return ERR_PTR(rc);
-+                }
-+        }
-+        return newchild;
-+}
-+
-+/* Generic callbacks */
-+int lprocfs_rd_uint(char *page, char **start, off_t off,
-+                    int count, int *eof, void *data)
-+{
-+        unsigned int *temp = (unsigned int *)data;
-+        return snprintf(page, count, "%u\n", *temp);
-+}
-+
-+int lprocfs_wr_uint(struct file *file, const char *buffer,
-+                    unsigned long count, void *data)
-+{
-+        unsigned *p = data;
-+        char dummy[MAX_STRING_SIZE + 1] = { '\0' }, *end;
-+        unsigned long tmp;
-+
-+        if (count >= sizeof(dummy) || count == 0)
-+                return -EINVAL;
-+
-+        if (copy_from_user(dummy, buffer, count))
-+                return -EFAULT;
-+
-+        tmp = simple_strtoul(dummy, &end, 0);
-+        if (dummy == end)
-+                return -EINVAL;
-+
-+        *p = (unsigned int)tmp;
-+        return count;
-+}
-+
-+int lprocfs_rd_u64(char *page, char **start, off_t off,
-+                   int count, int *eof, void *data)
-+{
-+        LASSERT(data != NULL);
-+        *eof = 1;
-+        return snprintf(page, count, LPU64"\n", *(__u64 *)data);
-+}
-+
-+int lprocfs_rd_atomic(char *page, char **start, off_t off,
-+                   int count, int *eof, void *data)
-+{
-+        atomic_t *atom = (atomic_t *)data;
-+        LASSERT(atom != NULL);
-+        *eof = 1;
-+        return snprintf(page, count, "%d\n", atomic_read(atom));
-+}
-+
-+int lprocfs_wr_atomic(struct file *file, const char *buffer,
-+                      unsigned long count, void *data)
-+{
-+        atomic_t *atm = data;
-+        int val = 0;
-+        int rc;
-+
-+        rc = lprocfs_write_helper(buffer, count, &val);
-+        if (rc < 0)
-+                return rc;
-+
-+        if (val <= 0)
-+                return -ERANGE;
-+
-+        atomic_set(atm, val);
-+        return count;
-+}
-+
-+int lprocfs_rd_uuid(char *page, char **start, off_t off, int count,
-+                    int *eof, void *data)
-+{
-+        struct obd_device *obd = (struct obd_device*)data;
-+
-+        LASSERT(obd != NULL);
-+        *eof = 1;
-+        return snprintf(page, count, "%s\n", obd->obd_uuid.uuid);
-+}
-+
-+int lprocfs_rd_name(char *page, char **start, off_t off, int count,
-+                    int *eof, void* data)
-+{
-+        struct obd_device *dev = (struct obd_device *)data;
-+
-+        LASSERT(dev != NULL);
-+        LASSERT(dev->obd_name != NULL);
-+        *eof = 1;
-+        return snprintf(page, count, "%s\n", dev->obd_name);
-+}
-+
-+int lprocfs_rd_fstype(char *page, char **start, off_t off, int count, int *eof,
-+                      void *data)
-+{
-+        struct obd_device *obd = (struct obd_device *)data;
-+
-+        LASSERT(obd != NULL);
-+        LASSERT(obd->obd_fsops != NULL);
-+        LASSERT(obd->obd_fsops->fs_type != NULL);
-+        return snprintf(page, count, "%s\n", obd->obd_fsops->fs_type);
-+}
-+
-+int lprocfs_rd_blksize(char *page, char **start, off_t off, int count,
-+                       int *eof, void *data)
-+{
-+        struct obd_statfs osfs;
-+        int rc = obd_statfs(data, &osfs, cfs_time_current_64() - HZ,
-+                            OBD_STATFS_NODELAY);
-+        if (!rc) {
-+                *eof = 1;
-+                rc = snprintf(page, count, "%u\n", osfs.os_bsize);
-+        }
-+        return rc;
-+}
-+
-+int lprocfs_rd_kbytestotal(char *page, char **start, off_t off, int count,
-+                           int *eof, void *data)
-+{
-+        struct obd_statfs osfs;
-+        int rc = obd_statfs(data, &osfs, cfs_time_current_64() - HZ,
-+                            OBD_STATFS_NODELAY);
-+        if (!rc) {
-+                __u32 blk_size = osfs.os_bsize >> 10;
-+                __u64 result = osfs.os_blocks;
-+
-+                while (blk_size >>= 1)
-+                        result <<= 1;
-+
-+                *eof = 1;
-+                rc = snprintf(page, count, LPU64"\n", result);
-+        }
-+        return rc;
-+}
-+
-+int lprocfs_rd_kbytesfree(char *page, char **start, off_t off, int count,
-+                          int *eof, void *data)
-+{
-+        struct obd_statfs osfs;
-+        int rc = obd_statfs(data, &osfs, cfs_time_current_64() - HZ,
-+                            OBD_STATFS_NODELAY);
-+        if (!rc) {
-+                __u32 blk_size = osfs.os_bsize >> 10;
-+                __u64 result = osfs.os_bfree;
-+
-+                while (blk_size >>= 1)
-+                        result <<= 1;
-+
-+                *eof = 1;
-+                rc = snprintf(page, count, LPU64"\n", result);
-+        }
-+        return rc;
-+}
-+
-+int lprocfs_rd_kbytesavail(char *page, char **start, off_t off, int count,
-+                           int *eof, void *data)
-+{
-+        struct obd_statfs osfs;
-+        int rc = obd_statfs(data, &osfs, cfs_time_current_64() - HZ,
-+                            OBD_STATFS_NODELAY);
-+        if (!rc) {
-+                __u32 blk_size = osfs.os_bsize >> 10;
-+                __u64 result = osfs.os_bavail;
-+
-+                while (blk_size >>= 1)
-+                        result <<= 1;
-+
-+                *eof = 1;
-+                rc = snprintf(page, count, LPU64"\n", result);
-+        }
-+        return rc;
-+}
-+
-+int lprocfs_rd_filestotal(char *page, char **start, off_t off, int count,
-+                          int *eof, void *data)
-+{
-+        struct obd_statfs osfs;
-+        int rc = obd_statfs(data, &osfs, cfs_time_current_64() - HZ,
-+                            OBD_STATFS_NODELAY);
-+        if (!rc) {
-+                *eof = 1;
-+                rc = snprintf(page, count, LPU64"\n", osfs.os_files);
-+        }
-+
-+        return rc;
-+}
-+
-+int lprocfs_rd_filesfree(char *page, char **start, off_t off, int count,
-+                         int *eof, void *data)
-+{
-+        struct obd_statfs osfs;
-+        int rc = obd_statfs(data, &osfs, cfs_time_current_64() - HZ,
-+                            OBD_STATFS_NODELAY);
-+        if (!rc) {
-+                *eof = 1;
-+                rc = snprintf(page, count, LPU64"\n", osfs.os_ffree);
-+        }
-+        return rc;
-+}
-+
-+int lprocfs_rd_server_uuid(char *page, char **start, off_t off, int count,
-+                           int *eof, void *data)
-+{
-+        struct obd_device *obd = (struct obd_device *)data;
-+        struct obd_import *imp;
-+        char *imp_state_name = NULL;
-+        int rc = 0;
-+
-+        LASSERT(obd != NULL);
-+        LPROCFS_CLIMP_CHECK(obd);
-+        imp = obd->u.cli.cl_import;
-+        imp_state_name = ptlrpc_import_state_name(imp->imp_state);
-+        *eof = 1;
-+        rc = snprintf(page, count, "%s\t%s%s\n",
-+                        obd2cli_tgt(obd), imp_state_name,
-+                        imp->imp_deactive ? "\tDEACTIVATED" : "");
-+
-+        LPROCFS_CLIMP_EXIT(obd);
-+        return rc;
-+}
-+
-+int lprocfs_rd_conn_uuid(char *page, char **start, off_t off, int count,
-+                         int *eof,  void *data)
-+{
-+        struct obd_device *obd = (struct obd_device*)data;
-+        struct ptlrpc_connection *conn;
-+        int rc = 0;
-+
-+        LASSERT(obd != NULL);
-+        LPROCFS_CLIMP_CHECK(obd);
-+        conn = obd->u.cli.cl_import->imp_connection;
-+        LASSERT(conn != NULL);
-+        *eof = 1;
-+        rc = snprintf(page, count, "%s\n", conn->c_remote_uuid.uuid);
-+
-+        LPROCFS_CLIMP_EXIT(obd);
-+        return rc;
-+}
-+
-+#define flag2str(flag) \
-+        if (imp->imp_##flag && max - len > 0) \
-+                len += snprintf(str + len, max - len, " " #flag);
-+
-+/**
-+ * Append a space separated list of current set flags to str.
-+ */
-+static int obd_import_flags2str(struct obd_import *imp, char *str,
-+                                          int max)
-+{
-+        int len = 0;
-+
-+        if (imp->imp_obd->obd_no_recov)
-+                len += snprintf(str, max - len, " no_recov");
-+
-+        flag2str(invalid);
-+        flag2str(deactive);
-+        flag2str(replayable);
-+        flag2str(pingable);
-+        flag2str(recon_bk);
-+        flag2str(last_recon);
-+        return len;
-+}
-+#undef flags2str
-+
-+int lprocfs_rd_import(char *page, char **start, off_t off, int count,
-+                      int *eof, void *data)
-+{
-+        struct obd_device *obd = (struct obd_device *)data;
-+        struct obd_import *imp;
-+        char *imp_state_name = NULL;
-+        int rc = 0;
-+
-+        LASSERT(obd != NULL);
-+        LPROCFS_CLIMP_CHECK(obd);
-+        imp = obd->u.cli.cl_import;
-+        imp_state_name = ptlrpc_import_state_name(imp->imp_state);
-+        *eof = 1;
-+
-+        rc = snprintf(page, count,
-+                      "import: %s\n"
-+                      "    target: %s@%s\n"
-+                      "    state: %s\n"
-+                      "    inflight: %u\n"
-+                      "    unregistering: %u\n"
-+                      "    conn_cnt: %u\n"
-+                      "    generation: %u\n"
-+                      "    inval_cnt: %u\n"
-+                      "    last_replay_transno: "LPU64"\n"
-+                      "    peer_committed_transno: "LPU64"\n"
-+                      "    last_trasno_checked: "LPU64"\n"
-+                      "    flags:",
-+                      obd->obd_name,
-+                      obd2cli_tgt(obd), imp->imp_connection->c_remote_uuid.uuid,
-+                      imp_state_name,
-+                      atomic_read(&imp->imp_inflight),
-+                      atomic_read(&imp->imp_unregistering),
-+                      imp->imp_conn_cnt,
-+                      imp->imp_generation,
-+                      atomic_read(&imp->imp_inval_count),
-+                      imp->imp_last_replay_transno,
-+                      imp->imp_peer_committed_transno,
-+                      imp->imp_last_transno_checked);
-+        rc += obd_import_flags2str(imp, page + rc, count - rc);
-+        rc += snprintf(page+rc, count - rc, "\n");
-+        LPROCFS_CLIMP_EXIT(obd);
-+        return rc;
-+}
-+
-+int lprocfs_at_hist_helper(char *page, int count, int rc,
-+                           struct adaptive_timeout *at)
-+{
-+        int i;
-+        for (i = 0; i < AT_BINS; i++)
-+                rc += snprintf(page + rc, count - rc, "%3u ", at->at_hist[i]);
-+        rc += snprintf(page + rc, count - rc, "\n");
-+        return rc;
-+}
-+
-+/* See also ptlrpc_lprocfs_rd_timeouts */
-+int lprocfs_rd_timeouts(char *page, char **start, off_t off, int count,
-+                        int *eof, void *data)
-+{
-+        struct obd_device *obd = (struct obd_device *)data;
-+        struct obd_import *imp;
-+        unsigned int cur, worst;
-+        time_t now, worstt;
-+        struct dhms ts;
-+        int i, rc = 0;
-+
-+        LASSERT(obd != NULL);
-+        LPROCFS_CLIMP_CHECK(obd);
-+        imp = obd->u.cli.cl_import;
-+        *eof = 1;
-+
-+        now = cfs_time_current_sec();
-+
-+        /* Some network health info for kicks */
-+        s2dhms(&ts, now - imp->imp_last_reply_time);
-+        rc += snprintf(page + rc, count - rc,
-+                       "%-10s : %ld, "DHMS_FMT" ago\n",
-+                       "last reply", imp->imp_last_reply_time, DHMS_VARS(&ts));
-+
-+        cur = at_get(&imp->imp_at.iat_net_latency);
-+        worst = imp->imp_at.iat_net_latency.at_worst_ever;
-+        worstt = imp->imp_at.iat_net_latency.at_worst_time;
-+        s2dhms(&ts, now - worstt);
-+        rc += snprintf(page + rc, count - rc,
-+                       "%-10s : cur %3u  worst %3u (at %ld, "DHMS_FMT" ago) ",
-+                       "network", cur, worst, worstt, DHMS_VARS(&ts));
-+        rc = lprocfs_at_hist_helper(page, count, rc,
-+                                    &imp->imp_at.iat_net_latency);
-+
-+        for(i = 0; i < IMP_AT_MAX_PORTALS; i++) {
-+                if (imp->imp_at.iat_portal[i] == 0)
-+                        break;
-+                cur = at_get(&imp->imp_at.iat_service_estimate[i]);
-+                worst = imp->imp_at.iat_service_estimate[i].at_worst_ever;
-+                worstt = imp->imp_at.iat_service_estimate[i].at_worst_time;
-+                s2dhms(&ts, now - worstt);
-+                rc += snprintf(page + rc, count - rc,
-+                               "portal %-2d  : cur %3u  worst %3u (at %ld, "
-+                               DHMS_FMT" ago) ", imp->imp_at.iat_portal[i],
-+                               cur, worst, worstt, DHMS_VARS(&ts));
-+                rc = lprocfs_at_hist_helper(page, count, rc,
-+                                          &imp->imp_at.iat_service_estimate[i]);
-+        }
-+
-+        LPROCFS_CLIMP_EXIT(obd);
-+        return rc;
-+}
-+
-+static const char *obd_connect_names[] = {
-+        "read_only",
-+        "lov_index",
-+        "unused",
-+        "write_grant",
-+        "server_lock",
-+        "version",
-+        "request_portal",
-+        "acl",
-+        "xattr",
-+        "create_on_write",
-+        "truncate_lock",
-+        "initial_transno",
-+        "inode_bit_locks",
-+        "join_file",
-+        "getattr_by_fid",
-+        "no_oh_for_devices",
-+        "local_1.8_client",
-+        "remote_1.8_client",
-+        "max_byte_per_rpc",
-+        "64bit_qdata",
-+        "fid_capability",
-+        "oss_capability",
-+        "early_lock_cancel",
-+        "size_on_mds",
-+        "adaptive_timeout",
-+        "lru_resize",
-+        "mds_mds_connection",
-+        "real_conn",
-+        "change_qunit_size",
-+        "alt_checksum_algorithm",
-+        "fid_is_enabled",
-+        "version_recovery",
-+        "pools",
-+        NULL
-+};
-+
-+int lprocfs_rd_connect_flags(char *page, char **start, off_t off,
-+                             int count, int *eof, void *data)
-+{
-+        struct obd_device *obd = data;
-+        __u64 mask = 1, flags;
-+        int i, ret = 0;
-+
-+        LPROCFS_CLIMP_CHECK(obd);
-+        flags = obd->u.cli.cl_import->imp_connect_data.ocd_connect_flags;
-+        ret = snprintf(page, count, "flags="LPX64"\n", flags);
-+        for (i = 0; obd_connect_names[i] != NULL; i++, mask <<= 1) {
-+                if (flags & mask)
-+                        ret += snprintf(page + ret, count - ret, "%s\n",
-+                                        obd_connect_names[i]);
-+        }
-+        if (flags & ~(mask - 1))
-+                ret += snprintf(page + ret, count - ret,
-+                                "unknown flags "LPX64"\n", flags & ~(mask - 1));
-+
-+        LPROCFS_CLIMP_EXIT(obd);
-+        return ret;
-+}
-+EXPORT_SYMBOL(lprocfs_rd_connect_flags);
-+
-+int lprocfs_rd_num_exports(char *page, char **start, off_t off, int count,
-+                           int *eof,  void *data)
-+{
-+        struct obd_device *obd = (struct obd_device*)data;
-+
-+        LASSERT(obd != NULL);
-+        *eof = 1;
-+        return snprintf(page, count, "%u\n", obd->obd_num_exports);
-+}
-+
-+int lprocfs_rd_numrefs(char *page, char **start, off_t off, int count,
-+                       int *eof, void *data)
-+{
-+        struct obd_type *class = (struct obd_type*) data;
-+
-+        LASSERT(class != NULL);
-+        *eof = 1;
-+        return snprintf(page, count, "%d\n", class->typ_refcnt);
-+}
-+
-+int lprocfs_obd_setup(struct obd_device *obd, struct lprocfs_vars *list)
-+{
-+        int rc = 0;
-+
-+        LASSERT(obd != NULL);
-+        LASSERT(obd->obd_magic == OBD_DEVICE_MAGIC);
-+        LASSERT(obd->obd_type->typ_procroot != NULL);
-+
-+        obd->obd_proc_entry = lprocfs_register(obd->obd_name,
-+                                               obd->obd_type->typ_procroot,
-+                                               list, obd);
-+        if (IS_ERR(obd->obd_proc_entry)) {
-+                rc = PTR_ERR(obd->obd_proc_entry);
-+                CERROR("error %d setting up lprocfs for %s\n",rc,obd->obd_name);
-+                obd->obd_proc_entry = NULL;
-+        }
-+        return rc;
-+}
-+
-+int lprocfs_obd_cleanup(struct obd_device *obd)
-+{
-+        if (!obd)
-+                return -EINVAL;
-+        if (obd->obd_proc_exports_entry) {
-+                /* Should be no exports left */
-+                LASSERT(obd->obd_proc_exports_entry->subdir == NULL);
-+                lprocfs_remove(&obd->obd_proc_exports_entry);
-+        }
-+        lprocfs_remove(&obd->obd_proc_entry);
-+        return 0;
-+}
-+
-+static void lprocfs_free_client_stats(struct nid_stat *client_stat)
-+{
-+        CDEBUG(D_CONFIG, "stat %p - data %p/%p/%p\n", client_stat,
-+               client_stat->nid_proc, client_stat->nid_stats,
-+               client_stat->nid_brw_stats);
-+
-+        LASSERTF(client_stat->nid_exp_ref_count == 0, "count %d\n",
-+                 client_stat->nid_exp_ref_count);
-+
-+        hlist_del_init(&client_stat->nid_hash);
-+
-+        if (client_stat->nid_proc)
-+                lprocfs_remove(&client_stat->nid_proc);
-+
-+        if (client_stat->nid_stats)
-+                lprocfs_free_stats(&client_stat->nid_stats);
-+
-+        if (client_stat->nid_brw_stats)
-+                OBD_FREE_PTR(client_stat->nid_brw_stats);
-+
-+        if (client_stat->nid_ldlm_stats)
-+                lprocfs_free_stats(&client_stat->nid_ldlm_stats);
-+
-+        OBD_FREE_PTR(client_stat);
-+        return;
-+
-+}
-+
-+void lprocfs_free_per_client_stats(struct obd_device *obd)
-+{
-+        struct nid_stat *stat;
-+        ENTRY;
-+
-+        /* we need extra list - because hash_exit called to early */
-+        /* not need locking because all clients is died */
-+        while(!list_empty(&obd->obd_nid_stats)) {
-+                stat = list_entry(obd->obd_nid_stats.next,
-+                                  struct nid_stat, nid_list);
-+                list_del_init(&stat->nid_list);
-+                lprocfs_free_client_stats(stat);
-+        }
-+
-+        EXIT;
-+}
-+
-+struct lprocfs_stats *lprocfs_alloc_stats(unsigned int num,
-+                                          enum lprocfs_stats_flags flags)
-+{
-+        struct lprocfs_stats *stats;
-+        unsigned int percpusize;
-+        unsigned int i, j;
-+        unsigned int num_cpu;
-+
-+        if (num == 0)
-+                return NULL;
-+
-+        if (flags & LPROCFS_STATS_FLAG_NOPERCPU)
-+                num_cpu = 1;
-+        else
-+                num_cpu = num_possible_cpus();
-+
-+        OBD_ALLOC(stats, offsetof(typeof(*stats), ls_percpu[num_cpu]));
-+        if (stats == NULL)
-+                return NULL;
-+
-+        if (flags & LPROCFS_STATS_FLAG_NOPERCPU) {
-+                stats->ls_flags = flags;
-+                spin_lock_init(&stats->ls_lock);
-+                /* Use this lock only if there are no percpu areas */
-+        } else {
-+                stats->ls_flags = 0;
-+        }
-+
-+        percpusize = offsetof(struct lprocfs_percpu, lp_cntr[num]);
-+        if (num_cpu > 1)
-+                percpusize = L1_CACHE_ALIGN(percpusize);
-+
-+        for (i = 0; i < num_cpu; i++) {
-+                OBD_ALLOC(stats->ls_percpu[i], percpusize);
-+                if (stats->ls_percpu[i] == NULL) {
-+                        for (j = 0; j < i; j++) {
-+                                OBD_FREE(stats->ls_percpu[j], percpusize);
-+                                stats->ls_percpu[j] = NULL;
-+                        }
-+                        break;
-+                }
-+        }
-+        if (stats->ls_percpu[0] == NULL) {
-+                OBD_FREE(stats, offsetof(typeof(*stats),
-+                                         ls_percpu[num_cpu]));
-+                return NULL;
-+        }
-+
-+        stats->ls_num = num;
-+        return stats;
-+}
-+
-+void lprocfs_free_stats(struct lprocfs_stats **statsh)
-+{
-+        struct lprocfs_stats *stats = *statsh;
-+        unsigned int num_cpu;
-+        unsigned int percpusize;
-+        unsigned int i;
-+
-+        if (!stats || (stats->ls_num == 0))
-+                return;
-+        *statsh = NULL;
-+        if (stats->ls_flags & LPROCFS_STATS_FLAG_NOPERCPU)
-+                num_cpu = 1;
-+        else
-+                num_cpu = num_possible_cpus();
-+
-+        percpusize = offsetof(struct lprocfs_percpu, lp_cntr[stats->ls_num]);
-+        if (num_cpu > 1)
-+                percpusize = L1_CACHE_ALIGN(percpusize);
-+        for (i = 0; i < num_cpu; i++)
-+                OBD_FREE(stats->ls_percpu[i], percpusize);
-+        OBD_FREE(stats, offsetof(typeof(*stats), ls_percpu[num_cpu]));
-+}
-+
-+void lprocfs_clear_stats(struct lprocfs_stats *stats)
-+{
-+        struct lprocfs_counter *percpu_cntr;
-+        int i, j;
-+        unsigned int num_cpu;
-+
-+        num_cpu = lprocfs_stats_lock(stats, LPROCFS_GET_NUM_CPU);
-+
-+        for (i = 0; i < num_cpu; i++) {
-+                for (j = 0; j < stats->ls_num; j++) {
-+                        percpu_cntr = &(stats->ls_percpu[i])->lp_cntr[j];
-+                        atomic_inc(&percpu_cntr->lc_cntl.la_entry);
-+                        percpu_cntr->lc_count = 0;
-+                        percpu_cntr->lc_sum = 0;
-+                        percpu_cntr->lc_min = LC_MIN_INIT;
-+                        percpu_cntr->lc_max = 0;
-+                        percpu_cntr->lc_sumsquare = 0;
-+                        atomic_inc(&percpu_cntr->lc_cntl.la_exit);
-+                }
-+        }
-+
-+        lprocfs_stats_unlock(stats);
-+}
-+
-+static ssize_t lprocfs_stats_seq_write(struct file *file, const char *buf,
-+                                       size_t len, loff_t *off)
-+{
-+        struct seq_file *seq = file->private_data;
-+        struct lprocfs_stats *stats = seq->private;
-+
-+        lprocfs_clear_stats(stats);
-+
-+        return len;
-+}
-+
-+static void *lprocfs_stats_seq_start(struct seq_file *p, loff_t *pos)
-+{
-+        struct lprocfs_stats *stats = p->private;
-+        /* return 1st cpu location */
-+        return (*pos >= stats->ls_num) ? NULL :
-+                &(stats->ls_percpu[0]->lp_cntr[*pos]);
-+}
-+
-+static void lprocfs_stats_seq_stop(struct seq_file *p, void *v)
-+{
-+}
-+
-+static void *lprocfs_stats_seq_next(struct seq_file *p, void *v, loff_t *pos)
-+{
-+        struct lprocfs_stats *stats = p->private;
-+        ++*pos;
-+        return (*pos >= stats->ls_num) ? NULL :
-+                &(stats->ls_percpu[0]->lp_cntr[*pos]);
-+}
-+
-+/* seq file export of one lprocfs counter */
-+static int lprocfs_stats_seq_show(struct seq_file *p, void *v)
-+{
-+       struct lprocfs_stats *stats = p->private;
-+       struct lprocfs_counter  *cntr = v;
-+       struct lprocfs_counter  t, ret = { .lc_min = LC_MIN_INIT };
-+       int i, idx, rc = 0;
-+       unsigned int num_cpu;
-+
-+       if (cntr == &(stats->ls_percpu[0])->lp_cntr[0]) {
-+               struct timeval now;
-+               do_gettimeofday(&now);
-+               rc = seq_printf(p, "%-25s %lu.%lu secs.usecs\n",
-+                               "snapshot_time", now.tv_sec, now.tv_usec);
-+               if (rc < 0)
-+                       return rc;
-+       }
-+       idx = cntr - &(stats->ls_percpu[0])->lp_cntr[0];
-+
-+       if (stats->ls_flags & LPROCFS_STATS_FLAG_NOPERCPU)
-+               num_cpu = 1;
-+       else
-+               num_cpu = num_possible_cpus();
-+
-+       for (i = 0; i < num_cpu; i++) {
-+               struct lprocfs_counter *percpu_cntr =
-+                       &(stats->ls_percpu[i])->lp_cntr[idx];
-+               int centry;
-+
-+               do {
-+                       centry = atomic_read(&percpu_cntr->lc_cntl.la_entry);
-+                       t.lc_count = percpu_cntr->lc_count;
-+                       t.lc_sum = percpu_cntr->lc_sum;
-+                       t.lc_min = percpu_cntr->lc_min;
-+                       t.lc_max = percpu_cntr->lc_max;
-+                       t.lc_sumsquare = percpu_cntr->lc_sumsquare;
-+               } while (centry != atomic_read(&percpu_cntr->lc_cntl.la_entry) &&
-+                        centry != atomic_read(&percpu_cntr->lc_cntl.la_exit));
-+               ret.lc_count += t.lc_count;
-+               ret.lc_sum += t.lc_sum;
-+               if (t.lc_min < ret.lc_min)
-+                       ret.lc_min = t.lc_min;
-+               if (t.lc_max > ret.lc_max)
-+                       ret.lc_max = t.lc_max;
-+               ret.lc_sumsquare += t.lc_sumsquare;
-+       }
-+
-+       if (ret.lc_count == 0)
-+               goto out;
-+
-+       rc = seq_printf(p, "%-25s "LPD64" samples [%s]", cntr->lc_name,
-+                       ret.lc_count, cntr->lc_units);
-+       if (rc < 0)
-+               goto out;
-+
-+       if ((cntr->lc_config & LPROCFS_CNTR_AVGMINMAX) && (ret.lc_count > 0)) {
-+               rc = seq_printf(p, " "LPD64" "LPD64" "LPD64,
-+                               ret.lc_min, ret.lc_max, ret.lc_sum);
-+               if (rc < 0)
-+                       goto out;
-+               if (cntr->lc_config & LPROCFS_CNTR_STDDEV)
-+                       rc = seq_printf(p, " "LPD64, ret.lc_sumsquare);
-+               if (rc < 0)
-+                       goto out;
-+       }
-+       rc = seq_printf(p, "\n");
-+ out:
-+       return (rc < 0) ? rc : 0;
-+}
-+
-+struct seq_operations lprocfs_stats_seq_sops = {
-+        start: lprocfs_stats_seq_start,
-+        stop:  lprocfs_stats_seq_stop,
-+        next:  lprocfs_stats_seq_next,
-+        show:  lprocfs_stats_seq_show,
-+};
-+
-+static int lprocfs_stats_seq_open(struct inode *inode, struct file *file)
-+{
-+        struct proc_dir_entry *dp = PDE(inode);
-+        struct seq_file *seq;
-+        int rc;
-+
-+        LPROCFS_ENTRY_AND_CHECK(dp);
-+        rc = seq_open(file, &lprocfs_stats_seq_sops);
-+        if (rc) {
-+                LPROCFS_EXIT();
-+                return rc;
-+        }
-+
-+        seq = file->private_data;
-+        seq->private = dp->data;
-+        return 0;
-+}
-+
-+struct file_operations lprocfs_stats_seq_fops = {
-+        .owner   = THIS_MODULE,
-+        .open    = lprocfs_stats_seq_open,
-+        .read    = seq_read,
-+        .write   = lprocfs_stats_seq_write,
-+        .llseek  = seq_lseek,
-+        .release = lprocfs_seq_release,
-+};
-+
-+int lprocfs_register_stats(struct proc_dir_entry *root, const char *name,
-+                           struct lprocfs_stats *stats)
-+{
-+        struct proc_dir_entry *entry;
-+        LASSERT(root != NULL);
-+
-+        entry = create_proc_entry(name, 0644, root);
-+        if (entry == NULL)
-+                return -ENOMEM;
-+        entry->proc_fops = &lprocfs_stats_seq_fops;
-+        entry->data = (void *)stats;
-+        return 0;
-+}
-+
-+void lprocfs_counter_init(struct lprocfs_stats *stats, int index,
-+                          unsigned conf, const char *name, const char *units)
-+{
-+        struct lprocfs_counter *c;
-+        int i;
-+        unsigned int num_cpu;
-+
-+        LASSERT(stats != NULL);
-+
-+        num_cpu = lprocfs_stats_lock(stats, LPROCFS_GET_NUM_CPU);
-+
-+        for (i = 0; i < num_cpu; i++) {
-+                c = &(stats->ls_percpu[i]->lp_cntr[index]);
-+                c->lc_config = conf;
-+                c->lc_count = 0;
-+                c->lc_sum = 0;
-+                c->lc_min = LC_MIN_INIT;
-+                c->lc_max = 0;
-+                c->lc_name = name;
-+                c->lc_units = units;
-+        }
-+
-+        lprocfs_stats_unlock(stats);
-+}
-+EXPORT_SYMBOL(lprocfs_counter_init);
-+
-+#define LPROCFS_OBD_OP_INIT(base, stats, op)                               \
-+do {                                                                       \
-+        unsigned int coffset = base + OBD_COUNTER_OFFSET(op);              \
-+        LASSERT(coffset < stats->ls_num);                                  \
-+        lprocfs_counter_init(stats, coffset, 0, #op, "reqs");              \
-+} while (0)
-+
-+void lprocfs_init_ops_stats(int num_private_stats, struct lprocfs_stats *stats)
-+{
-+        LPROCFS_OBD_OP_INIT(num_private_stats, stats, iocontrol);
-+        LPROCFS_OBD_OP_INIT(num_private_stats, stats, get_info);
-+        LPROCFS_OBD_OP_INIT(num_private_stats, stats, set_info_async);
-+        LPROCFS_OBD_OP_INIT(num_private_stats, stats, attach);
-+        LPROCFS_OBD_OP_INIT(num_private_stats, stats, detach);
-+        LPROCFS_OBD_OP_INIT(num_private_stats, stats, setup);
-+        LPROCFS_OBD_OP_INIT(num_private_stats, stats, precleanup);
-+        LPROCFS_OBD_OP_INIT(num_private_stats, stats, cleanup);
-+        LPROCFS_OBD_OP_INIT(num_private_stats, stats, process_config);
-+        LPROCFS_OBD_OP_INIT(num_private_stats, stats, postrecov);
-+        LPROCFS_OBD_OP_INIT(num_private_stats, stats, add_conn);
-+        LPROCFS_OBD_OP_INIT(num_private_stats, stats, del_conn);
-+        LPROCFS_OBD_OP_INIT(num_private_stats, stats, connect);
-+        LPROCFS_OBD_OP_INIT(num_private_stats, stats, reconnect);
-+        LPROCFS_OBD_OP_INIT(num_private_stats, stats, disconnect);
-+        LPROCFS_OBD_OP_INIT(num_private_stats, stats, statfs);
-+        LPROCFS_OBD_OP_INIT(num_private_stats, stats, statfs_async);
-+        LPROCFS_OBD_OP_INIT(num_private_stats, stats, packmd);
-+        LPROCFS_OBD_OP_INIT(num_private_stats, stats, unpackmd);
-+        LPROCFS_OBD_OP_INIT(num_private_stats, stats, checkmd);
-+        LPROCFS_OBD_OP_INIT(num_private_stats, stats, preallocate);
-+        LPROCFS_OBD_OP_INIT(num_private_stats, stats, precreate);
-+        LPROCFS_OBD_OP_INIT(num_private_stats, stats, create);
-+        LPROCFS_OBD_OP_INIT(num_private_stats, stats, destroy);
-+        LPROCFS_OBD_OP_INIT(num_private_stats, stats, setattr);
-+        LPROCFS_OBD_OP_INIT(num_private_stats, stats, setattr_async);
-+        LPROCFS_OBD_OP_INIT(num_private_stats, stats, getattr);
-+        LPROCFS_OBD_OP_INIT(num_private_stats, stats, getattr_async);
-+        LPROCFS_OBD_OP_INIT(num_private_stats, stats, brw);
-+        LPROCFS_OBD_OP_INIT(num_private_stats, stats, brw_async);
-+        LPROCFS_OBD_OP_INIT(num_private_stats, stats, prep_async_page);
-+        LPROCFS_OBD_OP_INIT(num_private_stats, stats, reget_short_lock);
-+        LPROCFS_OBD_OP_INIT(num_private_stats, stats, release_short_lock);
-+        LPROCFS_OBD_OP_INIT(num_private_stats, stats, queue_async_io);
-+        LPROCFS_OBD_OP_INIT(num_private_stats, stats, queue_group_io);
-+        LPROCFS_OBD_OP_INIT(num_private_stats, stats, trigger_group_io);
-+        LPROCFS_OBD_OP_INIT(num_private_stats, stats, set_async_flags);
-+        LPROCFS_OBD_OP_INIT(num_private_stats, stats, teardown_async_page);
-+        LPROCFS_OBD_OP_INIT(num_private_stats, stats, merge_lvb);
-+        LPROCFS_OBD_OP_INIT(num_private_stats, stats, adjust_kms);
-+        LPROCFS_OBD_OP_INIT(num_private_stats, stats, punch);
-+        LPROCFS_OBD_OP_INIT(num_private_stats, stats, sync);
-+        LPROCFS_OBD_OP_INIT(num_private_stats, stats, migrate);
-+        LPROCFS_OBD_OP_INIT(num_private_stats, stats, copy);
-+        LPROCFS_OBD_OP_INIT(num_private_stats, stats, iterate);
-+        LPROCFS_OBD_OP_INIT(num_private_stats, stats, preprw);
-+        LPROCFS_OBD_OP_INIT(num_private_stats, stats, commitrw);
-+        LPROCFS_OBD_OP_INIT(num_private_stats, stats, enqueue);
-+        LPROCFS_OBD_OP_INIT(num_private_stats, stats, match);
-+        LPROCFS_OBD_OP_INIT(num_private_stats, stats, change_cbdata);
-+        LPROCFS_OBD_OP_INIT(num_private_stats, stats, cancel);
-+        LPROCFS_OBD_OP_INIT(num_private_stats, stats, cancel_unused);
-+        LPROCFS_OBD_OP_INIT(num_private_stats, stats, join_lru);
-+        LPROCFS_OBD_OP_INIT(num_private_stats, stats, init_export);
-+        LPROCFS_OBD_OP_INIT(num_private_stats, stats, destroy_export);
-+        LPROCFS_OBD_OP_INIT(num_private_stats, stats, extent_calc);
-+        LPROCFS_OBD_OP_INIT(num_private_stats, stats, llog_init);
-+        LPROCFS_OBD_OP_INIT(num_private_stats, stats, llog_finish);
-+        LPROCFS_OBD_OP_INIT(num_private_stats, stats, pin);
-+        LPROCFS_OBD_OP_INIT(num_private_stats, stats, unpin);
-+        LPROCFS_OBD_OP_INIT(num_private_stats, stats, import_event);
-+        LPROCFS_OBD_OP_INIT(num_private_stats, stats, notify);
-+        LPROCFS_OBD_OP_INIT(num_private_stats, stats, health_check);
-+        LPROCFS_OBD_OP_INIT(num_private_stats, stats, quotacheck);
-+        LPROCFS_OBD_OP_INIT(num_private_stats, stats, quotactl);
-+        LPROCFS_OBD_OP_INIT(num_private_stats, stats, quota_adjust_qunit);
-+        LPROCFS_OBD_OP_INIT(num_private_stats, stats, ping);
-+        LPROCFS_OBD_OP_INIT(num_private_stats, stats, register_page_removal_cb);
-+        LPROCFS_OBD_OP_INIT(num_private_stats,stats,unregister_page_removal_cb);
-+        LPROCFS_OBD_OP_INIT(num_private_stats, stats, register_lock_cancel_cb);
-+        LPROCFS_OBD_OP_INIT(num_private_stats, stats,unregister_lock_cancel_cb);
-+}
-+
-+void lprocfs_init_ldlm_stats(struct lprocfs_stats *ldlm_stats)
-+{
-+        lprocfs_counter_init(ldlm_stats,
-+                             LDLM_ENQUEUE - LDLM_FIRST_OPC,
-+                             0, "ldlm_enqueue", "reqs");
-+        lprocfs_counter_init(ldlm_stats,
-+                             LDLM_CONVERT - LDLM_FIRST_OPC,
-+                             0, "ldlm_convert", "reqs");
-+        lprocfs_counter_init(ldlm_stats,
-+                             LDLM_CANCEL - LDLM_FIRST_OPC,
-+                             0, "ldlm_cancel", "reqs");
-+        lprocfs_counter_init(ldlm_stats,
-+                             LDLM_BL_CALLBACK - LDLM_FIRST_OPC,
-+                             0, "ldlm_bl_callback", "reqs");
-+        lprocfs_counter_init(ldlm_stats,
-+                             LDLM_CP_CALLBACK - LDLM_FIRST_OPC,
-+                             0, "ldlm_cp_callback", "reqs");
-+        lprocfs_counter_init(ldlm_stats,
-+                             LDLM_GL_CALLBACK - LDLM_FIRST_OPC,
-+                             0, "ldlm_gl_callback", "reqs");
-+}
-+
-+int lprocfs_alloc_obd_stats(struct obd_device *obd, unsigned num_private_stats)
-+{
-+        struct lprocfs_stats *stats;
-+        unsigned int num_stats;
-+        int rc, i;
-+
-+        LASSERT(obd->obd_stats == NULL);
-+        LASSERT(obd->obd_proc_entry != NULL);
-+        LASSERT(obd->obd_cntr_base == 0);
-+
-+        num_stats = ((int)sizeof(*obd->obd_type->typ_ops) / sizeof(void *)) +
-+                num_private_stats - 1 /* o_owner */;
-+        stats = lprocfs_alloc_stats(num_stats, 0);
-+        if (stats == NULL)
-+                return -ENOMEM;
-+
-+        lprocfs_init_ops_stats(num_private_stats, stats);
-+
-+        for (i = num_private_stats; i < num_stats; i++) {
-+                /* If this LBUGs, it is likely that an obd
-+                 * operation was added to struct obd_ops in
-+                 * <obd.h>, and that the corresponding line item
-+                 * LPROCFS_OBD_OP_INIT(.., .., opname)
-+                 * is missing from the list above. */
-+                LASSERTF(stats->ls_percpu[0]->lp_cntr[i].lc_name != NULL,
-+                         "Missing obd_stat initializer obd_op "
-+                         "operation at offset %d.\n", i - num_private_stats);
-+        }
-+        rc = lprocfs_register_stats(obd->obd_proc_entry, "stats", stats);
-+        if (rc < 0) {
-+                lprocfs_free_stats(&stats);
-+        } else {
-+                obd->obd_stats  = stats;
-+                obd->obd_cntr_base = num_private_stats;
-+        }
-+        return rc;
-+}
-+
-+void lprocfs_free_obd_stats(struct obd_device *obd)
-+{
-+        if (obd->obd_stats)
-+                lprocfs_free_stats(&obd->obd_stats);
-+}
-+
-+int lprocfs_exp_rd_nid(char *page, char **start, off_t off, int count,
-+                         int *eof,  void *data)
-+{
-+        struct obd_export *exp = (struct obd_export*)data;
-+        LASSERT(exp != NULL);
-+        *eof = 1;
-+        return snprintf(page, count, "%s\n", obd_export_nid2str(exp));
-+}
-+
-+struct exp_uuid_cb_data {
-+        char                   *page;
-+        int                     count;
-+        int                    *eof;
-+        int                    *len;
-+};
-+
-+static void
-+lprocfs_exp_rd_cb_data_init(struct exp_uuid_cb_data *cb_data, char *page,
-+                            int count, int *eof, int *len)
-+{
-+        cb_data->page = page;
-+        cb_data->count = count;
-+        cb_data->eof = eof;
-+        cb_data->len = len;
-+}
-+
-+void lprocfs_exp_print_uuid(void *obj, void *cb_data)
-+{
-+        struct obd_export *exp = (struct obd_export *)obj;
-+        struct exp_uuid_cb_data *data = (struct exp_uuid_cb_data *)cb_data;
-+
-+        if (exp->exp_nid_stats)
-+                *data->len += snprintf((data->page + *data->len),
-+                                       data->count, "%s\n",
-+                                       obd_uuid2str(&exp->exp_client_uuid));
-+}
-+
-+int lprocfs_exp_rd_uuid(char *page, char **start, off_t off, int count,
-+                        int *eof,  void *data)
-+{
-+        struct nid_stat *stats = (struct nid_stat *)data;
-+        struct exp_uuid_cb_data cb_data;
-+        struct obd_device *obd = stats->nid_obd;
-+        int len = 0;
-+
-+        *eof = 1;
-+        page[0] = '\0';
-+        lprocfs_exp_rd_cb_data_init(&cb_data, page, count, eof, &len);
-+        lustre_hash_for_each_key(obd->obd_nid_hash, &stats->nid,
-+                                 lprocfs_exp_print_uuid, &cb_data);
-+        return (*cb_data.len);
-+}
-+
-+void lprocfs_exp_print_hash(void *obj, void *cb_data)
-+{
-+        struct obd_export *exp = (struct obd_export *)obj;
-+        struct exp_uuid_cb_data *data = (struct exp_uuid_cb_data *)cb_data;
-+        lustre_hash_t *lh;
-+
-+        lh = exp->exp_lock_hash;
-+        if (lh) {
-+                if (!*data->len)
-+                        *data->len += lustre_hash_debug_header(data->page,
-+                                                               data->count);
-+
-+                *data->len += lustre_hash_debug_str(lh, data->page +
-+                                                    *data->len,
-+                                                    data->count);
-+     }
-+}
-+
-+int lprocfs_exp_rd_hash(char *page, char **start, off_t off, int count,
-+                     int *eof,  void *data)
-+{
-+        struct nid_stat *stats = (struct nid_stat *)data;
-+        struct exp_uuid_cb_data cb_data;
-+        struct obd_device *obd = stats->nid_obd;
-+        int len = 0;
-+
-+        *eof = 1;
-+        page[0] = '\0';
-+        lprocfs_exp_rd_cb_data_init(&cb_data, page, count, eof, &len);
-+        lustre_hash_for_each_key(obd->obd_nid_hash, &stats->nid,
-+                                 lprocfs_exp_print_hash, &cb_data);
-+        return (*cb_data.len);
-+}
-+
-+int lprocfs_nid_stats_clear_read(char *page, char **start, off_t off,
-+                                        int count, int *eof,  void *data)
-+{
-+        *eof = 1;
-+        return snprintf(page, count, "%s\n",
-+                        "Write into this file to clear all nid stats and "
-+                        "stale nid entries");
-+}
-+EXPORT_SYMBOL(lprocfs_nid_stats_clear_read);
-+
-+void lprocfs_nid_stats_clear_write_cb(void *obj, void *data)
-+{
-+        struct nid_stat *stat = obj;
-+        int i;
-+
-+        /* object has only hash + iterate_all references.
-+         * add/delete blocked by hash bucket lock */
-+        CDEBUG(D_INFO,"refcnt %d\n", stat->nid_exp_ref_count);
-+        if (stat->nid_exp_ref_count == 2) {
-+                hlist_del_init(&stat->nid_hash);
-+                stat->nid_exp_ref_count--;
-+                spin_lock(&stat->nid_obd->obd_nid_lock);
-+                list_del_init(&stat->nid_list);
-+                spin_unlock(&stat->nid_obd->obd_nid_lock);
-+                list_add(&stat->nid_list, data);
-+                EXIT;
-+                return;
-+        }
-+        /* we has reference to object - only clear data*/
-+        if (stat->nid_stats)
-+                lprocfs_clear_stats(stat->nid_stats);
-+
-+        if (stat->nid_brw_stats) {
-+                for (i = 0; i < BRW_LAST; i++)
-+                        lprocfs_oh_clear(&stat->nid_brw_stats->hist[i]);
-+        }
-+        EXIT;
-+        return;
-+}
-+
-+int lprocfs_nid_stats_clear_write(struct file *file, const char *buffer,
-+                                         unsigned long count, void *data)
-+{
-+        struct obd_device *obd = (struct obd_device *)data;
-+        struct nid_stat *client_stat;
-+        CFS_LIST_HEAD(free_list);
-+
-+        lustre_hash_for_each(obd->obd_nid_stats_hash,
-+                             lprocfs_nid_stats_clear_write_cb, &free_list);
-+
-+        while (!list_empty(&free_list)) {
-+                client_stat = list_entry(free_list.next, struct nid_stat,
-+                                         nid_list);
-+                list_del_init(&client_stat->nid_list);
-+                lprocfs_free_client_stats(client_stat);
-+        }
-+
-+        return count;
-+}
-+EXPORT_SYMBOL(lprocfs_nid_stats_clear_write);
-+
-+int lprocfs_exp_setup(struct obd_export *exp, lnet_nid_t *nid, int *newnid)
-+{
-+        struct nid_stat *new_stat, *old_stat;
-+        struct nid_stat_uuid *new_ns_uuid;
-+        struct obd_device *obd;
-+        int rc = 0;
-+        ENTRY;
-+
-+        *newnid = 0;
-+
-+        if (!exp || !exp->exp_obd || !exp->exp_obd->obd_proc_exports_entry ||
-+            !exp->exp_obd->obd_nid_stats_hash)
-+                RETURN(-EINVAL);
-+
-+	/* not test against zero because eric say:
-+	 * You may only test nid against another nid, or LNET_NID_ANY.
-+         * Anything else is nonsense.*/
-+        if (!nid || *nid == LNET_NID_ANY)
-+                RETURN(0);
-+
-+        obd = exp->exp_obd;
-+
-+        CDEBUG(D_CONFIG, "using hash %p\n", obd->obd_nid_stats_hash);
-+
-+        OBD_ALLOC_PTR(new_stat);
-+        if (new_stat == NULL)
-+                RETURN(-ENOMEM);
-+
-+        OBD_ALLOC_PTR(new_ns_uuid);
-+        if (new_ns_uuid == NULL) {
-+                OBD_FREE_PTR(new_stat);
-+                RETURN(-ENOMEM);
-+        }
-+        CFS_INIT_LIST_HEAD(&new_ns_uuid->ns_uuid_list);
-+        strncpy(new_ns_uuid->ns_uuid.uuid, exp->exp_client_uuid.uuid,
-+                sizeof(struct obd_uuid));
-+
-+        CFS_INIT_LIST_HEAD(&new_stat->nid_uuid_list);
-+        new_stat->nid = *nid;
-+        new_stat->nid_obd = exp->exp_obd;
-+        /* need live in hash after destroy export */
-+        new_stat->nid_exp_ref_count = 1;
-+
-+        old_stat = lustre_hash_findadd_unique(obd->obd_nid_stats_hash,
-+                                              nid, &new_stat->nid_hash);
-+        CDEBUG(D_INFO, "Found stats %p for nid %s - ref %d\n",
-+               old_stat, libcfs_nid2str(*nid), new_stat->nid_exp_ref_count);
-+
-+        /* Return -EALREADY here so that we know that the /proc
-+         * entry already has been created */
-+        if (old_stat != new_stat) {
-+                struct nid_stat_uuid *tmp_uuid;
-+                int found = 0;
-+
-+                exp->exp_nid_stats = old_stat;
-+
-+                /* We need to decrement the refcount if the uuid was
-+                 * already in our list */
-+                spin_lock(&obd->obd_nid_lock);
-+                list_for_each_entry(tmp_uuid, &old_stat->nid_uuid_list,
-+                                    ns_uuid_list) {
-+                        if (tmp_uuid && obd_uuid_equals(&tmp_uuid->ns_uuid,
-+                                                        &exp->exp_client_uuid)){
-+                                found = 1;
-+                                --old_stat->nid_exp_ref_count;
-+                                break;
-+                        }
-+                }
-+
-+                if (!found)
-+                        list_add(&new_ns_uuid->ns_uuid_list,
-+                                 &old_stat->nid_uuid_list);
-+                else
-+                        OBD_FREE_PTR(new_ns_uuid);
-+                spin_unlock(&obd->obd_nid_lock);
-+
-+                GOTO(destroy_new, rc = -EALREADY);
-+        }
-+        /* not found - create */
-+        new_stat->nid_proc = proc_mkdir(libcfs_nid2str(*nid),
-+                                        obd->obd_proc_exports_entry);
-+        if (!new_stat->nid_proc) {
-+                CERROR("Error making export directory for"
-+                       " nid %s\n", libcfs_nid2str(*nid));
-+                GOTO(destroy_new_ns, rc = -ENOMEM);
-+        }
-+
-+        /* Add in uuid to our nid_stats list */
-+        spin_lock(&obd->obd_nid_lock);
-+        list_add(&new_ns_uuid->ns_uuid_list, &new_stat->nid_uuid_list);
-+        spin_unlock(&obd->obd_nid_lock);
-+
-+        rc = lprocfs_add_simple(new_stat->nid_proc, "uuid",
-+                                lprocfs_exp_rd_uuid, NULL, new_stat);
-+        if (rc) {
-+                CWARN("Error adding the uuid file\n");
-+                GOTO(destroy_new_ns, rc);
-+        }
-+
-+        rc = lprocfs_add_simple(new_stat->nid_proc, "hash",
-+                                lprocfs_exp_rd_hash, NULL, new_stat);
-+        if (rc) {
-+                CWARN("Error adding the hash file\n");
-+                lprocfs_remove(&new_stat->nid_proc);
-+                GOTO(destroy_new_ns, rc);
-+        }
-+
-+        exp->exp_nid_stats = new_stat;
-+        *newnid = 1;
-+        /* protect competitive add to list, not need locking on destroy */
-+        spin_lock(&obd->obd_nid_lock);
-+        list_add(&new_stat->nid_list, &obd->obd_nid_stats);
-+        spin_unlock(&obd->obd_nid_lock);
-+
-+        RETURN(rc);
-+
-+destroy_new_ns:
-+        lustre_hash_del(obd->obd_nid_stats_hash, nid, &new_stat->nid_hash);
-+        OBD_FREE_PTR(new_ns_uuid);
-+
-+destroy_new:
-+        OBD_FREE_PTR(new_stat);
-+        RETURN(rc);
-+}
-+
-+int lprocfs_exp_cleanup(struct obd_export *exp)
-+{
-+        struct nid_stat *stat = exp->exp_nid_stats;
-+        struct nid_stat_uuid *cursor, *tmp;
-+        int found = 0;
-+
-+        if(!stat || !exp->exp_obd)
-+                RETURN(0);
-+
-+        spin_lock(&exp->exp_obd->obd_nid_lock);
-+        list_for_each_entry_safe(cursor, tmp,
-+                                 &stat->nid_uuid_list,
-+                                 ns_uuid_list) {
-+                if (cursor && obd_uuid_equals(&cursor->ns_uuid,
-+                                              &exp->exp_client_uuid)) {
-+                        found = 1;
-+                        list_del(&cursor->ns_uuid_list);
-+                        OBD_FREE_PTR(cursor);
-+                        --stat->nid_exp_ref_count;
-+                        CDEBUG(D_INFO, "Put stat %p - %d\n", stat,
-+                               stat->nid_exp_ref_count);
-+                        break;
-+                }
-+        }
-+        spin_unlock(&exp->exp_obd->obd_nid_lock);
-+        if (!found)
-+                CERROR("obd_export's client uuid %s are not found in its "
-+                       "nid_stats list\n", exp->exp_client_uuid.uuid);
-+
-+        exp->exp_nid_stats = NULL;
-+        lprocfs_free_stats(&exp->exp_ops_stats);
-+
-+        return 0;
-+}
-+
-+int lprocfs_write_helper(const char *buffer, unsigned long count,
-+                         int *val)
-+{
-+        return lprocfs_write_frac_helper(buffer, count, val, 1);
-+}
-+
-+int lprocfs_write_frac_helper(const char *buffer, unsigned long count,
-+                              int *val, int mult)
-+{
-+        char kernbuf[20], *end, *pbuf;
-+
-+        if (count > (sizeof(kernbuf) - 1))
-+                return -EINVAL;
-+
-+        if (copy_from_user(kernbuf, buffer, count))
-+                return -EFAULT;
-+
-+        kernbuf[count] = '\0';
-+        pbuf = kernbuf;
-+        if (*pbuf == '-') {
-+                mult = -mult;
-+                pbuf++;
-+        }
-+
-+        *val = (int)simple_strtoul(pbuf, &end, 10) * mult;
-+        if (pbuf == end)
-+                return -EINVAL;
-+
-+        if (end != NULL && *end == '.') {
-+                int temp_val, pow = 1;
-+                int i;
-+
-+                pbuf = end + 1;
-+                if (strlen(pbuf) > 5)
-+                        pbuf[5] = '\0'; /*only allow 5bits fractional*/
-+
-+                temp_val = (int)simple_strtoul(pbuf, &end, 10) * mult;
-+
-+                if (pbuf < end) {
-+                        for (i = 0; i < (end - pbuf); i++)
-+                                pow *= 10;
-+
-+                        *val += temp_val / pow;
-+                }
-+        }
-+        return 0;
-+}
-+
-+int lprocfs_read_frac_helper(char *buffer, unsigned long count, long val,
-+                             int mult)
-+{
-+        long decimal_val, frac_val;
-+        int prtn;
-+
-+        if (count < 10)
-+                return -EINVAL;
-+
-+        decimal_val = val / mult;
-+        prtn = snprintf(buffer, count, "%ld", decimal_val);
-+        frac_val = val % mult;
-+
-+        if (prtn < (count - 4) && frac_val > 0) {
-+                long temp_frac;
-+                int i, temp_mult = 1, frac_bits = 0;
-+
-+                temp_frac = frac_val * 10;
-+                buffer[prtn++] = '.';
-+                while (frac_bits < 2 && (temp_frac / mult) < 1 ) {
-+                        /*only reserved 2bits fraction*/
-+                        buffer[prtn++] ='0';
-+                        temp_frac *= 10;
-+                        frac_bits++;
-+                }
-+                /*
-+                  Need to think these cases :
-+                        1. #echo x.00 > /proc/xxx       output result : x
-+                        2. #echo x.0x > /proc/xxx       output result : x.0x
-+                        3. #echo x.x0 > /proc/xxx       output result : x.x
-+                        4. #echo x.xx > /proc/xxx       output result : x.xx
-+                        Only reserved 2bits fraction.
-+                 */
-+                for (i = 0; i < (5 - prtn); i++)
-+                        temp_mult *= 10;
-+
-+                frac_bits = min((int)count - prtn, 3 - frac_bits);
-+                prtn += snprintf(buffer + prtn, frac_bits, "%ld",
-+                                 frac_val * temp_mult / mult);
-+
-+                prtn--;
-+                while(buffer[prtn] < '1' || buffer[prtn] > '9') {
-+                        prtn--;
-+                        if (buffer[prtn] == '.') {
-+                                prtn--;
-+                                break;
-+                        }
-+                }
-+                prtn++;
-+        }
-+        buffer[prtn++] ='\n';
-+        return prtn;
-+}
-+
-+int lprocfs_write_u64_helper(const char *buffer, unsigned long count,__u64 *val)
-+{
-+        return lprocfs_write_frac_u64_helper(buffer, count, val, 1);
-+}
-+
-+int lprocfs_write_frac_u64_helper(const char *buffer, unsigned long count,
-+                              __u64 *val, int mult)
-+{
-+        char kernbuf[22], *end, *pbuf;
-+        __u64 whole, frac = 0, units;
-+        unsigned frac_d = 1;
-+
-+        if (count > (sizeof(kernbuf) - 1))
-+                return -EINVAL;
-+
-+        if (copy_from_user(kernbuf, buffer, count))
-+                return -EFAULT;
-+
-+        kernbuf[count] = '\0';
-+        pbuf = kernbuf;
-+        if (*pbuf == '-') {
-+                mult = -mult;
-+                pbuf++;
-+        }
-+
-+        whole = simple_strtoull(pbuf, &end, 10);
-+        if (pbuf == end)
-+                return -EINVAL;
-+
-+        if (end != NULL && *end == '.') {
-+                int i;
-+                pbuf = end + 1;
-+
-+                /* need to limit frac_d to a __u32 */
-+                if (strlen(pbuf) > 10)
-+                        pbuf[10] = '\0';
-+
-+                frac = simple_strtoull(pbuf, &end, 10);
-+                /* count decimal places */
-+                for (i = 0; i < (end - pbuf); i++)
-+                        frac_d *= 10;
-+        }
-+
-+        units = 1;
-+        switch(*end) {
-+        case 'p': case 'P':
-+                units <<= 10;
-+        case 't': case 'T':
-+                units <<= 10;
-+        case 'g': case 'G':
-+                units <<= 10;
-+        case 'm': case 'M':
-+                units <<= 10;
-+        case 'k': case 'K':
-+                units <<= 10;
-+        }
-+        /* Specified units override the multiplier */
-+        if (units)
-+                mult = mult < 0 ? -units : units;
-+
-+        frac *= mult;
-+        do_div(frac, frac_d);
-+        *val = whole * mult + frac;
-+        return 0;
-+}
-+
-+int lprocfs_seq_create(cfs_proc_dir_entry_t *parent,
-+                       char *name, mode_t mode,
-+                       struct file_operations *seq_fops, void *data)
-+{
-+        struct proc_dir_entry *entry;
-+        ENTRY;
-+
-+        entry = create_proc_entry(name, mode, parent);
-+        if (entry == NULL)
-+                RETURN(-ENOMEM);
-+        entry->proc_fops = seq_fops;
-+        entry->data = data;
-+
-+        RETURN(0);
-+}
-+EXPORT_SYMBOL(lprocfs_seq_create);
-+
-+__inline__ int lprocfs_obd_seq_create(struct obd_device *dev, char *name,
-+                                      mode_t mode,
-+                                      struct file_operations *seq_fops,
-+                                      void *data)
-+{
-+        return (lprocfs_seq_create(dev->obd_proc_entry, name,
-+                                   mode, seq_fops, data));
-+}
-+EXPORT_SYMBOL(lprocfs_obd_seq_create);
-+
-+void lprocfs_oh_tally(struct obd_histogram *oh, unsigned int value)
-+{
-+        if (value >= OBD_HIST_MAX)
-+                value = OBD_HIST_MAX - 1;
-+
-+        spin_lock(&oh->oh_lock);
-+        oh->oh_buckets[value]++;
-+        spin_unlock(&oh->oh_lock);
-+}
-+EXPORT_SYMBOL(lprocfs_oh_tally);
-+
-+void lprocfs_oh_tally_log2(struct obd_histogram *oh, unsigned int value)
-+{
-+        unsigned int val;
-+
-+        for (val = 0; ((1 << val) < value) && (val <= OBD_HIST_MAX); val++)
-+                ;
-+
-+        lprocfs_oh_tally(oh, val);
-+}
-+EXPORT_SYMBOL(lprocfs_oh_tally_log2);
-+
-+unsigned long lprocfs_oh_sum(struct obd_histogram *oh)
-+{
-+        unsigned long ret = 0;
-+        int i;
-+
-+        for (i = 0; i < OBD_HIST_MAX; i++)
-+                ret +=  oh->oh_buckets[i];
-+        return ret;
-+}
-+EXPORT_SYMBOL(lprocfs_oh_sum);
-+
-+void lprocfs_oh_clear(struct obd_histogram *oh)
-+{
-+        spin_lock(&oh->oh_lock);
-+        memset(oh->oh_buckets, 0, sizeof(oh->oh_buckets));
-+        spin_unlock(&oh->oh_lock);
-+}
-+EXPORT_SYMBOL(lprocfs_oh_clear);
-+
-+int lprocfs_obd_rd_recovery_status(char *page, char **start, off_t off,
-+                                   int count, int *eof, void *data)
-+{
-+        struct obd_device *obd = data;
-+        int len = 0, size;
-+
-+        LASSERT(obd != NULL);
-+        LASSERT(count >= 0);
-+
-+        /* Set start of user data returned to
-+           page + off since the user may have
-+           requested to read much smaller than
-+           what we need to read */
-+        *start = page + off;
-+
-+        /* We know we are allocated a page here.
-+           Also we know that this function will
-+           not need to write more than a page
-+           so we can truncate at CFS_PAGE_SIZE.  */
-+        size = min(count + (int)off + 1, (int)CFS_PAGE_SIZE);
-+
-+        /* Initialize the page */
-+        memset(page, 0, size);
-+
-+        if (lprocfs_obd_snprintf(&page, size, &len, "status: ") <= 0)
-+                goto out;
-+        if (obd->obd_max_recoverable_clients == 0) {
-+                if (lprocfs_obd_snprintf(&page, size, &len, "INACTIVE\n") <= 0)
-+                        goto out;
-+
-+                goto fclose;
-+        }
-+
-+        /* sampled unlocked, but really... */
-+        if (obd->obd_recovering == 0) {
-+                if (lprocfs_obd_snprintf(&page, size, &len, "COMPLETE\n") <= 0)
-+                        goto out;
-+                if (lprocfs_obd_snprintf(&page, size, &len,
-+                                         "recovery_start: %lu\n",
-+                                         obd->obd_recovery_start) <= 0)
-+                        goto out;
-+                if (lprocfs_obd_snprintf(&page, size, &len,
-+                                         "recovery_duration: %lu\n",
-+                                         obd->obd_recovery_end -
-+                                         obd->obd_recovery_start) <= 0)
-+                        goto out;
-+                /* Number of clients that have completed recovery */
-+                if (lprocfs_obd_snprintf(&page, size, &len,
-+                                         "completed_clients: %d/%d\n",
-+                                         obd->obd_max_recoverable_clients -
-+                                         obd->obd_recoverable_clients,
-+                                         obd->obd_max_recoverable_clients) <= 0)
-+                        goto out;
-+                if (lprocfs_obd_snprintf(&page, size, &len,
-+                                         "replayed_requests: %d\n",
-+                                         obd->obd_replayed_requests) <= 0)
-+                        goto out;
-+                if (lprocfs_obd_snprintf(&page, size, &len,
-+                                         "last_transno: "LPD64"\n",
-+                                         obd->obd_next_recovery_transno - 1)<=0)
-+                        goto out;
-+                goto fclose;
-+        }
-+
-+        if (lprocfs_obd_snprintf(&page, size, &len, "RECOVERING\n") <= 0)
-+                goto out;
-+        if (lprocfs_obd_snprintf(&page, size, &len, "recovery_start: %lu\n",
-+                                 obd->obd_recovery_start) <= 0)
-+                goto out;
-+        if (lprocfs_obd_snprintf(&page, size, &len, "time_remaining: %lu\n",
-+                           cfs_time_current_sec() >= obd->obd_recovery_end ? 0 :
-+                           obd->obd_recovery_end - cfs_time_current_sec()) <= 0)
-+                goto out;
-+        if (lprocfs_obd_snprintf(&page, size, &len,"connected_clients: %d/%d\n",
-+                                 obd->obd_connected_clients,
-+                                 obd->obd_max_recoverable_clients) <= 0)
-+                goto out;
-+        /* Number of clients that have completed recovery */
-+        if (lprocfs_obd_snprintf(&page, size, &len,"completed_clients: %d/%d\n",
-+                                 obd->obd_max_recoverable_clients -
-+                                 obd->obd_recoverable_clients,
-+                                 obd->obd_max_recoverable_clients) <= 0)
-+                goto out;
-+        if (lprocfs_obd_snprintf(&page, size, &len,"replayed_requests: %d/??\n",
-+                                 obd->obd_replayed_requests) <= 0)
-+                goto out;
-+        if (lprocfs_obd_snprintf(&page, size, &len, "queued_requests: %d\n",
-+                                 obd->obd_requests_queued_for_recovery) <= 0)
-+                goto out;
-+        if (lprocfs_obd_snprintf(&page, size, &len, "next_transno: "LPD64"\n",
-+                                 obd->obd_next_recovery_transno) <= 0)
-+                goto out;
-+
-+fclose:
-+        *eof = 1;
-+out:
-+        return min(count, len - (int)off);
-+}
-+EXPORT_SYMBOL(lprocfs_obd_rd_recovery_status);
-+
-+int lprocfs_obd_rd_hash(char *page, char **start, off_t off,
-+                        int count, int *eof, void *data)
-+{
-+        struct obd_device *obd = data;
-+        int c = 0;
-+
-+        if (obd == NULL)
-+                return 0;
-+
-+        c += lustre_hash_debug_header(page, count);
-+        c += lustre_hash_debug_str(obd->obd_uuid_hash, page + c, count - c);
-+        c += lustre_hash_debug_str(obd->obd_nid_hash, page + c, count - c);
-+        c += lustre_hash_debug_str(obd->obd_nid_stats_hash, page+c, count-c);
-+
-+        return c;
-+}
-+EXPORT_SYMBOL(lprocfs_obd_rd_hash);
-+
-+#ifdef CRAY_XT3
-+int lprocfs_obd_rd_recovery_maxtime(char *page, char **start, off_t off,
-+                                    int count, int *eof, void *data)
-+{
-+        struct obd_device *obd = (struct obd_device *)data;
-+        LASSERT(obd != NULL);
-+
-+        return snprintf(page, count, "%lu\n",
-+                        obd->obd_recovery_max_time);
-+}
-+EXPORT_SYMBOL(lprocfs_obd_rd_recovery_maxtime);
-+
-+int lprocfs_obd_wr_recovery_maxtime(struct file *file, const char *buffer,
-+                                    unsigned long count, void *data)
-+{
-+        struct obd_device *obd = (struct obd_device *)data;
-+        int val, rc;
-+        LASSERT(obd != NULL);
-+
-+        rc = lprocfs_write_helper(buffer, count, &val);
-+        if (rc)
-+                return rc;
-+
-+        obd->obd_recovery_max_time = val;
-+        return count;
-+}
-+EXPORT_SYMBOL(lprocfs_obd_wr_recovery_maxtime);
-+#endif /* CRAY_XT3 */
-+
-+EXPORT_SYMBOL(lprocfs_register);
-+EXPORT_SYMBOL(lprocfs_srch);
-+EXPORT_SYMBOL(lprocfs_remove);
-+EXPORT_SYMBOL(lprocfs_add_vars);
-+EXPORT_SYMBOL(lprocfs_obd_setup);
-+EXPORT_SYMBOL(lprocfs_obd_cleanup);
-+EXPORT_SYMBOL(lprocfs_add_simple);
-+EXPORT_SYMBOL(lprocfs_free_per_client_stats);
-+EXPORT_SYMBOL(lprocfs_alloc_stats);
-+EXPORT_SYMBOL(lprocfs_free_stats);
-+EXPORT_SYMBOL(lprocfs_clear_stats);
-+EXPORT_SYMBOL(lprocfs_register_stats);
-+EXPORT_SYMBOL(lprocfs_init_ops_stats);
-+EXPORT_SYMBOL(lprocfs_init_ldlm_stats);
-+EXPORT_SYMBOL(lprocfs_alloc_obd_stats);
-+EXPORT_SYMBOL(lprocfs_free_obd_stats);
-+EXPORT_SYMBOL(lprocfs_exp_setup);
-+EXPORT_SYMBOL(lprocfs_exp_cleanup);
-+
-+EXPORT_SYMBOL(lprocfs_rd_u64);
-+EXPORT_SYMBOL(lprocfs_rd_atomic);
-+EXPORT_SYMBOL(lprocfs_wr_atomic);
-+EXPORT_SYMBOL(lprocfs_rd_uint);
-+EXPORT_SYMBOL(lprocfs_wr_uint);
-+EXPORT_SYMBOL(lprocfs_rd_uuid);
-+EXPORT_SYMBOL(lprocfs_rd_name);
-+EXPORT_SYMBOL(lprocfs_rd_fstype);
-+EXPORT_SYMBOL(lprocfs_rd_server_uuid);
-+EXPORT_SYMBOL(lprocfs_rd_conn_uuid);
-+EXPORT_SYMBOL(lprocfs_rd_num_exports);
-+EXPORT_SYMBOL(lprocfs_rd_numrefs);
-+EXPORT_SYMBOL(lprocfs_at_hist_helper);
-+EXPORT_SYMBOL(lprocfs_rd_import);
-+EXPORT_SYMBOL(lprocfs_rd_timeouts);
-+EXPORT_SYMBOL(lprocfs_rd_blksize);
-+EXPORT_SYMBOL(lprocfs_rd_kbytestotal);
-+EXPORT_SYMBOL(lprocfs_rd_kbytesfree);
-+EXPORT_SYMBOL(lprocfs_rd_kbytesavail);
-+EXPORT_SYMBOL(lprocfs_rd_filestotal);
-+EXPORT_SYMBOL(lprocfs_rd_filesfree);
-+
-+EXPORT_SYMBOL(lprocfs_write_helper);
-+EXPORT_SYMBOL(lprocfs_write_frac_helper);
-+EXPORT_SYMBOL(lprocfs_read_frac_helper);
-+EXPORT_SYMBOL(lprocfs_write_u64_helper);
-+EXPORT_SYMBOL(lprocfs_write_frac_u64_helper);
-+#endif /* LPROCFS*/
-diff -urNad lustre~/lustre/ptlrpc/service.c lustre/lustre/ptlrpc/service.c
---- lustre~/lustre/ptlrpc/service.c	2009-03-12 10:32:27.000000000 +0100
-+++ lustre/lustre/ptlrpc/service.c	2009-03-13 09:45:03.000000000 +0100
-@@ -1501,7 +1501,7 @@
-         cfs_daemonize(name);
-         exit_fs(cfs_current());
-         current->fs = fs;
--        ll_set_fs_pwd(current->fs, init_task.fs->pwdmnt, init_task.fs->pwd);
-+        ll_set_fs_pwd(current->fs, cfs_fs_mnt(init_task.fs), cfs_fs_pwd(init_task.fs));
- }
- 
- static void

-- 
Lustre Debian Packaging