[Pkg-lustre-svn-commit] updated: [d6e4e96] Delete old unneeded patches
Patrick Winnertz
winnie at debian.org
Fri Jun 5 13:57:57 UTC 2009
The following commit has been merged in the master branch:
commit d6e4e96bb8f1a5965216ea5a873f471ff46d3838
Author: Patrick Winnertz <winnie at debian.org>
Date: Fri Jun 5 15:56:42 2009 +0200
Delete old unneeded patches
Signed-off-by: Patrick Winnertz <winnie at debian.org>
diff --git a/debian/patches/patchless_support_2.6.24.dpatch b/debian/patches/patchless_support_2.6.24.dpatch
deleted file mode 100755
index 77832c4..0000000
--- a/debian/patches/patchless_support_2.6.24.dpatch
+++ /dev/null
@@ -1,4057 +0,0 @@
-#! /bin/sh /usr/share/dpatch/dpatch-run
-## posix_acl.patch by Patrick Winnertz <winnie at debian.org>
-##
-## All lines beginning with `## DP:' are a description of the patch.
-## DP: Patch which will enable 2.6.24 patchless support for lustre, taken from #14250
-
- at DPATCH@
-diff -urNad lustre~/lnet/autoconf/lustre-lnet.m4 lustre/lnet/autoconf/lustre-lnet.m4
---- lustre~/lnet/autoconf/lustre-lnet.m4 2008-12-30 11:23:32.000000000 +0100
-+++ lustre/lnet/autoconf/lustre-lnet.m4 2009-03-10 11:41:03.000000000 +0100
-@@ -1290,6 +1290,41 @@
- ])
- ])
-
-+# 2.6.24 request not use real numbers for ctl_name
-+AC_DEFUN([LN_SYSCTL_UNNUMBERED],
-+[AC_MSG_CHECKING([for CTL_UNNUMBERED])
-+LB_LINUX_TRY_COMPILE([
-+ #include <linux/sysctl.h>
-+],[
-+ #ifndef CTL_UNNUMBERED
-+ #error CTL_UNNUMBERED not exist in kernel
-+ #endif
-+],[
-+ AC_MSG_RESULT(yes)
-+ AC_DEFINE(HAVE_SYSCTL_UNNUMBERED, 1,
-+ [sysctl has CTL_UNNUMBERED])
-+],[
-+ AC_MSG_RESULT(NO)
-+])
-+])
-+
-+# 2.6.24 lost scatterlist->page
-+AC_DEFUN([LN_SCATTERLIST_SETPAGE],
-+[AC_MSG_CHECKING([for exist sg_set_page])
-+LB_LINUX_TRY_COMPILE([
-+ #include <linux/scatterlist.h>
-+],[
-+ sg_set_page(NULL,NULL,0,0);
-+],[
-+ AC_MSG_RESULT(yes)
-+ AC_DEFINE(HAVE_SCATTERLIST_SETPAGE, 1,
-+ [struct scatterlist has page member])
-+],[
-+ AC_MSG_RESULT(NO)
-+])
-+])
-+
-+
- #
- # LN_PROG_LINUX
- #
-@@ -1333,6 +1368,9 @@
- LN_KMEM_CACHE
- # 2.6.23
- LN_KMEM_CACHE_CREATE_DTOR
-+# 2.6.24
-+LN_SYSCTL_UNNUMBERED
-+LN_SCATTERLIST_SETPAGE
- ])
-
- #
-diff -urNad lustre~/lnet/include/libcfs/curproc.h lustre/lnet/include/libcfs/curproc.h
---- lustre~/lnet/include/libcfs/curproc.h 2008-12-30 11:23:32.000000000 +0100
-+++ lustre/lnet/include/libcfs/curproc.h 2009-03-10 11:41:03.000000000 +0100
-@@ -72,6 +72,11 @@
- */
- cfs_kernel_cap_t cfs_curproc_cap_get(void);
- void cfs_curproc_cap_set(cfs_kernel_cap_t cap);
-+
-+typedef __u32 cfs_cap_t;
-+
-+cfs_cap_t cfs_cap_convert_from_kernel(cfs_kernel_cap_t cap);
-+
- #endif
-
- /* __LIBCFS_CURPROC_H__ */
-diff -urNad lustre~/lnet/include/libcfs/linux/linux-prim.h lustre/lnet/include/libcfs/linux/linux-prim.h
---- lustre~/lnet/include/libcfs/linux/linux-prim.h 2008-12-30 11:23:32.000000000 +0100
-+++ lustre/lnet/include/libcfs/linux/linux-prim.h 2009-03-10 11:41:03.000000000 +0100
-@@ -84,6 +84,17 @@
- #endif
- #define cfs_unregister_sysctl_table(t) unregister_sysctl_table(t)
-
-+#define DECLARE_PROC_HANDLER(name) \
-+static int \
-+LL_PROC_PROTO(name) \
-+{ \
-+ DECLARE_LL_PROC_PPOS_DECL; \
-+ \
-+ return proc_call_handler(table->data, write, \
-+ ppos, buffer, lenp, \
-+ __##name); \
-+}
-+
- /*
- * Symbol register
- */
-diff -urNad lustre~/lnet/klnds/gmlnd/gmlnd_module.c lustre/lnet/klnds/gmlnd/gmlnd_module.c
---- lustre~/lnet/klnds/gmlnd/gmlnd_module.c 2008-12-30 11:23:32.000000000 +0100
-+++ lustre/lnet/klnds/gmlnd/gmlnd_module.c 2009-03-10 11:41:03.000000000 +0100
-@@ -78,9 +78,37 @@
- };
-
- #if defined(CONFIG_SYSCTL) && !CFS_SYSFS_MODULE_PARM
-+
-+#ifndef HAVE_SYSCTL_UNNUMBERED
-+
-+#define CTL_GMLND 202
-+
-+enum {
-+ GMLND_PORT = 1,
-+ GMLND_NTX,
-+ GMLND_CREDITS,
-+ GMLND_PEERCREDITS,
-+ GMLND_NLARGE_TX_BUFS,
-+ GMLND_NRX_SMALL,
-+ GMLND_NRX_LARGE
-+};
-+
-+#else
-+#define CTL_GMLND CTL_UNNUMBERED
-+
-+#define GMLND_PORT CTL_UNNUMBERED
-+#define GMLND_NTX CTL_UNNUMBERED
-+#define GMLND_CREDITS CTL_UNNUMBERED
-+#define GMLND_PEERCREDITS CTL_UNNUMBERED
-+#define GMLND_NLARGE_TX_BUFS CTL_UNNUMBERED
-+#define GMLND_NRX_SMALL CTL_UNNUMBERED
-+#define GMLND_NRX_LARGE CTL_UNNUMBERED
-+
-+#endif
-+
- static cfs_sysctl_table_t gmnal_ctl_table[] = {
- {
-- .ctl_name = 1,
-+ .ctl_name = GMLND_PORT,
- .procname = "port",
- .data = &port,
- .maxlen = sizeof (int),
-@@ -88,7 +116,7 @@
- .proc_handler = &proc_dointvec
- },
- {
-- .ctl_name = 2,
-+ .ctl_name = GMLND_NTX,
- .procname = "ntx",
- .data = &ntx,
- .maxlen = sizeof (int),
-@@ -96,7 +124,7 @@
- .proc_handler = &proc_dointvec
- },
- {
-- .ctl_name = 3,
-+ .ctl_name = GMLND_CREDITS,
- .procname = "credits",
- .data = &credits,
- .maxlen = sizeof (int),
-@@ -104,7 +132,7 @@
- .proc_handler = &proc_dointvec
- },
- {
-- .ctl_name = 4,
-+ .ctl_name = GMLND_PEERCREDITS,
- .procname = "peer_credits",
- .data = &peer_credits,
- .maxlen = sizeof (int),
-@@ -112,7 +140,7 @@
- .proc_handler = &proc_dointvec
- },
- {
-- .ctl_name = 5,
-+ .ctl_name = GMLND_NLARGE_TX_BUFS,
- .procname = "nlarge_tx_bufs",
- .data = &nlarge_tx_bufs,
- .maxlen = sizeof (int),
-@@ -120,7 +148,7 @@
- .proc_handler = &proc_dointvec
- },
- {
-- .ctl_name = 6,
-+ .ctl_name = GMLND_NRX_SMALL,
- .procname = "nrx_small",
- .data = &nrx_small,
- .maxlen = sizeof (int),
-@@ -128,7 +156,7 @@
- .proc_handler = &proc_dointvec
- },
- {
-- .ctl_name = 7,
-+ .ctl_name = GMLND_NRX_LARGE,
- .procname = "nrx_large",
- .data = &nrx_large,
- .maxlen = sizeof (int),
-@@ -140,7 +168,7 @@
-
- static cfs_sysctl_table_t gmnal_top_ctl_table[] = {
- {
-- .ctl_name = 207,
-+ .ctl_name = CTL_GMLND,
- .procname = "gmnal",
- .data = NULL,
- .maxlen = 0,
-diff -urNad lustre~/lnet/klnds/iiblnd/iiblnd_modparams.c lustre/lnet/klnds/iiblnd/iiblnd_modparams.c
---- lustre~/lnet/klnds/iiblnd/iiblnd_modparams.c 2008-12-30 11:23:32.000000000 +0100
-+++ lustre/lnet/klnds/iiblnd/iiblnd_modparams.c 2009-03-10 11:41:03.000000000 +0100
-@@ -119,9 +119,50 @@
- * not to truncate the printout; it only needs to be the actual size of the
- * string buffer if we allow writes (and we don't) */
-
-+#ifdef HAVE_SYSCTL_UNNUMBERED
-+
-+#define CTL_IIBLND 203
-+
-+enum {
-+ IIBLND_IPIF_BASENAME = 1,
-+ IIBLND_SERVICE_NAME,
-+ IIBLND_SERVICE_NUMBER,
-+ IIBLND_RECONNECT_MIN,
-+ IIBLND_RECONNECT_MAX,
-+ IIBLND_CONCURRENT_PEERS,
-+ IIBLND_CKSUM,
-+ IIBLND_TIMEOUT,
-+ IIBLND_NTX,
-+ IIBLND_CREDITS,
-+ IIBLND_PEER_CREDITS,
-+ IIBLND_SD_RETRIES,
-+ IIBLND_KEEPALIVE,
-+ IIBLND_CONCURRENT_SENDS
-+};
-+
-+#else
-+#define CTL_IIBLND CTL_UNNUMBERED
-+
-+#define IIBLND_IPIF_BASENAME CTL_UNNUMBERED
-+#define IIBLND_SERVICE_NAME CTL_UNNUMBERED
-+#define IIBLND_SERVICE_NUMBER CTL_UNNUMBERED
-+#define IIBLND_RECONNECT_MIN CTL_UNNUMBERED
-+#define IIBLND_RECONNECT_MAX CTL_UNNUMBERED
-+#define IIBLND_CONCURRENT_PEERS CTL_UNNUMBERED
-+#define IIBLND_CKSUM CTL_UNNUMBERED
-+#define IIBLND_TIMEOUT CTL_UNNUMBERED
-+#define IIBLND_NTX CTL_UNNUMBERED
-+#define IIBLND_CREDITS CTL_UNNUMBERED
-+#define IIBLND_PEER_CREDITS CTL_UNNUMBERED
-+#define IIBLND_SD_RETRIES CTL_UNNUMBERED
-+#define IIBLND_KEEPALIVE CTL_UNNUMBERED
-+#define IIBLND_CONCURRENT_SENDS CTL_UNNUMBERED
-+
-+#endif
-+
- static cfs_sysctl_table_t kibnal_ctl_table[] = {
- {
-- .ctl_name = 1,
-+ .ctl_name = IBLND_IPIF_BASENAME,
- .procname = "ipif_basename",
- .data = &ipif_basename,
- .maxlen = 1024,
-@@ -129,7 +170,7 @@
- .proc_handler = &proc_dostring
- },
- {
-- .ctl_name = 2,
-+ .ctl_name = IIBLND_SERVICE_NAME,
- .procname = "service_name",
- .data = &service_name,
- .maxlen = 1024,
-@@ -137,7 +178,7 @@
- .proc_handler = &proc_dostring
- },
- {
-- .ctl_name = 3,
-+ .ctl_name = IIBLND_SERVICE_NUMBER,
- .procname = "service_number",
- .data = &service_number,
- .maxlen = sizeof(int),
-@@ -145,7 +186,7 @@
- .proc_handler = &proc_dointvec
- },
- {
-- .ctl_name = 4,
-+ .ctl_name = IIBLND_RECONNECT_MIN,
- .procname = "min_reconnect_interval",
- .data = &min_reconnect_interval,
- .maxlen = sizeof(int),
-@@ -153,7 +194,7 @@
- .proc_handler = &proc_dointvec
- },
- {
-- .ctl_name = 5,
-+ .ctl_name = IIBLND_RECONNECT_MAX,
- .procname = "max_reconnect_interval",
- .data = &max_reconnect_interval,
- .maxlen = sizeof(int),
-@@ -161,7 +202,7 @@
- .proc_handler = &proc_dointvec
- },
- {
-- .ctl_name = 6,
-+ .ctl_name = IIBLND_CONCURRENT_PEERS,
- .procname = "concurrent_peers",
- .data = &concurrent_peers,
- .maxlen = sizeof(int),
-@@ -169,7 +210,7 @@
- .proc_handler = &proc_dointvec
- },
- {
-- .ctl_name = 7,
-+ .ctl_name = IIBLND_CKSUM,
- .procname = "cksum",
- .data = &cksum,
- .maxlen = sizeof(int),
-@@ -177,7 +218,7 @@
- .proc_handler = &proc_dointvec
- },
- {
-- .ctl_name = 8,
-+ .ctl_name = IIBLND_TIMEOUT,
- .procname = "timeout",
- .data = &timeout,
- .maxlen = sizeof(int),
-@@ -185,7 +226,7 @@
- .proc_handler = &proc_dointvec
- },
- {
-- .ctl_name = 9,
-+ .ctl_name = IIBLND_NTX,
- .procname = "ntx",
- .data = &ntx,
- .maxlen = sizeof(int),
-@@ -193,7 +234,7 @@
- .proc_handler = &proc_dointvec
- },
- {
-- .ctl_name = 10,
-+ .ctl_name = IIBLND_CREDITS,
- .procname = "credits",
- .data = &credits,
- .maxlen = sizeof(int),
-@@ -201,7 +242,7 @@
- .proc_handler = &proc_dointvec
- },
- {
-- .ctl_name = 11,
-+ .ctl_name = IIBLND_PEER_CREDITS,
- .procname = "peer_credits",
- .data = &peer_credits,
- .maxlen = sizeof(int),
-@@ -209,7 +250,7 @@
- .proc_handler = &proc_dointvec
- },
- {
-- .ctl_name = 12,
-+ .ctl_name = IIBLND_SD_RETRIES,
- .procname = "sd_retries",
- .data = &sd_retries,
- .maxlen = sizeof(int),
-@@ -217,7 +258,7 @@
- .proc_handler = &proc_dointvec
- },
- {
-- .ctl_name = 13,
-+ .ctl_name = IIBLND_KEEPALIVE,
- .procname = "keepalive",
- .data = &keepalive,
- .maxlen = sizeof(int),
-@@ -225,7 +266,7 @@
- .proc_handler = &proc_dointvec
- },
- {
-- .ctl_name = 14,
-+ .ctl_name = IIBLND_CONCURRENT_SENDS,
- .procname = "concurrent_sends",
- .data = &concurrent_sends,
- .maxlen = sizeof(int),
-@@ -237,7 +278,7 @@
-
- static cfs_sysctl_table_t kibnal_top_ctl_table[] = {
- {
-- .ctl_name = 203,
-+ .ctl_name = CTL_IIBLND,
- .procname = "openibnal",
- .data = NULL,
- .maxlen = 0,
-diff -urNad lustre~/lnet/klnds/o2iblnd/o2iblnd.h lustre/lnet/klnds/o2iblnd/o2iblnd.h
---- lustre~/lnet/klnds/o2iblnd/o2iblnd.h 2008-12-30 11:23:32.000000000 +0100
-+++ lustre/lnet/klnds/o2iblnd/o2iblnd.h 2009-03-10 11:41:03.000000000 +0100
-@@ -773,3 +773,13 @@
- int kiblnd_recv(lnet_ni_t *ni, void *private, lnet_msg_t *lntmsg, int delayed,
- unsigned int niov, struct iovec *iov, lnet_kiov_t *kiov,
- unsigned int offset, unsigned int mlen, unsigned int rlen);
-+/* compat macros */
-+#ifndef HAVE_SCATTERLIST_SETPAGE
-+static inline void sg_set_page(struct scatterlist *sg, struct page *page,
-+ unsigned int len, unsigned int offset)
-+{
-+ sg->page = page;
-+ sg->offset = offset;
-+ sg->length = len;
-+}
-+#endif
-diff -urNad lustre~/lnet/klnds/o2iblnd/o2iblnd_cb.c lustre/lnet/klnds/o2iblnd/o2iblnd_cb.c
---- lustre~/lnet/klnds/o2iblnd/o2iblnd_cb.c 2008-12-30 11:23:32.000000000 +0100
-+++ lustre/lnet/klnds/o2iblnd/o2iblnd_cb.c 2009-03-10 11:41:03.000000000 +0100
-@@ -643,9 +643,7 @@
- fragnob = min((int)(iov->iov_len - offset), nob);
- fragnob = min(fragnob, (int)PAGE_SIZE - page_offset);
-
-- sg->page = page;
-- sg->offset = page_offset;
-- sg->length = fragnob;
-+ sg_set_page(sg, page, fragnob, page_offset);
- sg++;
-
- if (offset + fragnob < iov->iov_len) {
-@@ -708,11 +706,10 @@
- fragnob = min((int)(kiov->kiov_len - offset), nob);
-
- memset(sg, 0, sizeof(*sg));
-- sg->page = kiov->kiov_page;
-- sg->offset = kiov->kiov_offset + offset;
-- sg->length = fragnob;
-+ sg_set_page(sg, kiov->kiov_page, fragnob,
-+ kiov->kiov_offset + offset);
- sg++;
--
-+
- offset = 0;
- kiov++;
- nkiov--;
-diff -urNad lustre~/lnet/klnds/o2iblnd/o2iblnd_modparams.c lustre/lnet/klnds/o2iblnd/o2iblnd_modparams.c
---- lustre~/lnet/klnds/o2iblnd/o2iblnd_modparams.c 2008-12-30 11:23:32.000000000 +0100
-+++ lustre/lnet/klnds/o2iblnd/o2iblnd_modparams.c 2009-03-10 11:41:03.000000000 +0100
-@@ -130,9 +130,51 @@
-
- static char ipif_basename_space[32];
-
-+#ifndef HAVE_SYSCTL_UNNUMBERED
-+
-+#define CTL_O2IBLND 205
-+
-+enum {
-+ O2IBLND_SERVICE = 1,
-+ O2IBLND_CKSUM,
-+ O2IBLND_TIMEOUT,
-+ O2IBLND_NTX,
-+ O2IBLND_CREDITS,
-+ O2IBLND_PEER_CREDITS,
-+ O2IBLND_IPIF_BASENAME,
-+ O2IBLND_RETRY_COUNT,
-+ O2IBLND_RNR_RETRY_COUNT,
-+ O2IBLND_KEEPALIVE,
-+ O2IBLND_CONCURRENT_SENDS,
-+ O2IBLND_IB_MTU,
-+ O2IBLND_FMR_POOL_SIZE,
-+ O2IBLND_FMR_FLUSH_TRIGGER,
-+ O2IBLND_FMR_CACHE
-+};
-+#else
-+#define CTL_O2IBLND CTL_UNNUMBERED
-+
-+#define O2IBLND_SERVICE CTL_UNNUMBERED
-+#define O2IBLND_CKSUM CTL_UNNUMBERED
-+#define O2IBLND_TIMEOUT CTL_UNNUMBERED
-+#define O2IBLND_NTX CTL_UNNUMBERED
-+#define O2IBLND_CREDITS CTL_UNNUMBERED
-+#define O2IBLND_PEER_CREDITS CTL_UNNUMBERED
-+#define O2IBLND_IPIF_BASENAME CTL_UNNUMBERED
-+#define O2IBLND_RETRY_COUNT CTL_UNNUMBERED
-+#define O2IBLND_RNR_RETRY_COUNT CTL_UNNUMBERED
-+#define O2IBLND_KEEPALIVE CTL_UNNUMBERED
-+#define O2IBLND_CONCURRENT_SENDS CTL_UNNUMBERED
-+#define O2IBLND_IB_MTU CTL_UNNUMBERED
-+#define O2IBLND_FMR_POOL_SIZE CTL_UNNUMBERED
-+#define O2IBLND_FMR_FLUSH_TRIGGER CTL_UNNUMBERED
-+#define O2IBLND_FMR_CACHE CTL_UNNUMBERED
-+
-+#endif
-+
- static cfs_sysctl_table_t kiblnd_ctl_table[] = {
- {
-- .ctl_name = 1,
-+ .ctl_name = O2IBLND_SERVICE,
- .procname = "service",
- .data = &service,
- .maxlen = sizeof(int),
-@@ -140,7 +182,7 @@
- .proc_handler = &proc_dointvec
- },
- {
-- .ctl_name = 2,
-+ .ctl_name = O2IBLND_CKSUM,
- .procname = "cksum",
- .data = &cksum,
- .maxlen = sizeof(int),
-@@ -148,7 +190,7 @@
- .proc_handler = &proc_dointvec
- },
- {
-- .ctl_name = 3,
-+ .ctl_name = O2IBLND_TIMEOUT,
- .procname = "timeout",
- .data = &timeout,
- .maxlen = sizeof(int),
-@@ -156,7 +198,7 @@
- .proc_handler = &proc_dointvec
- },
- {
-- .ctl_name = 4,
-+ .ctl_name = O2IBLND_NTX,
- .procname = "ntx",
- .data = &ntx,
- .maxlen = sizeof(int),
-@@ -164,7 +206,7 @@
- .proc_handler = &proc_dointvec
- },
- {
-- .ctl_name = 5,
-+ .ctl_name = O2IBLND_CREDITS,
- .procname = "credits",
- .data = &credits,
- .maxlen = sizeof(int),
-@@ -172,7 +214,7 @@
- .proc_handler = &proc_dointvec
- },
- {
-- .ctl_name = 6,
-+ .ctl_name = O2IBLND_PEER_CREDITS,
- .procname = "peer_credits",
- .data = &peer_credits,
- .maxlen = sizeof(int),
-@@ -180,7 +222,7 @@
- .proc_handler = &proc_dointvec
- },
- {
-- .ctl_name = 7,
-+ .ctl_name = O2IBLND_IPIF_BASENAME,
- .procname = "ipif_name",
- .data = ipif_basename_space,
- .maxlen = sizeof(ipif_basename_space),
-@@ -188,7 +230,7 @@
- .proc_handler = &proc_dostring
- },
- {
-- .ctl_name = 8,
-+ .ctl_name = O2IBLND_RETRY_COUNT,
- .procname = "retry_count",
- .data = &retry_count,
- .maxlen = sizeof(int),
-@@ -196,7 +238,7 @@
- .proc_handler = &proc_dointvec
- },
- {
-- .ctl_name = 9,
-+ .ctl_name = O2IBLND_RNR_RETRY_COUNT,
- .procname = "rnr_retry_count",
- .data = &rnr_retry_count,
- .maxlen = sizeof(int),
-@@ -204,7 +246,7 @@
- .proc_handler = &proc_dointvec
- },
- {
-- .ctl_name = 10,
-+ .ctl_name = O2IBLND_KEEPALIVE,
- .procname = "keepalive",
- .data = &keepalive,
- .maxlen = sizeof(int),
-@@ -212,7 +254,7 @@
- .proc_handler = &proc_dointvec
- },
- {
-- .ctl_name = 11,
-+ .ctl_name = O2IBLND_CONCURRENT_SENDS,
- .procname = "concurrent_sends",
- .data = &concurrent_sends,
- .maxlen = sizeof(int),
-@@ -220,7 +262,7 @@
- .proc_handler = &proc_dointvec
- },
- {
-- .ctl_name = 12,
-+ .ctl_name = O2IBLND_IB_MTU,
- .procname = "ib_mtu",
- .data = &ib_mtu,
- .maxlen = sizeof(int),
-@@ -229,7 +271,7 @@
- },
- #if IBLND_MAP_ON_DEMAND
- {
-- .ctl_name = 13,
-+ .ctl_name = O2IBLND_FMR_POOL_SIZE,
- .procname = "fmr_pool_size",
- .data = &fmr_pool_size,
- .maxlen = sizeof(int),
-@@ -237,7 +279,7 @@
- .proc_handler = &proc_dointvec
- },
- {
-- .ctl_name = 14,
-+ .ctl_name = O2IBLND_FMR_FLUSH_TRIGGER,
- .procname = "fmr_flush_trigger",
- .data = &fmr_flush_trigger,
- .maxlen = sizeof(int),
-@@ -245,7 +287,7 @@
- .proc_handler = &proc_dointvec
- },
- {
-- .ctl_name = 15,
-+ .ctl_name = O2IBLND_FMR_CACHE,
- .procname = "fmr_cache",
- .data = &fmr_cache,
- .maxlen = sizeof(int),
-@@ -258,7 +300,7 @@
-
- static cfs_sysctl_table_t kiblnd_top_ctl_table[] = {
- {
-- .ctl_name = 203,
-+ .ctl_name = CTL_O2IBLND,
- .procname = "o2iblnd",
- .data = NULL,
- .maxlen = 0,
-diff -urNad lustre~/lnet/klnds/openiblnd/openiblnd_modparams.c lustre/lnet/klnds/openiblnd/openiblnd_modparams.c
---- lustre~/lnet/klnds/openiblnd/openiblnd_modparams.c 2008-12-30 11:23:32.000000000 +0100
-+++ lustre/lnet/klnds/openiblnd/openiblnd_modparams.c 2009-03-10 11:41:03.000000000 +0100
-@@ -100,9 +100,42 @@
-
- #if defined(CONFIG_SYSCTL) && !CFS_SYSFS_MODULE_PARM
-
-+#ifndef HAVE_SYSCTL_UNNUMBERED
-+#define CTL_KIBNAL 203
-+enum {
-+ KIBNAL_IPIF_BASENAME = 1,
-+ KIBNAL_N_CONND,
-+ KIBNAL_RECONNECT_MIN,
-+ KIBNAL_RECONNECT_MAX,
-+ KIBNAL_CONCURRENT_PEERS,
-+ KIBNAL_CKSUM,
-+ KIBNAL_TIMEOUT,
-+ KIBNAL_NTX,
-+ KIBNAL_CREDITS,
-+ KIBNAL_PEER_CREDITS,
-+ KIBNAL_KEEPALIVE
-+};
-+#else
-+
-+#define CTL_KIBNAL CTL_UNNUMBERED
-+
-+#define KIBNAL_IPIF_BASENAME CTL_UNNUMBERED
-+#define KIBNAL_N_CONND CTL_UNNUMBERED
-+#define KIBNAL_RECONNECT_MIN CTL_UNNUMBERED
-+#define KIBNAL_RECONNECT_MAX CTL_UNNUMBERED
-+#define KIBNAL_CONCURRENT_PEERS CTL_UNNUMBERED
-+#define KIBNAL_CKSUM CTL_UNNUMBERED
-+#define KIBNAL_TIMEOUT CTL_UNNUMBERED
-+#define kiBNAL_NTX CTL_UNNUMBERED
-+#define KIBNAL_CREDITS CTL_UNNUMBERED
-+#define KIBNAL_PEER_CREDITS CTL_UNNUMBERED
-+#define KIBNAL_KEEPALIVE CTL_UNNUMBERED
-+
-+#endif
-+
- static cfs_sysctl_table_t kibnal_ctl_table[] = {
- {
-- .ctl_name = 1,
-+ .ctl_name = KIBNAL_IPIF_BASENAME,
- .procname = "ipif_basename",
- .data = &ipif_basename,
- .maxlen = 1024,
-@@ -110,7 +143,7 @@
- .proc_handler = &proc_dostring
- },
- {
-- .ctl_name = 2,
-+ .ctl_name = KIBNAL_N_CONND,
- .procname = "n_connd",
- .data = &n_connd,
- .maxlen = sizeof(int),
-@@ -118,7 +151,7 @@
- .proc_handler = &proc_dointvec
- },
- {
-- .ctl_name = 3,
-+ .ctl_name = KIBNAL_RECONNECT_MIN,
- .procname = "min_reconnect_interval",
- .data = &min_reconnect_interval,
- .maxlen = sizeof(int),
-@@ -126,7 +159,7 @@
- .proc_handler = &proc_dointvec
- },
- {
-- .ctl_name = 4,
-+ .ctl_name = KIBNAL_RECONNECT_MAX,
- .procname = "max_reconnect_interval",
- .data = &max_reconnect_interval,
- .maxlen = sizeof(int),
-@@ -134,7 +167,7 @@
- .proc_handler = &proc_dointvec
- },
- {
-- .ctl_name = 5,
-+ .ctl_name = KIBNAL_CONCURRENT_PEERS,
- .procname = "concurrent_peers",
- .data = &concurrent_peers,
- .maxlen = sizeof(int),
-@@ -142,7 +175,7 @@
- .proc_handler = &proc_dointvec
- },
- {
-- .ctl_name = 6,
-+ .ctl_name = KIBNAL_CKSUM,
- .procname = "cksum",
- .data = &cksum,
- .maxlen = sizeof(int),
-@@ -150,7 +183,7 @@
- .proc_handler = &proc_dointvec
- },
- {
-- .ctl_name = 7,
-+ .ctl_name = KIBNAL_TIMEOUT,
- .procname = "timeout",
- .data = &timeout,
- .maxlen = sizeof(int),
-@@ -158,7 +191,7 @@
- .proc_handler = &proc_dointvec
- },
- {
-- .ctl_name = 8,
-+ .ctl_name = KIBNAL_NTX,
- .procname = "ntx",
- .data = &ntx,
- .maxlen = sizeof(int),
-@@ -166,7 +199,7 @@
- .proc_handler = &proc_dointvec
- },
- {
-- .ctl_name = 9,
-+ .ctl_name = KIBNAL_CREDITS,
- .procname = "credits",
- .data = &credits,
- .maxlen = sizeof(int),
-@@ -174,7 +207,7 @@
- .proc_handler = &proc_dointvec
- },
- {
-- .ctl_name = 10,
-+ .ctl_name = KIBNAL_PEER_CREDITS,
- .procname = "peer_credits",
- .data = &peer_credits,
- .maxlen = sizeof(int),
-@@ -182,7 +215,7 @@
- .proc_handler = &proc_dointvec
- },
- {
-- .ctl_name = 11,
-+ .ctl_name = KIBNAL_KEEPALIVE,
- .procname = "keepalive",
- .data = &keepalive,
- .maxlen = sizeof(int),
-@@ -194,7 +227,7 @@
-
- static cfs_sysctl_table_t kibnal_top_ctl_table[] = {
- {
-- .ctl_name = 203,
-+ .ctl_name = CTL_KIBNAL,
- .procname = "openibnal",
- .data = NULL,
- .maxlen = 0,
-diff -urNad lustre~/lnet/klnds/ptllnd/ptllnd_modparams.c lustre/lnet/klnds/ptllnd/ptllnd_modparams.c
---- lustre~/lnet/klnds/ptllnd/ptllnd_modparams.c 2008-12-30 11:23:32.000000000 +0100
-+++ lustre/lnet/klnds/ptllnd/ptllnd_modparams.c 2009-03-10 11:41:03.000000000 +0100
-@@ -156,9 +156,54 @@
- }
- #endif
-
-+#ifndef HAVE_SYSCTL_UNNUMBERED
-+
-+#define CTL_PTLLND 207
-+
-+enum {
-+ KPTLLND_NTX = 1,
-+ KPTLLND_MAX_NODES,
-+ KPTLLND_MAX_PROC_PER_NODE,
-+ KPTLLND_CHECKSUM,
-+ KPTLLND_TIMEOUT,
-+ KPTLLND_PORTAL,
-+ KPTLLND_PID,
-+ KPTLLND_RXB_PAGES,
-+ KPTLLND_CREDITS,
-+ KPTLLND_PEERCREDITS,
-+ KPTLLND_MAX_MSG_SIZE,
-+ KPTLLND_PEER_HASH_SIZE,
-+ KPTLLND_RESHEDULE_LOOPS,
-+ KPTLLND_ACK_PUTS,
-+ KPTLLND_TRACETIMEOUT,
-+ KPTLLND_TRACEBASENAME,
-+ KPTLLND_SIMULATION_BITMAP
-+};
-+#else
-+#define CTL_PTLLND CTL_UNNUMBERED
-+
-+#define KPTLLND_NTX CTL_UNNUMBERED
-+#define KPTLLND_MAX_NODES CTL_UNNUMBERED
-+#define KPTLLND_MAX_PROC_PER_NODE CTL_UNNUMBERED
-+#define KPTLLND_CHECKSUM CTL_UNNUMBERED
-+#define KPTLLND_TIMEOUT CTL_UNNUMBERED
-+#define KPTLLND_PORTAL CTL_UNNUMBERED
-+#define KPTLLND_PID CTL_UNNUMBERED
-+#define KPTLLND_RXB_PAGES CTL_UNNUMBERED
-+#define KPTLLND_CREDITS CTL_UNNUMBERED
-+#define KPTLLND_PEERCREDITS CTL_UNNUMBERED
-+#define KPTLLND_MAX_MSG_SIZE CTL_UNNUMBERED
-+#define KPTLLND_PEER_HASH_SIZE CTL_UNNUMBERED
-+#define KPTLLND_RESHEDULE_LOOPS CTL_UNNUMBERED
-+#define KPTLLND_ACK_PUTS CTL_UNNUMBERED
-+#define KPTLLND_TRACETIMEOUT CTL_UNNUMBERED
-+#define KPTLLND_TRACEBASENAME CTL_UNNUMBERED
-+#define KPTLLND_SIMULATION_BITMAP CTL_UNNUMBERED
-+#endif
-+
- static cfs_sysctl_table_t kptllnd_ctl_table[] = {
- {
-- .ctl_name = 1,
-+ .ctl_name = KPTLLND_NTX,
- .procname = "ntx",
- .data = &ntx,
- .maxlen = sizeof(int),
-@@ -166,15 +211,15 @@
- .proc_handler = &proc_dointvec
- },
- {
-- .ctl_name = 2,
-+ .ctl_name = KPTLLND_MAX_NODES,
- .procname = "max_nodes",
- .data = &max_nodes,
- .maxlen = sizeof(int),
- .mode = 0444,
-- .proc_handler = &proc_dointvec
-+
- },
- {
-- .ctl_name = 3,
-+ .ctl_name = KPTLLND_MAX_PROC_PER_NODE,
- .procname = "max_procs_per_node",
- .data = &max_procs_per_node,
- .maxlen = sizeof(int),
-@@ -182,7 +227,7 @@
- .proc_handler = &proc_dointvec
- },
- {
-- .ctl_name = 4,
-+ .ctl_name = KPTLLND_CHECKSUM,
- .procname = "checksum",
- .data = &checksum,
- .maxlen = sizeof(int),
-@@ -190,7 +235,7 @@
- .proc_handler = &proc_dointvec
- },
- {
-- .ctl_name = 5,
-+ .ctl_name = KPTLLND_TIMEOUT,
- .procname = "timeout",
- .data = &timeout,
- .maxlen = sizeof(int),
-@@ -198,7 +243,7 @@
- .proc_handler = &proc_dointvec
- },
- {
-- .ctl_name = 6,
-+ .ctl_name = KPTLLND_PORTAL,
- .procname = "portal",
- .data = &portal,
- .maxlen = sizeof(int),
-@@ -206,7 +251,7 @@
- .proc_handler = &proc_dointvec
- },
- {
-- .ctl_name = 7,
-+ .ctl_name = KPTLLND_PID,
- .procname = "pid",
- .data = &pid,
- .maxlen = sizeof(int),
-@@ -214,7 +259,7 @@
- .proc_handler = &proc_dointvec
- },
- {
-- .ctl_name = 8,
-+ .ctl_name = KPTLLND_RXB_PAGES,
- .procname = "rxb_npages",
- .data = &rxb_npages,
- .maxlen = sizeof(int),
-@@ -222,7 +267,7 @@
- .proc_handler = &proc_dointvec
- },
- {
-- .ctl_name = 9,
-+ .ctl_name = KPTLLND_CREDITS,
- .procname = "credits",
- .data = &credits,
- .maxlen = sizeof(int),
-@@ -230,7 +275,7 @@
- .proc_handler = &proc_dointvec
- },
- {
-- .ctl_name = 10,
-+ .ctl_name = KPTLLND_PEERCREDITS,
- .procname = "peercredits",
- .data = &peercredits,
- .maxlen = sizeof(int),
-@@ -238,7 +283,7 @@
- .proc_handler = &proc_dointvec
- },
- {
-- .ctl_name = 11,
-+ .ctl_name = KPTLLND_MAX_MSG_SIZE,
- .procname = "max_msg_size",
- .data = &max_msg_size,
- .maxlen = sizeof(int),
-@@ -246,7 +291,7 @@
- .proc_handler = &proc_dointvec
- },
- {
-- .ctl_name = 12,
-+ .ctl_name = KPTLLND_PEER_HASH_SIZE,
- .procname = "peer_hash_table_size",
- .data = &peer_hash_table_size,
- .maxlen = sizeof(int),
-@@ -254,7 +299,7 @@
- .proc_handler = &proc_dointvec
- },
- {
-- .ctl_name = 13,
-+ .ctl_name = KPTLLND_RESHEDULE_LOOPS,
- .procname = "reschedule_loops",
- .data = &reschedule_loops,
- .maxlen = sizeof(int),
-@@ -262,7 +307,7 @@
- .proc_handler = &proc_dointvec
- },
- {
-- .ctl_name = 14,
-+ .ctl_name = KPTLLND_ACK_PUTS,
- .procname = "ack_puts",
- .data = &ack_puts,
- .maxlen = sizeof(int),
-@@ -271,7 +316,7 @@
- },
- #ifdef CRAY_XT3
- {
-- .ctl_name = 15,
-+ .ctl_name = KPTLLND_TRACETIMEOUT,
- .procname = "ptltrace_on_timeout",
- .data = &ptltrace_on_timeout,
- .maxlen = sizeof(int),
-@@ -279,7 +324,7 @@
- .proc_handler = &proc_dointvec
- },
- {
-- .ctl_name = 16,
-+ .ctl_name = KPTLLND_TRACEBASENAME,
- .procname = "ptltrace_basename",
- .data = ptltrace_basename_space,
- .maxlen = sizeof(ptltrace_basename_space),
-@@ -290,7 +335,7 @@
- #endif
- #ifdef PJK_DEBUGGING
- {
-- .ctl_name = 17,
-+ .ctl_name = KPTLLND_SIMULATION_BITMAP,
- .procname = "simulation_bitmap",
- .data = &simulation_bitmap,
- .maxlen = sizeof(int),
-@@ -304,7 +349,7 @@
-
- static cfs_sysctl_table_t kptllnd_top_ctl_table[] = {
- {
-- .ctl_name = 203,
-+ .ctl_name = CTL_PTLLND,
- .procname = "ptllnd",
- .data = NULL,
- .maxlen = 0,
-diff -urNad lustre~/lnet/klnds/qswlnd/qswlnd_modparams.c lustre/lnet/klnds/qswlnd/qswlnd_modparams.c
---- lustre~/lnet/klnds/qswlnd/qswlnd_modparams.c 2008-12-30 11:23:32.000000000 +0100
-+++ lustre/lnet/klnds/qswlnd/qswlnd_modparams.c 2009-03-10 11:41:03.000000000 +0100
-@@ -87,9 +87,45 @@
- };
-
- #if defined(CONFIG_SYSCTL) && !CFS_SYSFS_MODULE_PARM
-+
-+#ifndef HAVE_SYSCTL_UNNUMBERED
-+
-+#define CTL_KQSWNAL 207
-+
-+enum
-+ KQSWNAL_TX_MAXCONTIG = 1,
-+ KQSWNAL_NTXMSG,
-+ KQSWNAL_CREDITS,
-+ KQSWNAL_PEERCREDITS,
-+ KQSWNAL_NRXMSGS_LARGE,
-+ KQSWNAL_EP_ENVELOPES_LARGE,
-+ KQSWNAL_NRXMSGS_SMALL,
-+ KQSWNAL_EP_ENVELOPES_SMALL,
-+ KQSWNAL_OPTIMIZED_PUTS,
-+ KQSWNAL_OPTIMIZED_GETS,
-+ KQSWNAL_INJECT_CSUM_ERROR
-+};
-+#else
-+
-+#define CTL_KQSWNAL CTL_UNNUMBERED
-+
-+#define KQSWNAL_TX_MAXCONTIG CTL_UNNUMBERED
-+#define KQSWNAL_NTXMSG CTL_UNNUMBERED
-+#define KQSWNAL_CREDITS CTL_UNNUMBERED
-+#define KQSWNAL_PEERCREDITS CTL_UNNUMBERED
-+#define KQSWNAL_NRXMSGS_LARGE CTL_UNNUMBERED
-+#define KQSWNAL_EP_ENVELOPES_LARGE CTL_UNNUMBERED
-+#define KQSWNAL_NRXMSGS_SMALL CTL_UNNUMBERED
-+#define KQSWNAL_EP_ENVELOPES_SMALL CTL_UNNUMBERED
-+#define KQSWNAL_OPTIMIZED_PUTS CTL_UNNUMBERED
-+#define KQSWNAL_OPTIMIZED_GETS CTL_UNNUMBERED
-+#define KQSWNAL_INJECT_CSUM_ERROR CTL_UNNUMBERED
-+
-+#endif
-+
- static cfs_sysctl_table_t kqswnal_ctl_table[] = {
- {
-- .ctl_name = 1,
-+ .ctl_name = KQSWNAL_TX_MAXCONTIG,
- .procname = "tx_maxcontig",
- .data = &tx_maxcontig,
- .maxlen = sizeof (int),
-@@ -97,7 +133,7 @@
- .proc_handler = &proc_dointvec
- },
- {
-- .ctl_name = 2,
-+ .ctl_name = KQSWNAL_NTXMSG,
- .procname = "ntxmsgs",
- .data = &ntxmsgs,
- .maxlen = sizeof (int),
-@@ -105,7 +141,7 @@
- .proc_handler = &proc_dointvec
- },
- {
-- .ctl_name = 3,
-+ .ctl_name = KQSWNAL_CREDITS,
- .procname = "credits",
- .data = &credits,
- .maxlen = sizeof (int),
-@@ -113,7 +149,7 @@
- .proc_handler = &proc_dointvec
- },
- {
-- .ctl_name = 4,
-+ .ctl_name = KQSWNAL_PEERCREDITS,
- .procname = "peer_credits",
- .data = &peer_credits,
- .maxlen = sizeof (int),
-@@ -121,7 +157,7 @@
- .proc_handler = &proc_dointvec
- },
- {
-- .ctl_name = 5,
-+ .ctl_name = KQSWNAL_NRXMSGS_LARGE,
- .procname = "nrxmsgs_large",
- .data = &nrxmsgs_large,
- .maxlen = sizeof (int),
-@@ -129,7 +165,7 @@
- .proc_handler = &proc_dointvec
- },
- {
-- .ctl_name = 6,
-+ .ctl_name = KQSWNAL_EP_ENVELOPES_LARGE,
- .procname = "ep_envelopes_large",
- .data = &ep_envelopes_large,
- .maxlen = sizeof (int),
-@@ -137,7 +173,7 @@
- .proc_handler = &proc_dointvec
- },
- {
-- .ctl_name = 7,
-+ .ctl_name = KQSWNAL_NRXMSGS_SMALL,
- .procname = "nrxmsgs_small",
- .data = &nrxmsgs_small,
- .maxlen = sizeof (int),
-@@ -145,7 +181,7 @@
- .proc_handler = &proc_dointvec
- },
- {
-- .ctl_name = 8,
-+ .ctl_name = KQSWNAL_EP_ENVELOPES_SMALL,
- .procname = "ep_envelopes_small",
- .data = &ep_envelopes_small,
- .maxlen = sizeof (int),
-@@ -153,7 +189,7 @@
- .proc_handler = &proc_dointvec
- },
- {
-- .ctl_name = 9,
-+ .ctl_name = KQSWNAL_OPTIMIZED_PUTS,
- .procname = "optimized_puts",
- .data = &optimized_puts,
- .maxlen = sizeof (int),
-@@ -161,7 +197,7 @@
- .proc_handler = &proc_dointvec
- },
- {
-- .ctl_name = 10,
-+ .ctl_name = KQSWNAL_OPTIMIZED_GETS,
- .procname = "optimized_gets",
- .data = &optimized_gets,
- .maxlen = sizeof (int),
-@@ -170,7 +206,7 @@
- },
- #if KQSW_CKSUM
- {
-- .ctl_name = 11,
-+ .ctl_name = KQSWNAL_INJECT_CSUM_ERROR,
- .procname = "inject_csum_error",
- .data = &inject_csum_error,
- .maxlen = sizeof (int),
-@@ -183,7 +219,7 @@
-
- static cfs_sysctl_table_t kqswnal_top_ctl_table[] = {
- {
-- .ctl_name = 201,
-+ .ctl_name = CTL_KQSWNAL,
- .procname = "qswnal",
- .data = NULL,
- .maxlen = 0,
-diff -urNad lustre~/lnet/klnds/ralnd/ralnd_modparams.c lustre/lnet/klnds/ralnd/ralnd_modparams.c
---- lustre~/lnet/klnds/ralnd/ralnd_modparams.c 2008-12-30 11:23:32.000000000 +0100
-+++ lustre/lnet/klnds/ralnd/ralnd_modparams.c 2009-03-10 11:41:03.000000000 +0100
-@@ -89,9 +89,37 @@
- };
-
- #if CONFIG_SYSCTL && !CFS_SYSFS_MODULE_PARM
-+
-+#ifndef HAVE_SYSCTL_UNNUMBERED
-+#define CTL_KRANAL 202
-+enum {
-+ KRANAL_N_CONND = 1,
-+ KRANAL_RECONNECT_MIN,
-+ KRANAL_RECONNECT_MAX,
-+ KRANAL_NTX,
-+ KRANAL_CREDITS,
-+ KRANAL_PEERCREDITS,
-+ KRANAL_FMA_CQ_SIZE,
-+ KRANAL_TIMEOUT,
-+ KRANAL_IMMEDIATE_MAX
-+};
-+#else
-+#define CTL_KRANAL CTL_UNNUMBERED
-+
-+#define KRANAL_N_CONND CTL_UNNUMBERED
-+#define KRANAL_RECONNECT_MIN CTL_UNNUMBERED
-+#define KRANAL_RECONNECT_MAX CTL_UNNUMBERED
-+#define KRANAL_NTX CTL_UNNUMBERED
-+#define KRANAL_CREDITS CTL_UNNUMBERED
-+#define KRANAL_PEERCREDITS CTL_UNNUMBERED
-+#define KRANAL_FMA_CQ_SIZE CTL_UNNUMBERED
-+#define KRANAL_TIMEOUT CTL_UNNUMBERED
-+#define KRANAL_IMMEDIATE_MAX CTL_UNNUMBERED
-+#endif
-+
- static cfs_sysctl_table_t kranal_ctl_table[] = {
- {
-- .ctl_name = 1,
-+ .ctl_name = KRANAL_N_CONND,
- .procname = "n_connd",
- .data = &n_connd,
- .maxlen = sizeof(int),
-@@ -99,7 +127,7 @@
- .proc_handler = &proc_dointvec
- },
- {
-- .ctl_name = 2,
-+ .ctl_name = KRANAL_RECONNECT_MIN,
- .procname = "min_reconnect_interval",
- .data = &min_reconnect_interval,
- .maxlen = sizeof(int),
-@@ -107,7 +135,7 @@
- .proc_handler = &proc_dointvec
- },
- {
-- .ctl_name = 3,
-+ .ctl_name = KRANAL_RECONNECT_MAX,
- .procname = "max_reconnect_interval",
- .data = &max_reconnect_interval,
- .maxlen = sizeof(int),
-@@ -115,7 +143,7 @@
- .proc_handler = &proc_dointvec
- },
- {
-- .ctl_name = 4,
-+ .ctl_name = KRANAL_NTX,
- .procname = "ntx",
- .data = &ntx,
- .maxlen = sizeof(int),
-@@ -123,7 +151,7 @@
- .proc_handler = &proc_dointvec
- },
- {
-- .ctl_name = 5,
-+ .ctl_name = KRANAL_CREDITS,
- .procname = "credits",
- .data = &credits,
- .maxlen = sizeof(int),
-@@ -131,7 +159,7 @@
- .proc_handler = &proc_dointvec
- },
- {
-- .ctl_name = 6,
-+ .ctl_name = KRANAL_PEERCREDITS,
- .procname = "peer_credits",
- .data = &peer_credits,
- .maxlen = sizeof(int),
-@@ -139,7 +167,7 @@
- .proc_handler = &proc_dointvec
- },
- {
-- .ctl_name = 7,
-+ .ctl_name = KRANAL_FMA_CQ_SIZE,
- .procname = "fma_cq_size",
- .data = &fma_cq_size,
- .maxlen = sizeof(int),
-@@ -147,7 +175,7 @@
- .proc_handler = &proc_dointvec
- },
- {
-- .ctl_name = 8,
-+ .ctl_name = KRANAL_TIMEOUT,
- .procname = "timeout",
- .data = &timeout,
- .maxlen = sizeof(int),
-@@ -155,7 +183,7 @@
- .proc_handler = &proc_dointvec
- },
- {
-- .ctl_name = 9,
-+ .ctl_name = KRANAL_IMMEDIATE_MAX,
- .procname = "max_immediate",
- .data = &max_immediate,
- .maxlen = sizeof(int),
-@@ -167,7 +195,7 @@
-
- static cfs_sysctl_table_t kranal_top_ctl_table[] = {
- {
-- .ctl_name = 202,
-+ .ctl_name = CTL_KRANAL,
- .procname = "ranal",
- .data = NULL,
- .maxlen = 0,
-diff -urNad lustre~/lnet/klnds/socklnd/socklnd_lib-linux.c lustre/lnet/klnds/socklnd/socklnd_lib-linux.c
---- lustre~/lnet/klnds/socklnd/socklnd_lib-linux.c 2008-12-30 11:23:32.000000000 +0100
-+++ lustre/lnet/klnds/socklnd/socklnd_lib-linux.c 2009-03-10 11:41:03.000000000 +0100
-@@ -37,197 +37,244 @@
- #include "socklnd.h"
-
- # if defined(CONFIG_SYSCTL) && !CFS_SYSFS_MODULE_PARM
--static cfs_sysctl_table_t ksocknal_ctl_table[21];
-
--cfs_sysctl_table_t ksocknal_top_ctl_table[] = {
-- {
-- .ctl_name = 200,
-- .procname = "socknal",
-- .data = NULL,
-- .maxlen = 0,
-- .mode = 0555,
-- .child = ksocknal_ctl_table
-- },
-- { 0 }
-+#ifndef HAVE_SYSCTL_UNNUMBERED
-+
-+#define CTL_SOCKLND 209
-+
-+enum {
-+ SOCKLND_TIMEOUT = 1,
-+ SOCKLND_CREDITS,
-+ SOCKLND_PEER_CREDITS,
-+ SOCKLND_NCONNDS,
-+ SOCKLND_RECONNECTS_MIN,
-+ SOCKLND_RECONNECTS_MAX,
-+ SOCKLND_EAGER_ACK,
-+ SOCKLND_ZERO_COPY,
-+ SOCKLND_TYPED,
-+ SOCKLND_BULK_MIN,
-+ SOCKLND_RX_BUFFER_SIZE,
-+ SOCKLND_TX_BUFFER_SIZE,
-+ SOCKLND_NAGLE,
-+ SOCKLND_IRQ_AFFINITY,
-+ SOCKLND_KEEPALIVE_IDLE,
-+ SOCKLND_KEEPALIVE_COUNT,
-+ SOCKLND_KEEPALIVE_INTVL,
-+ SOCKLND_BACKOFF_INIT,
-+ SOCKLND_BACKOFF_MAX,
-+ SOCKLND_PROTOCOL
- };
-+#else
-+#define CTL_SOCKLND CTL_UNNUMBERED
-
--int
--ksocknal_lib_tunables_init ()
--{
-- int i = 0;
-- int j = 1;
-+#define SOCKLND_TIMEOUT CTL_UNNUMBERED
-+#define SOCKLND_CREDITS CTL_UNNUMBERED
-+#define SOCKLND_PEER_CREDITS CTL_UNNUMBERED
-+#define SOCKLND_NCONNDS CTL_UNNUMBERED
-+#define SOCKLND_RECONNECTS_MIN CTL_UNNUMBERED
-+#define SOCKLND_RECONNECTS_MAX CTL_UNNUMBERED
-+#define SOCKLND_EAGER_ACK CTL_UNNUMBERED
-+#define SOCKLND_ZERO_COPY CTL_UNNUMBERED
-+#define SOCKLND_TYPED CTL_UNNUMBERED
-+#define SOCKLND_BULK_MIN CTL_UNNUMBERED
-+#define SOCKLND_RX_BUFFER_SIZE CTL_UNNUMBERED
-+#define SOCKLND_TX_BUFFER_SIZE CTL_UNNUMBERED
-+#define SOCKLND_NAGLE CTL_UNNUMBERED
-+#define SOCKLND_IRQ_AFFINITY CTL_UNNUMBERED
-+#define SOCKLND_KEEPALIVE_IDLE CTL_UNNUMBERED
-+#define SOCKLND_KEEPALIVE_COUNT CTL_UNNUMBERED
-+#define SOCKLND_KEEPALIVE_INTVL CTL_UNNUMBERED
-+#define SOCKLND_BACKOFF_INIT CTL_UNNUMBERED
-+#define SOCKLND_BACKOFF_MAX CTL_UNNUMBERED
-+#define SOCKLND_PROTOCOL CTL_UNNUMBERED
-+#endif
-
-- ksocknal_ctl_table[i++] = (cfs_sysctl_table_t) {
-- .ctl_name = j++,
-+static cfs_sysctl_table_t ksocknal_ctl_table[] = {
-+ {
-+ .ctl_name = SOCKLND_TIMEOUT,
- .procname = "timeout",
- .data = ksocknal_tunables.ksnd_timeout,
- .maxlen = sizeof (int),
- .mode = 0644,
- .proc_handler = &proc_dointvec
-- };
-- ksocknal_ctl_table[i++] = (cfs_sysctl_table_t) {
-- .ctl_name = j++,
-+ },
-+ {
-+ .ctl_name = SOCKLND_CREDITS,
- .procname = "credits",
- .data = ksocknal_tunables.ksnd_credits,
- .maxlen = sizeof (int),
- .mode = 0444,
- .proc_handler = &proc_dointvec
-- };
-- ksocknal_ctl_table[i++] = (cfs_sysctl_table_t) {
-- .ctl_name = j++,
-+ },
-+ {
-+ .ctl_name = SOCKLND_PEER_CREDITS,
- .procname = "peer_credits",
- .data = ksocknal_tunables.ksnd_peercredits,
- .maxlen = sizeof (int),
- .mode = 0444,
- .proc_handler = &proc_dointvec
-- };
-- ksocknal_ctl_table[i++] = (cfs_sysctl_table_t) {
-- .ctl_name = j++,
-+ },
-+ {
-+ .ctl_name = SOCKLND_NCONNDS,
- .procname = "nconnds",
- .data = ksocknal_tunables.ksnd_nconnds,
- .maxlen = sizeof (int),
- .mode = 0444,
- .proc_handler = &proc_dointvec
-- };
-- ksocknal_ctl_table[i++] = (cfs_sysctl_table_t) {
-- .ctl_name = j++,
-+ },
-+ {
-+ .ctl_name = SOCKLND_RECONNECTS_MIN,
- .procname = "min_reconnectms",
- .data = ksocknal_tunables.ksnd_min_reconnectms,
- .maxlen = sizeof (int),
- .mode = 0444,
- .proc_handler = &proc_dointvec
-- };
-- ksocknal_ctl_table[i++] = (cfs_sysctl_table_t) {
-- .ctl_name = j++,
-+ },
-+ {
-+ .ctl_name = SOCKLND_RECONNECTS_MAX,
- .procname = "max_reconnectms",
- .data = ksocknal_tunables.ksnd_max_reconnectms,
- .maxlen = sizeof (int),
- .mode = 0444,
- .proc_handler = &proc_dointvec
-- };
-- ksocknal_ctl_table[i++] = (cfs_sysctl_table_t) {
-- .ctl_name = j++,
-+ },
-+ {
-+ .ctl_name = SOCKLND_EAGER_ACK,
- .procname = "eager_ack",
- .data = ksocknal_tunables.ksnd_eager_ack,
- .maxlen = sizeof (int),
- .mode = 0644,
- .proc_handler = &proc_dointvec
-- };
-- ksocknal_ctl_table[i++] = (cfs_sysctl_table_t) {
-- .ctl_name = j++,
-+ },
-+ {
-+ .ctl_name = SOCKLND_ZERO_COPY,
- .procname = "zero_copy",
- .data = ksocknal_tunables.ksnd_zc_min_frag,
- .maxlen = sizeof (int),
- .mode = 0644,
- .proc_handler = &proc_dointvec
-- };
-- ksocknal_ctl_table[i++] = (cfs_sysctl_table_t) {
-- .ctl_name = j++,
-+ },
-+ {
-+ .ctl_name = SOCKLND_TYPED,
- .procname = "typed",
- .data = ksocknal_tunables.ksnd_typed_conns,
- .maxlen = sizeof (int),
- .mode = 0444,
- .proc_handler = &proc_dointvec
-- };
-- ksocknal_ctl_table[i++] = (cfs_sysctl_table_t) {
-- .ctl_name = j++,
-+ },
-+ {
-+ .ctl_name = SOCKLND_BULK_MIN,
- .procname = "min_bulk",
- .data = ksocknal_tunables.ksnd_min_bulk,
- .maxlen = sizeof (int),
- .mode = 0644,
- .proc_handler = &proc_dointvec
-- };
-- ksocknal_ctl_table[i++] = (cfs_sysctl_table_t) {
-- .ctl_name = j++,
-+ },
-+ {
-+ .ctl_name = SOCKLND_RX_BUFFER_SIZE,
- .procname = "rx_buffer_size",
- .data = ksocknal_tunables.ksnd_rx_buffer_size,
- .maxlen = sizeof(int),
- .mode = 0644,
- .proc_handler = &proc_dointvec
-- };
-- ksocknal_ctl_table[i++] = (cfs_sysctl_table_t) {
-- .ctl_name = j++,
-+ },
-+ {
-+ .ctl_name = SOCKLND_TX_BUFFER_SIZE,
- .procname = "tx_buffer_size",
- .data = ksocknal_tunables.ksnd_tx_buffer_size,
- .maxlen = sizeof(int),
- .mode = 0644,
- .proc_handler = &proc_dointvec
-- };
-- ksocknal_ctl_table[i++] = (cfs_sysctl_table_t) {
-- .ctl_name = j++,
-+ },
-+ {
-+ .ctl_name = SOCKLND_NAGLE,
- .procname = "nagle",
- .data = ksocknal_tunables.ksnd_nagle,
- .maxlen = sizeof(int),
- .mode = 0644,
- .proc_handler = &proc_dointvec
-- };
-+ },
- #ifdef CPU_AFFINITY
-- ksocknal_ctl_table[i++] = (cfs_sysctl_table_t) {
-- .ctl_name = j++,
-+ {
-+ .ctl_name = SOCKLND_IRQ_AFFINITY,
- .procname = "irq_affinity",
- .data = ksocknal_tunables.ksnd_irq_affinity,
- .maxlen = sizeof(int),
- .mode = 0644,
- .proc_handler = &proc_dointvec
-- };
-+ },
- #endif
-- ksocknal_ctl_table[i++] = (cfs_sysctl_table_t) {
-- .ctl_name = j++,
-+ {
-+ .ctl_name = SOCKLND_KEEPALIVE_IDLE,
- .procname = "keepalive_idle",
- .data = ksocknal_tunables.ksnd_keepalive_idle,
- .maxlen = sizeof(int),
- .mode = 0644,
- .proc_handler = &proc_dointvec
-- };
-- ksocknal_ctl_table[i++] = (cfs_sysctl_table_t) {
-- .ctl_name = j++,
-+ },
-+ {
-+ .ctl_name = SOCKLND_KEEPALIVE_COUNT,
- .procname = "keepalive_count",
- .data = ksocknal_tunables.ksnd_keepalive_count,
- .maxlen = sizeof(int),
- .mode = 0644,
- .proc_handler = &proc_dointvec
-- };
-- ksocknal_ctl_table[i++] = (cfs_sysctl_table_t) {
-- .ctl_name = j++,
-+ },
-+ {
-+ .ctl_name = SOCKLND_KEEPALIVE_INTVL,
- .procname = "keepalive_intvl",
- .data = ksocknal_tunables.ksnd_keepalive_intvl,
- .maxlen = sizeof(int),
- .mode = 0644,
- .proc_handler = &proc_dointvec
-- };
-+ },
- #ifdef SOCKNAL_BACKOFF
-- ksocknal_ctl_table[i++] = (cfs_sysctl_table_t) {
-- .ctl_name = j++,
-+ {
-+ .ctl_name = SOCKLND_BACKOFF_INIT,
- .procname = "backoff_init",
- .data = ksocknal_tunables.ksnd_backoff_init,
- .maxlen = sizeof(int),
- .mode = 0644,
- .proc_handler = &proc_dointvec
-- };
-- ksocknal_ctl_table[i++] = (cfs_sysctl_table_t) {
-- .ctl_name = j++,
-+ },
-+ {
-+ .ctl_name = SOCKLND_BACKOFF_MAX,
- .procname = "backoff_max",
- .data = ksocknal_tunables.ksnd_backoff_max,
- .maxlen = sizeof(int),
- .mode = 0644,
- .proc_handler = &proc_dointvec
-- };
-+ }
- #endif
- #if SOCKNAL_VERSION_DEBUG
-- ksocknal_ctl_table[i++] = (cfs_sysctl_table_t) {
-- .ctl_name = j++,
-+ {
-+ .ctl_name = SOCKLND_PROTOCOL,
- .procname = "protocol",
- .data = ksocknal_tunables.ksnd_protocol,
- .maxlen = sizeof(int),
- .mode = 0644,
- .proc_handler = &proc_dointvec
-- };
-+ },
- #endif
-- ksocknal_ctl_table[i++] = (cfs_sysctl_table_t) { 0 };
-+ {0}
-+};
-
-- LASSERT (j == i);
-- LASSERT (i <= sizeof(ksocknal_ctl_table)/sizeof(ksocknal_ctl_table[0]));
-
-+cfs_sysctl_table_t ksocknal_top_ctl_table[] = {
-+ {
-+ .ctl_name = CTL_SOCKLND,
-+ .procname = "socknal",
-+ .data = NULL,
-+ .maxlen = 0,
-+ .mode = 0555,
-+ .child = ksocknal_ctl_table
-+ },
-+ { 0 }
-+};
-+
-+int
-+ksocknal_lib_tunables_init ()
-+{
- ksocknal_tunables.ksnd_sysctl =
- cfs_register_sysctl_table(ksocknal_top_ctl_table, 0);
-
-diff -urNad lustre~/lnet/klnds/viblnd/viblnd_modparams.c lustre/lnet/klnds/viblnd/viblnd_modparams.c
---- lustre~/lnet/klnds/viblnd/viblnd_modparams.c 2008-12-30 11:23:32.000000000 +0100
-+++ lustre/lnet/klnds/viblnd/viblnd_modparams.c 2009-03-10 11:41:03.000000000 +0100
-@@ -142,6 +142,56 @@
- #endif
- };
-
-+#ifndef HAVE_SYSCTL_UNNUMBERED
-+
-+#define CTL_VIBLND 209
-+
-+enum {
-+ VIBLND_SERVICE = 1,
-+ VIBLND_RECONNECT_MIN,
-+ VIBLND_RECONNECT_MAX,
-+ VIBLND_CONCURRENT_PEERS,
-+ VIBLND_CHKSUM,
-+ VIBLND_TIMEOUT,
-+ VIBLND_NTX,
-+ VIBLND_CREDITS,
-+ VIBLND_PEER_CREDITS,
-+ VIBLND_ARP_RETRIES,
-+ VIBLND_HCA_BASENAME,
-+ VIBLND_IPIF_BASENAME,
-+ VIBLND_LOCAL_ACK_TIMEOUT,
-+ VIBLND_RETRY_CNT
-+ VIBLND_RNR_CNT,
-+ VIBLND_RNR_NAK_TIMER,
-+ VIBLND_KEEPALIVE,
-+ VIBLND_CONCURRENT_SENDS,
-+ VIBLND_FMR_REMAPS
-+};
-+#else
-+#define CTL_VIBLND CTL_UNNUMBERED
-+
-+#define VIBLND_SERVICE CTL_UNNUMBERED
-+#define VIBLND_RECONNECT_MIN CTL_UNNUMBERED
-+#define VIBLND_RECONNECT_MAX CTL_UNNUMBERED
-+#define VIBLND_CONCURRENT_PEERS CTL_UNNUMBERED
-+#define VIBLND_CHKSUM CTL_UNNUMBERED
-+#define VIBLND_TIMEOUT CTL_UNNUMBERED
-+#define VIBLND_NTX CTL_UNNUMBERED
-+#define VIBLND_CREDITS CTL_UNNUMBERED
-+#define VIBLND_PEER_CREDITS CTL_UNNUMBERED
-+#define VIBLND_ARP_RETRIES CTL_UNNUMBERED
-+#define VIBLND_HCA_BASENAME CTL_UNNUMBERED
-+#define VIBLND_IPIF_BASENAME CTL_UNNUMBERED
-+#define VIBLND_LOCAL_ACK_TIMEOUT CTL_UNNUMBERED
-+#define VIBLND_RETRY_CNT CTL_UNNUMBERED
-+#define VIBLND_RNR_CNT CTL_UNNUMBERED
-+#define VIBLND_RNR_NAK_TIMER CTL_UNNUMBERED
-+#define VIBLND_KEEPALIVE CTL_UNNUMBERED
-+#define VIBLND_CONCURRENT_SENDS CTL_UNNUMBERED
-+#define VIBLND_FMR_REMAPS CTL_UNNUMBERED
-+
-+#endif
-+
- #if defined(CONFIG_SYSCTL) && !CFS_SYSFS_MODULE_PARM
-
- static char hca_basename_space[32];
-@@ -149,7 +199,7 @@
-
- static cfs_sysctl_table_t kibnal_ctl_table[] = {
- {
-- .ctl_name = 1,
-+ .ctl_name = VIBLND_SERVICE,
- .procname = "service_number",
- .data = &service_number,
- .maxlen = sizeof(int),
-@@ -157,7 +207,7 @@
- .proc_handler = &proc_dointvec
- },
- {
-- .ctl_name = 2,
-+ .ctl_name = VIBLND_RECONNECT_MIN,
- .procname = "min_reconnect_interval",
- .data = &min_reconnect_interval,
- .maxlen = sizeof(int),
-@@ -165,7 +215,7 @@
- .proc_handler = &proc_dointvec
- },
- {
-- .ctl_name = 3,
-+ .ctl_name = VIBLND_RECONNECT_MAX,
- .procname = "max_reconnect_interval",
- .data = &max_reconnect_interval,
- .maxlen = sizeof(int),
-@@ -173,7 +223,7 @@
- .proc_handler = &proc_dointvec
- },
- {
-- .ctl_name = 4,
-+ .ctl_name = VIBLND_CONCURRENT_PEERS,
- .procname = "concurrent_peers",
- .data = &concurrent_peers,
- .maxlen = sizeof(int),
-@@ -181,7 +231,7 @@
- .proc_handler = &proc_dointvec
- },
- {
-- .ctl_name = 5,
-+ .ctl_name = VIBLND_CHKSUM,
- .procname = "cksum",
- .data = &cksum,
- .maxlen = sizeof(int),
-@@ -189,7 +239,7 @@
- .proc_handler = &proc_dointvec
- },
- {
-- .ctl_name = 6,
-+ .ctl_name = VIBLND_TIMEOUT,
- .procname = "timeout",
- .data = &timeout,
- .maxlen = sizeof(int),
-@@ -197,7 +247,7 @@
- .proc_handler = &proc_dointvec
- },
- {
-- .ctl_name = 7,
-+ .ctl_name = VIBLND_NTX,
- .procname = "ntx",
- .data = &ntx,
- .maxlen = sizeof(int),
-@@ -205,7 +255,7 @@
- .proc_handler = &proc_dointvec
- },
- {
-- .ctl_name = 8,
-+ .ctl_name = VIBLND_CREDITS,
- .procname = "credits",
- .data = &credits,
- .maxlen = sizeof(int),
-@@ -213,7 +263,7 @@
- .proc_handler = &proc_dointvec
- },
- {
-- .ctl_name = 9,
-+ .ctl_name = VIBLND_PEER_CREDITS,
- .procname = "peer_credits",
- .data = &peer_credits,
- .maxlen = sizeof(int),
-@@ -221,7 +271,7 @@
- .proc_handler = &proc_dointvec
- },
- {
-- .ctl_name = 10,
-+ .ctl_name = VIBLND_ARP_RETRIES,
- .procname = "arp_retries",
- .data = &arp_retries,
- .maxlen = sizeof(int),
-@@ -229,7 +279,7 @@
- .proc_handler = &proc_dointvec
- },
- {
-- .ctl_name = 11,
-+ .ctl_name = VIBLND_HCA_BASENAME,
- .procname = "hca_basename",
- .data = hca_basename_space,
- .maxlen = sizeof(hca_basename_space),
-@@ -237,7 +287,7 @@
- .proc_handler = &proc_dostring
- },
- {
-- .ctl_name = 12,
-+ .ctl_name = VIBLND_IPIF_BASENAME,
- .procname = "ipif_basename",
- .data = ipif_basename_space,
- .maxlen = sizeof(ipif_basename_space),
-@@ -245,7 +295,7 @@
- .proc_handler = &proc_dostring
- },
- {
-- .ctl_name = 13,
-+ .ctl_name = VIBLND_LOCAL_ACK_TIMEOUT,
- .procname = "local_ack_timeout",
- .data = &local_ack_timeout,
- .maxlen = sizeof(int),
-@@ -253,7 +303,7 @@
- .proc_handler = &proc_dointvec
- },
- {
-- .ctl_name = 14,
-+ .ctl_name = VIBLND_RETRY_CNT,
- .procname = "retry_cnt",
- .data = &retry_cnt,
- .maxlen = sizeof(int),
-@@ -261,7 +311,7 @@
- .proc_handler = &proc_dointvec
- },
- {
-- .ctl_name = 15,
-+ .ctl_name = VIBLND_RNR_CNT,
- .procname = "rnr_cnt",
- .data = &rnr_cnt,
- .maxlen = sizeof(int),
-@@ -269,7 +319,7 @@
- .proc_handler = &proc_dointvec
- },
- {
-- .ctl_name = 16,
-+ .ctl_name = VIBLND_RNR_NAK_TIMER,
- .procname = "rnr_nak_timer",
- .data = &rnr_nak_timer,
- .maxlen = sizeof(int),
-@@ -277,7 +327,7 @@
- .proc_handler = &proc_dointvec
- },
- {
-- .ctl_name = 17,
-+ .ctl_name = VIBLND_KEEPALIVE,
- .procname = "keepalive",
- .data = &keepalive,
- .maxlen = sizeof(int),
-@@ -285,7 +335,7 @@
- .proc_handler = &proc_dointvec
- },
- {
-- .ctl_name = 18,
-+ .ctl_name = VIBLND_CONCURRENT_SENDS,
- .procname = "concurrent_sends",
- .data = &concurrent_sends,
- .maxlen = sizeof(int),
-@@ -294,7 +344,7 @@
- },
- #if IBNAL_USE_FMR
- {
-- .ctl_name = 19,
-+ .ctl_name = VIBLND_FMR_REMAPS,
- .procname = "fmr_remaps",
- .data = &fmr_remaps,
- .maxlen = sizeof(int),
-@@ -307,7 +357,7 @@
-
- static cfs_sysctl_table_t kibnal_top_ctl_table[] = {
- {
-- .ctl_name = 203,
-+ .ctl_name = CTL_VIBLND,
- .procname = "vibnal",
- .data = NULL,
- .maxlen = 0,
-diff -urNad lustre~/lnet/libcfs/linux/linux-curproc.c lustre/lnet/libcfs/linux/linux-curproc.c
---- lustre~/lnet/libcfs/linux/linux-curproc.c 2008-12-30 11:23:32.000000000 +0100
-+++ lustre/lnet/libcfs/linux/linux-curproc.c 2009-03-10 11:41:03.000000000 +0100
-@@ -131,6 +131,19 @@
- current->cap_effective = cap;
- }
-
-+cfs_cap_t cfs_cap_convert_from_kernel(cfs_kernel_cap_t cap)
-+{
-+#if _LINUX_CAPABILITY_VERSION_3 || _LINUX_CAPABILITY_VERSION == 0x20071026
-+ /* XXX lost high byte */
-+ return cap.cap[0];
-+#elif _LINUX_CAPABILITY_VERSION == 0x19980330
-+ return cap;
-+#else
-+ #error "need correct _LINUX_CAPABILITY_VERSION "
-+#endif
-+}
-+
-+
- EXPORT_SYMBOL(cfs_curproc_uid);
- EXPORT_SYMBOL(cfs_curproc_pid);
- EXPORT_SYMBOL(cfs_curproc_gid);
-@@ -143,7 +156,7 @@
- EXPORT_SYMBOL(cfs_curproc_is_in_groups);
- EXPORT_SYMBOL(cfs_curproc_cap_get);
- EXPORT_SYMBOL(cfs_curproc_cap_set);
--
-+EXPORT_SYMBOL(cfs_cap_convert_from_kernel);
- /*
- * Local variables:
- * c-indentation-style: "K&R"
-diff -urNad lustre~/lnet/libcfs/linux/linux-proc.c lustre/lnet/libcfs/linux/linux-proc.c
---- lustre~/lnet/libcfs/linux/linux-proc.c 2008-12-30 11:23:32.000000000 +0100
-+++ lustre/lnet/libcfs/linux/linux-proc.c 2009-03-10 11:41:03.000000000 +0100
-@@ -79,7 +79,8 @@
- static cfs_sysctl_table_header_t *lnet_table_header = NULL;
- extern char lnet_upcall[1024];
-
--#define PSDEV_LNET (0x100)
-+#ifndef HAVE_SYSCTL_UNNUMBERED
-+#define CTL_LNET (0x100)
- enum {
- PSDEV_DEBUG = 1, /* control debugging */
- PSDEV_SUBSYSTEM_DEBUG, /* control debugging */
-@@ -98,8 +99,27 @@
- PSDEV_LNET_DAEMON_FILE, /* spool kernel debug buffer to file */
- PSDEV_LNET_DEBUG_MB, /* size of debug buffer */
- };
-+#else
-+#define CTL_LNET CTL_UNNUMBERED
-+#define PSDEV_DEBUG CTL_UNNUMBERED
-+#define PSDEV_SUBSYSTEM_DEBUG CTL_UNNUMBERED
-+#define PSDEV_PRINTK CTL_UNNUMBERED
-+#define PSDEV_CONSOLE_RATELIMIT CTL_UNNUMBERED
-+#define PSDEV_CONSOLE_MAX_DELAY_CS CTL_UNNUMBERED
-+#define PSDEV_CONSOLE_MIN_DELAY_CS CTL_UNNUMBERED
-+#define PSDEV_CONSOLE_BACKOFF CTL_UNNUMBERED
-+#define PSDEV_DEBUG_PATH CTL_UNNUMBERED
-+#define PSDEV_DEBUG_DUMP_PATH CTL_UNNUMBERED
-+#define PSDEV_LNET_UPCALL CTL_UNNUMBERED
-+#define PSDEV_LNET_MEMUSED CTL_UNNUMBERED
-+#define PSDEV_LNET_CATASTROPHE CTL_UNNUMBERED
-+#define PSDEV_LNET_PANIC_ON_LBUG CTL_UNNUMBERED
-+#define PSDEV_LNET_DUMP_KERNEL CTL_UNNUMBERED
-+#define PSDEV_LNET_DAEMON_FILE CTL_UNNUMBERED
-+#define PSDEV_LNET_DEBUG_MB CTL_UNNUMBERED
-+#endif
-
--static int
-+int
- proc_call_handler(void *data, int write,
- loff_t *ppos, void *buffer, size_t *lenp,
- int (*handler)(void *data, int write,
-@@ -118,17 +138,7 @@
- }
- return 0;
- }
--
--#define DECLARE_PROC_HANDLER(name) \
--static int \
--LL_PROC_PROTO(name) \
--{ \
-- DECLARE_LL_PROC_PPOS_DECL; \
-- \
-- return proc_call_handler(table->data, write, \
-- ppos, buffer, lenp, \
-- __##name); \
--}
-+EXPORT_SYMBOL(proc_call_handler);
-
- static int __proc_dobitmasks(void *data, int write,
- loff_t pos, void *buffer, int nob)
-@@ -200,11 +210,12 @@
-
- DECLARE_PROC_HANDLER(proc_daemon_file)
-
-+char tmpstr[32];
-+
- static int __proc_debug_mb(void *data, int write,
- loff_t pos, void *buffer, int nob)
- {
- if (!write) {
-- char tmpstr[32];
- int len = snprintf(tmpstr, sizeof(tmpstr), "%d",
- trace_get_debug_mb());
-
-@@ -319,7 +330,8 @@
- .data = &libcfs_debug,
- .maxlen = sizeof(int),
- .mode = 0644,
-- .proc_handler = &proc_dobitmasks
-+ .proc_handler = &proc_dobitmasks,
-+ .strategy = &sysctl_intvec,
- },
- {
- .ctl_name = PSDEV_SUBSYSTEM_DEBUG,
-@@ -327,7 +339,8 @@
- .data = &libcfs_subsystem_debug,
- .maxlen = sizeof(int),
- .mode = 0644,
-- .proc_handler = &proc_dobitmasks
-+ .proc_handler = &proc_dobitmasks,
-+ .strategy = &sysctl_intvec,
- },
- {
- .ctl_name = PSDEV_PRINTK,
-@@ -335,7 +348,8 @@
- .data = &libcfs_printk,
- .maxlen = sizeof(int),
- .mode = 0644,
-- .proc_handler = &proc_dobitmasks
-+ .proc_handler = &proc_dobitmasks,
-+ .strategy = &sysctl_intvec,
- },
- {
- .ctl_name = PSDEV_CONSOLE_RATELIMIT,
-@@ -343,30 +357,36 @@
- .data = &libcfs_console_ratelimit,
- .maxlen = sizeof(int),
- .mode = 0644,
-- .proc_handler = &proc_dointvec
-+ .proc_handler = &proc_dointvec,
-+ .strategy = &sysctl_intvec,
- },
- {
- .ctl_name = PSDEV_CONSOLE_MAX_DELAY_CS,
- .procname = "console_max_delay_centisecs",
-+ .data = &libcfs_console_max_delay,
- .maxlen = sizeof(int),
- .mode = 0644,
-- .proc_handler = &proc_console_max_delay_cs
-+ .proc_handler = &proc_console_max_delay_cs,
-+ .strategy = &sysctl_intvec,
- },
- {
- .ctl_name = PSDEV_CONSOLE_MIN_DELAY_CS,
- .procname = "console_min_delay_centisecs",
- .maxlen = sizeof(int),
-+ .data = &libcfs_console_min_delay,
- .mode = 0644,
-- .proc_handler = &proc_console_min_delay_cs
-+ .proc_handler = &proc_console_min_delay_cs,
-+ .strategy = &sysctl_intvec,
- },
- {
- .ctl_name = PSDEV_CONSOLE_BACKOFF,
- .procname = "console_backoff",
- .maxlen = sizeof(int),
-+ .data = &libcfs_console_backoff,
- .mode = 0644,
-- .proc_handler = &proc_console_backoff
-+ .proc_handler = &proc_console_backoff,
-+ .strategy = &sysctl_intvec,
- },
--
- {
- .ctl_name = PSDEV_DEBUG_PATH,
- .procname = "debug_path",
-@@ -374,8 +394,8 @@
- .maxlen = sizeof(debug_file_path_arr),
- .mode = 0644,
- .proc_handler = &proc_dostring,
-+ .strategy = &sysctl_string,
- },
--
- {
- .ctl_name = PSDEV_LNET_UPCALL,
- .procname = "upcall",
-@@ -383,6 +403,7 @@
- .maxlen = sizeof(lnet_upcall),
- .mode = 0644,
- .proc_handler = &proc_dostring,
-+ .strategy = &sysctl_string,
- },
- {
- .ctl_name = PSDEV_LNET_MEMUSED,
-@@ -390,7 +411,8 @@
- .data = (int *)&libcfs_kmemory.counter,
- .maxlen = sizeof(int),
- .mode = 0444,
-- .proc_handler = &proc_dointvec
-+ .proc_handler = &proc_dointvec,
-+ .strategy = &sysctl_intvec,
- },
- {
- .ctl_name = PSDEV_LNET_CATASTROPHE,
-@@ -398,7 +420,8 @@
- .data = &libcfs_catastrophe,
- .maxlen = sizeof(int),
- .mode = 0444,
-- .proc_handler = &proc_dointvec
-+ .proc_handler = &proc_dointvec,
-+ .strategy = &sysctl_intvec,
- },
- {
- .ctl_name = PSDEV_LNET_PANIC_ON_LBUG,
-@@ -406,39 +429,52 @@
- .data = &libcfs_panic_on_lbug,
- .maxlen = sizeof(int),
- .mode = 0644,
-- .proc_handler = &proc_dointvec
-+ .proc_handler = &proc_dointvec,
-+ .strategy = &sysctl_intvec,
- },
- {
- .ctl_name = PSDEV_LNET_DUMP_KERNEL,
- .procname = "dump_kernel",
-+ .data = tmpstr,
-+ .maxlen = 256,
- .mode = 0200,
- .proc_handler = &proc_dump_kernel,
-+ .strategy = &sysctl_string,
- },
- {
- .ctl_name = PSDEV_LNET_DAEMON_FILE,
- .procname = "daemon_file",
-+ .data = tmpstr,
- .mode = 0644,
-+ .maxlen = 256,
- .proc_handler = &proc_daemon_file,
-+ .strategy = &sysctl_string,
- },
- {
- .ctl_name = PSDEV_LNET_DEBUG_MB,
- .procname = "debug_mb",
-+ .data = tmpstr,
-+ .maxlen = sizeof(tmpstr),
- .mode = 0644,
- .proc_handler = &proc_debug_mb,
-+ .strategy = &sysctl_string,
- },
- {0}
- };
-
--static cfs_sysctl_table_t top_table[2] = {
-+static cfs_sysctl_table_t top_table[] = {
- {
-- .ctl_name = PSDEV_LNET,
-+ .ctl_name = CTL_LNET,
- .procname = "lnet",
-- .data = NULL,
-- .maxlen = 0,
- .mode = 0555,
-- .child = lnet_table
-+ .child = lnet_table,
-+#ifdef HAVE_PARENT_IN_CTLTABLE
-+ .parent = NULL,
-+#endif
- },
-- {0}
-+ {
-+ .ctl_name = 0
-+ }
- };
-
- int insert_proc(void)
-diff -urNad lustre~/lnet/lnet/router_proc.c lustre/lnet/lnet/router_proc.c
---- lustre~/lnet/lnet/router_proc.c 2008-12-30 11:23:32.000000000 +0100
-+++ lustre/lnet/lnet/router_proc.c 2009-03-10 11:41:03.000000000 +0100
-@@ -32,12 +32,13 @@
-
- /* this is really lnet_proc.c */
-
--#define LNET_PROC_STATS "sys/lnet/stats"
--#define LNET_PROC_ROUTES "sys/lnet/routes"
--#define LNET_PROC_ROUTERS "sys/lnet/routers"
--#define LNET_PROC_PEERS "sys/lnet/peers"
--#define LNET_PROC_BUFFERS "sys/lnet/buffers"
--#define LNET_PROC_NIS "sys/lnet/nis"
-+#define LNET_PROC_ROOT "sys/lnet"
-+#define LNET_PROC_STATS LNET_PROC_ROOT"/stats"
-+#define LNET_PROC_ROUTES LNET_PROC_ROOT"/routes"
-+#define LNET_PROC_ROUTERS LNET_PROC_ROOT"/routers"
-+#define LNET_PROC_PEERS LNET_PROC_ROOT"/peers"
-+#define LNET_PROC_BUFFERS LNET_PROC_ROOT"/buffers"
-+#define LNET_PROC_NIS LNET_PROC_ROOT"/nis"
-
- static int
- lnet_router_proc_stats_read (char *page, char **start, off_t off,
-@@ -1007,6 +1008,12 @@
- {
- struct proc_dir_entry *pde;
-
-+ pde = proc_mkdir(LNET_PROC_ROOT, NULL);
-+ if (pde == NULL) {
-+ CERROR("couldn't create "LNET_PROC_ROOT"\n");
-+ return;
-+ }
-+
- /* Initialize LNET_PROC_STATS */
- pde = create_proc_entry (LNET_PROC_STATS, 0644, NULL);
- if (pde == NULL) {
-@@ -1078,6 +1085,7 @@
- remove_proc_entry(LNET_PROC_PEERS, 0);
- remove_proc_entry(LNET_PROC_BUFFERS, 0);
- remove_proc_entry(LNET_PROC_NIS, 0);
-+ remove_proc_entry(LNET_PROC_ROOT, 0);
- }
-
- #else
-diff -urNad lustre~/lustre/autoconf/lustre-core.m4 lustre/lustre/autoconf/lustre-core.m4
---- lustre~/lustre/autoconf/lustre-core.m4 2008-12-30 11:23:32.000000000 +0100
-+++ lustre/lustre/autoconf/lustre-core.m4 2009-03-10 11:46:22.000000000 +0100
-@@ -1105,20 +1105,79 @@
- ])
- ])
-
-+# Older kernels (2.6.18) doesn't know about .parent in
-+# ctl_table
-+AC_DEFUN([CLT_TABLE_HAS_PARENT],
-+[AC_MSG_CHECKING([sysctl has .parent])
-+LB_LINUX_TRY_COMPILE([
-+ #include <linux/module.h>
-+ #include <linux/sysctl.h>
-+],[
-+ struct ctl_table random_table[] = {
-+ {
-+ .ctl_name = 1,
-+ .procname = "poolsize",
-+ .data = NULL,
-+ .maxlen = sizeof(int),
-+ .parent = NULL,
-+ },
-+ { .ctl_name = 0 }
-+ };
-+],[
-+ AC_MSG_RESULT(yes)
-+ AC_DEFINE(HAVE_PARENT_IN_CTLTABLE, 1,
-+ [ctl_table knows .parent])
-+],[
-+ AC_MSG_RESULT(NO)
-+])
-+])
-+
-+AC_DEFUN([IGET_CALL_IS_PRESENT],
-+[AC_MSG_CHECKING([iget() call is available])
-+LB_LINUX_TRY_COMPILE([
-+ #include <linux/fs.h>
-+],[
-+ iget(NULL,0);
-+],[
-+ AC_MSG_RESULT(yes)
-+ AC_DEFINE(HAVE_IGET_CALL, 1,
-+ [iget call is available, which is removed in 2.6.26])
-+],[
-+ AC_MSG_RESULT(NO)
-+])
-+])
-+
- # RHEL5 PageChecked and SetPageChecked defined
- AC_DEFUN([LC_PAGE_CHECKED],
- [AC_MSG_CHECKING([kernel has PageChecked and SetPageChecked])
- LB_LINUX_TRY_COMPILE([
-- #include <linux/mm.h>
-- #include <linux/page-flags.h>
-+ #include <linux/page-flags.h>
-+ #include <linux/autoconf.h>
-+ #include <linux/mm_types.h>
- ],[
-+ struct page *p;
-+
-+ /* 2.6.26 use function instead of define for it */
-+ SetPageChecked(p);
-+ PageChecked(p);
-+],[
-+ AC_MSG_RESULT(yes)
-+ AC_DEFINE(HAVE_PAGE_CHECKED, 1,
-+ [does kernel have PageChecked and SetPageChecked])
-+],[
-+ AC_MSG_CHECKING([kernel has PageChecked and SetPageChecked defined])
-+ LB_LINUX_TRY_COMPILE([
-+ #include <linux/page-flags.h>
-+ #include <linux/autoconf.h>
-+ #include <linux/mm.h>
-+ ],[
- #ifndef PageChecked
- #error PageChecked not defined in kernel
- #endif
- #ifndef SetPageChecked
- #error SetPageChecked not defined in kernel
- #endif
--],[
-+ ],[
- AC_MSG_RESULT(yes)
- AC_DEFINE(HAVE_PAGE_CHECKED, 1,
- [does kernel have PageChecked and SetPageChecked])
-@@ -1126,6 +1185,7 @@
- AC_MSG_RESULT(NO)
- ])
- ])
-+])
-
- AC_DEFUN([LC_EXPORT_TRUNCATE_COMPLETE],
- [LB_CHECK_SYMBOL_EXPORT([truncate_complete_page],
-@@ -1271,11 +1331,170 @@
-
- # 2.6.23 extract nfs export related data into exportfs.h
- AC_DEFUN([LC_HAVE_EXPORTFS_H],
--[
--tmpfl="$CFLAGS"
--CFLAGS="$CFLAGS -I$LINUX_OBJ/include"
--AC_CHECK_HEADERS([linux/exportfs.h])
--CFLAGS="$tmpfl"
-+[LB_CHECK_FILE([$LINUX/include/linux/exportfs.h], [
-+ AC_DEFINE(HAVE_LINUX_EXPORTFS_H, 1,
-+ [kernel has include/exportfs.h])
-+],[
-+ AC_MSG_RESULT([no])
-+])
-+])
-+
-+# 2.6.23 have new page fault handling API
-+AC_DEFUN([LC_VM_OP_FAULT],
-+[AC_MSG_CHECKING([if kernel has .fault in vm_operation_struct])
-+LB_LINUX_TRY_COMPILE([
-+ #include <linux/mm.h>
-+],[
-+ struct vm_operations_struct op;
-+
-+ op.fault = NULL;
-+], [
-+ AC_MSG_RESULT([yes])
-+ AC_DEFINE(HAVE_VM_OP_FAULT, 1,
-+ [if kernel has .fault in vm_operation_struct])
-+],[
-+ AC_MSG_RESULT([no])
-+])
-+])
-+
-+#2.6.23 has new shrinker API
-+AC_DEFUN([LC_REGISTER_SHRINKER],
-+[AC_MSG_CHECKING([if kernel has register_shrinker])
-+LB_LINUX_TRY_COMPILE([
-+ #include <linux/mm.h>
-+],[
-+ register_shrinker(NULL);
-+], [
-+ AC_MSG_RESULT([yes])
-+ AC_DEFINE(HAVE_REGISTER_SHRINKER, 1,
-+ [if kernel has register_shrinker])
-+],[
-+ AC_MSG_RESULT([no])
-+])
-+])
-+
-+# 2.6.24 has bio_endio with 2 args
-+AC_DEFUN([LC_BIO_ENDIO_2ARG],
-+[AC_MSG_CHECKING([if kernel has bio_endio with 2 args])
-+LB_LINUX_TRY_COMPILE([
-+ #include <linux/bio.h>
-+],[
-+ bio_endio(NULL, 0);
-+], [
-+ AC_MSG_RESULT([yes])
-+ AC_DEFINE(HAVE_BIO_ENDIO_2ARG, 1,
-+ [if kernel has bio_endio with 2 args])
-+],[
-+ AC_MSG_RESULT([no])
-+])
-+])
-+
-+# 2.6.24 has new members in exports struct.
-+AC_DEFUN([LC_FH_TO_DENTRY],
-+[AC_MSG_CHECKING([if kernel has .fh_to_dentry member in export_operations struct])
-+LB_LINUX_TRY_COMPILE([
-+#ifdef HAVE_LINUX_EXPORTFS_H
-+ #include <linux/exportfs.h>
-+#else
-+ #include <linux/fs.h>
-+#endif
-+],[
-+ struct export_operations exp;
-+
-+ exp.fh_to_dentry = NULL;
-+], [
-+ AC_MSG_RESULT([yes])
-+ AC_DEFINE(HAVE_FH_TO_DENTRY, 1,
-+ [kernel has .fh_to_dentry member in export_operations struct])
-+],[
-+ AC_MSG_RESULT([no])
-+])
-+])
-+
-+# 2.6.24 remove long aged procfs entry -> deleted member
-+AC_DEFUN([LC_PROCFS_DELETED],
-+[AC_MSG_CHECKING([if kernel has deleted member in procfs entry struct])
-+LB_LINUX_TRY_COMPILE([
-+ #include <linux/proc_fs.h>
-+],[
-+ struct proc_dir_entry pde;
-+
-+ pde.deleted = NULL;
-+], [
-+ AC_MSG_RESULT([yes])
-+ AC_DEFINE(HAVE_PROCFS_DELETED, 1,
-+ [kernel has deleted member in procfs entry struct])
-+],[
-+ AC_MSG_RESULT([no])
-+])
-+])
-+
-+# 2.6.26 isn't export set_fs_pwd and change paramter in fs struct
-+AC_DEFUN([LC_FS_STRUCT_USE_PATH],
-+[AC_MSG_CHECKING([fs_struct use path structure])
-+LB_LINUX_TRY_COMPILE([
-+ #include <asm/atomic.h>
-+ #include <linux/spinlock.h>
-+ #include <linux/fs_struct.h>
-+],[
-+ struct path path;
-+ struct fs_struct fs;
-+
-+ fs.pwd = path;
-+], [
-+ AC_MSG_RESULT([yes])
-+ AC_DEFINE(HAVE_FS_STRUCT_USE_PATH, 1,
-+ [fs_struct use path structure])
-+],[
-+ AC_MSG_RESULT([no])
-+])
-+])
-+
-+# 2.6.26 drop list_for_each_safe_rcu
-+AC_DEFUN([LC_RCU_LIST_SAFE],
-+[AC_MSG_CHECKING([if list_for_each_safe_rcu exist])
-+LB_LINUX_TRY_COMPILE([
-+ #include <linux/list.h>
-+],[
-+ #ifndef list_for_each_safe_rcu
-+ #error list_for_each_safe not exist
-+ #endif
-+],[
-+ AC_DEFINE(HAVE_RCU_LIST_SAFE, 1, [list_for_each_safe_rcu exist])
-+ AC_MSG_RESULT([yes])
-+],[
-+ AC_MSG_RESULT([no])
-+])
-+])
-+
-+# 2.6.26 remove path_release and use path_put instead
-+AC_DEFUN([LC_PATH_RELEASE],
-+[AC_MSG_CHECKING([if path_release exist])
-+LB_LINUX_TRY_COMPILE([
-+ #include <linux/dcache.h>
-+ #include <linux/namei.h>
-+],[
-+ path_release(NULL);
-+],[
-+ AC_DEFINE(HAVE_PATH_RELEASE, 1, [path_release exist])
-+ AC_MSG_RESULT([yes])
-+],[
-+ AC_MSG_RESULT([no])
-+])
-+])
-+# blk_put_queue is replaced in 2.6.25-rc5 by blk_cleanup_queue
-+AC_DEFUN([LC_BLK_CLEANUP_QUEUE],
-+[AC_MSG_CHECKING([if blk_cleanup_queue exists])
-+LB_LINUX_TRY_COMPILE([
-+ #include <linux/blkdev.h>
-+],[
-+ blk_cleanup_queue(NULL);
-+],[
-+ AC_DEFINE(HAVE_BLK_CLEANUP_QUEUE, 1, [blk_cleanup_queue exists])
-+ AC_MSG_RESULT([yes])
-+],[
-+ AC_MSG_RESULT([no])
-+])
- ])
-
- #
-@@ -1377,6 +1596,20 @@
- LC_UNREGISTER_BLKDEV_RETURN_INT
- LC_KERNEL_SPLICE_READ
- LC_HAVE_EXPORTFS_H
-+ LC_VM_OP_FAULT
-+ LC_REGISTER_SHRINKER
-+
-+ # 2.6.24
-+ LC_BIO_ENDIO_2ARG
-+ LC_FH_TO_DENTRY
-+ LC_PROCFS_DELETED
-+
-+ # 2.6.26
-+ LC_FS_STRUCT_USE_PATH
-+ LC_RCU_LIST_SAFE
-+ LC_PATH_RELEASE
-+ IGET_CALL_IS_PRESENT
-+ LC_BLK_CLEANUP_QUEUE
- ])
-
- #
-@@ -1609,6 +1842,7 @@
- ],[
- AC_MSG_RESULT([no])
- ])
-+
- ],[
- AC_MSG_RESULT([no])
- ])
-diff -urNad lustre~/lustre/include/liblustre.h lustre/lustre/include/liblustre.h
---- lustre~/lustre/include/liblustre.h 2008-12-30 11:23:32.000000000 +0100
-+++ lustre/lustre/include/liblustre.h 2009-03-10 11:41:03.000000000 +0100
-@@ -743,11 +743,13 @@
- struct _cap_struct;
- typedef struct _cap_struct *cap_t;
- typedef int cap_value_t;
-+
- typedef enum {
- CAP_EFFECTIVE=0,
- CAP_PERMITTED=1,
- CAP_INHERITABLE=2
- } cap_flag_t;
-+
- typedef enum {
- CAP_CLEAR=0,
- CAP_SET=1
-@@ -757,11 +759,33 @@
- #define CAP_DAC_READ_SEARCH 2
- #define CAP_FOWNER 3
- #define CAP_FSETID 4
--#define CAP_SYS_ADMIN 21
-+#define CAP_SYS_ADMIN 21
-+#define CAP_SYS_RESOURCE 24
-+
-+#define cap_raise(c, flag) do {} while(0)
-+
-
- cap_t cap_get_proc(void);
- int cap_get_flag(cap_t, cap_value_t, cap_flag_t, cap_flag_value_t *);
-
-+/* XXX should be move into licfs */
-+typedef __u32 cfs_cap_t;
-+
-+static inline cfs_kernel_cap_t cfs_curproc_cap_get(void)
-+{
-+ return current->cap_effective;
-+}
-+
-+static inline void cfs_curproc_cap_set(cfs_kernel_cap_t cap)
-+{
-+ current->cap_effective = cap;
-+}
-+
-+static inline cfs_cap_t cfs_cap_convert_from_kernel(cfs_kernel_cap_t data)
-+{
-+ return (cfs_cap_t)data;
-+}
-+
- static inline void libcfs_run_lbug_upcall(char *file, const char *fn,
- const int l){}
-
-diff -urNad lustre~/lustre/include/linux/lustre_compat25.h lustre/lustre/include/linux/lustre_compat25.h
---- lustre~/lustre/include/linux/lustre_compat25.h 2008-12-30 11:23:32.000000000 +0100
-+++ lustre/lustre/include/linux/lustre_compat25.h 2009-03-10 11:41:03.000000000 +0100
-@@ -57,6 +57,28 @@
- #endif /* LINUX_VERSION_CODE >= KERNEL_VERSION(2,6,14) */
-
- #ifndef HAVE_SET_FS_PWD
-+
-+#ifdef HAVE_FS_STRUCT_USE_PATH
-+static inline void ll_set_fs_pwd(struct fs_struct *fs, struct vfsmount *mnt,
-+ struct dentry *dentry)
-+{
-+ struct path path;
-+ struct path old_pwd;
-+
-+ path.mnt = mnt;
-+ path.dentry = dentry;
-+ write_lock(&fs->lock);
-+ old_pwd = fs->pwd;
-+ path_get(&path);
-+ fs->pwd = path;
-+ write_unlock(&fs->lock);
-+
-+ if (old_pwd.dentry)
-+ path_put(&old_pwd);
-+}
-+
-+#else
-+
- static inline void ll_set_fs_pwd(struct fs_struct *fs, struct vfsmount *mnt,
- struct dentry *dentry)
- {
-@@ -75,6 +97,7 @@
- mntput(old_pwdmnt);
- }
- }
-+#endif
- #else
- #define ll_set_fs_pwd set_fs_pwd
- #endif /* HAVE_SET_FS_PWD */
-@@ -590,5 +613,56 @@
- vfs_rename(old,old_dir,new,new_dir)
- #endif
-
-+#ifdef HAVE_REGISTER_SHRINKER
-+typedef int (*shrinker_t)(int nr_to_scan, gfp_t gfp_mask);
-+
-+static inline
-+struct shrinker *set_shrinker(int seek, shrinker_t func)
-+{
-+ struct shrinker *s;
-+
-+ s = kmalloc(sizeof(*s), GFP_KERNEL);
-+ if (s == NULL)
-+ return (NULL);
-+
-+ s->shrink = func;
-+ s->seeks = seek;
-+
-+ register_shrinker(s);
-+
-+ return s;
-+}
-+
-+static inline
-+void remove_shrinker(struct shrinker *shrinker)
-+{
-+ if (shrinker == NULL)
-+ return;
-+
-+ unregister_shrinker(shrinker);
-+ kfree(shrinker);
-+}
-+#endif
-+
-+#ifdef HAVE_BIO_ENDIO_2ARG
-+#define cfs_bio_io_error(a,b) bio_io_error((a))
-+#define cfs_bio_endio(a,b,c) bio_endio((a),(c))
-+#else
-+#define cfs_bio_io_error(a,b) bio_io_error((a),(b))
-+#define cfs_bio_endio(a,b,c) bio_endio((a),(b),(c))
-+#endif
-+
-+#ifdef HAVE_FS_STRUCT_USE_PATH
-+#define cfs_fs_pwd(fs) ((fs)->pwd.dentry)
-+#define cfs_fs_mnt(fs) ((fs)->pwd.mnt)
-+#else
-+#define cfs_fs_pwd(fs) ((fs)->pwd)
-+#define cfs_fs_mnt(fs) ((fs)->pwdmnt)
-+#endif
-+
-+#ifndef HAVE_RCU_LIST_SAFE
-+#define list_for_each_safe_rcu(a,b,c) list_for_each_rcu(b, c)
-+#endif
-+
- #endif /* __KERNEL__ */
- #endif /* _COMPAT25_H */
-diff -urNad lustre~/lustre/include/linux/lvfs.h lustre/lustre/include/linux/lvfs.h
---- lustre~/lustre/include/linux/lvfs.h 2008-12-30 11:23:32.000000000 +0100
-+++ lustre/lustre/include/linux/lvfs.h 2009-03-10 11:41:03.000000000 +0100
-@@ -64,7 +64,7 @@
- struct upcall_cache_entry *luc_uce;
- __u32 luc_fsuid;
- __u32 luc_fsgid;
-- __u32 luc_cap;
-+ cfs_kernel_cap_t luc_cap;
- __u32 luc_suppgid1;
- __u32 luc_suppgid2;
- __u32 luc_umask;
-diff -urNad lustre~/lustre/include/lprocfs_status.h lustre/lustre/include/lprocfs_status.h
---- lustre~/lustre/include/lprocfs_status.h 2008-12-30 11:23:32.000000000 +0100
-+++ lustre/lustre/include/lprocfs_status.h 2009-03-10 11:41:03.000000000 +0100
-@@ -509,6 +509,8 @@
- #define LPROCFS_EXIT() do { \
- up_read(&_lprocfs_lock); \
- } while(0)
-+
-+#ifdef HAVE_PROCFS_DELETED
- #define LPROCFS_ENTRY_AND_CHECK(dp) do { \
- typecheck(struct proc_dir_entry *, dp); \
- LPROCFS_ENTRY(); \
-@@ -517,6 +519,13 @@
- return -ENODEV; \
- } \
- } while(0)
-+#define LPROCFS_CHECK_DELETED(dp) ((dp)->deleted)
-+#else
-+#define LPROCFS_ENTRY_AND_CHECK(dp) \
-+ LPROCFS_ENTRY();
-+#define LPROCFS_CHECK_DELETED(dp) (0)
-+#endif
-+
- #define LPROCFS_WRITE_ENTRY() do { \
- down_write(&_lprocfs_lock); \
- } while(0)
-diff -urNad lustre~/lustre/include/lustre_log.h lustre/lustre/include/lustre_log.h
---- lustre~/lustre/include/lustre_log.h 2008-12-30 11:23:32.000000000 +0100
-+++ lustre/lustre/include/lustre_log.h 2009-03-10 11:41:03.000000000 +0100
-@@ -238,14 +238,6 @@
- void *llog_proc_cb;
- };
-
--#ifndef __KERNEL__
--
--#define cap_raise(c, flag) do {} while(0)
--
--#define CAP_SYS_RESOURCE 24
--
--#endif /* !__KERNEL__ */
--
- static inline void llog_gen_init(struct llog_ctxt *ctxt)
- {
- struct obd_device *obd = ctxt->loc_exp->exp_obd;
-@@ -349,7 +341,7 @@
- int numcookies, void *buf, int idx)
- {
- struct llog_operations *lop;
-- __u32 cap;
-+ cfs_kernel_cap_t cap;
- int rc, buflen;
- ENTRY;
-
-@@ -366,10 +358,10 @@
- buflen = rec->lrh_len;
- LASSERT(size_round(buflen) == buflen);
-
-- cap = current->cap_effective;
-- cap_raise(current->cap_effective, CAP_SYS_RESOURCE);
-+ cap = current->cap_effective;
-+ cap_raise(current->cap_effective, CAP_SYS_RESOURCE);
- rc = lop->lop_write_rec(handle, rec, logcookies, numcookies, buf, idx);
-- current->cap_effective = cap;
-+ current->cap_effective = cap;
- RETURN(rc);
- }
-
-@@ -465,7 +457,7 @@
- struct llog_logid *logid, char *name)
- {
- struct llog_operations *lop;
-- __u32 cap;
-+ cfs_kernel_cap_t cap;
- int rc;
- ENTRY;
-
-@@ -475,10 +467,10 @@
- if (lop->lop_create == NULL)
- RETURN(-EOPNOTSUPP);
-
-- cap = current->cap_effective;
-+ cap = current->cap_effective;
- cap_raise(current->cap_effective, CAP_SYS_RESOURCE);
- rc = lop->lop_create(ctxt, res, logid, name);
-- current->cap_effective = cap;
-+ current->cap_effective = cap;
- RETURN(rc);
- }
-
-diff -urNad lustre~/lustre/include/lustre_mds.h lustre/lustre/include/lustre_mds.h
---- lustre~/lustre/include/lustre_mds.h 2008-12-30 11:23:32.000000000 +0100
-+++ lustre/lustre/include/lustre_mds.h 2009-03-10 11:41:03.000000000 +0100
-@@ -209,7 +209,7 @@
- struct page *, struct ptlrpc_request **);
- int mdc_create(struct obd_export *exp, struct mdc_op_data *op_data,
- const void *data, int datalen, int mode, __u32 uid, __u32 gid,
-- __u32 cap_effective, __u64 rdev,struct ptlrpc_request **request);
-+ cfs_kernel_cap_t cap_effective, __u64 rdev,struct ptlrpc_request **request);
- int mdc_unlink(struct obd_export *exp, struct mdc_op_data *data,
- struct ptlrpc_request **request);
- int mdc_link(struct obd_export *exp, struct mdc_op_data *data,
-diff -urNad lustre~/lustre/llite/file.c lustre/lustre/llite/file.c
---- lustre~/lustre/llite/file.c 2009-03-10 11:41:02.000000000 +0100
-+++ lustre/lustre/llite/file.c 2009-03-10 11:41:03.000000000 +0100
-@@ -1820,9 +1820,17 @@
- /*
- * Send file content (through pagecache) somewhere with helper
- */
--#if (LINUX_VERSION_CODE > KERNEL_VERSION(2,5,0))
--static ssize_t ll_file_sendfile(struct file *in_file, loff_t *ppos,size_t count,
-- read_actor_t actor, void *target)
-+/* change based on
-+ * http://git.kernel.org/?p=linux/kernel/git/torvalds/linux-2.6.git;a=commit;h=f0930fffa99e7fe0a0c4b6c7d9a244dc88288c27
-+ */
-+#ifdef HAVE_KERNEL_SPLICE_READ
-+static ssize_t ll_file_splice_read(struct file *in_file, loff_t *ppos,
-+ struct pipe_inode_info *pipe, size_t count,
-+ unsigned int flags)
-+#else
-+static ssize_t ll_file_sendfile(struct file *in_file, loff_t *ppos,
-+ size_t count, read_actor_t actor, void *target)
-+#endif
- {
- struct inode *inode = in_file->f_dentry->d_inode;
- struct ll_inode_info *lli = ll_i2info(inode);
-@@ -1831,8 +1839,7 @@
- struct ll_lock_tree_node *node;
- struct ost_lvb lvb;
- struct ll_ra_read bead;
-- int rc;
-- ssize_t retval;
-+ ssize_t rc;
- __u64 kms;
- ENTRY;
- CDEBUG(D_VFSTRACE, "VFS Op:inode=%lu/%u(%p),size="LPSZ",offset=%Ld\n",
-@@ -1848,8 +1855,14 @@
- in_file->f_ra.ra_pages = 0;
-
- /* File with no objects, nothing to lock */
-- if (!lsm)
-- RETURN(generic_file_sendfile(in_file, ppos, count, actor, target));
-+ if (!lsm) {
-+#ifdef HAVE_KERNEL_SPLICE_READ
-+ rc = generic_file_splice_read(in_file, ppos, pipe, count, flags);
-+#else
-+ rc = generic_file_sendfile(in_file, ppos, count, actor, target);
-+#endif
-+ RETURN(rc);
-+ }
-
- node = ll_node_from_inode(inode, *ppos, *ppos + count - 1, LCK_PR);
- if (IS_ERR(node))
-@@ -1889,8 +1902,8 @@
- /* A glimpse is necessary to determine whether we return a
- * short read (B) or some zeroes at the end of the buffer (C) */
- ll_inode_size_unlock(inode, 1);
-- retval = ll_glimpse_size(inode, LDLM_FL_BLOCK_GRANTED);
-- if (retval)
-+ rc = ll_glimpse_size(inode, LDLM_FL_BLOCK_GRANTED);
-+ if (rc)
- goto out;
- } else {
- /* region is within kms and, hence, within real file size (A) */
-@@ -1906,14 +1919,17 @@
- ll_ra_read_in(in_file, &bead);
- /* BUG: 5972 */
- file_accessed(in_file);
-- retval = generic_file_sendfile(in_file, ppos, count, actor, target);
-+#ifdef HAVE_KERNEL_SPLICE_READ
-+ rc = generic_file_splice_read(in_file, ppos, pipe, count, flags);
-+#else
-+ rc = generic_file_sendfile(in_file, ppos, count, actor, target);
-+#endif
- ll_ra_read_ex(in_file, &bead);
-
- out:
- ll_tree_unlock(&tree);
-- RETURN(retval);
-+ RETURN(rc);
- }
--#endif
-
- static int ll_lov_recreate_obj(struct inode *inode, struct file *file,
- unsigned long arg)
-@@ -3179,7 +3195,9 @@
- .release = ll_file_release,
- .mmap = ll_file_mmap,
- .llseek = ll_file_seek,
--#if (LINUX_VERSION_CODE >= KERNEL_VERSION(2,5,0))
-+#ifdef HAVE_KERNEL_SPLICE_READ
-+ .splice_read = ll_file_splice_read,
-+#else
- .sendfile = ll_file_sendfile,
- #endif
- .fsync = ll_fsync,
-@@ -3203,7 +3221,9 @@
- .release = ll_file_release,
- .mmap = ll_file_mmap,
- .llseek = ll_file_seek,
--#if (LINUX_VERSION_CODE >= KERNEL_VERSION(2,5,0))
-+#ifdef HAVE_KERNEL_SPLICE_READ
-+ .splice_read = ll_file_splice_read,
-+#else
- .sendfile = ll_file_sendfile,
- #endif
- .fsync = ll_fsync,
-@@ -3232,7 +3252,9 @@
- .release = ll_file_release,
- .mmap = ll_file_mmap,
- .llseek = ll_file_seek,
--#if (LINUX_VERSION_CODE >= KERNEL_VERSION(2,5,0))
-+#ifdef HAVE_KERNEL_SPLICE_READ
-+ .splice_read = ll_file_splice_read,
-+#else
- .sendfile = ll_file_sendfile,
- #endif
- .fsync = ll_fsync,
-diff -urNad lustre~/lustre/llite/llite_internal.h lustre/lustre/llite/llite_internal.h
---- lustre~/lustre/llite/llite_internal.h 2008-12-30 11:23:32.000000000 +0100
-+++ lustre/lustre/llite/llite_internal.h 2009-03-10 11:41:03.000000000 +0100
-@@ -748,9 +748,6 @@
- /* llite/llite_nfs.c */
- extern struct export_operations lustre_export_operations;
- __u32 get_uuid2int(const char *name, int len);
--struct dentry *ll_fh_to_dentry(struct super_block *sb, __u32 *data, int len,
-- int fhtype, int parent);
--int ll_dentry_to_fh(struct dentry *, __u32 *datap, int *lenp, int need_parent);
-
- /* llite/special.c */
- extern struct inode_operations ll_special_inode_operations;
-diff -urNad lustre~/lustre/llite/llite_lib.c lustre/lustre/llite/llite_lib.c
---- lustre~/lustre/llite/llite_lib.c 2008-12-30 11:23:32.000000000 +0100
-+++ lustre/lustre/llite/llite_lib.c 2009-03-10 11:41:03.000000000 +0100
-@@ -1373,7 +1373,7 @@
- rc = vmtruncate(inode, new_size);
- clear_bit(LLI_F_SRVLOCK, &lli->lli_flags);
- if (rc != 0) {
-- LASSERT(atomic_read(&lli->lli_size_sem.count) <= 0);
-+// LASSERT(atomic_read(&lli->lli_size_sem.count) <= 0);
- ll_inode_size_unlock(inode, 0);
- }
- }
-diff -urNad lustre~/lustre/llite/llite_mmap.c lustre/lustre/llite/llite_mmap.c
---- lustre~/lustre/llite/llite_mmap.c 2008-12-30 11:23:32.000000000 +0100
-+++ lustre/lustre/llite/llite_mmap.c 2009-03-10 11:41:03.000000000 +0100
-@@ -53,9 +53,6 @@
- #include <linux/mm.h>
- #include <linux/pagemap.h>
- #include <linux/smp_lock.h>
--#if (LINUX_VERSION_CODE < KERNEL_VERSION(2,5,0))
--#include <linux/iobuf.h>
--#endif
-
- #define DEBUG_SUBSYSTEM S_LLITE
-
-@@ -84,8 +81,7 @@
- int lt_get_mmap_locks(struct ll_lock_tree *tree,
- unsigned long addr, size_t count);
-
--struct page *ll_nopage(struct vm_area_struct *vma, unsigned long address,
-- int *type);
-+static struct vm_operations_struct ll_file_vm_ops;
-
- struct ll_lock_tree_node * ll_node_from_inode(struct inode *inode, __u64 start,
- __u64 end, ldlm_mode_t mode)
-@@ -311,7 +307,7 @@
- spin_lock(&mm->page_table_lock);
- for(vma = find_vma(mm, addr);
- vma != NULL && vma->vm_start < (addr + count); vma = vma->vm_next) {
-- if (vma->vm_ops && vma->vm_ops->nopage == ll_nopage &&
-+ if (vma->vm_ops && vma->vm_ops == &ll_file_vm_ops &&
- vma->vm_flags & VM_SHARED) {
- ret = vma;
- break;
-@@ -363,44 +359,30 @@
- }
- RETURN(0);
- }
--/**
-- * Page fault handler.
-- *
-- * \param vma - is virtiual area struct related to page fault
-- * \param address - address when hit fault
-- * \param type - of fault
-- *
-- * \return allocated and filled page for address
-- * \retval NOPAGE_SIGBUS if page not exist on this address
-- * \retval NOPAGE_OOM not have memory for allocate new page
-- */
--struct page *ll_nopage(struct vm_area_struct *vma, unsigned long address,
-- int *type)
-+
-+
-+static int ll_get_extent_lock(struct vm_area_struct *vma, unsigned long pgoff,
-+ int *save_flags, struct lustre_handle *lockh)
- {
- struct file *filp = vma->vm_file;
- struct ll_file_data *fd = LUSTRE_FPRIVATE(filp);
- struct inode *inode = filp->f_dentry->d_inode;
-- struct lustre_handle lockh = { 0 };
- ldlm_policy_data_t policy;
- ldlm_mode_t mode;
-- struct page *page = NULL;
- struct ll_inode_info *lli = ll_i2info(inode);
-- struct lov_stripe_md *lsm;
- struct ost_lvb lvb;
- __u64 kms, old_mtime;
-- unsigned long pgoff, size, rand_read, seq_read;
-- int rc = 0;
-- ENTRY;
-+ unsigned long size;
-
- if (lli->lli_smd == NULL) {
- CERROR("No lsm on fault?\n");
-- RETURN(NOPAGE_SIGBUS);
-+ RETURN(0);
- }
-
- ll_clear_file_contended(inode);
-
- /* start and end the lock on the first and last bytes in the page */
-- policy_from_vma(&policy, vma, address, CFS_PAGE_SIZE);
-+ policy_from_vma(&policy, vma, pgoff, CFS_PAGE_SIZE);
-
- CDEBUG(D_MMAP, "nopage vma %p inode %lu, locking ["LPU64", "LPU64"]\n",
- vma, inode->i_ino, policy.l_extent.start, policy.l_extent.end);
-@@ -408,26 +390,24 @@
- mode = mode_from_vma(vma);
- old_mtime = LTIME_S(inode->i_mtime);
-
-- lsm = lli->lli_smd;
-- rc = ll_extent_lock(fd, inode, lsm, mode, &policy,
-- &lockh, LDLM_FL_CBPENDING | LDLM_FL_NO_LRU);
-- if (rc != 0)
-- RETURN(NOPAGE_SIGBUS);
-+ if(ll_extent_lock(fd, inode, lli->lli_smd, mode, &policy,
-+ lockh, LDLM_FL_CBPENDING | LDLM_FL_NO_LRU))
-+ RETURN(0);
-
- if (vma->vm_flags & VM_EXEC && LTIME_S(inode->i_mtime) != old_mtime)
- CWARN("binary changed. inode %lu\n", inode->i_ino);
-
-- lov_stripe_lock(lsm);
-+ lov_stripe_lock(lli->lli_smd);
- inode_init_lvb(inode, &lvb);
-- obd_merge_lvb(ll_i2obdexp(inode), lsm, &lvb, 1);
-+ obd_merge_lvb(ll_i2obdexp(inode), lli->lli_smd, &lvb, 1);
- kms = lvb.lvb_size;
-
-- pgoff = ((address - vma->vm_start) >> CFS_PAGE_SHIFT) + vma->vm_pgoff;
- size = (kms + CFS_PAGE_SIZE - 1) >> CFS_PAGE_SHIFT;
-
- if (pgoff >= size) {
-- lov_stripe_unlock(lsm);
-+ lov_stripe_unlock(lli->lli_smd);
- ll_glimpse_size(inode, LDLM_FL_BLOCK_GRANTED);
-+ lov_stripe_lock(lli->lli_smd);
- } else {
- /* XXX change inode size without ll_inode_size_lock() held!
- * there is a race condition with truncate path. (see
-@@ -449,29 +429,59 @@
- CDEBUG(D_INODE, "ino=%lu, updating i_size %llu\n",
- inode->i_ino, i_size_read(inode));
- }
-- lov_stripe_unlock(lsm);
- }
-
- /* If mapping is writeable, adjust kms to cover this page,
- * but do not extend kms beyond actual file size.
- * policy.l_extent.end is set to the end of the page by policy_from_vma
- * bug 10919 */
-- lov_stripe_lock(lsm);
- if (mode == LCK_PW)
-- obd_adjust_kms(ll_i2obdexp(inode), lsm,
-+ obd_adjust_kms(ll_i2obdexp(inode), lli->lli_smd,
- min_t(loff_t, policy.l_extent.end + 1,
- i_size_read(inode)), 0);
-- lov_stripe_unlock(lsm);
-+ lov_stripe_unlock(lli->lli_smd);
-
- /* disable VM_SEQ_READ and use VM_RAND_READ to make sure that
- * the kernel will not read other pages not covered by ldlm in
- * filemap_nopage. we do our readahead in ll_readpage.
- */
-- rand_read = vma->vm_flags & VM_RAND_READ;
-- seq_read = vma->vm_flags & VM_SEQ_READ;
-+ *save_flags = vma->vm_flags & (VM_RAND_READ | VM_SEQ_READ);
- vma->vm_flags &= ~ VM_SEQ_READ;
- vma->vm_flags |= VM_RAND_READ;
-
-+ RETURN(1);
-+}
-+
-+static void ll_put_extent_lock(struct vm_area_struct *vma, int save_flags,
-+ struct lustre_handle *lockh)
-+{
-+ struct file *filp = vma->vm_file;
-+ struct ll_file_data *fd = LUSTRE_FPRIVATE(filp);
-+ struct inode *inode = filp->f_dentry->d_inode;
-+ ldlm_mode_t mode;
-+
-+ mode = mode_from_vma(vma);
-+ vma->vm_flags &= ~(VM_RAND_READ | VM_SEQ_READ);
-+ vma->vm_flags |= save_flags;
-+
-+ ll_extent_unlock(fd, inode, ll_i2info(inode)->lli_smd, mode, lockh);
-+}
-+
-+#ifndef HAVE_VM_OP_FAULT
-+struct page *ll_nopage(struct vm_area_struct *vma, unsigned long address,
-+ int *type) {
-+ struct lustre_handle lockh = { 0 };
-+ int save_fags;
-+ unsigned long pgoff;
-+ struct page *page;
-+ ENTRY;
-+
-+ pgoff = ((address - vma->vm_start) >> CFS_PAGE_SHIFT) + vma->vm_pgoff;
-+ if(!ll_get_extent_lock(vma, pgoff, &save_fags, &lockh))
-+ RETURN(NOPAGE_SIGBUS);
-+
-+
-+
- page = filemap_nopage(vma, address, type);
- if (page != NOPAGE_SIGBUS && page != NOPAGE_OOM)
- LL_CDEBUG_PAGE(D_PAGE, page, "got addr %lu type %lx\n", address,
-@@ -480,12 +490,30 @@
- CDEBUG(D_PAGE, "got addr %lu type %lx - SIGBUS\n", address,
- (long)type);
-
-- vma->vm_flags &= ~VM_RAND_READ;
-- vma->vm_flags |= (rand_read | seq_read);
-+ ll_put_extent_lock(vma, save_fags, &lockh);
-
-- ll_extent_unlock(fd, inode, ll_i2info(inode)->lli_smd, mode, &lockh);
- RETURN(page);
- }
-+#else
-+/* New fault() API*/
-+int ll_fault(struct vm_area_struct *vma, struct vm_fault *vmf)
-+{
-+ struct lustre_handle lockh = { 0 };
-+ int save_fags;
-+ int rc;
-+ ENTRY;
-+
-+ if(!ll_get_extent_lock(vma, vmf->pgoff, &save_fags, &lockh))
-+ RETURN(VM_FAULT_SIGBUS);
-+
-+ rc = filemap_fault(vma, vmf);
-+ LL_CDEBUG_PAGE(D_PAGE, vmf->page, "got addr %p type NOPAGE\n",
-+ vmf->virtual_address);
-+ ll_put_extent_lock(vma, save_fags, &lockh);
-+
-+ RETURN(rc);
-+}
-+#endif
-
- /* To avoid cancel the locks covering mmapped region for lock cache pressure,
- * we track the mapped vma count by lli_mmap_cnt.
-@@ -551,6 +579,7 @@
- }
- }
-
-+#ifndef HAVE_VM_OP_FAULT
- #ifndef HAVE_FILEMAP_POPULATE
- static int (*filemap_populate)(struct vm_area_struct * area, unsigned long address, unsigned long len, pgprot_t prot, unsigned long pgoff, int nonblock);
- #endif
-@@ -565,6 +594,7 @@
- rc = filemap_populate(area, address, len, prot, pgoff, 1);
- RETURN(rc);
- }
-+#endif
-
- /* return the user space pointer that maps to a file offset via a vma */
- static inline unsigned long file_to_user(struct vm_area_struct *vma, __u64 byte)
-@@ -591,10 +621,14 @@
- }
-
- static struct vm_operations_struct ll_file_vm_ops = {
-- .nopage = ll_nopage,
- .open = ll_vm_open,
- .close = ll_vm_close,
-+#ifdef HAVE_VM_OP_FAULT
-+ .fault = ll_fault,
-+#else
-+ .nopage = ll_nopage,
- .populate = ll_populate,
-+#endif
- };
-
- int ll_file_mmap(struct file * file, struct vm_area_struct * vma)
-@@ -605,8 +639,7 @@
- ll_stats_ops_tally(ll_i2sbi(file->f_dentry->d_inode), LPROC_LL_MAP, 1);
- rc = generic_file_mmap(file, vma);
- if (rc == 0) {
--#if !defined(HAVE_FILEMAP_POPULATE) && \
-- (LINUX_VERSION_CODE >= KERNEL_VERSION(2,5,0))
-+#if !defined(HAVE_FILEMAP_POPULATE) && !defined(HAVE_VM_OP_FAULT)
- if (!filemap_populate)
- filemap_populate = vma->vm_ops->populate;
- #endif
-diff -urNad lustre~/lustre/llite/llite_nfs.c lustre/lustre/llite/llite_nfs.c
---- lustre~/lustre/llite/llite_nfs.c 2008-12-30 11:23:32.000000000 +0100
-+++ lustre/lustre/llite/llite_nfs.c 2009-03-10 11:41:03.000000000 +0100
-@@ -57,11 +57,7 @@
- return (key0 << 1);
- }
-
--#if (LINUX_VERSION_CODE < KERNEL_VERSION(2,5,0))
--static int ll_nfs_test_inode(struct inode *inode, unsigned long ino, void *opaque)
--#else
- static int ll_nfs_test_inode(struct inode *inode, void *opaque)
--#endif
- {
- struct ll_fid *iid = opaque;
-
-@@ -73,12 +69,10 @@
-
- static struct inode * search_inode_for_lustre(struct super_block *sb,
- unsigned long ino,
-- unsigned long generation,
-- int mode)
-+ unsigned long generation)
- {
- struct ptlrpc_request *req = NULL;
- struct ll_sb_info *sbi = ll_s2sbi(sb);
-- struct ll_fid fid;
- unsigned long valid = 0;
- int eadatalen = 0, rc;
- struct inode *inode = NULL;
-@@ -89,17 +83,15 @@
-
- if (inode)
- RETURN(inode);
-- if (S_ISREG(mode)) {
-- rc = ll_get_max_mdsize(sbi, &eadatalen);
-- if (rc)
-- RETURN(ERR_PTR(rc));
-- valid |= OBD_MD_FLEASIZE;
-- }
-- fid.id = (__u64)ino;
-- fid.generation = generation;
-- fid.f_type = mode;
-
-- rc = mdc_getattr(sbi->ll_mdc_exp, &fid, valid, eadatalen, &req);
-+ rc = ll_get_max_mdsize(sbi, &eadatalen);
-+ if (rc)
-+ RETURN(ERR_PTR(rc));
-+
-+ valid |= OBD_MD_FLEASIZE;
-+
-+ /* mds_fid2dentry ignore f_type */
-+ rc = mdc_getattr(sbi->ll_mdc_exp, &iid, valid, eadatalen, &req);
- if (rc) {
- CERROR("failure %d inode %lu\n", rc, ino);
- RETURN(ERR_PTR(rc));
-@@ -115,20 +107,23 @@
- RETURN(inode);
- }
-
--static struct dentry *ll_iget_for_nfs(struct super_block *sb, unsigned long ino,
-- __u32 generation, umode_t mode)
-+extern struct dentry_operations ll_d_ops;
-+
-+#ifdef HAVE_FH_TO_DENTRY
-+static struct inode *ll_iget_for_nfs(struct super_block *sb, __u64 ino,
-+ __u32 generation)
-+#else
-+static struct inode *ll_iget_for_nfs(struct super_block *sb, unsigned long ino,
-+ __u32 generation)
-+#endif
- {
- struct inode *inode;
-- struct dentry *result;
--#if (LINUX_VERSION_CODE < KERNEL_VERSION(2,5,0))
-- struct list_head *lp;
--#endif
- ENTRY;
-
- if (ino == 0)
- RETURN(ERR_PTR(-ESTALE));
-
-- inode = search_inode_for_lustre(sb, ino, generation, mode);
-+ inode = search_inode_for_lustre(sb, ino, generation);
- if (IS_ERR(inode)) {
- RETURN(ERR_PTR(PTR_ERR(inode)));
- }
-@@ -142,40 +137,24 @@
- iput(inode);
- RETURN(ERR_PTR(-ESTALE));
- }
-+ RETURN(inode);
-+}
-+
-+static struct dentry *ll_nfs_get_dentry(struct super_block *sb, void *data)
-+{
-+ __u32 *inump = (__u32*)data;
-+ struct inode *inode;
-+ struct dentry *result;
-+
-+ inode = ll_iget_for_nfs(sb, inump[0], inump[1]);
-+ if (IS_ERR(inode))
-+ RETURN((struct dentry *)inode);
-
--#if (LINUX_VERSION_CODE > KERNEL_VERSION(2,5,0))
- result = d_alloc_anon(inode);
- if (!result) {
- iput(inode);
- RETURN(ERR_PTR(-ENOMEM));
- }
--#else
-- /* now to find a dentry.
-- * If possible, get a well-connected one
-- */
-- spin_lock(&dcache_lock);
-- for (lp = inode->i_dentry.next; lp != &inode->i_dentry ; lp=lp->next) {
-- result = list_entry(lp,struct dentry, d_alias);
-- lock_dentry(result);
-- if (!(result->d_flags & DCACHE_DISCONNECTED)) {
-- dget_locked(result);
-- ll_set_dflags(result, DCACHE_REFERENCED);
-- unlock_dentry(result);
-- spin_unlock(&dcache_lock);
-- iput(inode);
-- RETURN(result);
-- }
-- unlock_dentry(result);
-- }
-- spin_unlock(&dcache_lock);
-- result = d_alloc_root(inode);
-- if (result == NULL) {
-- iput(inode);
-- RETURN(ERR_PTR(-ENOMEM));
-- }
-- result->d_flags |= DCACHE_DISCONNECTED;
--
--#endif
- ll_set_dd(result);
-
- lock_dentry(result);
-@@ -190,80 +169,63 @@
- }
-
- RETURN(result);
-+
- }
-
--struct dentry *ll_fh_to_dentry(struct super_block *sb, __u32 *data, int len,
-- int fhtype, int parent)
-+#ifdef HAVE_FH_TO_DENTRY
-+
-+static struct dentry *ll_fh_to_dentry(struct super_block *sb, struct fid *fid,
-+ int fh_len, int fh_type)
- {
-- switch (fhtype) {
-- case 2:
-- if (len < 5)
-- break;
-- if (parent)
-- return ll_iget_for_nfs(sb, data[3], 0, data[4]);
-- case 1:
-- if (len < 3)
-- break;
-- if (parent)
-- break;
-- return ll_iget_for_nfs(sb, data[0], data[1], data[2]);
-- default: break;
-+ struct dentry *result;
-+
-+ result = generic_fh_to_dentry(sb, fid, fh_len, fh_type,
-+ ll_iget_for_nfs);
-+ if(!IS_ERR(result)) {
-+ ll_set_dd(result);
-+ result->d_op = &ll_d_ops;
- }
-- return ERR_PTR(-EINVAL);
-+ RETURN(result);
- }
-
--int ll_dentry_to_fh(struct dentry *dentry, __u32 *datap, int *lenp,
-- int need_parent)
-+static struct dentry *ll_fh_to_parent(struct super_block *sb, struct fid *fid,
-+ int fh_len, int fh_type)
- {
-- if (*lenp < 3)
-- return 255;
-- *datap++ = dentry->d_inode->i_ino;
-- *datap++ = dentry->d_inode->i_generation;
-- *datap++ = (__u32)(S_IFMT & dentry->d_inode->i_mode);
--
-- if (*lenp == 3 || S_ISDIR(dentry->d_inode->i_mode)) {
-- *lenp = 3;
-- return 1;
-- }
-- if (dentry->d_parent) {
-- *datap++ = dentry->d_parent->d_inode->i_ino;
-- *datap++ = (__u32)(S_IFMT & dentry->d_parent->d_inode->i_mode);
-+ struct dentry *result;
-
-- *lenp = 5;
-- return 2;
-+ result = generic_fh_to_parent(sb, fid, fh_len, fh_type,
-+ ll_iget_for_nfs);
-+ if(!IS_ERR(result)) {
-+ ll_set_dd(result);
-+ result->d_op = &ll_d_ops;
- }
-- *lenp = 3;
-- return 1;
-+ RETURN(result);
- }
--
--#if (LINUX_VERSION_CODE > KERNEL_VERSION(2,5,0))
--struct dentry *ll_get_dentry(struct super_block *sb, void *data)
-+#else
-+static struct dentry *ll_get_dentry(struct super_block *sb, void *data)
- {
-- __u32 *inump = (__u32*)data;
-- return ll_iget_for_nfs(sb, inump[0], inump[1], S_IFREG);
-+ return ll_nfs_get_dentry(sb, data);
- }
-
--struct dentry *ll_get_parent(struct dentry *dchild)
-+#endif
-+
-+static struct dentry *ll_get_parent(struct dentry *dchild)
- {
- struct ptlrpc_request *req = NULL;
- struct inode *dir = dchild->d_inode;
-- struct ll_sb_info *sbi;
- struct dentry *result = NULL;
- struct ll_fid fid;
- struct mds_body *body;
- char dotdot[] = "..";
-+ __u32 idata[2];
- int rc = 0;
- ENTRY;
-
- LASSERT(dir && S_ISDIR(dir->i_mode));
--
-- sbi = ll_s2sbi(dir->i_sb);
-
- fid.id = (__u64)dir->i_ino;
- fid.generation = dir->i_generation;
-- fid.f_type = S_IFDIR;
--
-- rc = mdc_getattr_name(sbi->ll_mdc_exp, &fid, dotdot, strlen(dotdot) + 1,
-+ rc = mdc_getattr_name(ll_s2sbi(dir->i_sb)->ll_mdc_exp, &fid, dotdot, sizeof(dotdot),
- 0, 0, &req);
- if (rc) {
- CERROR("failure %d inode %lu get parent\n", rc, dir->i_ino);
-@@ -273,8 +235,9 @@
-
- LASSERT((body->valid & OBD_MD_FLGENER) && (body->valid & OBD_MD_FLID));
-
-- result = ll_iget_for_nfs(dir->i_sb, body->ino, body->generation, S_IFDIR);
--
-+ idata[0] = body->ino;
-+ idata[1] = body->generation;
-+ result = ll_nfs_get_dentry(dir->i_sb, &idata);
- if (IS_ERR(result))
- rc = PTR_ERR(result);
-
-@@ -285,7 +248,11 @@
- }
-
- struct export_operations lustre_export_operations = {
-- .get_parent = ll_get_parent,
-- .get_dentry = ll_get_dentry,
--};
-+#ifdef HAVE_FH_TO_DENTRY
-+ .fh_to_dentry = ll_fh_to_dentry,
-+ .fh_to_parent = ll_fh_to_parent,
-+#else
-+ .get_dentry = ll_get_dentry,
- #endif
-+ .get_parent = ll_get_parent,
-+};
-diff -urNad lustre~/lustre/llite/lloop.c lustre/lustre/llite/lloop.c
---- lustre~/lustre/llite/lloop.c 2008-12-30 11:23:32.000000000 +0100
-+++ lustre/lustre/llite/lloop.c 2009-03-10 11:48:26.000000000 +0100
-@@ -312,7 +312,7 @@
- if (atomic_dec_and_test(&lo->lo_pending))
- up(&lo->lo_bh_mutex);
- out:
-- bio_io_error(old_bio, old_bio->bi_size);
-+ cfs_bio_io_error(old_bio, old_bio->bi_size);
- return 0;
- inactive:
- spin_unlock_irq(&lo->lo_lock);
-@@ -334,7 +334,7 @@
- {
- int ret;
- ret = do_bio_filebacked(lo, bio);
-- bio_endio(bio, bio->bi_size, ret);
-+ cfs_bio_endio(bio, bio->bi_size, ret);
- }
-
- /*
-@@ -736,7 +736,11 @@
-
- out_mem4:
- while (i--)
-+#ifndef HAVE_BLK_CLEANUP_QUEUE
- blk_put_queue(loop_dev[i].lo_queue);
-+#else
-+ blk_cleanup_queue(loop_dev[i].lo_queue);
-+#endif
- i = max_loop;
- out_mem3:
- while (i--)
-@@ -758,7 +762,11 @@
- ll_iocontrol_unregister(ll_iocontrol_magic);
- for (i = 0; i < max_loop; i++) {
- del_gendisk(disks[i]);
-+#ifndef HAVE_BLK_CLEANUP_QUEUE
- blk_put_queue(loop_dev[i].lo_queue);
-+#else
-+ blk_cleanup_queue(loop_dev[i].lo_queue);
-+#endif
- put_disk(disks[i]);
- }
- if (ll_unregister_blkdev(lloop_major, "lloop"))
-diff -urNad lustre~/lustre/llite/namei.c lustre/lustre/llite/namei.c
---- lustre~/lustre/llite/namei.c 2008-12-30 11:23:32.000000000 +0100
-+++ lustre/lustre/llite/namei.c 2009-03-10 11:41:03.000000000 +0100
-@@ -901,7 +901,7 @@
-
- err = mdc_create(sbi->ll_mdc_exp, &op_data, tgt, tgt_len,
- mode, current->fsuid, current->fsgid,
-- current->cap_effective, rdev, &request);
-+ cfs_curproc_cap_get(), rdev, &request);
- if (err)
- GOTO(err_exit, err);
-
-diff -urNad lustre~/lustre/llite/rw.c lustre/lustre/llite/rw.c
---- lustre~/lustre/llite/rw.c 2008-12-30 11:23:32.000000000 +0100
-+++ lustre/lustre/llite/rw.c 2009-03-10 11:41:03.000000000 +0100
-@@ -186,7 +186,7 @@
- GOTO(out_unlock, 0);
- }
-
-- LASSERT(atomic_read(&lli->lli_size_sem.count) <= 0);
-+// LASSERT(atomic_read(&lli->lli_size_sem.count) <= 0);
-
- if (!srvlock) {
- struct ost_lvb lvb;
-diff -urNad lustre~/lustre/llite/symlink.c lustre/lustre/llite/symlink.c
---- lustre~/lustre/llite/symlink.c 2008-12-30 11:23:32.000000000 +0100
-+++ lustre/lustre/llite/symlink.c 2009-03-10 11:41:03.000000000 +0100
-@@ -171,8 +171,12 @@
- rc = ll_readlink_internal(inode, &request, &symname);
- up(&lli->lli_size_sem);
- if (rc) {
-+#ifdef HAVE_PATH_RELEASE
- path_release(nd); /* Kernel assumes that ->follow_link()
- releases nameidata on error */
-+#else
-+ path_put(&nd->path);
-+#endif
- GOTO(out, rc);
- }
-
-diff -urNad lustre~/lustre/lvfs/lvfs_linux.c lustre/lustre/lvfs/lvfs_linux.c
---- lustre~/lustre/lvfs/lvfs_linux.c 2008-12-30 11:23:32.000000000 +0100
-+++ lustre/lustre/lvfs/lvfs_linux.c 2009-03-10 11:41:03.000000000 +0100
-@@ -148,10 +148,10 @@
- */
-
- save->fs = get_fs();
-- LASSERT(atomic_read(¤t->fs->pwd->d_count));
-+ LASSERT(atomic_read(&cfs_fs_pwd(current->fs)->d_count));
- LASSERT(atomic_read(&new_ctx->pwd->d_count));
-- save->pwd = dget(current->fs->pwd);
-- save->pwdmnt = mntget(current->fs->pwdmnt);
-+ save->pwd = dget(cfs_fs_pwd(current->fs));
-+ save->pwdmnt = mntget(cfs_fs_mnt(current->fs));
- save->luc.luc_umask = current->fs->umask;
-
- LASSERT(save->pwd);
-@@ -162,11 +162,11 @@
- if (uc) {
- save->luc.luc_fsuid = current->fsuid;
- save->luc.luc_fsgid = current->fsgid;
-- save->luc.luc_cap = current->cap_effective;
-+ save->luc.luc_cap = cfs_curproc_cap_get();
-
- current->fsuid = uc->luc_fsuid;
- current->fsgid = uc->luc_fsgid;
-- current->cap_effective = uc->luc_cap;
-+ cfs_curproc_cap_set(uc->luc_cap);
- push_group_info(save, uc->luc_uce);
- }
- current->fs->umask = 0; /* umask already applied on client */
-@@ -205,10 +205,10 @@
- atomic_read(¤t->fs->pwdmnt->mnt_count));
- */
-
-- LASSERTF(current->fs->pwd == new_ctx->pwd, "%p != %p\n",
-- current->fs->pwd, new_ctx->pwd);
-- LASSERTF(current->fs->pwdmnt == new_ctx->pwdmnt, "%p != %p\n",
-- current->fs->pwdmnt, new_ctx->pwdmnt);
-+ LASSERTF(cfs_fs_pwd(current->fs) == new_ctx->pwd, "%p != %p\n",
-+ cfs_fs_pwd(current->fs), new_ctx->pwd);
-+ LASSERTF(cfs_fs_mnt(current->fs) == new_ctx->pwdmnt, "%p != %p\n",
-+ cfs_fs_mnt(current->fs), new_ctx->pwdmnt);
-
- set_fs(saved->fs);
- ll_set_fs_pwd(current->fs, saved->pwdmnt, saved->pwd);
-@@ -219,7 +219,7 @@
- if (uc) {
- current->fsuid = saved->luc.luc_fsuid;
- current->fsgid = saved->luc.luc_fsgid;
-- current->cap_effective = saved->luc.luc_cap;
-+ cfs_curproc_cap_set(saved->luc.luc_cap);
- pop_group_info(saved, uc->luc_uce);
- }
-
-diff -urNad lustre~/lustre/mdc/mdc_internal.h lustre/lustre/mdc/mdc_internal.h
---- lustre~/lustre/mdc/mdc_internal.h 2008-12-30 11:23:32.000000000 +0100
-+++ lustre/lustre/mdc/mdc_internal.h 2009-03-10 11:41:03.000000000 +0100
-@@ -60,7 +60,7 @@
- void *ea2, int ea2len);
- void mdc_create_pack(struct ptlrpc_request *req, int offset,
- struct mdc_op_data *op_data, const void *data, int datalen,
-- __u32 mode, __u32 uid, __u32 gid, __u32 cap_effective,
-+ __u32 mode, __u32 uid, __u32 gid, cfs_kernel_cap_t cap_effective,
- __u64 rdev);
- void mdc_open_pack(struct ptlrpc_request *req, int offset,
- struct mdc_op_data *op_data, __u32 mode, __u64 rdev,
-diff -urNad lustre~/lustre/mdc/mdc_lib.c lustre/lustre/mdc/mdc_lib.c
---- lustre~/lustre/mdc/mdc_lib.c 2008-12-30 11:23:32.000000000 +0100
-+++ lustre/lustre/mdc/mdc_lib.c 2009-03-10 11:41:03.000000000 +0100
-@@ -58,7 +58,7 @@
- b = lustre_msg_buf(req->rq_reqmsg, offset, sizeof(*b));
- b->fsuid = current->fsuid;
- b->fsgid = current->fsgid;
-- b->capability = current->cap_effective;
-+ b->capability = cfs_cap_convert_from_kernel(cfs_curproc_cap_get());
- b->fid1 = *fid;
- b->size = pg_off; /* !! */
- b->suppgid = -1;
-@@ -71,7 +71,7 @@
-
- b->fsuid = current->fsuid;
- b->fsgid = current->fsgid;
-- b->capability = current->cap_effective;
-+ b->capability = cfs_cap_convert_from_kernel(cfs_curproc_cap_get());
- }
-
- void mdc_pack_req_body(struct ptlrpc_request *req, int offset,
-@@ -90,7 +90,7 @@
- /* packing of MDS records */
- void mdc_create_pack(struct ptlrpc_request *req, int offset,
- struct mdc_op_data *op_data, const void *data, int datalen,
-- __u32 mode, __u32 uid, __u32 gid, __u32 cap_effective,
-+ __u32 mode, __u32 uid, __u32 gid, cfs_kernel_cap_t cap_effective,
- __u64 rdev)
- {
- struct mds_rec_create *rec;
-@@ -100,7 +100,7 @@
- rec->cr_opcode = REINT_CREATE;
- rec->cr_fsuid = uid;
- rec->cr_fsgid = gid;
-- rec->cr_cap = cap_effective;
-+ rec->cr_cap = cfs_cap_convert_from_kernel(cap_effective);
- rec->cr_fid = op_data->fid1;
- memset(&rec->cr_replayfid, 0, sizeof(rec->cr_replayfid));
- rec->cr_mode = mode;
-@@ -168,7 +168,7 @@
- rec->cr_opcode = REINT_OPEN;
- rec->cr_fsuid = current->fsuid;
- rec->cr_fsgid = current->fsgid;
-- rec->cr_cap = current->cap_effective;
-+ rec->cr_cap = cfs_cap_convert_from_kernel(cfs_curproc_cap_get());
- rec->cr_fid = op_data->fid1;
- memset(&rec->cr_replayfid, 0, sizeof(rec->cr_replayfid));
- rec->cr_mode = mode;
-@@ -242,7 +242,7 @@
- rec->sa_opcode = REINT_SETATTR;
- rec->sa_fsuid = current->fsuid;
- rec->sa_fsgid = current->fsgid;
-- rec->sa_cap = current->cap_effective;
-+ rec->sa_cap = cfs_cap_convert_from_kernel(cfs_curproc_cap_get());
- rec->sa_fid = data->fid1;
- rec->sa_suppgid = -1;
-
-@@ -286,7 +286,7 @@
- rec->ul_opcode = REINT_UNLINK;
- rec->ul_fsuid = current->fsuid;
- rec->ul_fsgid = current->fsgid;
-- rec->ul_cap = current->cap_effective;
-+ rec->ul_cap = cfs_cap_convert_from_kernel(cfs_curproc_cap_get());
- rec->ul_mode = data->create_mode;
- rec->ul_suppgid = data->suppgids[0];
- rec->ul_fid1 = data->fid1;
-@@ -309,7 +309,7 @@
- rec->lk_opcode = REINT_LINK;
- rec->lk_fsuid = current->fsuid;
- rec->lk_fsgid = current->fsgid;
-- rec->lk_cap = current->cap_effective;
-+ rec->lk_cap = cfs_cap_convert_from_kernel(cfs_curproc_cap_get());
- rec->lk_suppgid1 = data->suppgids[0];
- rec->lk_suppgid2 = data->suppgids[1];
- rec->lk_fid1 = data->fid1;
-@@ -333,7 +333,7 @@
- rec->rn_opcode = REINT_RENAME;
- rec->rn_fsuid = current->fsuid;
- rec->rn_fsgid = current->fsgid;
-- rec->rn_cap = current->cap_effective;
-+ rec->rn_cap = cfs_cap_convert_from_kernel(cfs_curproc_cap_get());
- rec->rn_suppgid1 = data->suppgids[0];
- rec->rn_suppgid2 = data->suppgids[1];
- rec->rn_fid1 = data->fid1;
-@@ -357,7 +357,7 @@
-
- b->fsuid = current->fsuid;
- b->fsgid = current->fsgid;
-- b->capability = current->cap_effective;
-+ b->capability = cfs_cap_convert_from_kernel(cfs_curproc_cap_get());
- b->valid = valid;
- b->flags = flags | MDS_BFLAG_EXT_FLAGS;
- /* skip MDS_BFLAG_EXT_FLAGS to verify the "client < 1.4.7" case
-diff -urNad lustre~/lustre/mdc/mdc_reint.c lustre/lustre/mdc/mdc_reint.c
---- lustre~/lustre/mdc/mdc_reint.c 2008-12-30 11:23:32.000000000 +0100
-+++ lustre/lustre/mdc/mdc_reint.c 2009-03-10 11:41:03.000000000 +0100
-@@ -176,7 +176,7 @@
-
- int mdc_create(struct obd_export *exp, struct mdc_op_data *op_data,
- const void *data, int datalen, int mode, __u32 uid, __u32 gid,
-- __u32 cap_effective, __u64 rdev, struct ptlrpc_request **request)
-+ cfs_kernel_cap_t cap_effective, __u64 rdev, struct ptlrpc_request **request)
- {
- CFS_LIST_HEAD(cancels);
- struct obd_device *obd = exp->exp_obd;
-diff -urNad lustre~/lustre/mgc/mgc_request.c lustre/lustre/mgc/mgc_request.c
---- lustre~/lustre/mgc/mgc_request.c 2008-12-30 11:23:32.000000000 +0100
-+++ lustre/lustre/mgc/mgc_request.c 2009-03-10 11:41:03.000000000 +0100
-@@ -410,7 +410,7 @@
- obd->obd_lvfs_ctxt.fs = get_ds();
-
- push_ctxt(&saved, &obd->obd_lvfs_ctxt, NULL);
-- dentry = lookup_one_len(MOUNT_CONFIGS_DIR, current->fs->pwd,
-+ dentry = lookup_one_len(MOUNT_CONFIGS_DIR, cfs_fs_pwd(current->fs),
- strlen(MOUNT_CONFIGS_DIR));
- pop_ctxt(&saved, &obd->obd_lvfs_ctxt, NULL);
- if (IS_ERR(dentry)) {
-diff -urNad lustre~/lustre/obdclass/linux/linux-module.c lustre/lustre/obdclass/linux/linux-module.c
---- lustre~/lustre/obdclass/linux/linux-module.c 2008-12-30 11:23:32.000000000 +0100
-+++ lustre/lustre/obdclass/linux/linux-module.c 2009-03-10 11:41:03.000000000 +0100
-@@ -418,7 +418,7 @@
- ENTRY;
-
- obd_sysctl_init();
-- proc_lustre_root = proc_mkdir("lustre", proc_root_fs);
-+ proc_lustre_root = proc_mkdir("fs/lustre", NULL);
- if (!proc_lustre_root) {
- printk(KERN_ERR
- "LustreError: error registering /proc/fs/lustre\n");
-diff -urNad lustre~/lustre/obdclass/linux/linux-sysctl.c lustre/lustre/obdclass/linux/linux-sysctl.c
---- lustre~/lustre/obdclass/linux/linux-sysctl.c 2008-12-30 11:23:32.000000000 +0100
-+++ lustre/lustre/obdclass/linux/linux-sysctl.c 2009-03-10 11:41:03.000000000 +0100
-@@ -59,7 +59,9 @@
-
- cfs_sysctl_table_header_t *obd_table_header = NULL;
-
--#define OBD_SYSCTL 300
-+#ifndef HAVE_SYSCTL_UNNUMBERED
-+
-+#define CTL_LUSTRE 300
-
- enum {
- OBD_FAIL_LOC = 1, /* control test failures instrumentation */
-@@ -77,6 +79,23 @@
- OBD_ALLOC_FAIL_RATE, /* memory allocation random failure rate */
- OBD_MAX_DIRTY_PAGES, /* maximum dirty pages */
- };
-+#else
-+#define CTL_LUSTRE CTL_UNNUMBERED
-+#define OBD_FAIL_LOC CTL_UNNUMBERED
-+#define OBD_FAIL_VAL CTL_UNNUMBERED
-+#define OBD_TIMEOUT CTL_UNNUMBERED
-+#define OBD_DUMP_ON_TIMEOUT CTL_UNNUMBERED
-+#define OBD_MEMUSED CTL_UNNUMBERED
-+#define OBD_PAGESUSED CTL_UNNUMBERED
-+#define OBD_MAXMEMUSED CTL_UNNUMBERED
-+#define OBD_MAXPAGESUSED CTL_UNNUMBERED
-+#define OBD_SYNCFILTER CTL_UNNUMBERED
-+#define OBD_LDLM_TIMEOUT CTL_UNNUMBERED
-+#define OBD_DUMP_ON_EVICTION CTL_UNNUMBERED
-+#define OBD_DEBUG_PEER_ON_TIMEOUT CTL_UNNUMBERED
-+#define OBD_ALLOC_FAIL_RATE CTL_UNNUMBERED
-+#define OBD_MAX_DIRTY_PAGES CTL_UNNUMBERED
-+#endif
-
- int LL_PROC_PROTO(proc_fail_loc)
- {
-@@ -123,7 +142,8 @@
- obd_max_dirty_pages = 4 << (20 - CFS_PAGE_SHIFT);
- }
- } else {
-- char buf[21];
-+ char buf[22];
-+ struct ctl_table dummy;
- int len;
-
- len = lprocfs_read_frac_helper(buf, sizeof(buf),
-@@ -132,7 +152,13 @@
- if (len > *lenp)
- len = *lenp;
- buf[len] = '\0';
-- if (copy_to_user(buffer, buf, len))
-+
-+ dummy = *table;
-+ dummy.data = buf;
-+ dummy.maxlen = sizeof(buf);
-+
-+ rc = ll_proc_dostring(&dummy,write,filp,buffer,lenp, ppos);
-+ if (rc)
- return -EFAULT;
- *lenp = len;
- }
-@@ -175,98 +201,107 @@
-
- int LL_PROC_PROTO(proc_memory_alloc)
- {
-- char buf[22];
- int len;
-+ char buf[22];
-+ struct ctl_table dummy;
- DECLARE_LL_PROC_PPOS_DECL;
-
-- if (!*lenp || (*ppos && !write)) {
-+ if (write)
-+ return -EINVAL;
-+
-+ if (!*lenp || *ppos) {
- *lenp = 0;
- return 0;
- }
-- if (write)
-- return -EINVAL;
-
-- len = snprintf(buf, sizeof(buf), LPU64"\n", obd_memory_sum());
-+ dummy = *table;
-+ dummy.data = buf;
-+ dummy.maxlen = sizeof(buf);
-+
-+ len = snprintf(buf, sizeof(buf), LPU64,
-+ obd_memory_sum());
-+
- if (len > *lenp)
- len = *lenp;
- buf[len] = '\0';
-- if (copy_to_user(buffer, buf, len))
-- return -EFAULT;
-- *lenp = len;
-- *ppos += *lenp;
-- return 0;
-+ return ll_proc_dostring(&dummy,write,filp,buffer,lenp, ppos);
- }
-
- int LL_PROC_PROTO(proc_pages_alloc)
- {
-- char buf[22];
- int len;
-+ char buf[22];
-+ struct ctl_table dummy;
- DECLARE_LL_PROC_PPOS_DECL;
-
-- if (!*lenp || (*ppos && !write)) {
-+ if (write)
-+ return -EINVAL;
-+
-+ if (!*lenp || *ppos) {
- *lenp = 0;
- return 0;
- }
-- if (write)
-- return -EINVAL;
-+ dummy = *table;
-+ dummy.data = buf;
-+ dummy.maxlen = sizeof(buf);
-
-- len = snprintf(buf, sizeof(buf), LPU64"\n", obd_pages_sum());
-+ len = snprintf(buf, sizeof(buf), LPU64,
-+ obd_pages_sum());
- if (len > *lenp)
- len = *lenp;
- buf[len] = '\0';
-- if (copy_to_user(buffer, buf, len))
-- return -EFAULT;
-- *lenp = len;
-- *ppos += *lenp;
-- return 0;
-+ return ll_proc_dostring(&dummy,write,filp,buffer,lenp, ppos);
- }
-
- int LL_PROC_PROTO(proc_mem_max)
- {
-- char buf[22];
- int len;
-+ char buf[22];
-+ struct ctl_table dummy;
- DECLARE_LL_PROC_PPOS_DECL;
-
-- if (!*lenp || (*ppos && !write)) {
-- *lenp = 0;
-- return 0;
-- }
- if (write)
- return -EINVAL;
-
-- len = snprintf(buf, sizeof(buf), LPU64"\n", obd_memory_max());
-+ if (!*lenp || *ppos) {
-+ *lenp = 0;
-+ return 0;
-+ }
-+ dummy = *table;
-+ dummy.data = buf;
-+ dummy.maxlen = sizeof(buf);
-+
-+ len = snprintf(buf, sizeof(buf), LPU64,
-+ obd_memory_max());
- if (len > *lenp)
- len = *lenp;
- buf[len] = '\0';
-- if (copy_to_user(buffer, buf, len))
-- return -EFAULT;
-- *lenp = len;
-- *ppos += *lenp;
-- return 0;
-+ return ll_proc_dostring(&dummy,write,filp,buffer,lenp, ppos);
- }
-
- int LL_PROC_PROTO(proc_pages_max)
- {
- char buf[22];
-+ struct ctl_table dummy;
- int len;
- DECLARE_LL_PROC_PPOS_DECL;
-
-- if (!*lenp || (*ppos && !write)) {
-- *lenp = 0;
-- return 0;
-- }
- if (write)
- return -EINVAL;
-
-- len = snprintf(buf, sizeof(buf), LPU64"\n", obd_pages_max());
-+ if (!*lenp || *ppos) {
-+ *lenp = 0;
-+ return 0;
-+ }
-+ dummy = *table;
-+ dummy.data = buf;
-+ dummy.maxlen = sizeof(buf);
-+ len = snprintf(buf, sizeof(buf), LPU64,
-+ obd_pages_max());
- if (len > *lenp)
- len = *lenp;
- buf[len] = '\0';
-- if (copy_to_user(buffer, buf, len))
-- return -EFAULT;
-- *lenp = len;
-- *ppos += *lenp;
-- return 0;
-+ return ll_proc_dostring(&dummy,write,filp,buffer,lenp, ppos);
- }
-
- static cfs_sysctl_table_t obd_table[] = {
-@@ -284,7 +319,8 @@
- .data = &obd_fail_val,
- .maxlen = sizeof(int),
- .mode = 0644,
-- .proc_handler = &proc_dointvec
-+ .proc_handler = &proc_dointvec,
-+ .strategy = &sysctl_intvec,
- },
- {
- .ctl_name = OBD_TIMEOUT,
-@@ -300,7 +336,7 @@
- .data = &obd_debug_peer_on_timeout,
- .maxlen = sizeof(int),
- .mode = 0644,
-- .proc_handler = &proc_dointvec
-+ .proc_handler = &proc_dointvec,
- },
- {
- .ctl_name = OBD_DUMP_ON_TIMEOUT,
-@@ -308,7 +344,7 @@
- .data = &obd_dump_on_timeout,
- .maxlen = sizeof(int),
- .mode = 0644,
-- .proc_handler = &proc_dointvec
-+ .proc_handler = &proc_dointvec,
- },
- {
- .ctl_name = OBD_DUMP_ON_EVICTION,
-@@ -316,7 +352,7 @@
- .data = &obd_dump_on_eviction,
- .maxlen = sizeof(int),
- .mode = 0644,
-- .proc_handler = &proc_dointvec
-+ .proc_handler = &proc_dointvec,
- },
- {
- .ctl_name = OBD_MEMUSED,
-@@ -324,7 +360,8 @@
- .data = NULL,
- .maxlen = 0,
- .mode = 0444,
-- .proc_handler = &proc_memory_alloc
-+ .proc_handler = &proc_memory_alloc,
-+// .strategy = &sysctl_memory_alloc,
- },
- {
- .ctl_name = OBD_PAGESUSED,
-@@ -332,7 +369,8 @@
- .data = NULL,
- .maxlen = 0,
- .mode = 0444,
-- .proc_handler = &proc_pages_alloc
-+ .proc_handler = &proc_pages_alloc,
-+// .strategy = &sysctl_pages_alloc,
- },
- {
- .ctl_name = OBD_MAXMEMUSED,
-@@ -340,7 +378,8 @@
- .data = NULL,
- .maxlen = 0,
- .mode = 0444,
-- .proc_handler = &proc_mem_max
-+ .proc_handler = &proc_mem_max,
-+// .strategy = &sysctl_mem_max,
- },
- {
- .ctl_name = OBD_MAXPAGESUSED,
-@@ -348,7 +387,8 @@
- .data = NULL,
- .maxlen = 0,
- .mode = 0444,
-- .proc_handler = &proc_pages_max
-+ .proc_handler = &proc_pages_max,
-+// .strategy = &sysctl_pages_max,
- },
- {
- .ctl_name = OBD_LDLM_TIMEOUT,
-@@ -380,15 +420,13 @@
- };
-
- static cfs_sysctl_table_t parent_table[] = {
-- {
-- .ctl_name = OBD_SYSCTL,
-- .procname = "lustre",
-- .data = NULL,
-- .maxlen = 0,
-- .mode = 0555,
-- .child = obd_table
-- },
-- {0}
-+ {
-+ .ctl_name = CTL_LUSTRE,
-+ .procname = "lustre",
-+ .mode = 0555,
-+ .child = obd_table
-+ },
-+ { 0 }
- };
-
- void obd_sysctl_init (void)
-diff -urNad lustre~/lustre/obdclass/llog_obd.c lustre/lustre/obdclass/llog_obd.c
---- lustre~/lustre/obdclass/llog_obd.c 2008-12-30 11:23:32.000000000 +0100
-+++ lustre/lustre/obdclass/llog_obd.c 2009-03-10 11:41:03.000000000 +0100
-@@ -203,7 +203,7 @@
- struct lov_stripe_md *lsm, struct llog_cookie *logcookies,
- int numcookies)
- {
-- __u32 cap;
-+ cfs_kernel_cap_t cap;
- int rc;
- ENTRY;
-
-@@ -213,10 +213,10 @@
- }
-
- CTXT_CHECK_OP(ctxt, add, -EOPNOTSUPP);
-- cap = current->cap_effective;
-+ cap = current->cap_effective;
- cap_raise(current->cap_effective, CAP_SYS_RESOURCE);
- rc = CTXTP(ctxt, add)(ctxt, rec, lsm, logcookies, numcookies);
-- current->cap_effective = cap;
-+ current->cap_effective = cap;
- RETURN(rc);
- }
- EXPORT_SYMBOL(llog_add);
-diff -urNad lustre~/lustre/obdclass/lprocfs_status.c lustre/lustre/obdclass/lprocfs_status.c
---- lustre~/lustre/obdclass/lprocfs_status.c 2008-12-30 11:23:32.000000000 +0100
-+++ lustre/lustre/obdclass/lprocfs_status.c 2009-03-10 11:41:03.000000000 +0100
-@@ -132,6 +132,8 @@
- proc->read_proc = read_proc;
- proc->write_proc = write_proc;
- proc->data = data;
-+ proc->owner = THIS_MODULE;
-+
- return 0;
- }
-
-@@ -151,7 +153,7 @@
-
- LPROCFS_ENTRY();
- OBD_FAIL_TIMEOUT(OBD_FAIL_LPROC_REMOVE, 10);
-- if (!dp->deleted && dp->read_proc)
-+ if (!LPROCFS_CHECK_DELETED(dp) && dp->read_proc)
- rc = dp->read_proc(page, &start, *ppos, PAGE_SIZE,
- &eof, dp->data);
- LPROCFS_EXIT();
-@@ -190,7 +192,7 @@
- int rc = -EIO;
-
- LPROCFS_ENTRY();
-- if (!dp->deleted && dp->write_proc)
-+ if (!LPROCFS_CHECK_DELETED(dp) && dp->write_proc)
- rc = dp->write_proc(f, buf, size, dp->data);
- LPROCFS_EXIT();
- return rc;
-diff -urNad lustre~/lustre/ptlrpc/service.c lustre/lustre/ptlrpc/service.c
---- lustre~/lustre/ptlrpc/service.c 2008-12-30 11:23:32.000000000 +0100
-+++ lustre/lustre/ptlrpc/service.c 2009-03-10 11:41:03.000000000 +0100
-@@ -1268,7 +1268,7 @@
- cfs_daemonize(name);
- exit_fs(cfs_current());
- current->fs = fs;
-- ll_set_fs_pwd(current->fs, init_task.fs->pwdmnt, init_task.fs->pwd);
-+ ll_set_fs_pwd(current->fs, cfs_fs_mnt(init_task.fs), cfs_fs_pwd(init_task.fs));
- }
-
- static void
-diff -urNad lustre~/lustre/quota/quotacheck_test.c lustre/lustre/quota/quotacheck_test.c
---- lustre~/lustre/quota/quotacheck_test.c 2008-12-30 11:23:32.000000000 +0100
-+++ lustre/lustre/quota/quotacheck_test.c 2009-03-10 11:41:03.000000000 +0100
-@@ -97,7 +97,14 @@
- if (ext3_test_bit(index, bitmap_bh->b_data)) {
- CERROR("i: %d, ino: %lu\n", index, ino);
- ll_sleep(1);
-+#if HAVE_IGET_CALL
- inode = iget(sb, ino);
-+#else
-+ inode = iget_locked(sb, ino);
-+ if (inode && (inode->i_state & I_NEW)) {
-+ unlock_new_inode(inode);
-+ }
-+#endif
- }
-
- return inode;
diff --git a/debian/patches/patchless_support_2.6.26.dpatch b/debian/patches/patchless_support_2.6.26.dpatch
deleted file mode 100755
index 0d5c413..0000000
--- a/debian/patches/patchless_support_2.6.26.dpatch
+++ /dev/null
@@ -1,16962 +0,0 @@
-#! /bin/sh /usr/share/dpatch/dpatch-run
-## posix_acl.patch by Patrick Winnertz <winnie at debian.org>
-##
-## All lines beginning with `## DP:' are a description of the patch.
-## DP: Patch which will enable 2.6.26 patchless support for lustre, taken from #14250
-
- at DPATCH@
-diff -urNad lustre~/lnet/autoconf/lustre-lnet.m4 lustre/lnet/autoconf/lustre-lnet.m4
---- lustre~/lnet/autoconf/lustre-lnet.m4 2009-03-12 10:32:27.000000000 +0100
-+++ lustre/lnet/autoconf/lustre-lnet.m4 2009-03-13 09:45:02.000000000 +0100
-@@ -1362,6 +1362,22 @@
- ])
- ])
-
-+# 2.6.27 have second argument to sock_map_fd
-+AC_DEFUN([LN_SOCK_MAP_FD_2ARG],
-+[AC_MSG_CHECKING([sock_map_fd have second argument])
-+LB_LINUX_TRY_COMPILE([
-+ #include <linux/net.h>
-+],[
-+ sock_map_fd(NULL, 0);
-+],[
-+ AC_MSG_RESULT(yes)
-+ AC_DEFINE(HAVE_SOCK_MAP_FD_2ARG, 1,
-+ [sock_map_fd have second argument])
-+],[
-+ AC_MSG_RESULT(NO)
-+])
-+])
-+
- #
- # LN_PROG_LINUX
- #
-@@ -1410,6 +1426,8 @@
- LN_SCATTERLIST_SETPAGE
- # 2.6.26
- LN_SEM_COUNT
-+# 2.6.27
-+LN_SOCK_MAP_FD_2ARG
- ])
-
- #
-diff -urNad lustre~/lnet/libcfs/linux/linux-prim.c lustre/lnet/libcfs/linux/linux-prim.c
---- lustre~/lnet/libcfs/linux/linux-prim.c 2008-08-07 11:51:06.000000000 +0200
-+++ lustre/lnet/libcfs/linux/linux-prim.c 2009-03-13 09:45:02.000000000 +0100
-@@ -49,7 +49,7 @@
- void cfs_enter_debugger(void)
- {
- #if defined(CONFIG_KGDB)
-- BREAKPOINT();
-+// BREAKPOINT();
- #elif defined(__arch_um__)
- asm("int $3");
- #else
-diff -urNad lustre~/lnet/libcfs/linux/linux-tcpip.c lustre/lnet/libcfs/linux/linux-tcpip.c
---- lustre~/lnet/libcfs/linux/linux-tcpip.c 2008-08-07 11:51:07.000000000 +0200
-+++ lustre/lnet/libcfs/linux/linux-tcpip.c 2009-03-13 09:45:02.000000000 +0100
-@@ -63,7 +63,11 @@
- return rc;
- }
-
-+#ifdef HAVE_SOCK_MAP_FD_2ARG
-+ fd = sock_map_fd(sock,0);
-+#else
- fd = sock_map_fd(sock);
-+#endif
- if (fd < 0) {
- rc = fd;
- sock_release(sock);
-diff -urNad lustre~/lnet/lnet/api-ni.c lustre/lnet/lnet/api-ni.c
---- lustre~/lnet/lnet/api-ni.c 2009-03-12 10:21:27.000000000 +0100
-+++ lustre/lnet/lnet/api-ni.c 2009-03-13 09:45:02.000000000 +0100
-@@ -1032,7 +1032,7 @@
- #ifdef __KERNEL__
- if (lnd == NULL) {
- LNET_MUTEX_UP(&the_lnet.ln_lnd_mutex);
-- rc = request_module(libcfs_lnd2modname(lnd_type));
-+ rc = request_module("%s", libcfs_lnd2modname(lnd_type));
- LNET_MUTEX_DOWN(&the_lnet.ln_lnd_mutex);
-
- lnd = lnet_find_lnd_by_type(lnd_type);
-diff -urNad lustre~/lustre/autoconf/lustre-core.m4 lustre/lustre/autoconf/lustre-core.m4
---- lustre~/lustre/autoconf/lustre-core.m4 2009-03-12 10:32:27.000000000 +0100
-+++ lustre/lustre/autoconf/lustre-core.m4 2009-03-13 09:45:02.000000000 +0100
-@@ -1106,15 +1106,20 @@
- AC_DEFUN([LC_PAGE_CHECKED],
- [AC_MSG_CHECKING([kernel has PageChecked and SetPageChecked])
- LB_LINUX_TRY_COMPILE([
-- #include <linux/mm.h>
-- #include <linux/page-flags.h>
-+ #include <linux/autoconf.h>
-+#ifdef HAVE_LINUX_MMTYPES_H
-+ #include <linux/mm_types.h>
-+#endif
-+ #include <linux/page-flags.h>
- ],[
-- #ifndef PageChecked
-- #error PageChecked not defined in kernel
-- #endif
-- #ifndef SetPageChecked
-- #error SetPageChecked not defined in kernel
-- #endif
-+ struct page *p;
-+
-+ /* before 2.6.26 this define*/
-+ #ifndef PageChecked
-+ /* 2.6.26 use function instead of define for it */
-+ SetPageChecked(p);
-+ PageChecked(p);
-+ #endif
- ],[
- AC_MSG_RESULT(yes)
- AC_DEFINE(HAVE_PAGE_CHECKED, 1,
-@@ -1232,6 +1237,9 @@
- ])
- ])
-
-+# 2.6.18
-+
-+
- # 2.6.23 have return type 'void' for unregister_blkdev
- AC_DEFUN([LC_UNREGISTER_BLKDEV_RETURN_INT],
- [AC_MSG_CHECKING([if unregister_blkdev return int])
-@@ -1249,6 +1257,25 @@
- ])
-
- # 2.6.23 change .sendfile to .splice_read
-+# RHEL4 (-92 kernel) have both sendfile and .splice_read API
-+AC_DEFUN([LC_KERNEL_SENDFILE],
-+[AC_MSG_CHECKING([if kernel has .sendfile])
-+LB_LINUX_TRY_COMPILE([
-+ #include <linux/fs.h>
-+],[
-+ struct file_operations file;
-+
-+ file.sendfile = NULL;
-+], [
-+ AC_MSG_RESULT([yes])
-+ AC_DEFINE(HAVE_KERNEL_SENDFILE, 1,
-+ [kernel has .sendfile])
-+],[
-+ AC_MSG_RESULT([no])
-+])
-+])
-+
-+# 2.6.23 change .sendfile to .splice_read
- AC_DEFUN([LC_KERNEL_SPLICE_READ],
- [AC_MSG_CHECKING([if kernel has .splice_read])
- LB_LINUX_TRY_COMPILE([
-@@ -1268,11 +1295,240 @@
-
- # 2.6.23 extract nfs export related data into exportfs.h
- AC_DEFUN([LC_HAVE_EXPORTFS_H],
--[
--tmpfl="$CFLAGS"
--CFLAGS="$CFLAGS -I$LINUX_OBJ/include"
--AC_CHECK_HEADERS([linux/exportfs.h])
--CFLAGS="$tmpfl"
-+[LB_CHECK_FILE([$LINUX/include/linux/exportfs.h], [
-+ AC_DEFINE(HAVE_LINUX_EXPORTFS_H, 1,
-+ [kernel has include/exportfs.h])
-+],[
-+ AC_MSG_RESULT([no])
-+])
-+])
-+
-+# 2.6.23 have new page fault handling API
-+AC_DEFUN([LC_VM_OP_FAULT],
-+[AC_MSG_CHECKING([if kernel has .fault in vm_operation_struct])
-+LB_LINUX_TRY_COMPILE([
-+ #include <linux/mm.h>
-+],[
-+ struct vm_operations_struct op;
-+
-+ op.fault = NULL;
-+], [
-+ AC_MSG_RESULT([yes])
-+ AC_DEFINE(HAVE_VM_OP_FAULT, 1,
-+ [if kernel has .fault in vm_operation_struct])
-+],[
-+ AC_MSG_RESULT([no])
-+])
-+])
-+
-+#2.6.23 has new shrinker API
-+AC_DEFUN([LC_REGISTER_SHRINKER],
-+[AC_MSG_CHECKING([if kernel has register_shrinker])
-+LB_LINUX_TRY_COMPILE([
-+ #include <linux/mm.h>
-+],[
-+ register_shrinker(NULL);
-+], [
-+ AC_MSG_RESULT([yes])
-+ AC_DEFINE(HAVE_REGISTER_SHRINKER, 1,
-+ [if kernel has register_shrinker])
-+],[
-+ AC_MSG_RESULT([no])
-+])
-+])
-+
-+# 2.6.24 has bio_endio with 2 args
-+AC_DEFUN([LC_BIO_ENDIO_2ARG],
-+[AC_MSG_CHECKING([if kernel has bio_endio with 2 args])
-+LB_LINUX_TRY_COMPILE([
-+ #include <linux/bio.h>
-+],[
-+ bio_endio(NULL, 0);
-+], [
-+ AC_MSG_RESULT([yes])
-+ AC_DEFINE(HAVE_BIO_ENDIO_2ARG, 1,
-+ [if kernel has bio_endio with 2 args])
-+],[
-+ AC_MSG_RESULT([no])
-+])
-+])
-+
-+# 2.6.24 has new members in exports struct.
-+AC_DEFUN([LC_FH_TO_DENTRY],
-+[AC_MSG_CHECKING([if kernel has .fh_to_dentry member in export_operations struct])
-+LB_LINUX_TRY_COMPILE([
-+#ifdef HAVE_LINUX_EXPORTFS_H
-+ #include <linux/exportfs.h>
-+#else
-+ #include <linux/fs.h>
-+#endif
-+],[
-+ struct export_operations exp;
-+
-+ exp.fh_to_dentry = NULL;
-+], [
-+ AC_MSG_RESULT([yes])
-+ AC_DEFINE(HAVE_FH_TO_DENTRY, 1,
-+ [kernel has .fh_to_dentry member in export_operations struct])
-+],[
-+ AC_MSG_RESULT([no])
-+])
-+])
-+
-+# 2.6.24 need linux/mm_types.h included
-+AC_DEFUN([LC_HAVE_MMTYPES_H],
-+[LB_CHECK_FILE([$LINUX/include/linux/mm_types.h], [
-+ AC_DEFINE(HAVE_LINUX_MMTYPES_H, 1,
-+ [kernel has include/mm_types.h])
-+],[
-+ AC_MSG_RESULT([no])
-+])
-+])
-+
-+# 2.6.24 remove long aged procfs entry -> deleted member
-+AC_DEFUN([LC_PROCFS_DELETED],
-+[AC_MSG_CHECKING([if kernel has deleted member in procfs entry struct])
-+LB_LINUX_TRY_COMPILE([
-+ #include <linux/proc_fs.h>
-+],[
-+ struct proc_dir_entry pde;
-+
-+ pde.deleted = NULL;
-+], [
-+ AC_MSG_RESULT([yes])
-+ AC_DEFINE(HAVE_PROCFS_DELETED, 1,
-+ [kernel has deleted member in procfs entry struct])
-+],[
-+ AC_MSG_RESULT([no])
-+])
-+])
-+
-+# 2.6.25 change define to inline
-+AC_DEFUN([LC_MAPPING_CAP_WRITEBACK_DIRTY],
-+[AC_MSG_CHECKING([if kernel have mapping_cap_writeback_dirty])
-+LB_LINUX_TRY_COMPILE([
-+ #include <linux/backing-dev.h>
-+],[
-+ #ifndef mapping_cap_writeback_dirty
-+ mapping_cap_writeback_dirty(NULL);
-+ #endif
-+],[
-+ AC_MSG_RESULT([yes])
-+ AC_DEFINE(HAVE_MAPPING_CAP_WRITEBACK_DIRTY, 1,
-+ [kernel have mapping_cap_writeback_dirty])
-+],[
-+ AC_MSG_RESULT([no])
-+])
-+])
-+
-+
-+
-+# 2.6.26 isn't export set_fs_pwd and change paramter in fs struct
-+AC_DEFUN([LC_FS_STRUCT_USE_PATH],
-+[AC_MSG_CHECKING([fs_struct use path structure])
-+LB_LINUX_TRY_COMPILE([
-+ #include <asm/atomic.h>
-+ #include <linux/spinlock.h>
-+ #include <linux/fs_struct.h>
-+],[
-+ struct path path;
-+ struct fs_struct fs;
-+
-+ fs.pwd = path;
-+], [
-+ AC_MSG_RESULT([yes])
-+ AC_DEFINE(HAVE_FS_STRUCT_USE_PATH, 1,
-+ [fs_struct use path structure])
-+],[
-+ AC_MSG_RESULT([no])
-+])
-+])
-+
-+# 2.6.26 remove path_release and use path_put instead
-+AC_DEFUN([LC_PATH_RELEASE],
-+[AC_MSG_CHECKING([if path_release exist])
-+LB_LINUX_TRY_COMPILE([
-+ #include <linux/dcache.h>
-+ #include <linux/namei.h>
-+],[
-+ path_release(NULL);
-+],[
-+ AC_DEFINE(HAVE_PATH_RELEASE, 1, [path_release exist])
-+ AC_MSG_RESULT([yes])
-+],[
-+ AC_MSG_RESULT([no])
-+])
-+])
-+
-+#2.6.27
-+AC_DEFUN([LC_INODE_PERMISION_2ARGS],
-+[AC_MSG_CHECKING([inode_operations->permission have two args])
-+LB_LINUX_TRY_COMPILE([
-+ #include <linux/fs.h>
-+],[
-+ struct inode *inode;
-+
-+ inode->i_op->permission(NULL,0);
-+],[
-+ AC_DEFINE(HAVE_INODE_PERMISION_2ARGS, 1,
-+ [inode_operations->permission have two args])
-+ AC_MSG_RESULT([yes])
-+],[
-+ AC_MSG_RESULT([no])
-+])
-+])
-+
-+# 2.6.27 have file_remove_suid instead of remove_suid
-+AC_DEFUN([LC_FILE_REMOVE_SUID],
-+[AC_MSG_CHECKING([kernel have file_remove_suid])
-+LB_LINUX_TRY_COMPILE([
-+ #include <linux/fs.h>
-+],[
-+ file_remove_suid(NULL);
-+],[
-+ AC_DEFINE(HAVE_FILE_REMOVE_SUID, 1,
-+ [kernel have file_remove_suid])
-+ AC_MSG_RESULT([yes])
-+],[
-+ AC_MSG_RESULT([no])
-+])
-+])
-+
-+# 2.6.27 have new page locking API
-+AC_DEFUN([LC_TRYLOCKPAGE],
-+[AC_MSG_CHECKING([kernel use trylock_page for page lock])
-+LB_LINUX_TRY_COMPILE([
-+ #include <linux/pagemap.h>
-+],[
-+ trylock_page(NULL);
-+],[
-+ AC_DEFINE(HAVE_TRYLOCK_PAGE, 1,
-+ [kernel use trylock_page for page lock])
-+ AC_MSG_RESULT([yes])
-+],[
-+ AC_MSG_RESULT([no])
-+])
-+])
-+
-+# 2.6.27 and some older have mapping->tree_lock as spin_lock
-+AC_DEFUN([LC_RW_TREE_LOCK],
-+[AC_MSG_CHECKING([mapping->tree_lock is rw_lock])
-+tmp_flags="$EXTRA_KCFLAGS"
-+EXTRA_KCFLAGS="-Werror"
-+LB_LINUX_TRY_COMPILE([
-+ #include <linux/fs.h>
-+],[
-+ struct address_space *map = NULL;
-+
-+ write_lock_irq(&map->tree_lock);
-+],[
-+ AC_MSG_RESULT(yes)
-+ AC_DEFINE(HAVE_RW_TREE_LOCK, 1,
-+ [mapping->tree_lock is rw_lock])
-+],[
-+ AC_MSG_RESULT(no)
-+])
-+EXTRA_KCFLAGS="$tmp_flags"
- ])
-
- #
-@@ -1372,8 +1628,31 @@
- LC_FS_RENAME_DOES_D_MOVE
- # 2.6.23
- LC_UNREGISTER_BLKDEV_RETURN_INT
-+ LC_KERNEL_SENDFILE
- LC_KERNEL_SPLICE_READ
- LC_HAVE_EXPORTFS_H
-+ LC_VM_OP_FAULT
-+ LC_REGISTER_SHRINKER
-+
-+ #2.6.25
-+ LC_MAPPING_CAP_WRITEBACK_DIRTY
-+
-+ # 2.6.24
-+ LC_HAVE_MMTYPES_H
-+ LC_BIO_ENDIO_2ARG
-+ LC_FH_TO_DENTRY
-+ LC_PROCFS_DELETED
-+
-+ # 2.6.26
-+ LC_FS_STRUCT_USE_PATH
-+ LC_RCU_LIST_SAFE
-+ LC_PATH_RELEASE
-+
-+ # 2.6.27
-+ LC_INODE_PERMISION_2ARGS
-+ LC_FILE_REMOVE_SUID
-+ LC_TRYLOCKPAGE
-+ LC_RW_TREE_LOCK
- ])
-
- #
-@@ -1606,6 +1885,7 @@
- ],[
- AC_MSG_RESULT([no])
- ])
-+
- ],[
- AC_MSG_RESULT([no])
- ])
-diff -urNad lustre~/lustre/autoconf/lustre-core.m4.orig lustre/lustre/autoconf/lustre-core.m4.orig
---- lustre~/lustre/autoconf/lustre-core.m4.orig 1970-01-01 00:00:00.000000000 +0000
-+++ lustre/lustre/autoconf/lustre-core.m4.orig 2009-03-13 09:45:02.000000000 +0100
-@@ -0,0 +1,2075 @@
-+#* -*- mode: c; c-basic-offset: 8; indent-tabs-mode: nil; -*-
-+#* vim:expandtab:shiftwidth=8:tabstop=8:
-+#
-+# LC_CONFIG_SRCDIR
-+#
-+# Wrapper for AC_CONFIG_SUBDIR
-+#
-+AC_DEFUN([LC_CONFIG_SRCDIR],
-+[AC_CONFIG_SRCDIR([lustre/obdclass/obdo.c])
-+])
-+
-+#
-+# LC_PATH_DEFAULTS
-+#
-+# lustre specific paths
-+#
-+AC_DEFUN([LC_PATH_DEFAULTS],
-+[# ptlrpc kernel build requires this
-+LUSTRE="$PWD/lustre"
-+AC_SUBST(LUSTRE)
-+
-+# mount.lustre
-+rootsbindir='/sbin'
-+AC_SUBST(rootsbindir)
-+
-+demodir='$(docdir)/demo'
-+AC_SUBST(demodir)
-+
-+pkgexampledir='${pkgdatadir}/examples'
-+AC_SUBST(pkgexampledir)
-+])
-+
-+#
-+# LC_TARGET_SUPPORTED
-+#
-+# is the target os supported?
-+#
-+AC_DEFUN([LC_TARGET_SUPPORTED],
-+[case $target_os in
-+ linux* | darwin*)
-+$1
-+ ;;
-+ *)
-+$2
-+ ;;
-+esac
-+])
-+
-+#
-+# LC_CONFIG_EXT3
-+#
-+# that ext3 is enabled in the kernel
-+#
-+AC_DEFUN([LC_CONFIG_EXT3],
-+[LB_LINUX_CONFIG([EXT3_FS],[],[
-+ LB_LINUX_CONFIG([EXT3_FS_MODULE],[],[$2])
-+])
-+LB_LINUX_CONFIG([EXT3_FS_XATTR],[$1],[$3])
-+])
-+
-+#
-+# LC_FSHOOKS
-+#
-+# If we have (and can build) fshooks.h
-+#
-+AC_DEFUN([LC_FSHOOKS],
-+[LB_CHECK_FILE([$LINUX/include/linux/fshooks.h],[
-+ AC_MSG_CHECKING([if fshooks.h can be compiled])
-+ LB_LINUX_TRY_COMPILE([
-+ #include <linux/fshooks.h>
-+ ],[],[
-+ AC_MSG_RESULT([yes])
-+ ],[
-+ AC_MSG_RESULT([no])
-+ AC_MSG_WARN([You might have better luck with gcc 3.3.x.])
-+ AC_MSG_WARN([You can set CC=gcc33 before running configure.])
-+ AC_MSG_ERROR([Your compiler cannot build fshooks.h.])
-+ ])
-+$1
-+],[
-+$2
-+])
-+])
-+
-+#
-+# LC_STRUCT_KIOBUF
-+#
-+# rh 2.4.18 has iobuf->dovary, but other kernels do not
-+#
-+AC_DEFUN([LC_STRUCT_KIOBUF],
-+[AC_MSG_CHECKING([if struct kiobuf has a dovary field])
-+LB_LINUX_TRY_COMPILE([
-+ #include <linux/iobuf.h>
-+],[
-+ struct kiobuf iobuf;
-+ iobuf.dovary = 1;
-+],[
-+ AC_MSG_RESULT([yes])
-+ AC_DEFINE(HAVE_KIOBUF_DOVARY, 1, [struct kiobuf has a dovary field])
-+],[
-+ AC_MSG_RESULT([no])
-+])
-+])
-+
-+#
-+# LC_FUNC_COND_RESCHED
-+#
-+# cond_resched() was introduced in 2.4.20
-+#
-+AC_DEFUN([LC_FUNC_COND_RESCHED],
-+[AC_MSG_CHECKING([if kernel offers cond_resched])
-+LB_LINUX_TRY_COMPILE([
-+ #include <linux/sched.h>
-+],[
-+ cond_resched();
-+],[
-+ AC_MSG_RESULT([yes])
-+ AC_DEFINE(HAVE_COND_RESCHED, 1, [cond_resched found])
-+],[
-+ AC_MSG_RESULT([no])
-+])
-+])
-+
-+#
-+# LC_FUNC_ZAP_PAGE_RANGE
-+#
-+# if zap_page_range() takes a vma arg
-+#
-+AC_DEFUN([LC_FUNC_ZAP_PAGE_RANGE],
-+[AC_MSG_CHECKING([if zap_page_range with vma parameter])
-+ZAP_PAGE_RANGE_VMA="`grep -c 'zap_page_range.*struct vm_area_struct' $LINUX/include/linux/mm.h`"
-+if test "$ZAP_PAGE_RANGE_VMA" != 0 ; then
-+ AC_DEFINE(ZAP_PAGE_RANGE_VMA, 1, [zap_page_range with vma parameter])
-+ AC_MSG_RESULT([yes])
-+else
-+ AC_MSG_RESULT([no])
-+fi
-+])
-+
-+#
-+# LC_FUNC_PDE
-+#
-+# if proc_fs.h defines PDE()
-+#
-+AC_DEFUN([LC_FUNC_PDE],
-+[AC_MSG_CHECKING([if kernel defines PDE])
-+HAVE_PDE="`grep -c 'proc_dir_entry..PDE' $LINUX/include/linux/proc_fs.h`"
-+if test "$HAVE_PDE" != 0 ; then
-+ AC_DEFINE(HAVE_PDE, 1, [the kernel defines PDE])
-+ AC_MSG_RESULT([yes])
-+else
-+ AC_MSG_RESULT([no])
-+fi
-+])
-+
-+#
-+# LC_FUNC_FILEMAP_FDATASYNC
-+#
-+# if filemap_fdatasync() exists
-+#
-+AC_DEFUN([LC_FUNC_FILEMAP_FDATAWRITE],
-+[AC_MSG_CHECKING([whether filemap_fdatawrite() is defined])
-+LB_LINUX_TRY_COMPILE([
-+ #include <linux/fs.h>
-+],[
-+ int (*foo)(struct address_space *)= filemap_fdatawrite;
-+],[
-+ AC_MSG_RESULT([yes])
-+ AC_DEFINE(HAVE_FILEMAP_FDATAWRITE, 1, [filemap_fdatawrite() found])
-+],[
-+ AC_MSG_RESULT([no])
-+])
-+])
-+
-+#
-+# LC_FUNC_DIRECT_IO
-+#
-+# if direct_IO takes a struct file argument
-+#
-+AC_DEFUN([LC_FUNC_DIRECT_IO],
-+[AC_MSG_CHECKING([if kernel passes struct file to direct_IO])
-+HAVE_DIO_FILE="`grep -c 'direct_IO.*struct file' $LINUX/include/linux/fs.h`"
-+if test "$HAVE_DIO_FILE" != 0 ; then
-+ AC_DEFINE(HAVE_DIO_FILE, 1, [the kernel passes struct file to direct_IO])
-+ AC_MSG_RESULT(yes)
-+else
-+ AC_MSG_RESULT(no)
-+fi
-+])
-+
-+#
-+# LC_HEADER_MM_INLINE
-+#
-+# RHEL kernels define page_count in mm_inline.h
-+#
-+AC_DEFUN([LC_HEADER_MM_INLINE],
-+[AC_MSG_CHECKING([if kernel has mm_inline.h header])
-+LB_LINUX_TRY_COMPILE([
-+ #include <linux/mm_inline.h>
-+],[
-+ #ifndef page_count
-+ #error mm_inline.h does not define page_count
-+ #endif
-+],[
-+ AC_MSG_RESULT([yes])
-+ AC_DEFINE(HAVE_MM_INLINE, 1, [mm_inline found])
-+],[
-+ AC_MSG_RESULT([no])
-+])
-+])
-+
-+#
-+# LC_STRUCT_INODE
-+#
-+# if inode->i_alloc_sem exists
-+#
-+AC_DEFUN([LC_STRUCT_INODE],
-+[AC_MSG_CHECKING([if struct inode has i_alloc_sem])
-+LB_LINUX_TRY_COMPILE([
-+ #include <linux/fs.h>
-+ #include <linux/version.h>
-+],[
-+ struct inode i;
-+ return (char *)&i.i_alloc_sem - (char *)&i;
-+],[
-+ AC_MSG_RESULT([yes])
-+ AC_DEFINE(HAVE_I_ALLOC_SEM, 1, [struct inode has i_alloc_sem])
-+],[
-+ AC_MSG_RESULT([no])
-+])
-+])
-+
-+#
-+# LC_FUNC_REGISTER_CACHE
-+#
-+# if register_cache() is defined by kernel
-+#
-+AC_DEFUN([LC_FUNC_REGISTER_CACHE],
-+[AC_MSG_CHECKING([if kernel defines register_cache()])
-+LB_LINUX_TRY_COMPILE([
-+ #include <linux/list.h>
-+ #include <linux/cache_def.h>
-+],[
-+ struct cache_definition cache;
-+],[
-+ AC_MSG_RESULT([yes])
-+ AC_DEFINE(HAVE_REGISTER_CACHE, 1, [register_cache found])
-+ AC_MSG_CHECKING([if kernel expects return from cache shrink function])
-+ HAVE_CACHE_RETURN_INT="`grep -c 'int.*shrink' $LINUX/include/linux/cache_def.h`"
-+ if test "$HAVE_CACHE_RETURN_INT" != 0 ; then
-+ AC_DEFINE(HAVE_CACHE_RETURN_INT, 1, [kernel expects return from shrink_cache])
-+ AC_MSG_RESULT(yes)
-+ else
-+ AC_MSG_RESULT(no)
-+ fi
-+],[
-+ AC_MSG_RESULT([no])
-+])
-+])
-+
-+#
-+# LC_FUNC_GRAB_CACHE_PAGE_NOWAIT_GFP
-+#
-+# check for our patched grab_cache_page_nowait_gfp() function
-+#
-+AC_DEFUN([LC_FUNC_GRAB_CACHE_PAGE_NOWAIT_GFP],
-+[AC_MSG_CHECKING([if kernel defines grab_cache_page_nowait_gfp()])
-+HAVE_GCPN_GFP="`grep -c 'grab_cache_page_nowait_gfp' $LINUX/include/linux/pagemap.h`"
-+if test "$HAVE_GCPN_GFP" != 0 ; then
-+ AC_DEFINE(HAVE_GRAB_CACHE_PAGE_NOWAIT_GFP, 1,
-+ [kernel has grab_cache_page_nowait_gfp()])
-+ AC_MSG_RESULT(yes)
-+else
-+ AC_MSG_RESULT(no)
-+fi
-+])
-+
-+#
-+# LC_FUNC_DEV_SET_RDONLY
-+#
-+# check for the old-style dev_set_rdonly which took an extra "devno" param
-+# and can only set a single device to discard writes at one time
-+#
-+AC_DEFUN([LC_FUNC_DEV_SET_RDONLY],
-+[AC_MSG_CHECKING([if kernel has new dev_set_rdonly])
-+LB_LINUX_TRY_COMPILE([
-+ #include <linux/fs.h>
-+],[
-+ #ifndef HAVE_CLEAR_RDONLY_ON_PUT
-+ #error needs to be patched by lustre kernel patches from Lustre version 1.4.3 or above.
-+ #endif
-+],[
-+ AC_MSG_RESULT([yes])
-+ AC_DEFINE(HAVE_DEV_SET_RDONLY, 1, [kernel has new dev_set_rdonly])
-+],[
-+ AC_MSG_RESULT([no, Linux kernel source needs to be patches by lustre
-+kernel patches from Lustre version 1.4.3 or above.])
-+])
-+])
-+
-+#
-+# LC_CONFIG_BACKINGFS
-+#
-+# setup, check the backing filesystem
-+#
-+AC_DEFUN([LC_CONFIG_BACKINGFS],
-+[
-+BACKINGFS="ldiskfs"
-+
-+if test x$with_ldiskfs = xno ; then
-+ BACKINGFS="ext3"
-+
-+ if test x$linux25$enable_server = xyesyes ; then
-+ AC_MSG_ERROR([ldiskfs is required for 2.6-based servers.])
-+ fi
-+
-+ # --- Check that ext3 and ext3 xattr are enabled in the kernel
-+ LC_CONFIG_EXT3([],[
-+ AC_MSG_ERROR([Lustre requires that ext3 is enabled in the kernel])
-+ ],[
-+ AC_MSG_WARN([Lustre requires that extended attributes for ext3 are enabled in the kernel])
-+ AC_MSG_WARN([This build may fail.])
-+ ])
-+else
-+ # ldiskfs is enabled
-+ LB_DEFINE_LDISKFS_OPTIONS
-+fi #ldiskfs
-+
-+AC_MSG_CHECKING([which backing filesystem to use])
-+AC_MSG_RESULT([$BACKINGFS])
-+AC_SUBST(BACKINGFS)
-+])
-+
-+#
-+# LC_CONFIG_PINGER
-+#
-+# the pinger is temporary, until we have the recovery node in place
-+#
-+AC_DEFUN([LC_CONFIG_PINGER],
-+[AC_MSG_CHECKING([whether to enable pinger support])
-+AC_ARG_ENABLE([pinger],
-+ AC_HELP_STRING([--disable-pinger],
-+ [disable recovery pinger support]),
-+ [],[enable_pinger='yes'])
-+AC_MSG_RESULT([$enable_pinger])
-+if test x$enable_pinger != xno ; then
-+ AC_DEFINE(ENABLE_PINGER, 1, Use the Pinger)
-+fi
-+])
-+
-+#
-+# LC_CONFIG_CHECKSUM
-+#
-+# do checksum of bulk data between client and OST
-+#
-+AC_DEFUN([LC_CONFIG_CHECKSUM],
-+[AC_MSG_CHECKING([whether to enable data checksum support])
-+AC_ARG_ENABLE([checksum],
-+ AC_HELP_STRING([--disable-checksum],
-+ [disable data checksum support]),
-+ [],[enable_checksum='yes'])
-+AC_MSG_RESULT([$enable_checksum])
-+if test x$enable_checksum != xno ; then
-+ AC_DEFINE(ENABLE_CHECKSUM, 1, do data checksums)
-+fi
-+])
-+
-+#
-+# LC_CONFIG_HEALTH_CHECK_WRITE
-+#
-+# Turn on the actual write to the disk
-+#
-+AC_DEFUN([LC_CONFIG_HEALTH_CHECK_WRITE],
-+[AC_MSG_CHECKING([whether to enable a write with the health check])
-+AC_ARG_ENABLE([health-write],
-+ AC_HELP_STRING([--enable-health-write],
-+ [enable disk writes when doing health check]),
-+ [],[enable_health_write='no'])
-+AC_MSG_RESULT([$enable_health_write])
-+if test x$enable_health_write == xyes ; then
-+ AC_DEFINE(USE_HEALTH_CHECK_WRITE, 1, Write when Checking Health)
-+fi
-+])
-+
-+#
-+# LC_CONFIG_LIBLUSTRE_RECOVERY
-+#
-+AC_DEFUN([LC_CONFIG_LIBLUSTRE_RECOVERY],
-+[AC_MSG_CHECKING([whether to enable liblustre recovery support])
-+AC_ARG_ENABLE([liblustre-recovery],
-+ AC_HELP_STRING([--disable-liblustre-recovery],
-+ [disable liblustre recovery support]),
-+ [],[enable_liblustre_recovery='yes'])
-+AC_MSG_RESULT([$enable_liblustre_recovery])
-+if test x$enable_liblustre_recovery != xno ; then
-+ AC_DEFINE(ENABLE_LIBLUSTRE_RECOVERY, 1, Liblustre Can Recover)
-+fi
-+])
-+
-+#
-+# LC_CONFIG_OBD_BUFFER_SIZE
-+#
-+# the maximum buffer size of lctl ioctls
-+#
-+AC_DEFUN([LC_CONFIG_OBD_BUFFER_SIZE],
-+[AC_MSG_CHECKING([maximum OBD ioctl size])
-+AC_ARG_WITH([obd-buffer-size],
-+ AC_HELP_STRING([--with-obd-buffer-size=[size]],
-+ [set lctl ioctl maximum bytes (default=8192)]),
-+ [
-+ OBD_BUFFER_SIZE=$with_obd_buffer_size
-+ ],[
-+ OBD_BUFFER_SIZE=8192
-+ ])
-+AC_MSG_RESULT([$OBD_BUFFER_SIZE bytes])
-+AC_DEFINE_UNQUOTED(OBD_MAX_IOCTL_BUFFER, $OBD_BUFFER_SIZE, [IOCTL Buffer Size])
-+])
-+
-+#
-+# LC_STRUCT_STATFS
-+#
-+# AIX does not have statfs.f_namelen
-+#
-+AC_DEFUN([LC_STRUCT_STATFS],
-+[AC_MSG_CHECKING([if struct statfs has a f_namelen field])
-+LB_LINUX_TRY_COMPILE([
-+ #include <linux/vfs.h>
-+],[
-+ struct statfs sfs;
-+ sfs.f_namelen = 1;
-+],[
-+ AC_MSG_RESULT([yes])
-+ AC_DEFINE(HAVE_STATFS_NAMELEN, 1, [struct statfs has a namelen field])
-+],[
-+ AC_MSG_RESULT([no])
-+])
-+])
-+
-+#
-+# LC_READLINK_SSIZE_T
-+#
-+AC_DEFUN([LC_READLINK_SSIZE_T],
-+[AC_MSG_CHECKING([if readlink returns ssize_t])
-+AC_TRY_COMPILE([
-+ #include <unistd.h>
-+],[
-+ ssize_t readlink(const char *, char *, size_t);
-+],[
-+ AC_MSG_RESULT([yes])
-+ AC_DEFINE(HAVE_POSIX_1003_READLINK, 1, [readlink returns ssize_t])
-+],[
-+ AC_MSG_RESULT([no])
-+])
-+])
-+
-+AC_DEFUN([LC_FUNC_PAGE_MAPPED],
-+[AC_MSG_CHECKING([if kernel offers page_mapped])
-+LB_LINUX_TRY_COMPILE([
-+ #include <linux/mm.h>
-+],[
-+ page_mapped(NULL);
-+],[
-+ AC_MSG_RESULT([yes])
-+ AC_DEFINE(HAVE_PAGE_MAPPED, 1, [page_mapped found])
-+],[
-+ AC_MSG_RESULT([no])
-+])
-+])
-+
-+AC_DEFUN([LC_STRUCT_FILE_OPS_UNLOCKED_IOCTL],
-+[AC_MSG_CHECKING([if struct file_operations has an unlocked_ioctl field])
-+LB_LINUX_TRY_COMPILE([
-+ #include <linux/fs.h>
-+],[
-+ struct file_operations fops;
-+ &fops.unlocked_ioctl;
-+],[
-+ AC_MSG_RESULT([yes])
-+ AC_DEFINE(HAVE_UNLOCKED_IOCTL, 1, [struct file_operations has an unlock ed_ioctl field])
-+],[
-+ AC_MSG_RESULT([no])
-+])
-+])
-+
-+AC_DEFUN([LC_FILEMAP_POPULATE],
-+[AC_MSG_CHECKING([for exported filemap_populate])
-+LB_LINUX_TRY_COMPILE([
-+ #include <asm/page.h>
-+ #include <linux/mm.h>
-+],[
-+ filemap_populate(NULL, 0, 0, __pgprot(0), 0, 0);
-+],[
-+ AC_MSG_RESULT([yes])
-+ AC_DEFINE(HAVE_FILEMAP_POPULATE, 1, [Kernel exports filemap_populate])
-+],[
-+ AC_MSG_RESULT([no])
-+])
-+])
-+
-+AC_DEFUN([LC_D_ADD_UNIQUE],
-+[AC_MSG_CHECKING([for d_add_unique])
-+LB_LINUX_TRY_COMPILE([
-+ #include <linux/dcache.h>
-+],[
-+ d_add_unique(NULL, NULL);
-+],[
-+ AC_MSG_RESULT([yes])
-+ AC_DEFINE(HAVE_D_ADD_UNIQUE, 1, [Kernel has d_add_unique])
-+],[
-+ AC_MSG_RESULT([no])
-+])
-+])
-+
-+AC_DEFUN([LC_BIT_SPINLOCK_H],
-+[LB_CHECK_FILE([$LINUX/include/linux/bit_spinlock.h],[
-+ AC_MSG_CHECKING([if bit_spinlock.h can be compiled])
-+ LB_LINUX_TRY_COMPILE([
-+ #include <asm/processor.h>
-+ #include <linux/spinlock.h>
-+ #include <linux/bit_spinlock.h>
-+ ],[],[
-+ AC_MSG_RESULT([yes])
-+ AC_DEFINE(HAVE_BIT_SPINLOCK_H, 1, [Kernel has bit_spinlock.h])
-+ ],[
-+ AC_MSG_RESULT([no])
-+ ])
-+],
-+[])
-+])
-+
-+#
-+# LC_POSIX_ACL_XATTR
-+#
-+# If we have xattr_acl.h
-+#
-+AC_DEFUN([LC_XATTR_ACL],
-+[LB_CHECK_FILE([$LINUX/include/linux/xattr_acl.h],[
-+ AC_MSG_CHECKING([if xattr_acl.h can be compiled])
-+ LB_LINUX_TRY_COMPILE([
-+ #include <linux/xattr_acl.h>
-+ ],[],[
-+ AC_MSG_RESULT([yes])
-+ AC_DEFINE(HAVE_XATTR_ACL, 1, [Kernel has xattr_acl])
-+ ],[
-+ AC_MSG_RESULT([no])
-+ ])
-+],
-+[])
-+])
-+
-+#
-+# LC_LINUX_FIEMAP_H
-+#
-+# If we have fiemap.h
-+# after 2.6.27 use fiemap.h in include/linux
-+#
-+AC_DEFUN([LC_LINUX_FIEMAP_H],
-+[LB_CHECK_FILE([$LINUX/include/linux/fiemap.h],[
-+ AC_MSG_CHECKING([if fiemap.h can be compiled])
-+ LB_LINUX_TRY_COMPILE([
-+ #include <linux/fiemap.h>
-+ ],[],[
-+ AC_MSG_RESULT([yes])
-+ AC_DEFINE(HAVE_LINUX_FIEMAP_H, 1, [Kernel has fiemap.h])
-+ ],[
-+ AC_MSG_RESULT([no])
-+ ])
-+],
-+[])
-+])
-+
-+
-+AC_DEFUN([LC_STRUCT_INTENT_FILE],
-+[AC_MSG_CHECKING([if struct open_intent has a file field])
-+LB_LINUX_TRY_COMPILE([
-+ #include <linux/fs.h>
-+ #include <linux/namei.h>
-+],[
-+ struct open_intent intent;
-+ &intent.file;
-+],[
-+ AC_MSG_RESULT([yes])
-+ AC_DEFINE(HAVE_FILE_IN_STRUCT_INTENT, 1, [struct open_intent has a file field])
-+],[
-+ AC_MSG_RESULT([no])
-+])
-+])
-+
-+
-+AC_DEFUN([LC_POSIX_ACL_XATTR_H],
-+[LB_CHECK_FILE([$LINUX/include/linux/posix_acl_xattr.h],[
-+ AC_MSG_CHECKING([if linux/posix_acl_xattr.h can be compiled])
-+ LB_LINUX_TRY_COMPILE([
-+ #include <linux/posix_acl_xattr.h>
-+ ],[],[
-+ AC_MSG_RESULT([yes])
-+ AC_DEFINE(HAVE_LINUX_POSIX_ACL_XATTR_H, 1, [linux/posix_acl_xattr.h found])
-+
-+ ],[
-+ AC_MSG_RESULT([no])
-+ ])
-+$1
-+],[
-+AC_MSG_RESULT([no])
-+])
-+])
-+
-+#
-+# LC_EXPORT___IGET
-+# starting from 2.6.19 linux kernel exports __iget()
-+#
-+AC_DEFUN([LC_EXPORT___IGET],
-+[LB_CHECK_SYMBOL_EXPORT([__iget],
-+[fs/inode.c],[
-+ AC_DEFINE(HAVE_EXPORT___IGET, 1, [kernel exports __iget])
-+],[
-+])
-+])
-+
-+
-+AC_DEFUN([LC_LUSTRE_VERSION_H],
-+[LB_CHECK_FILE([$LINUX/include/linux/lustre_version.h],[
-+ rm -f "$LUSTRE/include/linux/lustre_version.h"
-+],[
-+ touch "$LUSTRE/include/linux/lustre_version.h"
-+ if test x$enable_server = xyes ; then
-+ AC_MSG_WARN([Unpatched kernel detected.])
-+ AC_MSG_WARN([Lustre servers cannot be built with an unpatched kernel;])
-+ AC_MSG_WARN([disabling server build])
-+ enable_server='no'
-+ fi
-+])
-+])
-+
-+AC_DEFUN([LC_FUNC_SET_FS_PWD],
-+[LB_CHECK_SYMBOL_EXPORT([set_fs_pwd],
-+[fs/namespace.c],[
-+ AC_DEFINE(HAVE_SET_FS_PWD, 1, [set_fs_pwd is exported])
-+],[
-+])
-+])
-+
-+#
-+# check for FS_RENAME_DOES_D_MOVE flag
-+#
-+AC_DEFUN([LC_FS_RENAME_DOES_D_MOVE],
-+[AC_MSG_CHECKING([if kernel has FS_RENAME_DOES_D_MOVE flag])
-+LB_LINUX_TRY_COMPILE([
-+ #include <linux/fs.h>
-+],[
-+ int v = FS_RENAME_DOES_D_MOVE;
-+],[
-+ AC_MSG_RESULT([yes])
-+ AC_DEFINE(HAVE_FS_RENAME_DOES_D_MOVE, 1, [kernel has FS_RENAME_DOES_D_MOVE flag])
-+],[
-+ AC_MSG_RESULT([no])
-+])
-+])
-+
-+#
-+# LC_FUNC_MS_FLOCK_LOCK
-+#
-+# SLES9 kernel has MS_FLOCK_LOCK sb flag
-+#
-+AC_DEFUN([LC_FUNC_MS_FLOCK_LOCK],
-+[AC_MSG_CHECKING([if kernel has MS_FLOCK_LOCK sb flag])
-+LB_LINUX_TRY_COMPILE([
-+ #include <linux/fs.h>
-+],[
-+ int flags = MS_FLOCK_LOCK;
-+],[
-+ AC_DEFINE(HAVE_MS_FLOCK_LOCK, 1,
-+ [kernel has MS_FLOCK_LOCK flag])
-+ AC_MSG_RESULT([yes])
-+],[
-+ AC_MSG_RESULT([no])
-+])
-+])
-+
-+#
-+# LC_FUNC_HAVE_CAN_SLEEP_ARG
-+#
-+# SLES9 kernel has third arg can_sleep
-+# in fs/locks.c: flock_lock_file_wait()
-+#
-+AC_DEFUN([LC_FUNC_HAVE_CAN_SLEEP_ARG],
-+[AC_MSG_CHECKING([if kernel has third arg can_sleep in fs/locks.c: flock_lock_file_wait()])
-+LB_LINUX_TRY_COMPILE([
-+ #include <linux/fs.h>
-+],[
-+ int cansleep;
-+ struct file *file;
-+ struct file_lock *file_lock;
-+ flock_lock_file_wait(file, file_lock, cansleep);
-+],[
-+ AC_DEFINE(HAVE_CAN_SLEEP_ARG, 1,
-+ [kernel has third arg can_sleep in fs/locks.c: flock_lock_file_wait()])
-+ AC_MSG_RESULT([yes])
-+],[
-+ AC_MSG_RESULT([no])
-+])
-+])
-+
-+#
-+# LC_FUNC_F_OP_FLOCK
-+#
-+# rhel4.2 kernel has f_op->flock field
-+#
-+AC_DEFUN([LC_FUNC_F_OP_FLOCK],
-+[AC_MSG_CHECKING([if struct file_operations has flock field])
-+LB_LINUX_TRY_COMPILE([
-+ #include <linux/fs.h>
-+],[
-+ struct file_operations ll_file_operations_flock;
-+ ll_file_operations_flock.flock = NULL;
-+],[
-+ AC_DEFINE(HAVE_F_OP_FLOCK, 1,
-+ [struct file_operations has flock field])
-+ AC_MSG_RESULT([yes])
-+],[
-+ AC_MSG_RESULT([no])
-+])
-+])
-+
-+#
-+# LC_FUNC_MS_FLOCK_LOCK
-+#
-+# SLES9 kernel has MS_FLOCK_LOCK sb flag
-+#
-+AC_DEFUN([LC_FUNC_MS_FLOCK_LOCK],
-+[AC_MSG_CHECKING([if kernel has MS_FLOCK_LOCK sb flag])
-+LB_LINUX_TRY_COMPILE([
-+ #include <linux/fs.h>
-+],[
-+ int flags = MS_FLOCK_LOCK;
-+],[
-+ AC_DEFINE(HAVE_MS_FLOCK_LOCK, 1,
-+ [kernel has MS_FLOCK_LOCK flag])
-+ AC_MSG_RESULT([yes])
-+],[
-+ AC_MSG_RESULT([no])
-+])
-+])
-+
-+#
-+# LC_FUNC_HAVE_CAN_SLEEP_ARG
-+#
-+# SLES9 kernel has third arg can_sleep
-+# in fs/locks.c: flock_lock_file_wait()
-+#
-+AC_DEFUN([LC_FUNC_HAVE_CAN_SLEEP_ARG],
-+[AC_MSG_CHECKING([if kernel has third arg can_sleep in fs/locks.c: flock_lock_file_wait()])
-+LB_LINUX_TRY_COMPILE([
-+ #include <linux/fs.h>
-+],[
-+ int cansleep;
-+ struct file *file;
-+ struct file_lock *file_lock;
-+ flock_lock_file_wait(file, file_lock, cansleep);
-+],[
-+ AC_DEFINE(HAVE_CAN_SLEEP_ARG, 1,
-+ [kernel has third arg can_sleep in fs/locks.c: flock_lock_file_wait()])
-+ AC_MSG_RESULT([yes])
-+],[
-+ AC_MSG_RESULT([no])
-+])
-+])
-+
-+#
-+# LC_TASK_PPTR
-+#
-+# task struct has p_pptr instead of parent
-+#
-+AC_DEFUN([LC_TASK_PPTR],
-+[AC_MSG_CHECKING([task p_pptr found])
-+LB_LINUX_TRY_COMPILE([
-+ #include <linux/sched.h>
-+],[
-+ struct task_struct *p;
-+
-+ p = p->p_pptr;
-+],[
-+ AC_MSG_RESULT([yes])
-+ AC_DEFINE(HAVE_TASK_PPTR, 1, [task p_pptr found])
-+],[
-+ AC_MSG_RESULT([no])
-+])
-+])
-+
-+#
-+# LC_FUNC_F_OP_FLOCK
-+#
-+# rhel4.2 kernel has f_op->flock field
-+#
-+AC_DEFUN([LC_FUNC_F_OP_FLOCK],
-+[AC_MSG_CHECKING([if struct file_operations has flock field])
-+LB_LINUX_TRY_COMPILE([
-+ #include <linux/fs.h>
-+],[
-+ struct file_operations ll_file_operations_flock;
-+ ll_file_operations_flock.flock = NULL;
-+],[
-+ AC_DEFINE(HAVE_F_OP_FLOCK, 1,
-+ [struct file_operations has flock field])
-+ AC_MSG_RESULT([yes])
-+],[
-+ AC_MSG_RESULT([no])
-+])
-+])
-+
-+# LC_INODE_I_MUTEX
-+# after 2.6.15 inode have i_mutex intead of i_sem
-+AC_DEFUN([LC_INODE_I_MUTEX],
-+[AC_MSG_CHECKING([if inode has i_mutex ])
-+LB_LINUX_TRY_COMPILE([
-+ #include <linux/mutex.h>
-+ #include <linux/fs.h>
-+ #undef i_mutex
-+],[
-+ struct inode i;
-+
-+ mutex_unlock(&i.i_mutex);
-+],[
-+ AC_MSG_RESULT(yes)
-+ AC_DEFINE(HAVE_INODE_I_MUTEX, 1,
-+ [after 2.6.15 inode have i_mutex intead of i_sem])
-+],[
-+ AC_MSG_RESULT(no)
-+])
-+])
-+
-+# LC_DQUOTOFF_MUTEX
-+# after 2.6.17 dquote use mutex instead if semaphore
-+AC_DEFUN([LC_DQUOTOFF_MUTEX],
-+[AC_MSG_CHECKING([use dqonoff_mutex])
-+LB_LINUX_TRY_COMPILE([
-+ #include <linux/mutex.h>
-+ #include <linux/fs.h>
-+ #include <linux/quota.h>
-+],[
-+ struct quota_info dq;
-+
-+ mutex_unlock(&dq.dqonoff_mutex);
-+],[
-+ AC_MSG_RESULT(yes)
-+ AC_DEFINE(HAVE_DQUOTOFF_MUTEX, 1,
-+ [after 2.6.17 dquote use mutex instead if semaphore])
-+],[
-+ AC_MSG_RESULT(no)
-+])
-+])
-+
-+#
-+# LC_STATFS_DENTRY_PARAM
-+# starting from 2.6.18 linux kernel uses dentry instead of
-+# super_block for first vfs_statfs argument
-+#
-+AC_DEFUN([LC_STATFS_DENTRY_PARAM],
-+[AC_MSG_CHECKING([first vfs_statfs parameter is dentry])
-+LB_LINUX_TRY_COMPILE([
-+ #include <linux/fs.h>
-+],[
-+ int vfs_statfs(struct dentry *, struct kstatfs *);
-+],[
-+ AC_DEFINE(HAVE_STATFS_DENTRY_PARAM, 1,
-+ [first parameter of vfs_statfs is dentry])
-+ AC_MSG_RESULT([yes])
-+],[
-+ AC_MSG_RESULT([no])
-+])
-+])
-+
-+#
-+# LC_VFS_KERN_MOUNT
-+# starting from 2.6.18 kernel don't export do_kern_mount
-+# and want to use vfs_kern_mount instead.
-+#
-+AC_DEFUN([LC_VFS_KERN_MOUNT],
-+[AC_MSG_CHECKING([vfs_kern_mount exist in kernel])
-+LB_LINUX_TRY_COMPILE([
-+ #include <linux/mount.h>
-+],[
-+ vfs_kern_mount(NULL, 0, NULL, NULL);
-+],[
-+ AC_DEFINE(HAVE_VFS_KERN_MOUNT, 1,
-+ [vfs_kern_mount exist in kernel])
-+ AC_MSG_RESULT([yes])
-+],[
-+ AC_MSG_RESULT([no])
-+])
-+])
-+
-+#
-+# LC_INVALIDATEPAGE_RETURN_INT
-+# more 2.6 api changes. return type for the invalidatepage
-+# address_space_operation is 'void' in new kernels but 'int' in old
-+#
-+AC_DEFUN([LC_INVALIDATEPAGE_RETURN_INT],
-+[AC_MSG_CHECKING([invalidatepage has return int])
-+LB_LINUX_TRY_COMPILE([
-+ #include <linux/buffer_head.h>
-+],[
-+ int rc = block_invalidatepage(NULL, 0);
-+],[
-+ AC_MSG_RESULT(yes)
-+ AC_DEFINE(HAVE_INVALIDATEPAGE_RETURN_INT, 1,
-+ [Define if return type of invalidatepage should be int])
-+],[
-+ AC_MSG_RESULT(no)
-+])
-+])
-+
-+# LC_UMOUNTBEGIN_HAS_VFSMOUNT
-+# more 2.6 API changes. 2.6.18 umount_begin has different parameters
-+AC_DEFUN([LC_UMOUNTBEGIN_HAS_VFSMOUNT],
-+[AC_MSG_CHECKING([if umount_begin needs vfsmount parameter instead of super_block])
-+tmp_flags="$EXTRA_KCFLAGS"
-+EXTRA_KCFLAGS="-Werror"
-+LB_LINUX_TRY_COMPILE([
-+ #include <linux/fs.h>
-+
-+ struct vfsmount;
-+ static void cfg_umount_begin (struct vfsmount *v, int flags)
-+ {
-+ ;
-+ }
-+
-+ static struct super_operations cfg_super_operations = {
-+ .umount_begin = cfg_umount_begin,
-+ };
-+],[
-+ cfg_super_operations.umount_begin(NULL,0);
-+],[
-+ AC_MSG_RESULT(yes)
-+ AC_DEFINE(HAVE_UMOUNTBEGIN_VFSMOUNT, 1,
-+ [Define umount_begin need second argument])
-+],[
-+ AC_MSG_RESULT(no)
-+])
-+EXTRA_KCFLAGS="$tmp_flags"
-+])
-+
-+# 2.6.19 API changes
-+# inode don't have i_blksize field
-+AC_DEFUN([LC_INODE_BLKSIZE],
-+[AC_MSG_CHECKING([inode has i_blksize field])
-+LB_LINUX_TRY_COMPILE([
-+#include <linux/fs.h>
-+],[
-+ struct inode i;
-+ i.i_blksize = 0;
-+],[
-+ AC_MSG_RESULT(yes)
-+ AC_DEFINE(HAVE_INODE_BLKSIZE, 1,
-+ [struct inode has i_blksize field])
-+],[
-+ AC_MSG_RESULT(no)
-+])
-+])
-+
-+# LC_VFS_READDIR_U64_INO
-+# 2.6.19 use u64 for inode number instead of inode_t
-+AC_DEFUN([LC_VFS_READDIR_U64_INO],
-+[AC_MSG_CHECKING([check vfs_readdir need 64bit inode number])
-+tmp_flags="$EXTRA_KCFLAGS"
-+EXTRA_KCFLAGS="-Werror"
-+LB_LINUX_TRY_COMPILE([
-+#include <linux/fs.h>
-+ int fillonedir(void * __buf, const char * name, int namlen, loff_t offset,
-+ u64 ino, unsigned int d_type)
-+ {
-+ return 0;
-+ }
-+],[
-+ filldir_t filter;
-+
-+ filter = fillonedir;
-+ return 1;
-+],[
-+ AC_MSG_RESULT(yes)
-+ AC_DEFINE(HAVE_VFS_READDIR_U64_INO, 1,
-+ [if vfs_readdir need 64bit inode number])
-+],[
-+ AC_MSG_RESULT(no)
-+])
-+EXTRA_KCFLAGS="$tmp_flags"
-+])
-+
-+# LC_FILE_WRITEV
-+# 2.6.19 replaced writev with aio_write
-+AC_DEFUN([LC_FILE_WRITEV],
-+[AC_MSG_CHECKING([writev in fops])
-+LB_LINUX_TRY_COMPILE([
-+ #include <linux/fs.h>
-+],[
-+ struct file_operations *fops = NULL;
-+ fops->writev = NULL;
-+],[
-+ AC_MSG_RESULT(yes)
-+ AC_DEFINE(HAVE_FILE_WRITEV, 1,
-+ [use fops->writev])
-+],[
-+ AC_MSG_RESULT(no)
-+])
-+])
-+
-+# LC_GENERIC_FILE_READ
-+# 2.6.19 replaced readv with aio_read
-+AC_DEFUN([LC_FILE_READV],
-+[AC_MSG_CHECKING([readv in fops])
-+LB_LINUX_TRY_COMPILE([
-+ #include <linux/fs.h>
-+],[
-+ struct file_operations *fops = NULL;
-+ fops->readv = NULL;
-+],[
-+ AC_MSG_RESULT(yes)
-+ AC_DEFINE(HAVE_FILE_READV, 1,
-+ [use fops->readv])
-+],[
-+ AC_MSG_RESULT(no)
-+])
-+])
-+
-+# LC_NR_PAGECACHE
-+# 2.6.18 don't export nr_pagecahe
-+AC_DEFUN([LC_NR_PAGECACHE],
-+[AC_MSG_CHECKING([kernel export nr_pagecache])
-+LB_LINUX_TRY_COMPILE([
-+ #include <linux/pagemap.h>
-+],[
-+ return atomic_read(&nr_pagecache);
-+],[
-+ AC_MSG_RESULT(yes)
-+ AC_DEFINE(HAVE_NR_PAGECACHE, 1,
-+ [is kernel export nr_pagecache])
-+],[
-+ AC_MSG_RESULT(no)
-+])
-+])
-+
-+# LC_CANCEL_DIRTY_PAGE
-+# 2.6.20 introduse cancel_dirty_page instead of
-+# clear_page_dirty.
-+AC_DEFUN([LC_CANCEL_DIRTY_PAGE],
-+[AC_MSG_CHECKING([kernel has cancel_dirty_page])
-+LB_LINUX_TRY_COMPILE([
-+ #include <linux/mm.h>
-+ #include <linux/page-flags.h>
-+],[
-+ cancel_dirty_page(NULL, 0);
-+],[
-+ AC_MSG_RESULT(yes)
-+ AC_DEFINE(HAVE_CANCEL_DIRTY_PAGE, 1,
-+ [kernel has cancel_dirty_page instead of clear_page_dirty])
-+],[
-+ AC_MSG_RESULT(no)
-+])
-+])
-+
-+#
-+# LC_PAGE_CONSTANT
-+#
-+# In order to support raid5 zerocopy patch, we have to patch the kernel to make
-+# it support constant page, which means the page won't be modified during the
-+# IO.
-+#
-+AC_DEFUN([LC_PAGE_CONSTANT],
-+[AC_MSG_CHECKING([if kernel have PageConstant defined])
-+LB_LINUX_TRY_COMPILE([
-+ #include <linux/mm.h>
-+ #include <linux/page-flags.h>
-+],[
-+ #ifndef PG_constant
-+ #error "Have no raid5 zcopy patch"
-+ #endif
-+],[
-+ AC_MSG_RESULT(yes)
-+ AC_DEFINE(HAVE_PAGE_CONSTANT, 1, [kernel have PageConstant supported])
-+],[
-+ AC_MSG_RESULT(no);
-+])
-+])
-+
-+# RHEL5 in FS-cache patch rename PG_checked flag
-+# into PG_fs_misc
-+AC_DEFUN([LC_PG_FS_MISC],
-+[AC_MSG_CHECKING([kernel has PG_fs_misc])
-+LB_LINUX_TRY_COMPILE([
-+ #include <linux/mm.h>
-+ #include <linux/page-flags.h>
-+],[
-+ #ifndef PG_fs_misc
-+ #error PG_fs_misc not defined in kernel
-+ #endif
-+],[
-+ AC_MSG_RESULT(yes)
-+ AC_DEFINE(HAVE_PG_FS_MISC, 1,
-+ [is kernel have PG_fs_misc])
-+],[
-+ AC_MSG_RESULT(no)
-+])
-+])
-+
-+# RHEL5 PageChecked and SetPageChecked defined
-+AC_DEFUN([LC_PAGE_CHECKED],
-+[AC_MSG_CHECKING([kernel has PageChecked and SetPageChecked])
-+LB_LINUX_TRY_COMPILE([
-+ #include <linux/autoconf.h>
-+#ifdef HAVE_LINUX_MMTYPES_H
-+ #include <linux/mm_types.h>
-+#endif
-+ #include <linux/page-flags.h>
-+],[
-+ struct page *p;
-+
-+ /* before 2.6.26 this define*/
-+ #ifndef PageChecked
-+ /* 2.6.26 use function instead of define for it */
-+ SetPageChecked(p);
-+ PageChecked(p);
-+ #endif
-+],[
-+ AC_MSG_RESULT(yes)
-+ AC_DEFINE(HAVE_PAGE_CHECKED, 1,
-+ [does kernel have PageChecked and SetPageChecked])
-+],[
-+ AC_MSG_RESULT(no)
-+])
-+])
-+
-+AC_DEFUN([LC_EXPORT_TRUNCATE_COMPLETE],
-+[LB_CHECK_SYMBOL_EXPORT([truncate_complete_page],
-+[mm/truncate.c],[
-+AC_DEFINE(HAVE_TRUNCATE_COMPLETE_PAGE, 1,
-+ [kernel export truncate_complete_page])
-+],[
-+])
-+])
-+
-+AC_DEFUN([LC_EXPORT_D_REHASH_COND],
-+[LB_CHECK_SYMBOL_EXPORT([d_rehash_cond],
-+[fs/dcache.c],[
-+AC_DEFINE(HAVE_D_REHASH_COND, 1,
-+ [d_rehash_cond is exported by the kernel])
-+],[
-+])
-+])
-+
-+AC_DEFUN([LC_EXPORT___D_REHASH],
-+[LB_CHECK_SYMBOL_EXPORT([__d_rehash],
-+[fs/dcache.c],[
-+AC_DEFINE(HAVE___D_REHASH, 1,
-+ [__d_rehash is exported by the kernel])
-+],[
-+])
-+])
-+
-+AC_DEFUN([LC_EXPORT_D_MOVE_LOCKED],
-+[LB_CHECK_SYMBOL_EXPORT([d_move_locked],
-+[fs/dcache.c],[
-+AC_DEFINE(HAVE_D_MOVE_LOCKED, 1,
-+ [d_move_locked is exported by the kernel])
-+],[
-+])
-+])
-+
-+AC_DEFUN([LC_EXPORT___D_MOVE],
-+[LB_CHECK_SYMBOL_EXPORT([__d_move],
-+[fs/dcache.c],[
-+AC_DEFINE(HAVE___D_MOVE, 1,
-+ [__d_move is exported by the kernel])
-+],[
-+])
-+])
-+
-+# The actual symbol exported varies among architectures, so we need
-+# to check many symbols (but only in the current architecture.) No
-+# matter what symbol is exported, the kernel #defines node_to_cpumask
-+# to the appropriate function and that's what we use.
-+AC_DEFUN([LC_EXPORT_NODE_TO_CPUMASK],
-+ [LB_CHECK_SYMBOL_EXPORT([node_to_cpumask],
-+ [arch/$LINUX_ARCH/mm/numa.c],
-+ [AC_DEFINE(HAVE_NODE_TO_CPUMASK, 1,
-+ [node_to_cpumask is exported by
-+ the kernel])]) # x86_64
-+ LB_CHECK_SYMBOL_EXPORT([node_to_cpu_mask],
-+ [arch/$LINUX_ARCH/kernel/smpboot.c],
-+ [AC_DEFINE(HAVE_NODE_TO_CPUMASK, 1,
-+ [node_to_cpumask is exported by
-+ the kernel])]) # ia64
-+ LB_CHECK_SYMBOL_EXPORT([node_2_cpu_mask],
-+ [arch/$LINUX_ARCH/kernel/smpboot.c],
-+ [AC_DEFINE(HAVE_NODE_TO_CPUMASK, 1,
-+ [node_to_cpumask is exported by
-+ the kernel])]) # i386
-+ ])
-+
-+#
-+# LC_VFS_INTENT_PATCHES
-+#
-+# check if the kernel has the VFS intent patches
-+AC_DEFUN([LC_VFS_INTENT_PATCHES],
-+[AC_MSG_CHECKING([if the kernel has the VFS intent patches])
-+LB_LINUX_TRY_COMPILE([
-+ #include <linux/fs.h>
-+ #include <linux/namei.h>
-+],[
-+ struct nameidata nd;
-+ struct lookup_intent *it;
-+
-+ it = &nd.intent;
-+ intent_init(it, IT_OPEN);
-+ it->d.lustre.it_disposition = 0;
-+ it->d.lustre.it_data = NULL;
-+],[
-+ AC_MSG_RESULT([yes])
-+ AC_DEFINE(HAVE_VFS_INTENT_PATCHES, 1, [VFS intent patches are applied])
-+],[
-+ AC_MSG_RESULT([no])
-+])
-+])
-+
-+# 2.6.22 lost second parameter for invalidate_bdev
-+AC_DEFUN([LC_INVALIDATE_BDEV_2ARG],
-+[AC_MSG_CHECKING([if invalidate_bdev has second argument])
-+LB_LINUX_TRY_COMPILE([
-+ #include <linux/buffer_head.h>
-+],[
-+ invalidate_bdev(NULL,0);
-+],[
-+ AC_MSG_RESULT([yes])
-+ AC_DEFINE(HAVE_INVALIDATE_BDEV_2ARG, 1,
-+ [invalidate_bdev has second argument])
-+],[
-+ AC_MSG_RESULT([no])
-+])
-+])
-+
-+# 2.6.18
-+
-+
-+# 2.6.23 have return type 'void' for unregister_blkdev
-+AC_DEFUN([LC_UNREGISTER_BLKDEV_RETURN_INT],
-+[AC_MSG_CHECKING([if unregister_blkdev return int])
-+LB_LINUX_TRY_COMPILE([
-+ #include <linux/fs.h>
-+],[
-+ int i = unregister_blkdev(0,NULL);
-+],[
-+ AC_MSG_RESULT([yes])
-+ AC_DEFINE(HAVE_UNREGISTER_BLKDEV_RETURN_INT, 1,
-+ [unregister_blkdev return int])
-+],[
-+ AC_MSG_RESULT([no])
-+])
-+])
-+
-+# 2.6.23 change .sendfile to .splice_read
-+# RHEL4 (-92 kernel) have both sendfile and .splice_read API
-+AC_DEFUN([LC_KERNEL_SENDFILE],
-+[AC_MSG_CHECKING([if kernel has .sendfile])
-+LB_LINUX_TRY_COMPILE([
-+ #include <linux/fs.h>
-+],[
-+ struct file_operations file;
-+
-+ file.sendfile = NULL;
-+], [
-+ AC_MSG_RESULT([yes])
-+ AC_DEFINE(HAVE_KERNEL_SENDFILE, 1,
-+ [kernel has .sendfile])
-+],[
-+ AC_MSG_RESULT([no])
-+])
-+])
-+
-+# 2.6.23 change .sendfile to .splice_read
-+AC_DEFUN([LC_KERNEL_SPLICE_READ],
-+[AC_MSG_CHECKING([if kernel has .splice_read])
-+LB_LINUX_TRY_COMPILE([
-+ #include <linux/fs.h>
-+],[
-+ struct file_operations file;
-+
-+ file.splice_read = NULL;
-+], [
-+ AC_MSG_RESULT([yes])
-+ AC_DEFINE(HAVE_KERNEL_SPLICE_READ, 1,
-+ [kernel has .slice_read])
-+],[
-+ AC_MSG_RESULT([no])
-+])
-+])
-+
-+# 2.6.23 extract nfs export related data into exportfs.h
-+AC_DEFUN([LC_HAVE_EXPORTFS_H],
-+[LB_CHECK_FILE([$LINUX/include/linux/exportfs.h], [
-+ AC_DEFINE(HAVE_LINUX_EXPORTFS_H, 1,
-+ [kernel has include/exportfs.h])
-+],[
-+ AC_MSG_RESULT([no])
-+])
-+])
-+
-+# 2.6.23 have new page fault handling API
-+AC_DEFUN([LC_VM_OP_FAULT],
-+[AC_MSG_CHECKING([if kernel has .fault in vm_operation_struct])
-+LB_LINUX_TRY_COMPILE([
-+ #include <linux/mm.h>
-+],[
-+ struct vm_operations_struct op;
-+
-+ op.fault = NULL;
-+], [
-+ AC_MSG_RESULT([yes])
-+ AC_DEFINE(HAVE_VM_OP_FAULT, 1,
-+ [if kernel has .fault in vm_operation_struct])
-+],[
-+ AC_MSG_RESULT([no])
-+])
-+])
-+
-+#2.6.23 has new shrinker API
-+AC_DEFUN([LC_REGISTER_SHRINKER],
-+[AC_MSG_CHECKING([if kernel has register_shrinker])
-+LB_LINUX_TRY_COMPILE([
-+ #include <linux/mm.h>
-+],[
-+ register_shrinker(NULL);
-+], [
-+ AC_MSG_RESULT([yes])
-+ AC_DEFINE(HAVE_REGISTER_SHRINKER, 1,
-+ [if kernel has register_shrinker])
-+],[
-+ AC_MSG_RESULT([no])
-+])
-+])
-+
-+# 2.6.24 has bio_endio with 2 args
-+AC_DEFUN([LC_BIO_ENDIO_2ARG],
-+[AC_MSG_CHECKING([if kernel has bio_endio with 2 args])
-+LB_LINUX_TRY_COMPILE([
-+ #include <linux/bio.h>
-+],[
-+ bio_endio(NULL, 0);
-+], [
-+ AC_MSG_RESULT([yes])
-+ AC_DEFINE(HAVE_BIO_ENDIO_2ARG, 1,
-+ [if kernel has bio_endio with 2 args])
-+],[
-+ AC_MSG_RESULT([no])
-+])
-+])
-+
-+# 2.6.24 has new members in exports struct.
-+AC_DEFUN([LC_FH_TO_DENTRY],
-+[AC_MSG_CHECKING([if kernel has .fh_to_dentry member in export_operations struct])
-+LB_LINUX_TRY_COMPILE([
-+#ifdef HAVE_LINUX_EXPORTFS_H
-+ #include <linux/exportfs.h>
-+#else
-+ #include <linux/fs.h>
-+#endif
-+],[
-+ struct export_operations exp;
-+
-+ exp.fh_to_dentry = NULL;
-+], [
-+ AC_MSG_RESULT([yes])
-+ AC_DEFINE(HAVE_FH_TO_DENTRY, 1,
-+ [kernel has .fh_to_dentry member in export_operations struct])
-+],[
-+ AC_MSG_RESULT([no])
-+])
-+])
-+
-+# 2.6.24 need linux/mm_types.h included
-+AC_DEFUN([LC_HAVE_MMTYPES_H],
-+[LB_CHECK_FILE([$LINUX/include/linux/mm_types.h], [
-+ AC_DEFINE(HAVE_LINUX_MMTYPES_H, 1,
-+ [kernel has include/mm_types.h])
-+],[
-+ AC_MSG_RESULT([no])
-+])
-+])
-+
-+# 2.6.24 remove long aged procfs entry -> deleted member
-+AC_DEFUN([LC_PROCFS_DELETED],
-+[AC_MSG_CHECKING([if kernel has deleted member in procfs entry struct])
-+LB_LINUX_TRY_COMPILE([
-+ #include <linux/proc_fs.h>
-+],[
-+ struct proc_dir_entry pde;
-+
-+ pde.deleted = NULL;
-+], [
-+ AC_MSG_RESULT([yes])
-+ AC_DEFINE(HAVE_PROCFS_DELETED, 1,
-+ [kernel has deleted member in procfs entry struct])
-+],[
-+ AC_MSG_RESULT([no])
-+])
-+])
-+
-+# 2.6.25 change define to inline
-+AC_DEFUN([LC_MAPPING_CAP_WRITEBACK_DIRTY],
-+[AC_MSG_CHECKING([if kernel have mapping_cap_writeback_dirty])
-+LB_LINUX_TRY_COMPILE([
-+ #include <linux/backing-dev.h>
-+],[
-+ #ifndef mapping_cap_writeback_dirty
-+ mapping_cap_writeback_dirty(NULL);
-+ #endif
-+],[
-+ AC_MSG_RESULT([yes])
-+ AC_DEFINE(HAVE_MAPPING_CAP_WRITEBACK_DIRTY, 1,
-+ [kernel have mapping_cap_writeback_dirty])
-+],[
-+ AC_MSG_RESULT([no])
-+])
-+])
-+
-+
-+
-+# 2.6.26 isn't export set_fs_pwd and change paramter in fs struct
-+AC_DEFUN([LC_FS_STRUCT_USE_PATH],
-+[AC_MSG_CHECKING([fs_struct use path structure])
-+LB_LINUX_TRY_COMPILE([
-+ #include <asm/atomic.h>
-+ #include <linux/spinlock.h>
-+ #include <linux/fs_struct.h>
-+],[
-+ struct path path;
-+ struct fs_struct fs;
-+
-+ fs.pwd = path;
-+], [
-+ AC_MSG_RESULT([yes])
-+ AC_DEFINE(HAVE_FS_STRUCT_USE_PATH, 1,
-+ [fs_struct use path structure])
-+],[
-+ AC_MSG_RESULT([no])
-+])
-+])
-+
-+# 2.6.26 remove path_release and use path_put instead
-+AC_DEFUN([LC_PATH_RELEASE],
-+[AC_MSG_CHECKING([if path_release exist])
-+LB_LINUX_TRY_COMPILE([
-+ #include <linux/dcache.h>
-+ #include <linux/namei.h>
-+],[
-+ path_release(NULL);
-+],[
-+ AC_DEFINE(HAVE_PATH_RELEASE, 1, [path_release exist])
-+ AC_MSG_RESULT([yes])
-+],[
-+ AC_MSG_RESULT([no])
-+])
-+])
-+
-+#2.6.27
-+AC_DEFUN([LC_INODE_PERMISION_2ARGS],
-+[AC_MSG_CHECKING([inode_operations->permission have two args])
-+LB_LINUX_TRY_COMPILE([
-+ #include <linux/fs.h>
-+],[
-+ struct inode *inode;
-+
-+ inode->i_op->permission(NULL,0);
-+],[
-+ AC_DEFINE(HAVE_INODE_PERMISION_2ARGS, 1,
-+ [inode_operations->permission have two args])
-+ AC_MSG_RESULT([yes])
-+],[
-+ AC_MSG_RESULT([no])
-+])
-+])
-+
-+# 2.6.27 have file_remove_suid instead of remove_suid
-+AC_DEFUN([LC_FILE_REMOVE_SUID],
-+[AC_MSG_CHECKING([kernel have file_remove_suid])
-+LB_LINUX_TRY_COMPILE([
-+ #include <linux/fs.h>
-+],[
-+ file_remove_suid(NULL);
-+],[
-+ AC_DEFINE(HAVE_FILE_REMOVE_SUID, 1,
-+ [kernel have file_remove_suid])
-+ AC_MSG_RESULT([yes])
-+],[
-+ AC_MSG_RESULT([no])
-+])
-+])
-+
-+# 2.6.27 have new page locking API
-+AC_DEFUN([LC_TRYLOCKPAGE],
-+[AC_MSG_CHECKING([kernel use trylock_page for page lock])
-+LB_LINUX_TRY_COMPILE([
-+ #include <linux/pagemap.h>
-+],[
-+ trylock_page(NULL);
-+],[
-+ AC_DEFINE(HAVE_TRYLOCK_PAGE, 1,
-+ [kernel use trylock_page for page lock])
-+ AC_MSG_RESULT([yes])
-+],[
-+ AC_MSG_RESULT([no])
-+])
-+])
-+
-+#
-+# LC_PROG_LINUX
-+#
-+# Lustre linux kernel checks
-+#
-+AC_DEFUN([LC_PROG_LINUX],
-+ [LC_LUSTRE_VERSION_H
-+ if test x$enable_server = xyes ; then
-+ LC_CONFIG_BACKINGFS
-+ fi
-+ LC_CONFIG_PINGER
-+ LC_CONFIG_CHECKSUM
-+ LC_CONFIG_LIBLUSTRE_RECOVERY
-+ LC_CONFIG_HEALTH_CHECK_WRITE
-+ LC_CONFIG_LRU_RESIZE
-+ LC_CONFIG_ADAPTIVE_TIMEOUTS
-+ LC_QUOTA_MODULE
-+
-+ LC_TASK_PPTR
-+ # RHEL4 patches
-+ LC_EXPORT_TRUNCATE_COMPLETE
-+ LC_EXPORT_D_REHASH_COND
-+ LC_EXPORT___D_REHASH
-+ LC_EXPORT_D_MOVE_LOCKED
-+ LC_EXPORT___D_MOVE
-+ LC_EXPORT_NODE_TO_CPUMASK
-+
-+ LC_STRUCT_KIOBUF
-+ LC_FUNC_COND_RESCHED
-+ LC_FUNC_ZAP_PAGE_RANGE
-+ LC_FUNC_PDE
-+ LC_FUNC_DIRECT_IO
-+ LC_HEADER_MM_INLINE
-+ LC_STRUCT_INODE
-+ LC_FUNC_REGISTER_CACHE
-+ LC_FUNC_GRAB_CACHE_PAGE_NOWAIT_GFP
-+ LC_FUNC_DEV_SET_RDONLY
-+ LC_FUNC_FILEMAP_FDATAWRITE
-+ LC_STRUCT_STATFS
-+ LC_FUNC_PAGE_MAPPED
-+ LC_STRUCT_FILE_OPS_UNLOCKED_IOCTL
-+ LC_FILEMAP_POPULATE
-+ LC_D_ADD_UNIQUE
-+ LC_BIT_SPINLOCK_H
-+ LC_XATTR_ACL
-+ LC_STRUCT_INTENT_FILE
-+ LC_POSIX_ACL_XATTR_H
-+ LC_EXPORT___IGET
-+ LC_FUNC_SET_FS_PWD
-+ LC_FUNC_MS_FLOCK_LOCK
-+ LC_FUNC_HAVE_CAN_SLEEP_ARG
-+ LC_FUNC_F_OP_FLOCK
-+ LC_QUOTA_READ
-+ LC_COOKIE_FOLLOW_LINK
-+ LC_FUNC_RCU
-+ LC_QUOTA64
-+
-+ # does the kernel have VFS intent patches?
-+ LC_VFS_INTENT_PATCHES
-+
-+ # 2.6.15
-+ LC_INODE_I_MUTEX
-+
-+ # 2.6.16
-+ LC_SECURITY_PLUG # for SLES10 SP2
-+
-+ # 2.6.17
-+ LC_DQUOTOFF_MUTEX
-+
-+ # 2.6.18
-+ LC_NR_PAGECACHE
-+ LC_STATFS_DENTRY_PARAM
-+ LC_VFS_KERN_MOUNT
-+ LC_INVALIDATEPAGE_RETURN_INT
-+ LC_UMOUNTBEGIN_HAS_VFSMOUNT
-+
-+ #2.6.18 + RHEL5 (fc6)
-+ LC_PG_FS_MISC
-+ LC_PAGE_CHECKED
-+
-+ # 2.6.19
-+ LC_INODE_BLKSIZE
-+ LC_VFS_READDIR_U64_INO
-+ LC_FILE_WRITEV
-+ LC_FILE_READV
-+
-+ # 2.6.20
-+ LC_CANCEL_DIRTY_PAGE
-+
-+ # raid5-zerocopy patch
-+ LC_PAGE_CONSTANT
-+
-+ # 2.6.22
-+ LC_INVALIDATE_BDEV_2ARG
-+ LC_FS_RENAME_DOES_D_MOVE
-+ # 2.6.23
-+ LC_UNREGISTER_BLKDEV_RETURN_INT
-+ LC_KERNEL_SENDFILE
-+ LC_KERNEL_SPLICE_READ
-+ LC_HAVE_EXPORTFS_H
-+ LC_VM_OP_FAULT
-+ LC_REGISTER_SHRINKER
-+
-+ #2.6.25
-+ LC_MAPPING_CAP_WRITEBACK_DIRTY
-+
-+ # 2.6.24
-+ LC_HAVE_MMTYPES_H
-+ LC_BIO_ENDIO_2ARG
-+ LC_FH_TO_DENTRY
-+ LC_PROCFS_DELETED
-+
-+ # 2.6.26
-+ LC_FS_STRUCT_USE_PATH
-+ LC_RCU_LIST_SAFE
-+ LC_PATH_RELEASE
-+
-+ # 2.6.27
-+ LC_INODE_PERMISION_2ARGS
-+ LC_FILE_REMOVE_SUID
-+ LC_TRYLOCKPAGE
-+])
-+
-+#
-+# LC_CONFIG_CLIENT_SERVER
-+#
-+# Build client/server sides of Lustre
-+#
-+AC_DEFUN([LC_CONFIG_CLIENT_SERVER],
-+[AC_MSG_CHECKING([whether to build Lustre server support])
-+AC_ARG_ENABLE([server],
-+ AC_HELP_STRING([--disable-server],
-+ [disable Lustre server support]),
-+ [],[enable_server='yes'])
-+AC_MSG_RESULT([$enable_server])
-+
-+AC_MSG_CHECKING([whether to build Lustre client support])
-+AC_ARG_ENABLE([client],
-+ AC_HELP_STRING([--disable-client],
-+ [disable Lustre client support]),
-+ [],[enable_client='yes'])
-+AC_MSG_RESULT([$enable_client])])
-+
-+#
-+# LC_CONFIG_LIBLUSTRE
-+#
-+# whether to build liblustre
-+#
-+AC_DEFUN([LC_CONFIG_LIBLUSTRE],
-+[AC_MSG_CHECKING([whether to build Lustre library])
-+AC_ARG_ENABLE([liblustre],
-+ AC_HELP_STRING([--disable-liblustre],
-+ [disable building of Lustre library]),
-+ [],[enable_liblustre=$with_sysio])
-+AC_MSG_RESULT([$enable_liblustre])
-+# only build sysio if liblustre is built
-+with_sysio="$enable_liblustre"
-+
-+AC_MSG_CHECKING([whether to build liblustre tests])
-+AC_ARG_ENABLE([liblustre-tests],
-+ AC_HELP_STRING([--enable-liblustre-tests],
-+ [enable liblustre tests, if --disable-tests is used]),
-+ [],[enable_liblustre_tests=$enable_tests])
-+if test x$enable_liblustre != xyes ; then
-+ enable_liblustre_tests='no'
-+fi
-+AC_MSG_RESULT([$enable_liblustre_tests])
-+
-+AC_MSG_CHECKING([whether to enable liblustre acl])
-+AC_ARG_ENABLE([liblustre-acl],
-+ AC_HELP_STRING([--disable-liblustre-acl],
-+ [disable ACL support for liblustre]),
-+ [],[enable_liblustre_acl=yes])
-+AC_MSG_RESULT([$enable_liblustre_acl])
-+if test x$enable_liblustre_acl = xyes ; then
-+ AC_DEFINE(LIBLUSTRE_POSIX_ACL, 1, Liblustre Support ACL-enabled MDS)
-+fi
-+
-+#
-+# --enable-mpitest
-+#
-+AC_ARG_ENABLE(mpitests,
-+ AC_HELP_STRING([--enable-mpitest=yes|no|mpich directory],
-+ [include mpi tests]),
-+ [
-+ enable_mpitests=yes
-+ case $enableval in
-+ yes)
-+ MPI_ROOT=/opt/mpich
-+ LDFLAGS="$LDFLAGS -L$MPI_ROOT/ch-p4/lib -L$MPI_ROOT/ch-p4/lib64"
-+ CFLAGS="$CFLAGS -I$MPI_ROOT/include"
-+ ;;
-+ no)
-+ enable_mpitests=no
-+ ;;
-+ [[\\/$]]* | ?:[[\\/]]* )
-+ MPI_ROOT=$enableval
-+ LDFLAGS="$LDFLAGS -L$with_mpi/lib"
-+ CFLAGS="$CFLAGS -I$MPI_ROOT/include"
-+ ;;
-+ *)
-+ AC_MSG_ERROR([expected absolute directory name for --enable-mpitests or yes or no])
-+ ;;
-+ esac
-+ ],
-+ [
-+ MPI_ROOT=/opt/mpich
-+ LDFLAGS="$LDFLAGS -L$MPI_ROOT/ch-p4/lib -L$MPI_ROOT/ch-p4/lib64"
-+ CFLAGS="$CFLAGS -I$MPI_ROOT/include"
-+ enable_mpitests=yes
-+ ]
-+)
-+AC_SUBST(MPI_ROOT)
-+
-+if test x$enable_mpitests != xno; then
-+ AC_MSG_CHECKING([whether to mpitests can be built])
-+ AC_CHECK_FILE([$MPI_ROOT/include/mpi.h],
-+ [AC_CHECK_LIB([mpich],[MPI_Start],[enable_mpitests=yes],[enable_mpitests=no])],
-+ [enable_mpitests=no])
-+fi
-+AC_MSG_RESULT([$enable_mpitests])
-+
-+
-+AC_MSG_NOTICE([Enabling Lustre configure options for libsysio])
-+ac_configure_args="$ac_configure_args --with-lustre-hack --with-sockets"
-+
-+LC_CONFIG_PINGER
-+LC_CONFIG_LIBLUSTRE_RECOVERY
-+])
-+
-+AC_DEFUN([LC_CONFIG_LRU_RESIZE],
-+[AC_MSG_CHECKING([whether to enable lru self-adjusting])
-+AC_ARG_ENABLE([lru_resize],
-+ AC_HELP_STRING([--enable-lru-resize],
-+ [enable lru resize support]),
-+ [],[enable_lru_resize='yes'])
-+AC_MSG_RESULT([$enable_lru_resize])
-+if test x$enable_lru_resize != xno; then
-+ AC_DEFINE(HAVE_LRU_RESIZE_SUPPORT, 1, [Enable lru resize support])
-+fi
-+])
-+
-+AC_DEFUN([LC_CONFIG_ADAPTIVE_TIMEOUTS],
-+[AC_MSG_CHECKING([whether to enable ptlrpc adaptive timeouts support])
-+AC_ARG_ENABLE([adaptive_timeouts],
-+ AC_HELP_STRING([--enable-adaptive-timeouts],
-+ [enable ptlrpc adaptive timeouts support]),
-+ [],[enable_adaptive_timeouts='no'])
-+AC_MSG_RESULT([$enable_adaptive_timeouts])
-+if test x$enable_adaptive_timeouts == xyes; then
-+ AC_DEFINE(HAVE_AT_SUPPORT, 1, [Enable adaptive timeouts support])
-+fi
-+])
-+
-+#
-+# LC_CONFIG_QUOTA
-+#
-+# whether to enable quota support global control
-+#
-+AC_DEFUN([LC_CONFIG_QUOTA],
-+[AC_ARG_ENABLE([quota],
-+ AC_HELP_STRING([--enable-quota],
-+ [enable quota support]),
-+ [],[enable_quota='yes'])
-+])
-+
-+# whether to enable quota support(kernel modules)
-+AC_DEFUN([LC_QUOTA_MODULE],
-+[if test x$enable_quota != xno; then
-+ LB_LINUX_CONFIG([QUOTA],[
-+ enable_quota_module='yes'
-+ AC_DEFINE(HAVE_QUOTA_SUPPORT, 1, [Enable quota support])
-+ ],[
-+ enable_quota_module='no'
-+ AC_MSG_WARN([quota is not enabled because the kernel - lacks quota support])
-+ ])
-+fi
-+])
-+
-+AC_DEFUN([LC_QUOTA],
-+[#check global
-+LC_CONFIG_QUOTA
-+#check for utils
-+AC_CHECK_HEADER(sys/quota.h,
-+ [AC_DEFINE(HAVE_SYS_QUOTA_H, 1, [Define to 1 if you have <sys/quota.h>.])],
-+ [AC_MSG_ERROR([don't find <sys/quota.h> in your system])])
-+])
-+
-+AC_DEFUN([LC_QUOTA_READ],
-+[AC_MSG_CHECKING([if kernel supports quota_read])
-+LB_LINUX_TRY_COMPILE([
-+ #include <linux/fs.h>
-+],[
-+ struct super_operations sp;
-+ void *i = (void *)sp.quota_read;
-+],[
-+ AC_MSG_RESULT([yes])
-+ AC_DEFINE(KERNEL_SUPPORTS_QUOTA_READ, 1, [quota_read found])
-+],[
-+ AC_MSG_RESULT([no])
-+])
-+])
-+
-+#
-+# LC_COOKIE_FOLLOW_LINK
-+#
-+# kernel 2.6.13+ ->follow_link returns a cookie
-+#
-+
-+AC_DEFUN([LC_COOKIE_FOLLOW_LINK],
-+[AC_MSG_CHECKING([if inode_operations->follow_link returns a cookie])
-+LB_LINUX_TRY_COMPILE([
-+ #include <linux/fs.h>
-+ #include <linux/namei.h>
-+],[
-+ struct dentry dentry;
-+ struct nameidata nd;
-+
-+ dentry.d_inode->i_op->put_link(&dentry, &nd, NULL);
-+],[
-+ AC_DEFINE(HAVE_COOKIE_FOLLOW_LINK, 1, [inode_operations->follow_link returns a cookie])
-+ AC_MSG_RESULT([yes])
-+],[
-+ AC_MSG_RESULT([no])
-+])
-+])
-+
-+#
-+# LC_FUNC_RCU
-+#
-+# kernels prior than 2.6.0(?) have no RCU supported; in kernel 2.6.5(SUSE),
-+# call_rcu takes three parameters.
-+#
-+AC_DEFUN([LC_FUNC_RCU],
-+[AC_MSG_CHECKING([if kernel have RCU supported])
-+LB_LINUX_TRY_COMPILE([
-+ #include <linux/rcupdate.h>
-+],[],[
-+ AC_DEFINE(HAVE_RCU, 1, [have RCU defined])
-+ AC_MSG_RESULT([yes])
-+
-+ AC_MSG_CHECKING([if call_rcu takes three parameters])
-+ LB_LINUX_TRY_COMPILE([
-+ #include <linux/rcupdate.h>
-+ ],[
-+ struct rcu_head rh;
-+ call_rcu(&rh, (void (*)(struct rcu_head *))1, NULL);
-+ ],[
-+ AC_DEFINE(HAVE_CALL_RCU_PARAM, 1, [call_rcu takes three parameters])
-+ AC_MSG_RESULT([yes])
-+ ],[
-+ AC_MSG_RESULT([no])
-+ ])
-+
-+],[
-+ AC_MSG_RESULT([no])
-+])
-+])
-+
-+#
-+# LC_QUOTA64
-+# linux kernel may have 64-bit limits support
-+#
-+AC_DEFUN([LC_QUOTA64],
-+[AC_MSG_CHECKING([if kernel has 64-bit quota limits support])
-+LB_LINUX_TRY_COMPILE([
-+ #include <linux/kernel.h>
-+ #include <linux/fs.h>
-+ #include <linux/quotaio_v2.h>
-+ int versions[] = V2_INITQVERSIONS_R1;
-+ struct v2_disk_dqblk_r1 dqblk_r1;
-+],[],[
-+ AC_DEFINE(HAVE_QUOTA64, 1, [have quota64])
-+ AC_MSG_RESULT([yes])
-+
-+],[
-+ AC_MSG_WARN([4 TB (or larger) block quota limits can only be used with OSTs not larger than 4 TB.])
-+ AC_MSG_WARN([Continuing with limited quota support.])
-+ AC_MSG_WARN([quotacheck is needed for filesystems with recent quota versions.])
-+ AC_MSG_RESULT([no])
-+])
-+])
-+
-+# LC_SECURITY_PLUG # for SLES10 SP2
-+# check security plug in sles10 sp2 kernel
-+AC_DEFUN([LC_SECURITY_PLUG],
-+[AC_MSG_CHECKING([If kernel has security plug support])
-+LB_LINUX_TRY_COMPILE([
-+ #include <linux/fs.h>
-+],[
-+ struct dentry *dentry;
-+ struct vfsmount *mnt;
-+ struct iattr *iattr;
-+
-+ notify_change(dentry, mnt, iattr);
-+],[
-+ AC_MSG_RESULT(yes)
-+ AC_DEFINE(HAVE_SECURITY_PLUG, 1,
-+ [SLES10 SP2 use extra parameter in vfs])
-+],[
-+ AC_MSG_RESULT(no)
-+])
-+])
-+
-+#
-+# LC_CONFIGURE
-+#
-+# other configure checks
-+#
-+AC_DEFUN([LC_CONFIGURE],
-+[LC_CONFIG_OBD_BUFFER_SIZE
-+
-+# include/liblustre.h
-+AC_CHECK_HEADERS([asm/page.h sys/user.h sys/vfs.h stdint.h blkid/blkid.h])
-+
-+# liblustre/llite_lib.h
-+AC_CHECK_HEADERS([xtio.h file.h])
-+
-+# liblustre/dir.c
-+AC_CHECK_HEADERS([linux/types.h sys/types.h linux/unistd.h unistd.h])
-+
-+# liblustre/lutil.c
-+AC_CHECK_HEADERS([netinet/in.h arpa/inet.h catamount/data.h])
-+AC_CHECK_FUNCS([inet_ntoa])
-+
-+# libsysio/src/readlink.c
-+LC_READLINK_SSIZE_T
-+
-+# lvfs/prng.c - depends on linux/types.h from liblustre/dir.c
-+AC_CHECK_HEADERS([linux/random.h], [], [],
-+ [#ifdef HAVE_LINUX_TYPES_H
-+ # include <linux/types.h>
-+ #endif
-+ ])
-+
-+# utils/llverfs.c
-+AC_CHECK_HEADERS([ext2fs/ext2fs.h])
-+
-+# check for -lz support
-+ZLIB=""
-+AC_CHECK_LIB([z],
-+ [adler32],
-+ [AC_CHECK_HEADERS([zlib.h],
-+ [ZLIB="-lz"
-+ AC_DEFINE([HAVE_ADLER], 1,
-+ [support alder32 checksum type])],
-+ [AC_MSG_WARN([No zlib-devel package found,
-+ unable to use adler32 checksum])])],
-+ [AC_MSG_WARN([No zlib package found, unable to use adler32 checksum])]
-+)
-+AC_SUBST(ZLIB)
-+
-+# Super safe df
-+AC_ARG_ENABLE([mindf],
-+ AC_HELP_STRING([--enable-mindf],
-+ [Make statfs report the minimum available space on any single OST instead of the sum of free space on all OSTs]),
-+ [],[])
-+if test "$enable_mindf" = "yes" ; then
-+ AC_DEFINE([MIN_DF], 1, [Report minimum OST free space])
-+fi
-+
-+AC_ARG_ENABLE([fail_alloc],
-+ AC_HELP_STRING([--disable-fail-alloc],
-+ [disable randomly alloc failure]),
-+ [],[enable_fail_alloc=yes])
-+AC_MSG_CHECKING([whether to randomly failing memory alloc])
-+AC_MSG_RESULT([$enable_fail_alloc])
-+if test x$enable_fail_alloc != xno ; then
-+ AC_DEFINE([RANDOM_FAIL_ALLOC], 1, [enable randomly alloc failure])
-+fi
-+
-+])
-+
-+#
-+# LC_CONDITIONALS
-+#
-+# AM_CONDITIONALS for lustre
-+#
-+AC_DEFUN([LC_CONDITIONALS],
-+[AM_CONDITIONAL(LIBLUSTRE, test x$enable_liblustre = xyes)
-+AM_CONDITIONAL(USE_QUILT, test x$QUILT != xno)
-+AM_CONDITIONAL(LIBLUSTRE_TESTS, test x$enable_liblustre_tests = xyes)
-+AM_CONDITIONAL(MPITESTS, test x$enable_mpitests = xyes, Build MPI Tests)
-+AM_CONDITIONAL(CLIENT, test x$enable_client = xyes)
-+AM_CONDITIONAL(SERVER, test x$enable_server = xyes)
-+AM_CONDITIONAL(QUOTA, test x$enable_quota_module = xyes)
-+AM_CONDITIONAL(BLKID, test x$ac_cv_header_blkid_blkid_h = xyes)
-+AM_CONDITIONAL(EXT2FS_DEVEL, test x$ac_cv_header_ext2fs_ext2fs_h = xyes)
-+AM_CONDITIONAL(LIBPTHREAD, test x$enable_libpthread = xyes)
-+])
-+
-+#
-+# LC_CONFIG_FILES
-+#
-+# files that should be generated with AC_OUTPUT
-+#
-+AC_DEFUN([LC_CONFIG_FILES],
-+[AC_CONFIG_FILES([
-+lustre/Makefile
-+lustre/autoMakefile
-+lustre/autoconf/Makefile
-+lustre/contrib/Makefile
-+lustre/doc/Makefile
-+lustre/include/Makefile
-+lustre/include/lustre_ver.h
-+lustre/include/linux/Makefile
-+lustre/include/lustre/Makefile
-+lustre/kernel_patches/targets/2.6-suse.target
-+lustre/kernel_patches/targets/2.6-vanilla.target
-+lustre/kernel_patches/targets/2.6-rhel4.target
-+lustre/kernel_patches/targets/2.6-rhel5.target
-+lustre/kernel_patches/targets/2.6-fc5.target
-+lustre/kernel_patches/targets/2.6-patchless.target
-+lustre/kernel_patches/targets/2.6-sles10.target
-+lustre/kernel_patches/targets/hp_pnnl-2.4.target
-+lustre/kernel_patches/targets/rh-2.4.target
-+lustre/kernel_patches/targets/rhel-2.4.target
-+lustre/kernel_patches/targets/suse-2.4.21-2.target
-+lustre/kernel_patches/targets/sles-2.4.target
-+lustre/ldlm/Makefile
-+lustre/liblustre/Makefile
-+lustre/liblustre/tests/Makefile
-+lustre/llite/Makefile
-+lustre/llite/autoMakefile
-+lustre/lov/Makefile
-+lustre/lov/autoMakefile
-+lustre/lvfs/Makefile
-+lustre/lvfs/autoMakefile
-+lustre/mdc/Makefile
-+lustre/mdc/autoMakefile
-+lustre/mds/Makefile
-+lustre/mds/autoMakefile
-+lustre/obdclass/Makefile
-+lustre/obdclass/autoMakefile
-+lustre/obdclass/linux/Makefile
-+lustre/obdecho/Makefile
-+lustre/obdecho/autoMakefile
-+lustre/obdfilter/Makefile
-+lustre/obdfilter/autoMakefile
-+lustre/osc/Makefile
-+lustre/osc/autoMakefile
-+lustre/ost/Makefile
-+lustre/ost/autoMakefile
-+lustre/mgc/Makefile
-+lustre/mgc/autoMakefile
-+lustre/mgs/Makefile
-+lustre/mgs/autoMakefile
-+lustre/ptlrpc/Makefile
-+lustre/ptlrpc/autoMakefile
-+lustre/quota/Makefile
-+lustre/quota/autoMakefile
-+lustre/scripts/Makefile
-+lustre/scripts/version_tag.pl
-+lustre/tests/Makefile
-+lustre/utils/Makefile
-+])
-+case $lb_target_os in
-+ darwin)
-+ AC_CONFIG_FILES([ lustre/obdclass/darwin/Makefile ])
-+ ;;
-+esac
-+
-+])
-diff -urNad lustre~/lustre/include/linux/lustre_compat25.h lustre/lustre/include/linux/lustre_compat25.h
---- lustre~/lustre/include/linux/lustre_compat25.h 2009-03-12 10:33:45.000000000 +0100
-+++ lustre/lustre/include/linux/lustre_compat25.h 2009-03-13 09:45:02.000000000 +0100
-@@ -57,6 +57,28 @@
- #endif /* LINUX_VERSION_CODE >= KERNEL_VERSION(2,6,14) */
-
- #ifndef HAVE_SET_FS_PWD
-+
-+#ifdef HAVE_FS_STRUCT_USE_PATH
-+static inline void ll_set_fs_pwd(struct fs_struct *fs, struct vfsmount *mnt,
-+ struct dentry *dentry)
-+{
-+ struct path path;
-+ struct path old_pwd;
-+
-+ path.mnt = mnt;
-+ path.dentry = dentry;
-+ write_lock(&fs->lock);
-+ old_pwd = fs->pwd;
-+ path_get(&path);
-+ fs->pwd = path;
-+ write_unlock(&fs->lock);
-+
-+ if (old_pwd.dentry)
-+ path_put(&old_pwd);
-+}
-+
-+#else
-+
- static inline void ll_set_fs_pwd(struct fs_struct *fs, struct vfsmount *mnt,
- struct dentry *dentry)
- {
-@@ -75,6 +97,7 @@
- mntput(old_pwdmnt);
- }
- }
-+#endif
- #else
- #define ll_set_fs_pwd set_fs_pwd
- #endif /* HAVE_SET_FS_PWD */
-@@ -151,7 +174,12 @@
- #endif
-
- /* XXX our code should be using the 2.6 calls, not the other way around */
-+#ifndef HAVE_TRYLOCK_PAGE
- #define TryLockPage(page) TestSetPageLocked(page)
-+#else
-+#define TryLockPage(page) (!trylock_page(page))
-+#endif
-+
- #define Page_Uptodate(page) PageUptodate(page)
- #define ll_redirty_page(page) set_page_dirty(page)
-
-@@ -364,8 +392,17 @@
- #define LL_RENAME_DOES_D_MOVE FS_ODD_RENAME
- #endif
-
-+#ifdef HAVE_FILE_REMOVE_SUID
-+#define ll_remove_suid(file, mnt) file_remove_suid(file)
-+#else
-+ #ifdef HAVE_SECURITY_PLUG
-+ #define ll_remove_suid(file,mnt) remove_suid(file->f_dentry,mnt)
-+ #else
-+ #define ll_remove_suid(file,mnt) remove_suid(file->f_dentry)
-+ #endif
-+#endif
-+
- #ifdef HAVE_SECURITY_PLUG
--#define ll_remove_suid(inode,mnt) remove_suid(inode,mnt)
- #define ll_vfs_rmdir(dir,entry,mnt) vfs_rmdir(dir,entry,mnt)
- #define ll_vfs_mkdir(inode,dir,mnt,mode) vfs_mkdir(inode,dir,mnt,mode)
- #define ll_vfs_link(old,mnt,dir,new,mnt1) vfs_link(old,mnt,dir,new,mnt1)
-@@ -377,7 +414,6 @@
- #define ll_vfs_rename(old,old_dir,mnt,new,new_dir,mnt1) \
- vfs_rename(old,old_dir,mnt,new,new_dir,mnt1)
- #else
--#define ll_remove_suid(inode,mnt) remove_suid(inode)
- #define ll_vfs_rmdir(dir,entry,mnt) vfs_rmdir(dir,entry)
- #define ll_vfs_mkdir(inode,dir,mnt,mode) vfs_mkdir(inode,dir,mode)
- #define ll_vfs_link(old,mnt,dir,new,mnt1) vfs_link(old,dir,new)
-@@ -388,6 +424,57 @@
- vfs_rename(old,old_dir,new,new_dir)
- #endif
-
-+#ifdef HAVE_REGISTER_SHRINKER
-+typedef int (*shrinker_t)(int nr_to_scan, gfp_t gfp_mask);
-+
-+static inline
-+struct shrinker *set_shrinker(int seek, shrinker_t func)
-+{
-+ struct shrinker *s;
-+
-+ s = kmalloc(sizeof(*s), GFP_KERNEL);
-+ if (s == NULL)
-+ return (NULL);
-+
-+ s->shrink = func;
-+ s->seeks = seek;
-+
-+ register_shrinker(s);
-+
-+ return s;
-+}
-+
-+static inline
-+void remove_shrinker(struct shrinker *shrinker)
-+{
-+ if (shrinker == NULL)
-+ return;
-+
-+ unregister_shrinker(shrinker);
-+ kfree(shrinker);
-+}
-+#endif
-+
-+#ifdef HAVE_BIO_ENDIO_2ARG
-+#define cfs_bio_io_error(a,b) bio_io_error((a))
-+#define cfs_bio_endio(a,b,c) bio_endio((a),(c))
-+#else
-+#define cfs_bio_io_error(a,b) bio_io_error((a),(b))
-+#define cfs_bio_endio(a,b,c) bio_endio((a),(b),(c))
-+#endif
-+
-+#ifdef HAVE_FS_STRUCT_USE_PATH
-+#define cfs_fs_pwd(fs) ((fs)->pwd.dentry)
-+#define cfs_fs_mnt(fs) ((fs)->pwd.mnt)
-+#else
-+#define cfs_fs_pwd(fs) ((fs)->pwd)
-+#define cfs_fs_mnt(fs) ((fs)->pwdmnt)
-+#endif
-+
-+#ifndef list_for_each_safe_rcu
-+#define list_for_each_safe_rcu(a,b,c) list_for_each_rcu(a, c)
-+#endif
-+
- #ifndef abs
- static inline int abs(int x)
- {
-diff -urNad lustre~/lustre/include/linux/lustre_compat25.h.orig lustre/lustre/include/linux/lustre_compat25.h.orig
---- lustre~/lustre/include/linux/lustre_compat25.h.orig 1970-01-01 00:00:00.000000000 +0000
-+++ lustre/lustre/include/linux/lustre_compat25.h.orig 2009-03-13 09:45:03.000000000 +0100
-@@ -0,0 +1,411 @@
-+/* -*- mode: c; c-basic-offset: 8; indent-tabs-mode: nil; -*-
-+ * vim:expandtab:shiftwidth=8:tabstop=8:
-+ *
-+ * GPL HEADER START
-+ *
-+ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
-+ *
-+ * This program is free software; you can redistribute it and/or modify
-+ * it under the terms of the GNU General Public License version 2 only,
-+ * as published by the Free Software Foundation.
-+ *
-+ * This program is distributed in the hope that it will be useful, but
-+ * WITHOUT ANY WARRANTY; without even the implied warranty of
-+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
-+ * General Public License version 2 for more details (a copy is included
-+ * in the LICENSE file that accompanied this code).
-+ *
-+ * You should have received a copy of the GNU General Public License
-+ * version 2 along with this program; If not, see
-+ * http://www.sun.com/software/products/lustre/docs/GPLv2.pdf
-+ *
-+ * Please contact Sun Microsystems, Inc., 4150 Network Circle, Santa Clara,
-+ * CA 95054 USA or visit www.sun.com if you need additional information or
-+ * have any questions.
-+ *
-+ * GPL HEADER END
-+ */
-+/*
-+ * Copyright 2008 Sun Microsystems, Inc. All rights reserved
-+ * Use is subject to license terms.
-+ */
-+/*
-+ * This file is part of Lustre, http://www.lustre.org/
-+ * Lustre is a trademark of Sun Microsystems, Inc.
-+ */
-+
-+#ifndef _LINUX_COMPAT25_H
-+#define _LINUX_COMPAT25_H
-+
-+#ifdef __KERNEL__
-+
-+#if LINUX_VERSION_CODE < KERNEL_VERSION(2,6,5)
-+#error sorry, lustre requires at least 2.6.5
-+#endif
-+
-+#include <libcfs/linux/portals_compat25.h>
-+
-+#include <linux/lustre_patchless_compat.h>
-+
-+#if LINUX_VERSION_CODE >= KERNEL_VERSION(2,6,14)
-+struct ll_iattr_struct {
-+ struct iattr iattr;
-+ unsigned int ia_attr_flags;
-+};
-+#else
-+#define ll_iattr_struct iattr
-+#endif /* LINUX_VERSION_CODE >= KERNEL_VERSION(2,6,14) */
-+
-+#ifndef HAVE_SET_FS_PWD
-+static inline void ll_set_fs_pwd(struct fs_struct *fs, struct vfsmount *mnt,
-+ struct dentry *dentry)
-+{
-+ struct dentry *old_pwd;
-+ struct vfsmount *old_pwdmnt;
-+
-+ write_lock(&fs->lock);
-+ old_pwd = fs->pwd;
-+ old_pwdmnt = fs->pwdmnt;
-+ fs->pwdmnt = mntget(mnt);
-+ fs->pwd = dget(dentry);
-+ write_unlock(&fs->lock);
-+
-+ if (old_pwd) {
-+ dput(old_pwd);
-+ mntput(old_pwdmnt);
-+ }
-+}
-+#else
-+#define ll_set_fs_pwd set_fs_pwd
-+#endif /* HAVE_SET_FS_PWD */
-+
-+#ifdef HAVE_INODE_I_MUTEX
-+#define UNLOCK_INODE_MUTEX(inode) do {mutex_unlock(&(inode)->i_mutex); } while(0)
-+#define LOCK_INODE_MUTEX(inode) do {mutex_lock(&(inode)->i_mutex); } while(0)
-+#define TRYLOCK_INODE_MUTEX(inode) mutex_trylock(&(inode)->i_mutex)
-+#else
-+#define UNLOCK_INODE_MUTEX(inode) do {up(&(inode)->i_sem); } while(0)
-+#define LOCK_INODE_MUTEX(inode) do {down(&(inode)->i_sem); } while(0)
-+#define TRYLOCK_INODE_MUTEX(inode) (!down_trylock(&(inode)->i_sem))
-+#endif /* HAVE_INODE_I_MUTEX */
-+
-+#if LINUX_VERSION_CODE >= KERNEL_VERSION(2,6,15)
-+#define d_child d_u.d_child
-+#define d_rcu d_u.d_rcu
-+#endif
-+
-+#ifdef HAVE_DQUOTOFF_MUTEX
-+#define UNLOCK_DQONOFF_MUTEX(dqopt) do {mutex_unlock(&(dqopt)->dqonoff_mutex); } while(0)
-+#define LOCK_DQONOFF_MUTEX(dqopt) do {mutex_lock(&(dqopt)->dqonoff_mutex); } while(0)
-+#else
-+#define UNLOCK_DQONOFF_MUTEX(dqopt) do {up(&(dqopt)->dqonoff_sem); } while(0)
-+#define LOCK_DQONOFF_MUTEX(dqopt) do {down(&(dqopt)->dqonoff_sem); } while(0)
-+#endif /* HAVE_DQUOTOFF_MUTEX */
-+
-+
-+#if LINUX_VERSION_CODE < KERNEL_VERSION(2,6,4)
-+#define NGROUPS_SMALL NGROUPS
-+#define NGROUPS_PER_BLOCK ((int)(EXEC_PAGESIZE / sizeof(gid_t)))
-+
-+struct group_info {
-+ int ngroups;
-+ atomic_t usage;
-+ gid_t small_block[NGROUPS_SMALL];
-+ int nblocks;
-+ gid_t *blocks[0];
-+};
-+#define current_ngroups current->ngroups
-+#define current_groups current->groups
-+
-+struct group_info *groups_alloc(int gidsetsize);
-+void groups_free(struct group_info *ginfo);
-+#else /* >= 2.6.4 */
-+
-+#define current_ngroups current->group_info->ngroups
-+#define current_groups current->group_info->small_block
-+
-+#endif /* LINUX_VERSION_CODE < KERNEL_VERSION(2,6,4) */
-+
-+#ifndef page_private
-+#define page_private(page) ((page)->private)
-+#define set_page_private(page, v) ((page)->private = (v))
-+#endif
-+
-+#ifndef HAVE_GFP_T
-+#define gfp_t int
-+#endif
-+
-+#define lock_dentry(___dentry) spin_lock(&(___dentry)->d_lock)
-+#define unlock_dentry(___dentry) spin_unlock(&(___dentry)->d_lock)
-+
-+#define ll_kernel_locked() kernel_locked()
-+
-+/*
-+ * OBD need working random driver, thus all our
-+ * initialization routines must be called after device
-+ * driver initialization
-+ */
-+#ifndef MODULE
-+#undef module_init
-+#define module_init(a) late_initcall(a)
-+#endif
-+
-+/* XXX our code should be using the 2.6 calls, not the other way around */
-+#define TryLockPage(page) TestSetPageLocked(page)
-+#define Page_Uptodate(page) PageUptodate(page)
-+#define ll_redirty_page(page) set_page_dirty(page)
-+
-+#define KDEVT_INIT(val) (val)
-+
-+#define LTIME_S(time) (time.tv_sec)
-+#define ll_path_lookup path_lookup
-+#define ll_permission(inode,mask,nd) permission(inode,mask,nd)
-+
-+#define ll_pgcache_lock(mapping) spin_lock(&mapping->page_lock)
-+#define ll_pgcache_unlock(mapping) spin_unlock(&mapping->page_lock)
-+#define ll_call_writepage(inode, page) \
-+ (inode)->i_mapping->a_ops->writepage(page, NULL)
-+#define ll_invalidate_inode_pages(inode) \
-+ invalidate_inode_pages((inode)->i_mapping)
-+#define ll_truncate_complete_page(page) \
-+ truncate_complete_page(page->mapping, page)
-+
-+#define ll_vfs_create(a,b,c,d) vfs_create(a,b,c,d)
-+#define ll_dev_t dev_t
-+#define kdev_t dev_t
-+#define to_kdev_t(dev) (dev)
-+#define kdev_t_to_nr(dev) (dev)
-+#define val_to_kdev(dev) (dev)
-+#define ILOOKUP(sb, ino, test, data) ilookup5(sb, ino, test, data);
-+
-+#include <linux/writeback.h>
-+
-+static inline int cleanup_group_info(void)
-+{
-+ struct group_info *ginfo;
-+
-+ ginfo = groups_alloc(0);
-+ if (!ginfo)
-+ return -ENOMEM;
-+
-+ set_current_groups(ginfo);
-+ put_group_info(ginfo);
-+
-+ return 0;
-+}
-+
-+#define __set_page_ll_data(page, llap) \
-+ do { \
-+ page_cache_get(page); \
-+ SetPagePrivate(page); \
-+ set_page_private(page, (unsigned long)llap); \
-+ } while (0)
-+#define __clear_page_ll_data(page) \
-+ do { \
-+ ClearPagePrivate(page); \
-+ set_page_private(page, 0); \
-+ page_cache_release(page); \
-+ } while(0)
-+
-+#define kiobuf bio
-+
-+#include <linux/proc_fs.h>
-+
-+#if !defined(HAVE_D_REHASH_COND) && defined(HAVE___D_REHASH)
-+#define d_rehash_cond(dentry, lock) __d_rehash(dentry, lock)
-+extern void __d_rehash(struct dentry *dentry, int lock);
-+#endif
-+
-+#if !defined(HAVE_D_MOVE_LOCKED) && defined(HAVE___D_MOVE)
-+#define d_move_locked(dentry, target) __d_move(dentry, target)
-+extern void __d_move(struct dentry *dentry, struct dentry *target);
-+#endif
-+
-+#ifdef HAVE_CAN_SLEEP_ARG
-+#define ll_flock_lock_file_wait(file, lock, can_sleep) \
-+ flock_lock_file_wait(file, lock, can_sleep)
-+#else
-+#define ll_flock_lock_file_wait(file, lock, can_sleep) \
-+ flock_lock_file_wait(file, lock)
-+#endif
-+
-+#define CheckWriteback(page, cmd) \
-+ ((!PageWriteback(page) && (cmd & OBD_BRW_READ)) || \
-+ (PageWriteback(page) && (cmd & OBD_BRW_WRITE)))
-+
-+
-+#ifdef HAVE_PAGE_LIST
-+static inline int mapping_has_pages(struct address_space *mapping)
-+{
-+ int rc = 1;
-+
-+ ll_pgcache_lock(mapping);
-+ if (list_empty(&mapping->dirty_pages) &&
-+ list_empty(&mapping->clean_pages) &&
-+ list_empty(&mapping->locked_pages)) {
-+ rc = 0;
-+ }
-+ ll_pgcache_unlock(mapping);
-+
-+ return rc;
-+}
-+#else
-+static inline int mapping_has_pages(struct address_space *mapping)
-+{
-+ return mapping->nrpages > 0;
-+}
-+#endif
-+
-+#ifdef HAVE_KIOBUF_KIO_BLOCKS
-+#define KIOBUF_GET_BLOCKS(k) ((k)->kio_blocks)
-+#else
-+#define KIOBUF_GET_BLOCKS(k) ((k)->blocks)
-+#endif
-+
-+#if (LINUX_VERSION_CODE < KERNEL_VERSION(2,6,7))
-+#define ll_set_dflags(dentry, flags) do { dentry->d_vfs_flags |= flags; } while(0)
-+#define ll_vfs_symlink(dir, dentry, mnt, path, mode) \
-+ vfs_symlink(dir, dentry, path)
-+#else
-+#define ll_set_dflags(dentry, flags) do { \
-+ spin_lock(&dentry->d_lock); \
-+ dentry->d_flags |= flags; \
-+ spin_unlock(&dentry->d_lock); \
-+ } while(0)
-+#ifdef HAVE_SECURITY_PLUG
-+#define ll_vfs_symlink(dir, dentry, mnt, path, mode) \
-+ vfs_symlink(dir, dentry, mnt, path, mode)
-+#else
-+#define ll_vfs_symlink(dir, dentry, mnt, path, mode) \
-+ vfs_symlink(dir, dentry, path, mode)
-+#endif
-+#endif
-+
-+#ifndef container_of
-+#define container_of(ptr, type, member) ({ \
-+ const typeof( ((type *)0)->member ) *__mptr = (ptr); \
-+ (type *)( (char *)__mptr - offsetof(type,member) );})
-+#endif
-+
-+#ifdef HAVE_I_ALLOC_SEM
-+#define UP_WRITE_I_ALLOC_SEM(i) do { up_write(&(i)->i_alloc_sem); } while (0)
-+#define DOWN_WRITE_I_ALLOC_SEM(i) do { down_write(&(i)->i_alloc_sem); } while(0)
-+#define LASSERT_I_ALLOC_SEM_WRITE_LOCKED(i) LASSERT(down_read_trylock(&(i)->i_alloc_sem) == 0)
-+
-+#define UP_READ_I_ALLOC_SEM(i) do { up_read(&(i)->i_alloc_sem); } while (0)
-+#define DOWN_READ_I_ALLOC_SEM(i) do { down_read(&(i)->i_alloc_sem); } while (0)
-+#define LASSERT_I_ALLOC_SEM_READ_LOCKED(i) LASSERT(down_write_trylock(&(i)->i_alloc_sem) == 0)
-+#else
-+#define UP_READ_I_ALLOC_SEM(i) do { } while (0)
-+#define DOWN_READ_I_ALLOC_SEM(i) do { } while (0)
-+#define LASSERT_I_ALLOC_SEM_READ_LOCKED(i) do { } while (0)
-+
-+#define UP_WRITE_I_ALLOC_SEM(i) do { } while (0)
-+#define DOWN_WRITE_I_ALLOC_SEM(i) do { } while (0)
-+#define LASSERT_I_ALLOC_SEM_WRITE_LOCKED(i) do { } while (0)
-+#endif
-+
-+#ifndef HAVE_GRAB_CACHE_PAGE_NOWAIT_GFP
-+#define grab_cache_page_nowait_gfp(x, y, z) grab_cache_page_nowait((x), (y))
-+#endif
-+
-+#ifndef HAVE_FILEMAP_FDATAWRITE
-+#define filemap_fdatawrite(mapping) filemap_fdatasync(mapping)
-+#endif
-+
-+#ifdef HAVE_VFS_KERN_MOUNT
-+static inline
-+struct vfsmount *
-+ll_kern_mount(const char *fstype, int flags, const char *name, void *data)
-+{
-+ struct file_system_type *type = get_fs_type(fstype);
-+ struct vfsmount *mnt;
-+ if (!type)
-+ return ERR_PTR(-ENODEV);
-+ mnt = vfs_kern_mount(type, flags, name, data);
-+ module_put(type->owner);
-+ return mnt;
-+}
-+#else
-+#define ll_kern_mount(fstype, flags, name, data) do_kern_mount((fstype), (flags), (name), (data))
-+#endif
-+
-+#ifdef HAVE_STATFS_DENTRY_PARAM
-+#define ll_do_statfs(sb, sfs) (sb)->s_op->statfs((sb)->s_root, (sfs))
-+#else
-+#define ll_do_statfs(sb, sfs) (sb)->s_op->statfs((sb), (sfs))
-+#endif
-+
-+/* task_struct */
-+#ifndef HAVE_TASK_PPTR
-+#define p_pptr parent
-+#endif
-+
-+#ifdef HAVE_UNREGISTER_BLKDEV_RETURN_INT
-+#define ll_unregister_blkdev(a,b) unregister_blkdev((a),(b))
-+#else
-+static inline
-+int ll_unregister_blkdev(unsigned int dev, const char *name)
-+{
-+ unregister_blkdev(dev, name);
-+ return 0;
-+}
-+#endif
-+
-+#ifdef HAVE_INVALIDATE_BDEV_2ARG
-+#define ll_invalidate_bdev(a,b) invalidate_bdev((a),(b))
-+#else
-+#define ll_invalidate_bdev(a,b) invalidate_bdev((a))
-+#endif
-+
-+#ifdef HAVE_FS_RENAME_DOES_D_MOVE
-+#define LL_RENAME_DOES_D_MOVE FS_RENAME_DOES_D_MOVE
-+#else
-+#define LL_RENAME_DOES_D_MOVE FS_ODD_RENAME
-+#endif
-+
-+#ifdef HAVE_SECURITY_PLUG
-+#define ll_remove_suid(inode,mnt) remove_suid(inode,mnt)
-+#define ll_vfs_rmdir(dir,entry,mnt) vfs_rmdir(dir,entry,mnt)
-+#define ll_vfs_mkdir(inode,dir,mnt,mode) vfs_mkdir(inode,dir,mnt,mode)
-+#define ll_vfs_link(old,mnt,dir,new,mnt1) vfs_link(old,mnt,dir,new,mnt1)
-+#define ll_vfs_unlink(inode,entry,mnt) vfs_unlink(inode,entry,mnt)
-+#define ll_vfs_mknod(dir,entry,mnt,mode,dev) \
-+ vfs_mknod(dir,entry,mnt,mode,dev)
-+#define ll_security_inode_unlink(dir,entry,mnt) \
-+ security_inode_unlink(dir,entry,mnt)
-+#define ll_vfs_rename(old,old_dir,mnt,new,new_dir,mnt1) \
-+ vfs_rename(old,old_dir,mnt,new,new_dir,mnt1)
-+#else
-+#define ll_remove_suid(inode,mnt) remove_suid(inode)
-+#define ll_vfs_rmdir(dir,entry,mnt) vfs_rmdir(dir,entry)
-+#define ll_vfs_mkdir(inode,dir,mnt,mode) vfs_mkdir(inode,dir,mode)
-+#define ll_vfs_link(old,mnt,dir,new,mnt1) vfs_link(old,dir,new)
-+#define ll_vfs_unlink(inode,entry,mnt) vfs_unlink(inode,entry)
-+#define ll_vfs_mknod(dir,entry,mnt,mode,dev) vfs_mknod(dir,entry,mode,dev)
-+#define ll_security_inode_unlink(dir,entry,mnt) security_inode_unlink(dir,entry)
-+#define ll_vfs_rename(old,old_dir,mnt,new,new_dir,mnt1) \
-+ vfs_rename(old,old_dir,new,new_dir)
-+#endif
-+
-+#ifndef abs
-+static inline int abs(int x)
-+{
-+ return (x < 0) ? -x : x;
-+}
-+#endif
-+
-+#ifndef labs
-+static inline long labs(long x)
-+{
-+ return (x < 0) ? -x : x;
-+}
-+#endif
-+
-+/* Using kernel fls(). Userspace will use one defined in user-bitops.h. */
-+#ifndef __fls
-+#define __fls fls
-+#endif
-+
-+#endif /* __KERNEL__ */
-+#endif /* _COMPAT25_H */
-diff -urNad lustre~/lustre/include/linux/lustre_lib.h lustre/lustre/include/linux/lustre_lib.h
---- lustre~/lustre/include/linux/lustre_lib.h 2008-08-07 11:52:06.000000000 +0200
-+++ lustre/lustre/include/linux/lustre_lib.h 2009-03-13 09:45:03.000000000 +0100
-@@ -49,7 +49,6 @@
- # include <string.h>
- # include <sys/types.h>
- #else
--# include <asm/semaphore.h>
- # include <linux/rwsem.h>
- # include <linux/sched.h>
- # include <linux/signal.h>
-diff -urNad lustre~/lustre/include/linux/lustre_patchless_compat.h lustre/lustre/include/linux/lustre_patchless_compat.h
---- lustre~/lustre/include/linux/lustre_patchless_compat.h 2008-08-07 11:52:10.000000000 +0200
-+++ lustre/lustre/include/linux/lustre_patchless_compat.h 2009-03-13 09:45:03.000000000 +0100
-@@ -52,7 +52,7 @@
-
- BUG_ON(!PageLocked(page));
-
--#if (LINUX_VERSION_CODE >= KERNEL_VERSION(2,6,15))
-+#ifdef HAVE_RW_TREE_LOCK
- write_lock_irq(&mapping->tree_lock);
- #else
- spin_lock_irq(&mapping->tree_lock);
-@@ -65,7 +65,7 @@
- #else
- __dec_zone_page_state(page, NR_FILE_PAGES);
- #endif
--#if (LINUX_VERSION_CODE >= KERNEL_VERSION(2,6,15))
-+#ifdef HAVE_RW_TREE_LOCK
- write_unlock_irq(&mapping->tree_lock);
- #else
- spin_unlock_irq(&mapping->tree_lock);
-diff -urNad lustre~/lustre/include/lprocfs_status.h lustre/lustre/include/lprocfs_status.h
---- lustre~/lustre/include/lprocfs_status.h 2009-03-12 10:32:27.000000000 +0100
-+++ lustre/lustre/include/lprocfs_status.h 2009-03-13 09:45:03.000000000 +0100
-@@ -521,6 +521,8 @@
- #define LPROCFS_EXIT() do { \
- up_read(&_lprocfs_lock); \
- } while(0)
-+
-+#ifdef HAVE_PROCFS_DELETED
- #define LPROCFS_ENTRY_AND_CHECK(dp) do { \
- typecheck(struct proc_dir_entry *, dp); \
- LPROCFS_ENTRY(); \
-@@ -529,6 +531,14 @@
- return -ENODEV; \
- } \
- } while(0)
-+#define LPROCFS_CHECK_DELETED(dp) ((dp)->deleted)
-+#else
-+
-+#define LPROCFS_ENTRY_AND_CHECK(dp) \
-+ LPROCFS_ENTRY();
-+#define LPROCFS_CHECK_DELETED(dp) (0)
-+#endif
-+
- #define LPROCFS_WRITE_ENTRY() do { \
- down_write(&_lprocfs_lock); \
- } while(0)
-@@ -536,6 +546,7 @@
- up_write(&_lprocfs_lock); \
- } while(0)
-
-+
- /* You must use these macros when you want to refer to
- * the import in a client obd_device for a lprocfs entry */
- #define LPROCFS_CLIMP_CHECK(obd) do { \
-diff -urNad lustre~/lustre/include/lprocfs_status.h.orig lustre/lustre/include/lprocfs_status.h.orig
---- lustre~/lustre/include/lprocfs_status.h.orig 1970-01-01 00:00:00.000000000 +0000
-+++ lustre/lustre/include/lprocfs_status.h.orig 2009-03-13 09:45:03.000000000 +0100
-@@ -0,0 +1,817 @@
-+/* -*- mode: c; c-basic-offset: 8; indent-tabs-mode: nil; -*-
-+ * vim:expandtab:shiftwidth=8:tabstop=8:
-+ *
-+ * GPL HEADER START
-+ *
-+ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
-+ *
-+ * This program is free software; you can redistribute it and/or modify
-+ * it under the terms of the GNU General Public License version 2 only,
-+ * as published by the Free Software Foundation.
-+ *
-+ * This program is distributed in the hope that it will be useful, but
-+ * WITHOUT ANY WARRANTY; without even the implied warranty of
-+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
-+ * General Public License version 2 for more details (a copy is included
-+ * in the LICENSE file that accompanied this code).
-+ *
-+ * You should have received a copy of the GNU General Public License
-+ * version 2 along with this program; If not, see
-+ * http://www.sun.com/software/products/lustre/docs/GPLv2.pdf
-+ *
-+ * Please contact Sun Microsystems, Inc., 4150 Network Circle, Santa Clara,
-+ * CA 95054 USA or visit www.sun.com if you need additional information or
-+ * have any questions.
-+ *
-+ * GPL HEADER END
-+ */
-+/*
-+ * Copyright 2008 Sun Microsystems, Inc. All rights reserved
-+ * Use is subject to license terms.
-+ */
-+/*
-+ * This file is part of Lustre, http://www.lustre.org/
-+ * Lustre is a trademark of Sun Microsystems, Inc.
-+ *
-+ * lustre/include/lprocfs_status.h
-+ *
-+ * Top level header file for LProc SNMP
-+ *
-+ * Author: Hariharan Thantry thantry at users.sourceforge.net
-+ */
-+#ifndef _LPROCFS_SNMP_H
-+#define _LPROCFS_SNMP_H
-+
-+#include <lustre/lustre_idl.h>
-+#if defined(__linux__)
-+#include <linux/lprocfs_status.h>
-+#elif defined(__APPLE__)
-+#include <darwin/lprocfs_status.h>
-+#elif defined(__WINNT__)
-+#include <winnt/lprocfs_status.h>
-+#else
-+#error Unsupported operating system.
-+#endif
-+
-+#undef LPROCFS
-+#if (defined(__KERNEL__) && defined(CONFIG_PROC_FS))
-+# define LPROCFS
-+#endif
-+
-+struct lprocfs_vars {
-+ const char *name;
-+ cfs_read_proc_t *read_fptr;
-+ cfs_write_proc_t *write_fptr;
-+ void *data;
-+ struct file_operations *fops;
-+ /**
-+ * /proc file mode.
-+ */
-+ mode_t proc_mode;
-+};
-+
-+struct lprocfs_static_vars {
-+ struct lprocfs_vars *module_vars;
-+ struct lprocfs_vars *obd_vars;
-+};
-+
-+/* if we find more consumers this could be generalized */
-+#define OBD_HIST_MAX 32
-+struct obd_histogram {
-+ spinlock_t oh_lock;
-+ unsigned long oh_buckets[OBD_HIST_MAX];
-+};
-+
-+enum {
-+ BRW_R_PAGES = 0,
-+ BRW_W_PAGES,
-+ BRW_R_RPC_HIST,
-+ BRW_W_RPC_HIST,
-+ BRW_R_IO_TIME,
-+ BRW_W_IO_TIME,
-+ BRW_R_DISCONT_PAGES,
-+ BRW_W_DISCONT_PAGES,
-+ BRW_R_DISCONT_BLOCKS,
-+ BRW_W_DISCONT_BLOCKS,
-+ BRW_R_DISK_IOSIZE,
-+ BRW_W_DISK_IOSIZE,
-+ BRW_R_DIO_FRAGS,
-+ BRW_W_DIO_FRAGS,
-+ BRW_LAST,
-+};
-+
-+struct brw_stats {
-+ struct obd_histogram hist[BRW_LAST];
-+};
-+
-+
-+/* An lprocfs counter can be configured using the enum bit masks below.
-+ *
-+ * LPROCFS_CNTR_EXTERNALLOCK indicates that an external lock already
-+ * protects this counter from concurrent updates. If not specified,
-+ * lprocfs an internal per-counter lock variable. External locks are
-+ * not used to protect counter increments, but are used to protect
-+ * counter readout and resets.
-+ *
-+ * LPROCFS_CNTR_AVGMINMAX indicates a multi-valued counter samples,
-+ * (i.e. counter can be incremented by more than "1"). When specified,
-+ * the counter maintains min, max and sum in addition to a simple
-+ * invocation count. This allows averages to be be computed.
-+ * If not specified, the counter is an increment-by-1 counter.
-+ * min, max, sum, etc. are not maintained.
-+ *
-+ * LPROCFS_CNTR_STDDEV indicates that the counter should track sum of
-+ * squares (for multi-valued counter samples only). This allows
-+ * external computation of standard deviation, but involves a 64-bit
-+ * multiply per counter increment.
-+ */
-+
-+enum {
-+ LPROCFS_CNTR_EXTERNALLOCK = 0x0001,
-+ LPROCFS_CNTR_AVGMINMAX = 0x0002,
-+ LPROCFS_CNTR_STDDEV = 0x0004,
-+
-+ /* counter data type */
-+ LPROCFS_TYPE_REGS = 0x0100,
-+ LPROCFS_TYPE_BYTES = 0x0200,
-+ LPROCFS_TYPE_PAGES = 0x0400,
-+ LPROCFS_TYPE_CYCLE = 0x0800,
-+};
-+
-+struct lprocfs_atomic {
-+ atomic_t la_entry;
-+ atomic_t la_exit;
-+};
-+
-+#define LC_MIN_INIT ((~(__u64)0) >> 1)
-+
-+struct lprocfs_counter {
-+ struct lprocfs_atomic lc_cntl; /* may need to move to per set */
-+ unsigned int lc_config;
-+ __s64 lc_count;
-+ __s64 lc_sum;
-+ __s64 lc_min;
-+ __s64 lc_max;
-+ __s64 lc_sumsquare;
-+ const char *lc_name; /* must be static */
-+ const char *lc_units; /* must be static */
-+};
-+
-+struct lprocfs_percpu {
-+ struct lprocfs_counter lp_cntr[0];
-+};
-+
-+#define LPROCFS_GET_NUM_CPU 0x0001
-+#define LPROCFS_GET_SMP_ID 0x0002
-+
-+enum lprocfs_stats_flags {
-+ LPROCFS_STATS_FLAG_PERCPU = 0x0000, /* per cpu counter */
-+ LPROCFS_STATS_FLAG_NOPERCPU = 0x0001, /* stats have no percpu
-+ * area and need locking */
-+};
-+
-+enum lprocfs_fields_flags {
-+ LPROCFS_FIELDS_FLAGS_CONFIG = 0x0001,
-+ LPROCFS_FIELDS_FLAGS_SUM = 0x0002,
-+ LPROCFS_FIELDS_FLAGS_MIN = 0x0003,
-+ LPROCFS_FIELDS_FLAGS_MAX = 0x0004,
-+ LPROCFS_FIELDS_FLAGS_AVG = 0x0005,
-+ LPROCFS_FIELDS_FLAGS_SUMSQUARE = 0x0006,
-+ LPROCFS_FIELDS_FLAGS_COUNT = 0x0007,
-+};
-+
-+struct lprocfs_stats {
-+ unsigned int ls_num; /* # of counters */
-+ int ls_flags; /* See LPROCFS_STATS_FLAG_* */
-+ spinlock_t ls_lock; /* Lock used only when there are
-+ * no percpu stats areas */
-+ struct lprocfs_percpu *ls_percpu[0];
-+};
-+
-+static inline int opcode_offset(__u32 opc) {
-+ if (opc < OST_LAST_OPC) {
-+ /* OST opcode */
-+ return (opc - OST_FIRST_OPC);
-+ } else if (opc < MDS_LAST_OPC) {
-+ /* MDS opcode */
-+ return (opc - MDS_FIRST_OPC +
-+ (OST_LAST_OPC - OST_FIRST_OPC));
-+ } else if (opc < LDLM_LAST_OPC) {
-+ /* LDLM Opcode */
-+ return (opc - LDLM_FIRST_OPC +
-+ (MDS_LAST_OPC - MDS_FIRST_OPC) +
-+ (OST_LAST_OPC - OST_FIRST_OPC));
-+ } else if (opc < MGS_LAST_OPC) {
-+ /* MGS Opcode */
-+ return (opc - MGS_FIRST_OPC +
-+ (LDLM_LAST_OPC - LDLM_FIRST_OPC) +
-+ (MDS_LAST_OPC - MDS_FIRST_OPC) +
-+ (OST_LAST_OPC - OST_FIRST_OPC));
-+ } else if (opc < OBD_LAST_OPC) {
-+ /* OBD Ping */
-+ return (opc - OBD_FIRST_OPC +
-+ (MGS_LAST_OPC - MGS_FIRST_OPC) +
-+ (LDLM_LAST_OPC - LDLM_FIRST_OPC) +
-+ (MDS_LAST_OPC - MDS_FIRST_OPC) +
-+ (OST_LAST_OPC - OST_FIRST_OPC));
-+ } else if (opc < LLOG_LAST_OPC) {
-+ /* LLOG Opcode */
-+ return (opc - LLOG_FIRST_OPC +
-+ (OBD_LAST_OPC - OBD_FIRST_OPC) +
-+ (MGS_LAST_OPC - MGS_FIRST_OPC) +
-+ (LDLM_LAST_OPC - LDLM_FIRST_OPC) +
-+ (MDS_LAST_OPC - MDS_FIRST_OPC) +
-+ (OST_LAST_OPC - OST_FIRST_OPC));
-+ } else if (opc < QUOTA_LAST_OPC) {
-+ /* LQUOTA Opcode */
-+ return (opc - QUOTA_FIRST_OPC +
-+ (LLOG_LAST_OPC - LLOG_FIRST_OPC) +
-+ (OBD_LAST_OPC - OBD_FIRST_OPC) +
-+ (MGS_LAST_OPC - MGS_FIRST_OPC) +
-+ (LDLM_LAST_OPC - LDLM_FIRST_OPC) +
-+ (MDS_LAST_OPC - MDS_FIRST_OPC) +
-+ (OST_LAST_OPC - OST_FIRST_OPC));
-+ } else {
-+ /* Unknown Opcode */
-+ return -1;
-+ }
-+}
-+
-+#define LUSTRE_MAX_OPCODES ((OST_LAST_OPC - OST_FIRST_OPC) + \
-+ (MDS_LAST_OPC - MDS_FIRST_OPC) + \
-+ (LDLM_LAST_OPC - LDLM_FIRST_OPC) + \
-+ (MGS_LAST_OPC - MGS_FIRST_OPC) + \
-+ (OBD_LAST_OPC - OBD_FIRST_OPC) + \
-+ (LLOG_LAST_OPC - LLOG_FIRST_OPC) + \
-+ (QUOTA_LAST_OPC - QUOTA_FIRST_OPC))
-+
-+#define EXTRA_MAX_OPCODES ((PTLRPC_LAST_CNTR - PTLRPC_FIRST_CNTR) + \
-+ (EXTRA_LAST_OPC - EXTRA_FIRST_OPC))
-+
-+enum {
-+ PTLRPC_REQWAIT_CNTR = 0,
-+ PTLRPC_REQQDEPTH_CNTR,
-+ PTLRPC_REQACTIVE_CNTR,
-+ PTLRPC_TIMEOUT,
-+ PTLRPC_REQBUF_AVAIL_CNTR,
-+ PTLRPC_LAST_CNTR
-+};
-+
-+#define PTLRPC_FIRST_CNTR PTLRPC_REQWAIT_CNTR
-+
-+enum {
-+ LDLM_GLIMPSE_ENQUEUE = 0,
-+ LDLM_PLAIN_ENQUEUE,
-+ LDLM_EXTENT_ENQUEUE,
-+ LDLM_FLOCK_ENQUEUE,
-+ LDLM_IBITS_ENQUEUE,
-+ MDS_REINT_SETATTR,
-+ MDS_REINT_CREATE,
-+ MDS_REINT_LINK,
-+ MDS_REINT_UNLINK,
-+ MDS_REINT_RENAME,
-+ MDS_REINT_OPEN,
-+ BRW_READ_BYTES,
-+ BRW_WRITE_BYTES,
-+ EXTRA_LAST_OPC
-+};
-+
-+#define EXTRA_FIRST_OPC LDLM_GLIMPSE_ENQUEUE
-+/* class_obd.c */
-+extern cfs_proc_dir_entry_t *proc_lustre_root;
-+
-+struct obd_device;
-+struct file;
-+struct obd_histogram;
-+
-+/* Days / hours / mins / seconds format */
-+struct dhms {
-+ int d,h,m,s;
-+};
-+static inline void s2dhms(struct dhms *ts, time_t secs)
-+{
-+ ts->d = secs / 86400;
-+ secs = secs % 86400;
-+ ts->h = secs / 3600;
-+ secs = secs % 3600;
-+ ts->m = secs / 60;
-+ ts->s = secs % 60;
-+}
-+#define DHMS_FMT "%dd%dh%02dm%02ds"
-+#define DHMS_VARS(x) (x)->d, (x)->h, (x)->m, (x)->s
-+
-+
-+#ifdef LPROCFS
-+
-+static inline int lprocfs_stats_lock(struct lprocfs_stats *stats, int type)
-+{
-+ int rc = 0;
-+
-+ if (stats->ls_flags & LPROCFS_STATS_FLAG_NOPERCPU) {
-+ if (type & LPROCFS_GET_NUM_CPU)
-+ rc = 1;
-+ if (type & LPROCFS_GET_SMP_ID)
-+ rc = 0;
-+ spin_lock(&stats->ls_lock);
-+ } else {
-+ if (type & LPROCFS_GET_NUM_CPU)
-+ rc = num_possible_cpus();
-+ if (type & LPROCFS_GET_SMP_ID)
-+ rc = smp_processor_id();
-+ }
-+ return rc;
-+}
-+
-+static inline void lprocfs_stats_unlock(struct lprocfs_stats *stats)
-+{
-+ if (stats->ls_flags & LPROCFS_STATS_FLAG_NOPERCPU)
-+ spin_unlock(&stats->ls_lock);
-+}
-+
-+/* Two optimized LPROCFS counter increment functions are provided:
-+ * lprocfs_counter_incr(cntr, value) - optimized for by-one counters
-+ * lprocfs_counter_add(cntr) - use for multi-valued counters
-+ * Counter data layout allows config flag, counter lock and the
-+ * count itself to reside within a single cache line.
-+ */
-+
-+extern void lprocfs_counter_add(struct lprocfs_stats *stats, int idx,
-+ long amount);
-+extern void lprocfs_counter_sub(struct lprocfs_stats *stats, int idx,
-+ long amount);
-+
-+#define lprocfs_counter_incr(stats, idx) \
-+ lprocfs_counter_add(stats, idx, 1)
-+#define lprocfs_counter_decr(stats, idx) \
-+ lprocfs_counter_sub(stats, idx, 1)
-+
-+extern __s64 lprocfs_read_helper(struct lprocfs_counter *lc,
-+ enum lprocfs_fields_flags field);
-+
-+static inline __u64 lprocfs_stats_collector(struct lprocfs_stats *stats,
-+ int idx,
-+ enum lprocfs_fields_flags field)
-+{
-+ __u64 ret = 0;
-+ int i;
-+
-+ LASSERT(stats != NULL);
-+ for (i = 0; i < num_possible_cpus(); i++)
-+ ret += lprocfs_read_helper(&(stats->ls_percpu[i]->lp_cntr[idx]),
-+ field);
-+ return ret;
-+}
-+
-+extern struct lprocfs_stats *lprocfs_alloc_stats(unsigned int num,
-+ enum lprocfs_stats_flags flags);
-+extern void lprocfs_clear_stats(struct lprocfs_stats *stats);
-+extern void lprocfs_free_stats(struct lprocfs_stats **stats);
-+extern void lprocfs_init_ops_stats(int num_private_stats,
-+ struct lprocfs_stats *stats);
-+extern void lprocfs_init_ldlm_stats(struct lprocfs_stats *ldlm_stats);
-+extern int lprocfs_alloc_obd_stats(struct obd_device *obddev,
-+ unsigned int num_private_stats);
-+extern void lprocfs_counter_init(struct lprocfs_stats *stats, int index,
-+ unsigned conf, const char *name,
-+ const char *units);
-+extern void lprocfs_free_obd_stats(struct obd_device *obddev);
-+struct obd_export;
-+extern int lprocfs_add_clear_entry(struct obd_device * obd,
-+ cfs_proc_dir_entry_t *entry);
-+extern int lprocfs_exp_setup(struct obd_export *exp,
-+ lnet_nid_t *peer_nid, int *newnid);
-+extern int lprocfs_exp_cleanup(struct obd_export *exp);
-+extern int lprocfs_add_simple(struct proc_dir_entry *root,
-+ char *name, read_proc_t *read_proc,
-+ write_proc_t *write_proc, void *data);
-+extern int lprocfs_register_stats(cfs_proc_dir_entry_t *root, const char *name,
-+ struct lprocfs_stats *stats);
-+
-+/* lprocfs_status.c */
-+extern int lprocfs_add_vars(cfs_proc_dir_entry_t *root,
-+ struct lprocfs_vars *var,
-+ void *data);
-+
-+extern cfs_proc_dir_entry_t *lprocfs_register(const char *name,
-+ cfs_proc_dir_entry_t *parent,
-+ struct lprocfs_vars *list,
-+ void *data);
-+
-+extern void lprocfs_remove(cfs_proc_dir_entry_t **root);
-+
-+extern cfs_proc_dir_entry_t *lprocfs_srch(cfs_proc_dir_entry_t *root,
-+ const char *name);
-+
-+extern int lprocfs_obd_setup(struct obd_device *obd, struct lprocfs_vars *list);
-+extern int lprocfs_obd_cleanup(struct obd_device *obd);
-+extern int lprocfs_add_simple(struct proc_dir_entry *root, char *name,
-+ read_proc_t *read_proc, write_proc_t *write_proc,
-+ void *data);
-+struct nid_stat;
-+extern void lprocfs_free_per_client_stats(struct obd_device *obd);
-+extern int lprocfs_nid_stats_clear_write(struct file *file, const char *buffer,
-+ unsigned long count, void *data);
-+extern int lprocfs_nid_stats_clear_read(char *page, char **start, off_t off,
-+ int count, int *eof, void *data);
-+
-+
-+extern struct file_operations lprocfs_evict_client_fops;
-+
-+extern int lprocfs_seq_create(cfs_proc_dir_entry_t *parent, char *name,
-+ mode_t mode, struct file_operations *seq_fops,
-+ void *data);
-+extern int lprocfs_obd_seq_create(struct obd_device *dev, char *name,
-+ mode_t mode, struct file_operations *seq_fops,
-+ void *data);
-+
-+/* Generic callbacks */
-+
-+extern int lprocfs_rd_u64(char *page, char **start, off_t off,
-+ int count, int *eof, void *data);
-+extern int lprocfs_rd_atomic(char *page, char **start, off_t off,
-+ int count, int *eof, void *data);
-+extern int lprocfs_wr_atomic(struct file *file, const char *buffer,
-+ unsigned long count, void *data);
-+extern int lprocfs_rd_uint(char *page, char **start, off_t off,
-+ int count, int *eof, void *data);
-+extern int lprocfs_wr_uint(struct file *file, const char *buffer,
-+ unsigned long count, void *data);
-+extern int lprocfs_rd_uuid(char *page, char **start, off_t off,
-+ int count, int *eof, void *data);
-+extern int lprocfs_rd_name(char *page, char **start, off_t off,
-+ int count, int *eof, void *data);
-+extern int lprocfs_rd_fstype(char *page, char **start, off_t off,
-+ int count, int *eof, void *data);
-+extern int lprocfs_rd_server_uuid(char *page, char **start, off_t off,
-+ int count, int *eof, void *data);
-+extern int lprocfs_rd_conn_uuid(char *page, char **start, off_t off,
-+ int count, int *eof, void *data);
-+extern int lprocfs_rd_import(char *page, char **start, off_t off, int count,
-+ int *eof, void *data);
-+extern int lprocfs_rd_connect_flags(char *page, char **start, off_t off,
-+ int count, int *eof, void *data);
-+extern int lprocfs_rd_num_exports(char *page, char **start, off_t off,
-+ int count, int *eof, void *data);
-+extern int lprocfs_rd_numrefs(char *page, char **start, off_t off,
-+ int count, int *eof, void *data);
-+struct adaptive_timeout;
-+extern int lprocfs_at_hist_helper(char *page, int count, int rc,
-+ struct adaptive_timeout *at);
-+extern int lprocfs_rd_timeouts(char *page, char **start, off_t off,
-+ int count, int *eof, void *data);
-+extern int lprocfs_wr_timeouts(struct file *file, const char *buffer,
-+ unsigned long count, void *data);
-+extern int lprocfs_wr_evict_client(struct file *file, const char *buffer,
-+ unsigned long count, void *data);
-+extern int lprocfs_wr_ping(struct file *file, const char *buffer,
-+ unsigned long count, void *data);
-+
-+/* Statfs helpers */
-+extern int lprocfs_rd_blksize(char *page, char **start, off_t off,
-+ int count, int *eof, void *data);
-+extern int lprocfs_rd_kbytestotal(char *page, char **start, off_t off,
-+ int count, int *eof, void *data);
-+extern int lprocfs_rd_kbytesfree(char *page, char **start, off_t off,
-+ int count, int *eof, void *data);
-+extern int lprocfs_rd_kbytesavail(char *page, char **start, off_t off,
-+ int count, int *eof, void *data);
-+extern int lprocfs_rd_filestotal(char *page, char **start, off_t off,
-+ int count, int *eof, void *data);
-+extern int lprocfs_rd_filesfree(char *page, char **start, off_t off,
-+ int count, int *eof, void *data);
-+extern int lprocfs_rd_filegroups(char *page, char **start, off_t off,
-+ int count, int *eof, void *data);
-+
-+extern int lprocfs_write_helper(const char *buffer, unsigned long count,
-+ int *val);
-+extern int lprocfs_write_frac_helper(const char *buffer, unsigned long count,
-+ int *val, int mult);
-+extern int lprocfs_read_frac_helper(char *buffer, unsigned long count,
-+ long val, int mult);
-+extern int lprocfs_write_u64_helper(const char *buffer, unsigned long count,
-+ __u64 *val);
-+extern int lprocfs_write_frac_u64_helper(const char *buffer, unsigned long count,
-+ __u64 *val, int mult);
-+void lprocfs_oh_tally(struct obd_histogram *oh, unsigned int value);
-+void lprocfs_oh_tally_log2(struct obd_histogram *oh, unsigned int value);
-+void lprocfs_oh_clear(struct obd_histogram *oh);
-+unsigned long lprocfs_oh_sum(struct obd_histogram *oh);
-+
-+/* lprocfs_status.c: counter read/write functions */
-+extern int lprocfs_counter_read(char *page, char **start, off_t off,
-+ int count, int *eof, void *data);
-+extern int lprocfs_counter_write(struct file *file, const char *buffer,
-+ unsigned long count, void *data);
-+
-+/* lprocfs_status.c: recovery status */
-+int lprocfs_obd_rd_recovery_status(char *page, char **start, off_t off,
-+ int count, int *eof, void *data);
-+
-+/* lprocfs_statuc.c: hash statistics */
-+int lprocfs_obd_rd_hash(char *page, char **start, off_t off,
-+ int count, int *eof, void *data);
-+
-+extern int lprocfs_seq_release(struct inode *, struct file *);
-+
-+/* in lprocfs_stat.c, to protect the private data for proc entries */
-+extern struct rw_semaphore _lprocfs_lock;
-+#define LPROCFS_ENTRY() do { \
-+ down_read(&_lprocfs_lock); \
-+} while(0)
-+#define LPROCFS_EXIT() do { \
-+ up_read(&_lprocfs_lock); \
-+} while(0)
-+#define LPROCFS_ENTRY_AND_CHECK(dp) do { \
-+ typecheck(struct proc_dir_entry *, dp); \
-+ LPROCFS_ENTRY(); \
-+ if ((dp)->deleted) { \
-+ LPROCFS_EXIT(); \
-+ return -ENODEV; \
-+ } \
-+} while(0)
-+#define LPROCFS_WRITE_ENTRY() do { \
-+ down_write(&_lprocfs_lock); \
-+} while(0)
-+#define LPROCFS_WRITE_EXIT() do { \
-+ up_write(&_lprocfs_lock); \
-+} while(0)
-+
-+/* You must use these macros when you want to refer to
-+ * the import in a client obd_device for a lprocfs entry */
-+#define LPROCFS_CLIMP_CHECK(obd) do { \
-+ typecheck(struct obd_device *, obd); \
-+ down_read(&(obd)->u.cli.cl_sem); \
-+ if ((obd)->u.cli.cl_import == NULL) { \
-+ up_read(&(obd)->u.cli.cl_sem); \
-+ return -ENODEV; \
-+ } \
-+} while(0)
-+#define LPROCFS_CLIMP_EXIT(obd) \
-+ up_read(&(obd)->u.cli.cl_sem);
-+
-+
-+/* write the name##_seq_show function, call LPROC_SEQ_FOPS_RO for read-only
-+ proc entries; otherwise, you will define name##_seq_write function also for
-+ a read-write proc entry, and then call LPROC_SEQ_SEQ instead. Finally,
-+ call lprocfs_obd_seq_create(obd, filename, 0444, &name#_fops, data); */
-+#define __LPROC_SEQ_FOPS(name, custom_seq_write) \
-+static int name##_seq_open(struct inode *inode, struct file *file) { \
-+ struct proc_dir_entry *dp = PDE(inode); \
-+ int rc; \
-+ LPROCFS_ENTRY_AND_CHECK(dp); \
-+ rc = single_open(file, name##_seq_show, dp->data); \
-+ if (rc) { \
-+ LPROCFS_EXIT(); \
-+ return rc; \
-+ } \
-+ return 0; \
-+} \
-+struct file_operations name##_fops = { \
-+ .owner = THIS_MODULE, \
-+ .open = name##_seq_open, \
-+ .read = seq_read, \
-+ .write = custom_seq_write, \
-+ .llseek = seq_lseek, \
-+ .release = lprocfs_seq_release, \
-+}
-+
-+#define LPROC_SEQ_FOPS_RO(name) __LPROC_SEQ_FOPS(name, NULL)
-+#define LPROC_SEQ_FOPS(name) __LPROC_SEQ_FOPS(name, name##_seq_write)
-+
-+/* lproc_ptlrpc.c */
-+struct ptlrpc_request;
-+extern void target_print_req(void *seq_file, struct ptlrpc_request *req);
-+
-+#ifdef CRAY_XT3
-+/* lprocfs_status.c: read recovery max time bz13079 */
-+int lprocfs_obd_rd_recovery_maxtime(char *page, char **start, off_t off,
-+ int count, int *eof, void *data);
-+
-+/* lprocfs_status.c: write recovery max time bz13079 */
-+int lprocfs_obd_wr_recovery_maxtime(struct file *file, const char *buffer,
-+ unsigned long count, void *data);
-+#endif
-+
-+/* all quota proc functions */
-+extern int lprocfs_quota_rd_bunit(char *page, char **start, off_t off, int count,
-+ int *eof, void *data);
-+extern int lprocfs_quota_wr_bunit(struct file *file, const char *buffer,
-+ unsigned long count, void *data);
-+extern int lprocfs_quota_rd_btune(char *page, char **start, off_t off, int count,
-+ int *eof, void *data);
-+extern int lprocfs_quota_wr_btune(struct file *file, const char *buffer,
-+ unsigned long count, void *data);
-+extern int lprocfs_quota_rd_iunit(char *page, char **start, off_t off, int count,
-+ int *eof, void *data);
-+extern int lprocfs_quota_wr_iunit(struct file *file, const char *buffer,
-+ unsigned long count, void *data);
-+extern int lprocfs_quota_rd_itune(char *page, char **start, off_t off, int count,
-+ int *eof, void *data);
-+extern int lprocfs_quota_wr_itune(struct file *file, const char *buffer,
-+ unsigned long count, void *data);
-+extern int lprocfs_quota_rd_type(char *page, char **start, off_t off, int count,
-+ int *eof, void *data);
-+extern int lprocfs_quota_wr_type(struct file *file, const char *buffer,
-+ unsigned long count, void *data);
-+extern int lprocfs_quota_rd_switch_seconds(char *page, char **start, off_t off,
-+ int count, int *eof, void *data);
-+extern int lprocfs_quota_wr_switch_seconds(struct file *file, const char *buffer,
-+ unsigned long count, void *data);
-+extern int lprocfs_quota_rd_sync_blk(char *page, char **start, off_t off,
-+ int count, int *eof, void *data);
-+extern int lprocfs_quota_wr_sync_blk(struct file *file, const char *buffer,
-+ unsigned long count, void *data);
-+extern int lprocfs_quota_rd_switch_qs(char *page, char **start, off_t off,
-+ int count, int *eof, void *data);
-+extern int lprocfs_quota_wr_switch_qs(struct file *file, const char *buffer,
-+ unsigned long count, void *data);
-+extern int lprocfs_quota_rd_boundary_factor(char *page, char **start, off_t off,
-+ int count, int *eof, void *data);
-+extern int lprocfs_quota_wr_boundary_factor(struct file *file, const char *buffer,
-+ unsigned long count, void *data);
-+extern int lprocfs_quota_rd_least_bunit(char *page, char **start, off_t off,
-+ int count, int *eof, void *data);
-+extern int lprocfs_quota_wr_least_bunit(struct file *file, const char *buffer,
-+ unsigned long count, void *data);
-+extern int lprocfs_quota_rd_least_iunit(char *page, char **start, off_t off,
-+ int count, int *eof, void *data);
-+extern int lprocfs_quota_wr_least_iunit(struct file *file, const char *buffer,
-+ unsigned long count, void *data);
-+extern int lprocfs_quota_rd_qs_factor(char *page, char **start, off_t off,
-+ int count, int *eof, void *data);
-+extern int lprocfs_quota_wr_qs_factor(struct file *file, const char *buffer,
-+ unsigned long count, void *data);
-+
-+#else
-+/* LPROCFS is not defined */
-+static inline void lprocfs_counter_add(struct lprocfs_stats *stats,
-+ int index, long amount) { return; }
-+static inline void lprocfs_counter_incr(struct lprocfs_stats *stats,
-+ int index) { return; }
-+static inline void lprocfs_counter_sub(struct lprocfs_stats *stats,
-+ int index, long amount) { return; }
-+static inline void lprocfs_counter_init(struct lprocfs_stats *stats,
-+ int index, unsigned conf,
-+ const char *name, const char *units)
-+{ return; }
-+
-+static inline __u64 lc_read_helper(struct lprocfs_counter *lc,
-+ enum lprocfs_fields_flags field)
-+{ return 0; }
-+
-+static inline struct lprocfs_stats* lprocfs_alloc_stats(unsigned int num,
-+ enum lprocfs_stats_flags flags)
-+{ return NULL; }
-+static inline void lprocfs_clear_stats(struct lprocfs_stats *stats)
-+{ return; }
-+static inline void lprocfs_free_stats(struct lprocfs_stats **stats)
-+{ return; }
-+static inline int lprocfs_register_stats(cfs_proc_dir_entry_t *root,
-+ const char *name,
-+ struct lprocfs_stats *stats)
-+{ return 0; }
-+static inline void lprocfs_init_ops_stats(int num_private_stats,
-+ struct lprocfs_stats *stats)
-+{ return; }
-+static inline void lprocfs_init_ldlm_stats(struct lprocfs_stats *ldlm_stats)
-+{ return; }
-+static inline int lprocfs_alloc_obd_stats(struct obd_device *obddev,
-+ unsigned int num_private_stats)
-+{ return 0; }
-+static inline void lprocfs_free_obd_stats(struct obd_device *obddev)
-+{ return; }
-+
-+struct obd_export;
-+static inline int lprocfs_add_clear_entry(struct obd_export *exp)
-+{ return 0; }
-+static inline int lprocfs_exp_setup(struct obd_export *exp,
-+ lnet_nid_t *peer_nid, int *newnid)
-+{ return 0; }
-+static inline int lprocfs_exp_cleanup(struct obd_export *exp)
-+{ return 0; }
-+static inline int lprocfs_add_simple(struct proc_dir_entry *root,
-+ char *name,
-+ read_proc_t *read_proc,
-+ write_proc_t *write_proc,
-+ void *data)
-+{return 0; }
-+struct nid_stat;
-+static inline void lprocfs_free_per_client_stats(struct obd_device *obd)
-+{}
-+static inline
-+int lprocfs_nid_stats_clear_write(struct file *file, const char *buffer,
-+ unsigned long count, void *data)
-+{return count;}
-+static inline
-+int lprocfs_nid_stats_clear_read(char *page, char **start, off_t off,
-+ int count, int *eof, void *data)
-+{return count;}
-+
-+
-+static inline cfs_proc_dir_entry_t *
-+lprocfs_register(const char *name, cfs_proc_dir_entry_t *parent,
-+ struct lprocfs_vars *list, void *data) { return NULL; }
-+static inline int lprocfs_add_vars(cfs_proc_dir_entry_t *root,
-+ struct lprocfs_vars *var,
-+ void *data) { return 0; }
-+static inline void lprocfs_remove(cfs_proc_dir_entry_t **root) {};
-+static inline cfs_proc_dir_entry_t *lprocfs_srch(cfs_proc_dir_entry_t *head,
-+ const char *name) {return 0;}
-+static inline int lprocfs_obd_setup(struct obd_device *dev,
-+ struct lprocfs_vars *list) { return 0; }
-+static inline int lprocfs_obd_cleanup(struct obd_device *dev) { return 0; }
-+static inline int lprocfs_rd_u64(char *page, char **start, off_t off,
-+ int count, int *eof, void *data) { return 0; }
-+static inline int lprocfs_rd_uuid(char *page, char **start, off_t off,
-+ int count, int *eof, void *data) { return 0; }
-+static inline int lprocfs_rd_name(char *page, char **start, off_t off,
-+ int count, int *eof, void *data) { return 0; }
-+static inline int lprocfs_rd_server_uuid(char *page, char **start, off_t off,
-+ int count, int *eof, void *data)
-+{ return 0; }
-+static inline int lprocfs_rd_conn_uuid(char *page, char **start, off_t off,
-+ int count, int *eof, void *data)
-+{ return 0; }
-+static inline int lprocfs_rd_import(char *page, char **start, off_t off, int count,
-+ int *eof, void *data) { return 0; }
-+static inline int lprocfs_rd_connect_flags(char *page, char **start, off_t off,
-+ int count, int *eof, void *data)
-+{ return 0; }
-+static inline int lprocfs_rd_num_exports(char *page, char **start, off_t off,
-+ int count, int *eof, void *data)
-+{ return 0; }
-+static inline int lprocfs_rd_numrefs(char *page, char **start, off_t off,
-+ int count, int *eof, void *data)
-+{ return 0; }
-+struct adaptive_timeout;
-+static inline int lprocfs_at_hist_helper(char *page, int count, int rc,
-+ struct adaptive_timeout *at)
-+{ return 0; }
-+static inline int lprocfs_rd_timeouts(char *page, char **start, off_t off,
-+ int count, int *eof, void *data)
-+{ return 0; }
-+static inline int lprocfs_wr_timeouts(struct file *file, const char *buffer,
-+ unsigned long count, void *data)
-+{ return 0; }
-+static inline int lprocfs_wr_evict_client(struct file *file, const char *buffer,
-+ unsigned long count, void *data)
-+{ return 0; }
-+static inline int lprocfs_wr_ping(struct file *file, const char *buffer,
-+ unsigned long count, void *data)
-+{ return 0; }
-+
-+
-+/* Statfs helpers */
-+static inline
-+int lprocfs_rd_blksize(char *page, char **start, off_t off,
-+ int count, int *eof, void *data) { return 0; }
-+static inline
-+int lprocfs_rd_kbytestotal(char *page, char **start, off_t off,
-+ int count, int *eof, void *data) { return 0; }
-+static inline
-+int lprocfs_rd_kbytesfree(char *page, char **start, off_t off,
-+ int count, int *eof, void *data) { return 0; }
-+static inline
-+int lprocfs_rd_kbytesavail(char *page, char **start, off_t off,
-+ int count, int *eof, void *data) { return 0; }
-+static inline
-+int lprocfs_rd_filestotal(char *page, char **start, off_t off,
-+ int count, int *eof, void *data) { return 0; }
-+static inline
-+int lprocfs_rd_filesfree(char *page, char **start, off_t off,
-+ int count, int *eof, void *data) { return 0; }
-+static inline
-+int lprocfs_rd_filegroups(char *page, char **start, off_t off,
-+ int count, int *eof, void *data) { return 0; }
-+static inline
-+void lprocfs_oh_tally(struct obd_histogram *oh, unsigned int value) {}
-+static inline
-+void lprocfs_oh_tally_log2(struct obd_histogram *oh, unsigned int value) {}
-+static inline
-+void lprocfs_oh_clear(struct obd_histogram *oh) {}
-+static inline
-+unsigned long lprocfs_oh_sum(struct obd_histogram *oh) { return 0; }
-+static inline
-+int lprocfs_counter_read(char *page, char **start, off_t off,
-+ int count, int *eof, void *data) { return 0; }
-+static inline
-+int lprocfs_counter_write(struct file *file, const char *buffer,
-+ unsigned long count, void *data) { return 0; }
-+
-+static inline
-+__u64 lprocfs_stats_collector(struct lprocfs_stats *stats, int idx,
-+ enum lprocfs_fields_flags field)
-+{ return (__u64)0; }
-+
-+#define LPROCFS_ENTRY()
-+#define LPROCFS_EXIT()
-+#define LPROCFS_ENTRY_AND_CHECK(dp)
-+#define LPROC_SEQ_FOPS_RO(name)
-+#define LPROC_SEQ_FOPS(name)
-+
-+/* lproc_ptlrpc.c */
-+#define target_print_req NULL
-+
-+#endif /* LPROCFS */
-+
-+#endif /* LPROCFS_SNMP_H */
-diff -urNad lustre~/lustre/llite/file.c lustre/lustre/llite/file.c
---- lustre~/lustre/llite/file.c 2009-03-13 09:45:02.000000000 +0100
-+++ lustre/lustre/llite/file.c 2009-03-13 09:45:03.000000000 +0100
-@@ -1801,11 +1801,12 @@
- #endif
- }
-
-+#ifdef HAVE_KERNEL_SENDFILE
- /*
- * Send file content (through pagecache) somewhere with helper
- */
--static ssize_t ll_file_sendfile(struct file *in_file, loff_t *ppos,size_t count,
-- read_actor_t actor, void *target)
-+static ssize_t ll_file_sendfile(struct file *in_file, loff_t *ppos,
-+ size_t count, read_actor_t actor, void *target)
- {
- struct inode *inode = in_file->f_dentry->d_inode;
- struct ll_inode_info *lli = ll_i2info(inode);
-@@ -1814,10 +1815,10 @@
- struct ll_lock_tree_node *node;
- struct ost_lvb lvb;
- struct ll_ra_read bead;
-- int rc;
-- ssize_t retval;
-+ ssize_t rc;
- __u64 kms;
- ENTRY;
-+
- CDEBUG(D_VFSTRACE, "VFS Op:inode=%lu/%u(%p),size="LPSZ",offset=%Ld\n",
- inode->i_ino, inode->i_generation, inode, count, *ppos);
-
-@@ -1831,8 +1832,10 @@
- in_file->f_ra.ra_pages = 0;
-
- /* File with no objects, nothing to lock */
-- if (!lsm)
-- RETURN(generic_file_sendfile(in_file, ppos, count, actor, target));
-+ if (!lsm) {
-+ rc = generic_file_sendfile(in_file, ppos, count, actor, target);
-+ RETURN(rc);
-+ }
-
- node = ll_node_from_inode(inode, *ppos, *ppos + count - 1, LCK_PR);
- if (IS_ERR(node))
-@@ -1872,8 +1875,8 @@
- /* A glimpse is necessary to determine whether we return a
- * short read (B) or some zeroes at the end of the buffer (C) */
- ll_inode_size_unlock(inode, 1);
-- retval = ll_glimpse_size(inode, LDLM_FL_BLOCK_GRANTED);
-- if (retval)
-+ rc = ll_glimpse_size(inode, LDLM_FL_BLOCK_GRANTED);
-+ if (rc)
- goto out;
- } else {
- /* region is within kms and, hence, within real file size (A) */
-@@ -1889,13 +1892,115 @@
- ll_ra_read_in(in_file, &bead);
- /* BUG: 5972 */
- file_accessed(in_file);
-- retval = generic_file_sendfile(in_file, ppos, count, actor, target);
-+ rc = generic_file_sendfile(in_file, ppos, count, actor, target);
- ll_ra_read_ex(in_file, &bead);
-
- out:
- ll_tree_unlock(&tree);
-- RETURN(retval);
-+ RETURN(rc);
-+}
-+#endif
-+
-+/* change based on
-+ * http://git.kernel.org/?p=linux/kernel/git/torvalds/linux-2.6.git;a=commit;h=f0930fffa99e7fe0a0c4b6c7d9a244dc88288c27
-+ */
-+#ifdef HAVE_KERNEL_SPLICE_READ
-+static ssize_t ll_file_splice_read(struct file *in_file, loff_t *ppos,
-+ struct pipe_inode_info *pipe, size_t count,
-+ unsigned int flags)
-+{
-+ struct inode *inode = in_file->f_dentry->d_inode;
-+ struct ll_inode_info *lli = ll_i2info(inode);
-+ struct lov_stripe_md *lsm = lli->lli_smd;
-+ struct ll_lock_tree tree;
-+ struct ll_lock_tree_node *node;
-+ struct ost_lvb lvb;
-+ struct ll_ra_read bead;
-+ ssize_t rc;
-+ __u64 kms;
-+ ENTRY;
-+
-+ CDEBUG(D_VFSTRACE, "VFS Op:inode=%lu/%u(%p),size="LPSZ",offset=%Ld\n",
-+ inode->i_ino, inode->i_generation, inode, count, *ppos);
-+
-+ /* "If nbyte is 0, read() will return 0 and have no other results."
-+ * -- Single Unix Spec */
-+ if (count == 0)
-+ RETURN(0);
-+
-+ ll_stats_ops_tally(ll_i2sbi(inode), LPROC_LL_READ_BYTES, count);
-+ /* turn off the kernel's read-ahead */
-+ in_file->f_ra.ra_pages = 0;
-+
-+ /* File with no objects, nothing to lock */
-+ if (!lsm) {
-+ rc = generic_file_splice_read(in_file, ppos, pipe, count, flags);
-+ RETURN(rc);
-+ }
-+
-+ node = ll_node_from_inode(inode, *ppos, *ppos + count - 1, LCK_PR);
-+ if (IS_ERR(node))
-+ RETURN(PTR_ERR(node));
-+
-+ tree.lt_fd = LUSTRE_FPRIVATE(in_file);
-+ rc = ll_tree_lock(&tree, node, NULL, count,
-+ in_file->f_flags & O_NONBLOCK?LDLM_FL_BLOCK_NOWAIT:0);
-+ if (rc != 0)
-+ RETURN(rc);
-+
-+ ll_clear_file_contended(inode);
-+ ll_inode_size_lock(inode, 1);
-+ /*
-+ * Consistency guarantees: following possibilities exist for the
-+ * relation between region being read and real file size at this
-+ * moment:
-+ *
-+ * (A): the region is completely inside of the file;
-+ *
-+ * (B-x): x bytes of region are inside of the file, the rest is
-+ * outside;
-+ *
-+ * (C): the region is completely outside of the file.
-+ *
-+ * This classification is stable under DLM lock acquired by
-+ * ll_tree_lock() above, because to change class, other client has to
-+ * take DLM lock conflicting with our lock. Also, any updates to
-+ * ->i_size by other threads on this client are serialized by
-+ * ll_inode_size_lock(). This guarantees that short reads are handled
-+ * correctly in the face of concurrent writes and truncates.
-+ */
-+ inode_init_lvb(inode, &lvb);
-+ obd_merge_lvb(ll_i2sbi(inode)->ll_osc_exp, lsm, &lvb, 1);
-+ kms = lvb.lvb_size;
-+ if (*ppos + count - 1 > kms) {
-+ /* A glimpse is necessary to determine whether we return a
-+ * short read (B) or some zeroes at the end of the buffer (C) */
-+ ll_inode_size_unlock(inode, 1);
-+ rc = ll_glimpse_size(inode, LDLM_FL_BLOCK_GRANTED);
-+ if (rc)
-+ goto out;
-+ } else {
-+ /* region is within kms and, hence, within real file size (A) */
-+ i_size_write(inode, kms);
-+ ll_inode_size_unlock(inode, 1);
-+ }
-+
-+ CDEBUG(D_INFO, "Send ino %lu, "LPSZ" bytes, offset %lld, i_size %llu\n",
-+ inode->i_ino, count, *ppos, i_size_read(inode));
-+
-+ bead.lrr_start = *ppos >> CFS_PAGE_SHIFT;
-+ bead.lrr_count = (count + CFS_PAGE_SIZE - 1) >> CFS_PAGE_SHIFT;
-+ ll_ra_read_in(in_file, &bead);
-+ /* BUG: 5972 */
-+ file_accessed(in_file);
-+ rc = generic_file_splice_read(in_file, ppos, pipe, count, flags);
-+ ll_ra_read_ex(in_file, &bead);
-+
-+ out:
-+ ll_tree_unlock(&tree);
-+ RETURN(rc);
- }
-+#endif
-
- static int ll_lov_recreate_obj(struct inode *inode, struct file *file,
- unsigned long arg)
-@@ -3084,7 +3189,11 @@
- }
-
- #if (LINUX_VERSION_CODE >= KERNEL_VERSION(2,6,10))
-+#ifndef HAVE_INODE_PERMISION_2ARGS
- int ll_inode_permission(struct inode *inode, int mask, struct nameidata *nd)
-+#else
-+int ll_inode_permission(struct inode *inode, int mask)
-+#endif
- {
- CDEBUG(D_VFSTRACE, "VFS Op:inode=%lu/%u(%p), mask %o\n",
- inode->i_ino, inode->i_generation, inode, mask);
-@@ -3093,7 +3202,7 @@
- return generic_permission(inode, mask, lustre_check_acl);
- }
- #else
--#if (LINUX_VERSION_CODE >= KERNEL_VERSION(2,6,0))
-+#ifndef HAVE_INODE_PERMISION_2ARGS
- int ll_inode_permission(struct inode *inode, int mask, struct nameidata *nd)
- #else
- int ll_inode_permission(struct inode *inode, int mask)
-@@ -3163,7 +3272,12 @@
- .release = ll_file_release,
- .mmap = ll_file_mmap,
- .llseek = ll_file_seek,
-+#ifdef HAVE_KERNEL_SPLICE_READ
-+ .splice_read = ll_file_splice_read,
-+#endif
-+#ifdef HAVE_KERNEL_SENDFILE
- .sendfile = ll_file_sendfile,
-+#endif
- .fsync = ll_fsync,
- };
-
-@@ -3185,7 +3299,12 @@
- .release = ll_file_release,
- .mmap = ll_file_mmap,
- .llseek = ll_file_seek,
-+#ifdef HAVE_KERNEL_SPLICE_READ
-+ .splice_read = ll_file_splice_read,
-+#endif
-+#ifdef HAVE_KERNEL_SENDFILE
- .sendfile = ll_file_sendfile,
-+#endif
- .fsync = ll_fsync,
- #ifdef HAVE_F_OP_FLOCK
- .flock = ll_file_flock,
-@@ -3212,7 +3331,12 @@
- .release = ll_file_release,
- .mmap = ll_file_mmap,
- .llseek = ll_file_seek,
-+#ifdef HAVE_KERNEL_SPLICE_READ
-+ .splice_read = ll_file_splice_read,
-+#endif
-+#ifdef HAVE_KERNEL_SENDFILE
- .sendfile = ll_file_sendfile,
-+#endif
- .fsync = ll_fsync,
- #ifdef HAVE_F_OP_FLOCK
- .flock = ll_file_noflock,
-diff -urNad lustre~/lustre/llite/file.c.orig lustre/lustre/llite/file.c.orig
---- lustre~/lustre/llite/file.c.orig 1970-01-01 00:00:00.000000000 +0000
-+++ lustre/lustre/llite/file.c.orig 2009-03-13 09:45:03.000000000 +0100
-@@ -0,0 +1,3335 @@
-+/* -*- mode: c; c-basic-offset: 8; indent-tabs-mode: nil; -*-
-+ * vim:expandtab:shiftwidth=8:tabstop=8:
-+ *
-+ * GPL HEADER START
-+ *
-+ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
-+ *
-+ * This program is free software; you can redistribute it and/or modify
-+ * it under the terms of the GNU General Public License version 2 only,
-+ * as published by the Free Software Foundation.
-+ *
-+ * This program is distributed in the hope that it will be useful, but
-+ * WITHOUT ANY WARRANTY; without even the implied warranty of
-+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
-+ * General Public License version 2 for more details (a copy is included
-+ * in the LICENSE file that accompanied this code).
-+ *
-+ * You should have received a copy of the GNU General Public License
-+ * version 2 along with this program; If not, see
-+ * http://www.sun.com/software/products/lustre/docs/GPLv2.pdf
-+ *
-+ * Please contact Sun Microsystems, Inc., 4150 Network Circle, Santa Clara,
-+ * CA 95054 USA or visit www.sun.com if you need additional information or
-+ * have any questions.
-+ *
-+ * GPL HEADER END
-+ */
-+/*
-+ * Copyright 2008 Sun Microsystems, Inc. All rights reserved
-+ * Use is subject to license terms.
-+ */
-+/*
-+ * This file is part of Lustre, http://www.lustre.org/
-+ * Lustre is a trademark of Sun Microsystems, Inc.
-+ *
-+ * lustre/llite/file.c
-+ *
-+ * Author: Peter Braam <braam at clusterfs.com>
-+ * Author: Phil Schwan <phil at clusterfs.com>
-+ * Author: Andreas Dilger <adilger at clusterfs.com>
-+ */
-+
-+#define DEBUG_SUBSYSTEM S_LLITE
-+#include <lustre_dlm.h>
-+#include <lustre_lite.h>
-+#include <linux/pagemap.h>
-+#include <linux/file.h>
-+#include <linux/posix_acl.h>
-+#include "llite_internal.h"
-+#include <lustre/ll_fiemap.h>
-+
-+/* also used by llite/special.c:ll_special_open() */
-+struct ll_file_data *ll_file_data_get(void)
-+{
-+ struct ll_file_data *fd;
-+
-+ OBD_SLAB_ALLOC_PTR(fd, ll_file_data_slab);
-+ return fd;
-+}
-+
-+static void ll_file_data_put(struct ll_file_data *fd)
-+{
-+ if (fd != NULL)
-+ OBD_SLAB_FREE_PTR(fd, ll_file_data_slab);
-+}
-+
-+static int ll_close_inode_openhandle(struct inode *inode,
-+ struct obd_client_handle *och)
-+{
-+ struct ptlrpc_request *req = NULL;
-+ struct obd_device *obd;
-+ struct obdo *oa;
-+ int rc;
-+ ENTRY;
-+
-+ obd = class_exp2obd(ll_i2mdcexp(inode));
-+ if (obd == NULL) {
-+ CERROR("Invalid MDC connection handle "LPX64"\n",
-+ ll_i2mdcexp(inode)->exp_handle.h_cookie);
-+ GOTO(out, rc = 0);
-+ }
-+
-+ /*
-+ * here we check if this is forced umount. If so this is called on
-+ * canceling "open lock" and we do not call mdc_close() in this case, as
-+ * it will not be successful, as import is already deactivated.
-+ */
-+ if (obd->obd_force)
-+ GOTO(out, rc = 0);
-+
-+ OBDO_ALLOC(oa);
-+ if (!oa)
-+ RETURN(-ENOMEM); // XXX We leak openhandle and request here.
-+
-+ oa->o_id = inode->i_ino;
-+ oa->o_valid = OBD_MD_FLID;
-+ obdo_from_inode(oa, inode, OBD_MD_FLTYPE | OBD_MD_FLMODE |
-+ OBD_MD_FLSIZE | OBD_MD_FLBLOCKS |
-+ OBD_MD_FLATIME | OBD_MD_FLMTIME |
-+ OBD_MD_FLCTIME);
-+ if (ll_is_inode_dirty(inode)) {
-+ oa->o_flags = MDS_BFLAG_UNCOMMITTED_WRITES;
-+ oa->o_valid |= OBD_MD_FLFLAGS;
-+ }
-+
-+ rc = mdc_close(ll_i2mdcexp(inode), oa, och, &req);
-+ if (rc == EAGAIN) {
-+ /* We are the last writer, so the MDS has instructed us to get
-+ * the file size and any write cookies, then close again. */
-+ ll_queue_done_writing(inode);
-+ rc = 0;
-+ } else if (rc) {
-+ CERROR("inode %lu mdc close failed: rc = %d\n",
-+ inode->i_ino, rc);
-+ }
-+
-+ OBDO_FREE(oa);
-+
-+ if (rc == 0) {
-+ rc = ll_objects_destroy(req, inode);
-+ if (rc)
-+ CERROR("inode %lu ll_objects destroy: rc = %d\n",
-+ inode->i_ino, rc);
-+ }
-+
-+ ptlrpc_req_finished(req); /* This is close request */
-+ EXIT;
-+out:
-+ mdc_clear_open_replay_data(och);
-+
-+ return rc;
-+}
-+
-+int ll_mdc_real_close(struct inode *inode, int flags)
-+{
-+ struct ll_inode_info *lli = ll_i2info(inode);
-+ int rc = 0;
-+ struct obd_client_handle **och_p;
-+ struct obd_client_handle *och;
-+ __u64 *och_usecount;
-+
-+ ENTRY;
-+
-+ if (flags & FMODE_WRITE) {
-+ och_p = &lli->lli_mds_write_och;
-+ och_usecount = &lli->lli_open_fd_write_count;
-+ } else if (flags & FMODE_EXEC) {
-+ och_p = &lli->lli_mds_exec_och;
-+ och_usecount = &lli->lli_open_fd_exec_count;
-+ } else {
-+ LASSERT(flags & FMODE_READ);
-+ och_p = &lli->lli_mds_read_och;
-+ och_usecount = &lli->lli_open_fd_read_count;
-+ }
-+
-+ down(&lli->lli_och_sem);
-+ if (*och_usecount) { /* There are still users of this handle, so
-+ skip freeing it. */
-+ up(&lli->lli_och_sem);
-+ RETURN(0);
-+ }
-+ och=*och_p;
-+ *och_p = NULL;
-+ up(&lli->lli_och_sem);
-+
-+ if (och) { /* There might be a race and somebody have freed this och
-+ already */
-+ rc = ll_close_inode_openhandle(inode, och);
-+ och->och_fh.cookie = DEAD_HANDLE_MAGIC;
-+ OBD_FREE(och, sizeof *och);
-+ }
-+
-+ RETURN(rc);
-+}
-+
-+int ll_mdc_close(struct obd_export *mdc_exp, struct inode *inode,
-+ struct file *file)
-+{
-+ struct ll_file_data *fd = LUSTRE_FPRIVATE(file);
-+ struct ll_inode_info *lli = ll_i2info(inode);
-+ int rc = 0;
-+ ENTRY;
-+
-+ /* clear group lock, if present */
-+ if (unlikely(fd->fd_flags & LL_FILE_GROUP_LOCKED)) {
-+ struct lov_stripe_md *lsm = ll_i2info(inode)->lli_smd;
-+ fd->fd_flags &= ~(LL_FILE_GROUP_LOCKED|LL_FILE_IGNORE_LOCK);
-+ rc = ll_extent_unlock(fd, inode, lsm, LCK_GROUP,
-+ &fd->fd_cwlockh);
-+ }
-+
-+ /* Let's see if we have good enough OPEN lock on the file and if
-+ we can skip talking to MDS */
-+ if (file->f_dentry->d_inode) { /* Can this ever be false? */
-+ int lockmode;
-+ int flags = LDLM_FL_BLOCK_GRANTED | LDLM_FL_TEST_LOCK;
-+ struct lustre_handle lockh;
-+ struct inode *inode = file->f_dentry->d_inode;
-+ struct ldlm_res_id file_res_id = {.name={inode->i_ino,
-+ inode->i_generation}};
-+ ldlm_policy_data_t policy = {.l_inodebits={MDS_INODELOCK_OPEN}};
-+
-+ down(&lli->lli_och_sem);
-+ if (fd->fd_omode & FMODE_WRITE) {
-+ lockmode = LCK_CW;
-+ LASSERT(lli->lli_open_fd_write_count);
-+ lli->lli_open_fd_write_count--;
-+ } else if (fd->fd_omode & FMODE_EXEC) {
-+ lockmode = LCK_PR;
-+ LASSERT(lli->lli_open_fd_exec_count);
-+ lli->lli_open_fd_exec_count--;
-+ } else {
-+ lockmode = LCK_CR;
-+ LASSERT(lli->lli_open_fd_read_count);
-+ lli->lli_open_fd_read_count--;
-+ }
-+ up(&lli->lli_och_sem);
-+
-+ if (!ldlm_lock_match(mdc_exp->exp_obd->obd_namespace, flags,
-+ &file_res_id, LDLM_IBITS, &policy,lockmode,
-+ &lockh)) {
-+ rc = ll_mdc_real_close(file->f_dentry->d_inode,
-+ fd->fd_omode);
-+ }
-+ } else {
-+ CERROR("Releasing a file %p with negative dentry %p. Name %s",
-+ file, file->f_dentry, file->f_dentry->d_name.name);
-+ }
-+
-+ LUSTRE_FPRIVATE(file) = NULL;
-+ ll_file_data_put(fd);
-+
-+ RETURN(rc);
-+}
-+
-+int lov_test_and_clear_async_rc(struct lov_stripe_md *lsm);
-+
-+/* While this returns an error code, fput() the caller does not, so we need
-+ * to make every effort to clean up all of our state here. Also, applications
-+ * rarely check close errors and even if an error is returned they will not
-+ * re-try the close call.
-+ */
-+int ll_file_release(struct inode *inode, struct file *file)
-+{
-+ struct ll_file_data *fd;
-+ struct ll_sb_info *sbi = ll_i2sbi(inode);
-+ struct ll_inode_info *lli = ll_i2info(inode);
-+ struct lov_stripe_md *lsm = lli->lli_smd;
-+ int rc;
-+ ENTRY;
-+
-+ CDEBUG(D_VFSTRACE, "VFS Op:inode=%lu/%u(%p)\n", inode->i_ino,
-+ inode->i_generation, inode);
-+
-+
-+ if (inode->i_sb->s_root != file->f_dentry)
-+ ll_stats_ops_tally(sbi, LPROC_LL_RELEASE, 1);
-+ fd = LUSTRE_FPRIVATE(file);
-+ LASSERT(fd != NULL);
-+
-+ /* The last ref on @file, maybe not the the owner pid of statahead.
-+ * Different processes can open the same dir, "ll_opendir_key" means:
-+ * it is me that should stop the statahead thread. */
-+ if (lli->lli_opendir_key == fd && lli->lli_opendir_pid != 0)
-+ ll_stop_statahead(inode, lli->lli_opendir_key);
-+
-+ if (inode->i_sb->s_root == file->f_dentry) {
-+ LUSTRE_FPRIVATE(file) = NULL;
-+ ll_file_data_put(fd);
-+ RETURN(0);
-+ }
-+
-+ if (lsm)
-+ lov_test_and_clear_async_rc(lsm);
-+ lli->lli_async_rc = 0;
-+
-+ /* Ensure that dirty pages are flushed out with the right creds */
-+ if (file->f_mode & FMODE_WRITE)
-+ filemap_fdatawrite(file->f_mapping);
-+
-+ rc = ll_mdc_close(sbi->ll_mdc_exp, inode, file);
-+ RETURN(rc);
-+}
-+
-+static int ll_intent_file_open(struct file *file, void *lmm,
-+ int lmmsize, struct lookup_intent *itp)
-+{
-+ struct ll_sb_info *sbi = ll_i2sbi(file->f_dentry->d_inode);
-+ struct mdc_op_data data;
-+ struct dentry *parent = file->f_dentry->d_parent;
-+ const char *name = file->f_dentry->d_name.name;
-+ const int len = file->f_dentry->d_name.len;
-+ struct inode *inode = file->f_dentry->d_inode;
-+ struct ptlrpc_request *req;
-+ int rc;
-+ ENTRY;
-+
-+ if (!parent)
-+ RETURN(-ENOENT);
-+
-+ ll_prepare_mdc_op_data(&data, parent->d_inode, inode,
-+ name, len, O_RDWR, NULL);
-+
-+ /* Usually we come here only for NFSD, and we want open lock.
-+ But we can also get here with pre 2.6.15 patchless kernels, and in
-+ that case that lock is also ok */
-+ /* We can also get here if there was cached open handle in revalidate_it
-+ * but it disappeared while we were getting from there to ll_file_open.
-+ * But this means this file was closed and immediatelly opened which
-+ * makes a good candidate for using OPEN lock */
-+ /* If lmmsize & lmm are not 0, we are just setting stripe info
-+ * parameters. No need for the open lock */
-+ if (!lmm && !lmmsize)
-+ itp->it_flags |= MDS_OPEN_LOCK;
-+
-+ rc = mdc_intent_lock(sbi->ll_mdc_exp, &data, lmm, lmmsize, itp,
-+ 0 /*unused */, &req, ll_mdc_blocking_ast, 0);
-+ if (rc == -ESTALE) {
-+ /* reason for keep own exit path - don`t flood log
-+ * with messages with -ESTALE errors.
-+ */
-+ if (!it_disposition(itp, DISP_OPEN_OPEN) ||
-+ it_open_error(DISP_OPEN_OPEN, itp))
-+ GOTO(out, rc);
-+ ll_release_openhandle(file->f_dentry, itp);
-+ GOTO(out, rc);
-+ }
-+
-+ if (rc != 0 || it_open_error(DISP_OPEN_OPEN, itp)) {
-+ rc = rc ? rc : it_open_error(DISP_OPEN_OPEN, itp);
-+ CDEBUG(D_VFSTRACE, "lock enqueue: err: %d\n", rc);
-+ GOTO(out, rc);
-+ }
-+
-+ if (itp->d.lustre.it_lock_mode)
-+ mdc_set_lock_data(&itp->d.lustre.it_lock_handle,
-+ inode);
-+
-+ rc = ll_prep_inode(sbi->ll_osc_exp, &file->f_dentry->d_inode,
-+ req, DLM_REPLY_REC_OFF, NULL);
-+out:
-+ ptlrpc_req_finished(itp->d.lustre.it_data);
-+ it_clear_disposition(itp, DISP_ENQ_COMPLETE);
-+ ll_intent_drop_lock(itp);
-+
-+ RETURN(rc);
-+}
-+
-+
-+static void ll_och_fill(struct ll_inode_info *lli, struct lookup_intent *it,
-+ struct obd_client_handle *och)
-+{
-+ struct ptlrpc_request *req = it->d.lustre.it_data;
-+ struct mds_body *body;
-+
-+ LASSERT(och);
-+
-+ body = lustre_msg_buf(req->rq_repmsg, DLM_REPLY_REC_OFF, sizeof(*body));
-+ LASSERT(body != NULL); /* reply already checked out */
-+ /* and swabbed in mdc_enqueue */
-+ LASSERT(lustre_rep_swabbed(req, DLM_REPLY_REC_OFF));
-+
-+ memcpy(&och->och_fh, &body->handle, sizeof(body->handle));
-+ och->och_magic = OBD_CLIENT_HANDLE_MAGIC;
-+ lli->lli_io_epoch = body->io_epoch;
-+
-+ mdc_set_open_replay_data(och, it->d.lustre.it_data);
-+}
-+
-+int ll_local_open(struct file *file, struct lookup_intent *it,
-+ struct ll_file_data *fd, struct obd_client_handle *och)
-+{
-+ ENTRY;
-+
-+ LASSERT(!LUSTRE_FPRIVATE(file));
-+
-+ LASSERT(fd != NULL);
-+
-+ if (och)
-+ ll_och_fill(ll_i2info(file->f_dentry->d_inode), it, och);
-+ LUSTRE_FPRIVATE(file) = fd;
-+ ll_readahead_init(file->f_dentry->d_inode, &fd->fd_ras);
-+ fd->fd_omode = it->it_flags;
-+
-+ RETURN(0);
-+}
-+
-+/* Open a file, and (for the very first open) create objects on the OSTs at
-+ * this time. If opened with O_LOV_DELAY_CREATE, then we don't do the object
-+ * creation or open until ll_lov_setstripe() ioctl is called. We grab
-+ * lli_open_sem to ensure no other process will create objects, send the
-+ * stripe MD to the MDS, or try to destroy the objects if that fails.
-+ *
-+ * If we already have the stripe MD locally then we don't request it in
-+ * mdc_open(), by passing a lmm_size = 0.
-+ *
-+ * It is up to the application to ensure no other processes open this file
-+ * in the O_LOV_DELAY_CREATE case, or the default striping pattern will be
-+ * used. We might be able to avoid races of that sort by getting lli_open_sem
-+ * before returning in the O_LOV_DELAY_CREATE case and dropping it here
-+ * or in ll_file_release(), but I'm not sure that is desirable/necessary.
-+ */
-+int ll_file_open(struct inode *inode, struct file *file)
-+{
-+ struct ll_inode_info *lli = ll_i2info(inode);
-+ struct lookup_intent *it, oit = { .it_op = IT_OPEN,
-+ .it_flags = file->f_flags };
-+ struct lov_stripe_md *lsm;
-+ struct ptlrpc_request *req = NULL;
-+ struct obd_client_handle **och_p;
-+ __u64 *och_usecount;
-+ struct ll_file_data *fd;
-+ int rc = 0, opendir_set = 0;
-+ ENTRY;
-+
-+ CDEBUG(D_VFSTRACE, "VFS Op:inode=%lu/%u(%p), flags %o\n", inode->i_ino,
-+ inode->i_generation, inode, file->f_flags);
-+
-+#ifdef HAVE_VFS_INTENT_PATCHES
-+ it = file->f_it;
-+#else
-+ it = file->private_data; /* XXX: compat macro */
-+ file->private_data = NULL; /* prevent ll_local_open assertion */
-+#endif
-+
-+ fd = ll_file_data_get();
-+ if (fd == NULL)
-+ RETURN(-ENOMEM);
-+
-+ if (S_ISDIR(inode->i_mode)) {
-+again:
-+ spin_lock(&lli->lli_lock);
-+ if (lli->lli_opendir_key == NULL && lli->lli_opendir_pid == 0) {
-+ LASSERT(lli->lli_sai == NULL);
-+ lli->lli_opendir_key = fd;
-+ lli->lli_opendir_pid = cfs_curproc_pid();
-+ opendir_set = 1;
-+ } else if (unlikely(lli->lli_opendir_pid == cfs_curproc_pid() &&
-+ lli->lli_opendir_key != NULL)) {
-+ /* Two cases for this:
-+ * (1) The same process open such directory many times.
-+ * (2) The old process opened the directory, and exited
-+ * before its children processes. Then new process
-+ * with the same pid opens such directory before the
-+ * old process's children processes exit.
-+ * reset stat ahead for such cases. */
-+ spin_unlock(&lli->lli_lock);
-+ CDEBUG(D_INFO, "Conflict statahead for %.*s %lu/%u"
-+ " reset it.\n", file->f_dentry->d_name.len,
-+ file->f_dentry->d_name.name,
-+ inode->i_ino, inode->i_generation);
-+ ll_stop_statahead(inode, lli->lli_opendir_key);
-+ goto again;
-+ }
-+ spin_unlock(&lli->lli_lock);
-+ }
-+
-+ if (inode->i_sb->s_root == file->f_dentry) {
-+ LUSTRE_FPRIVATE(file) = fd;
-+ RETURN(0);
-+ }
-+
-+ if (!it || !it->d.lustre.it_disposition) {
-+ /* Convert f_flags into access mode. We cannot use file->f_mode,
-+ * because everything but O_ACCMODE mask was stripped from it */
-+ if ((oit.it_flags + 1) & O_ACCMODE)
-+ oit.it_flags++;
-+ if (file->f_flags & O_TRUNC)
-+ oit.it_flags |= FMODE_WRITE;
-+
-+ /* kernel only call f_op->open in dentry_open. filp_open calls
-+ * dentry_open after call to open_namei that checks permissions.
-+ * Only nfsd_open call dentry_open directly without checking
-+ * permissions and because of that this code below is safe. */
-+ if (oit.it_flags & FMODE_WRITE)
-+ oit.it_flags |= MDS_OPEN_OWNEROVERRIDE;
-+
-+ /* We do not want O_EXCL here, presumably we opened the file
-+ * already? XXX - NFS implications? */
-+ oit.it_flags &= ~O_EXCL;
-+
-+ it = &oit;
-+ }
-+
-+restart:
-+ /* Let's see if we have file open on MDS already. */
-+ if (it->it_flags & FMODE_WRITE) {
-+ och_p = &lli->lli_mds_write_och;
-+ och_usecount = &lli->lli_open_fd_write_count;
-+ } else if (it->it_flags & FMODE_EXEC) {
-+ och_p = &lli->lli_mds_exec_och;
-+ och_usecount = &lli->lli_open_fd_exec_count;
-+ } else {
-+ och_p = &lli->lli_mds_read_och;
-+ och_usecount = &lli->lli_open_fd_read_count;
-+ }
-+
-+ LASSERTF(it->it_flags != 0, "it %p dist %d \n", it,
-+ it->d.lustre.it_disposition);
-+
-+ down(&lli->lli_och_sem);
-+ if (*och_p) { /* Open handle is present */
-+ if (it_disposition(it, DISP_OPEN_OPEN)) {
-+ /* Well, there's extra open request that we do not need,
-+ let's close it somehow. This will decref request. */
-+ rc = it_open_error(DISP_OPEN_OPEN, it);
-+ if (rc) {
-+ up(&lli->lli_och_sem);
-+ ll_file_data_put(fd);
-+ GOTO(out_openerr, rc);
-+ }
-+ ll_release_openhandle(file->f_dentry, it);
-+ lprocfs_counter_incr(ll_i2sbi(inode)->ll_stats,
-+ LPROC_LL_OPEN);
-+ }
-+ (*och_usecount)++;
-+
-+ rc = ll_local_open(file, it, fd, NULL);
-+
-+ LASSERTF(rc == 0, "rc = %d\n", rc);
-+ } else {
-+ LASSERT(*och_usecount == 0);
-+ if (!it->d.lustre.it_disposition) {
-+ /* We cannot just request lock handle now, new ELC code
-+ means that one of other OPEN locks for this file
-+ could be cancelled, and since blocking ast handler
-+ would attempt to grab och_sem as well, that would
-+ result in a deadlock */
-+ up(&lli->lli_och_sem);
-+ rc = ll_intent_file_open(file, NULL, 0, it);
-+ if (rc) {
-+ ll_file_data_put(fd);
-+ GOTO(out_openerr, rc);
-+ }
-+
-+ mdc_set_lock_data(&it->d.lustre.it_lock_handle,
-+ file->f_dentry->d_inode);
-+ goto restart;
-+ }
-+
-+ OBD_ALLOC(*och_p, sizeof (struct obd_client_handle));
-+ if (!*och_p) {
-+ ll_file_data_put(fd);
-+ GOTO(out_och_free, rc = -ENOMEM);
-+ }
-+ (*och_usecount)++;
-+ req = it->d.lustre.it_data;
-+
-+ /* mdc_intent_lock() didn't get a request ref if there was an
-+ * open error, so don't do cleanup on the request here
-+ * (bug 3430) */
-+ /* XXX (green): Should not we bail out on any error here, not
-+ * just open error? */
-+ rc = it_open_error(DISP_OPEN_OPEN, it);
-+ if (rc) {
-+ ll_file_data_put(fd);
-+ GOTO(out_och_free, rc);
-+ }
-+
-+ ll_stats_ops_tally(ll_i2sbi(inode), LPROC_LL_OPEN, 1);
-+ rc = ll_local_open(file, it, fd, *och_p);
-+ LASSERTF(rc == 0, "rc = %d\n", rc);
-+ }
-+ up(&lli->lli_och_sem);
-+
-+ /* Must do this outside lli_och_sem lock to prevent deadlock where
-+ different kind of OPEN lock for this same inode gets cancelled
-+ by ldlm_cancel_lru */
-+ if (!S_ISREG(inode->i_mode))
-+ GOTO(out, rc);
-+
-+ lsm = lli->lli_smd;
-+ if (lsm == NULL) {
-+ if (file->f_flags & O_LOV_DELAY_CREATE ||
-+ !(file->f_mode & FMODE_WRITE)) {
-+ CDEBUG(D_INODE, "object creation was delayed\n");
-+ GOTO(out, rc);
-+ }
-+ }
-+ file->f_flags &= ~O_LOV_DELAY_CREATE;
-+ GOTO(out, rc);
-+ out:
-+ ptlrpc_req_finished(req);
-+ if (req)
-+ it_clear_disposition(it, DISP_ENQ_OPEN_REF);
-+ if (rc == 0) {
-+ ll_open_complete(inode);
-+ } else {
-+out_och_free:
-+ if (*och_p) {
-+ OBD_FREE(*och_p, sizeof (struct obd_client_handle));
-+ *och_p = NULL; /* OBD_FREE writes some magic there */
-+ (*och_usecount)--;
-+ }
-+ up(&lli->lli_och_sem);
-+out_openerr:
-+ if (opendir_set != 0)
-+ ll_stop_statahead(inode, lli->lli_opendir_key);
-+ }
-+
-+ return rc;
-+}
-+
-+/* Fills the obdo with the attributes for the inode defined by lsm */
-+int ll_lsm_getattr(struct obd_export *exp, struct lov_stripe_md *lsm,
-+ struct obdo *oa)
-+{
-+ struct ptlrpc_request_set *set;
-+ struct obd_info oinfo = { { { 0 } } };
-+ int rc;
-+ ENTRY;
-+
-+ LASSERT(lsm != NULL);
-+
-+ memset(oa, 0, sizeof *oa);
-+ oinfo.oi_md = lsm;
-+ oinfo.oi_oa = oa;
-+ oa->o_id = lsm->lsm_object_id;
-+ oa->o_mode = S_IFREG;
-+ oa->o_valid = OBD_MD_FLID | OBD_MD_FLTYPE | OBD_MD_FLSIZE |
-+ OBD_MD_FLBLOCKS | OBD_MD_FLBLKSZ | OBD_MD_FLMTIME |
-+ OBD_MD_FLCTIME;
-+
-+ set = ptlrpc_prep_set();
-+ if (set == NULL) {
-+ rc = -ENOMEM;
-+ } else {
-+ rc = obd_getattr_async(exp, &oinfo, set);
-+ if (rc == 0)
-+ rc = ptlrpc_set_wait(set);
-+ ptlrpc_set_destroy(set);
-+ }
-+ if (rc)
-+ RETURN(rc);
-+
-+ oa->o_valid &= (OBD_MD_FLBLOCKS | OBD_MD_FLBLKSZ | OBD_MD_FLMTIME |
-+ OBD_MD_FLCTIME | OBD_MD_FLSIZE);
-+ RETURN(0);
-+}
-+
-+static int ll_lock_to_stripe_offset(struct inode *inode, struct ldlm_lock *lock)
-+{
-+ struct ll_inode_info *lli = ll_i2info(inode);
-+ struct lov_stripe_md *lsm = lli->lli_smd;
-+ struct obd_export *exp = ll_i2obdexp(inode);
-+ struct {
-+ char name[16];
-+ struct ldlm_lock *lock;
-+ } key = { .name = KEY_LOCK_TO_STRIPE, .lock = lock };
-+ __u32 stripe, vallen = sizeof(stripe);
-+ int rc;
-+ ENTRY;
-+
-+ if (lsm->lsm_stripe_count == 1)
-+ GOTO(check, stripe = 0);
-+
-+ /* get our offset in the lov */
-+ rc = obd_get_info(exp, sizeof(key), &key, &vallen, &stripe, lsm);
-+ if (rc != 0) {
-+ CERROR("obd_get_info: rc = %d\n", rc);
-+ RETURN(rc);
-+ }
-+ LASSERT(stripe < lsm->lsm_stripe_count);
-+
-+check:
-+ if (lsm->lsm_oinfo[stripe]->loi_id != lock->l_resource->lr_name.name[0]||
-+ lsm->lsm_oinfo[stripe]->loi_gr != lock->l_resource->lr_name.name[1]){
-+ LDLM_ERROR(lock, "resource doesn't match object "LPU64"/"LPU64,
-+ lsm->lsm_oinfo[stripe]->loi_id,
-+ lsm->lsm_oinfo[stripe]->loi_gr);
-+ RETURN(-ELDLM_NO_LOCK_DATA);
-+ }
-+
-+ RETURN(stripe);
-+}
-+
-+/* Get extra page reference to ensure it is not going away */
-+void ll_pin_extent_cb(void *data)
-+{
-+ struct page *page = data;
-+
-+ page_cache_get(page);
-+
-+ return;
-+}
-+/* Flush the page from page cache for an extent as its canceled.
-+ * Page to remove is delivered as @data.
-+ *
-+ * No one can dirty the extent until we've finished our work and they cannot
-+ * enqueue another lock. The DLM protects us from ll_file_read/write here,
-+ * but other kernel actors could have pages locked.
-+ *
-+ * If @discard is set, there is no need to write the page if it is dirty.
-+ *
-+ * Called with the DLM lock held. */
-+int ll_page_removal_cb(void *data, int discard)
-+{
-+ int rc;
-+ struct page *page = data;
-+ struct address_space *mapping;
-+
-+ ENTRY;
-+
-+ /* We have page reference already from ll_pin_page */
-+ lock_page(page);
-+
-+ /* Already truncated by somebody */
-+ if (!page->mapping)
-+ GOTO(out, rc = 0);
-+
-+ mapping = page->mapping;
-+
-+ ll_teardown_mmaps(mapping,
-+ (__u64)page->index << PAGE_CACHE_SHIFT,
-+ ((__u64)page->index<<PAGE_CACHE_SHIFT)|
-+ ~PAGE_CACHE_MASK);
-+ LL_CDEBUG_PAGE(D_PAGE, page, "removing page\n");
-+ if (!discard && PageWriteback(page))
-+ wait_on_page_writeback(page);
-+
-+ if (!discard && clear_page_dirty_for_io(page)) {
-+ rc = ll_call_writepage(page->mapping->host, page);
-+ /* either waiting for io to complete or reacquiring
-+ * the lock that the failed writepage released */
-+ lock_page(page);
-+ wait_on_page_writeback(page);
-+ if (rc < 0) {
-+ CERROR("writepage inode %lu(%p) of page %p "
-+ "failed: %d\n", mapping->host->i_ino,
-+ mapping->host, page, rc);
-+ if (rc == -ENOSPC)
-+ set_bit(AS_ENOSPC, &mapping->flags);
-+ else
-+ set_bit(AS_EIO, &mapping->flags);
-+ }
-+ }
-+ if (page->mapping != NULL) {
-+ struct ll_async_page *llap = llap_cast_private(page);
-+ // checking again to account for writeback's lock_page()
-+ LL_CDEBUG_PAGE(D_PAGE, page, "truncating\n");
-+ if (llap)
-+ ll_ra_accounting(llap, page->mapping);
-+ ll_truncate_complete_page(page);
-+ }
-+ EXIT;
-+out:
-+ LASSERT(!PageWriteback(page));
-+ unlock_page(page);
-+ page_cache_release(page);
-+
-+ return 0;
-+}
-+
-+int ll_extent_lock_cancel_cb(struct ldlm_lock *lock, struct ldlm_lock_desc *new,
-+ void *data, int flag)
-+{
-+ struct inode *inode;
-+ struct ll_inode_info *lli;
-+ struct lov_stripe_md *lsm;
-+ int stripe;
-+ __u64 kms;
-+
-+ ENTRY;
-+
-+ if ((unsigned long)data > 0 && (unsigned long)data < 0x1000) {
-+ LDLM_ERROR(lock, "cancelling lock with bad data %p", data);
-+ LBUG();
-+ }
-+
-+ inode = ll_inode_from_lock(lock);
-+ if (inode == NULL)
-+ RETURN(0);
-+ lli = ll_i2info(inode);
-+ if (lli == NULL)
-+ GOTO(iput, 0);
-+ if (lli->lli_smd == NULL)
-+ GOTO(iput, 0);
-+ lsm = lli->lli_smd;
-+
-+ stripe = ll_lock_to_stripe_offset(inode, lock);
-+ if (stripe < 0)
-+ GOTO(iput, 0);
-+
-+ lov_stripe_lock(lsm);
-+ lock_res_and_lock(lock);
-+ kms = ldlm_extent_shift_kms(lock,
-+ lsm->lsm_oinfo[stripe]->loi_kms);
-+
-+ if (lsm->lsm_oinfo[stripe]->loi_kms != kms)
-+ LDLM_DEBUG(lock, "updating kms from "LPU64" to "LPU64,
-+ lsm->lsm_oinfo[stripe]->loi_kms, kms);
-+ lsm->lsm_oinfo[stripe]->loi_kms = kms;
-+ unlock_res_and_lock(lock);
-+ lov_stripe_unlock(lsm);
-+ ll_try_done_writing(inode);
-+ EXIT;
-+iput:
-+ iput(inode);
-+
-+ return 0;
-+}
-+
-+#if 0
-+int ll_async_completion_ast(struct ldlm_lock *lock, int flags, void *data)
-+{
-+ /* XXX ALLOCATE - 160 bytes */
-+ struct inode *inode = ll_inode_from_lock(lock);
-+ struct ll_inode_info *lli = ll_i2info(inode);
-+ struct lustre_handle lockh = { 0 };
-+ struct ost_lvb *lvb;
-+ int stripe;
-+ ENTRY;
-+
-+ if (flags & (LDLM_FL_BLOCK_WAIT | LDLM_FL_BLOCK_GRANTED |
-+ LDLM_FL_BLOCK_CONV)) {
-+ LBUG(); /* not expecting any blocked async locks yet */
-+ LDLM_DEBUG(lock, "client-side async enqueue returned a blocked "
-+ "lock, returning");
-+ ldlm_lock_dump(D_OTHER, lock, 0);
-+ ldlm_reprocess_all(lock->l_resource);
-+ RETURN(0);
-+ }
-+
-+ LDLM_DEBUG(lock, "client-side async enqueue: granted/glimpsed");
-+
-+ stripe = ll_lock_to_stripe_offset(inode, lock);
-+ if (stripe < 0)
-+ goto iput;
-+
-+ if (lock->l_lvb_len) {
-+ struct lov_stripe_md *lsm = lli->lli_smd;
-+ __u64 kms;
-+ lvb = lock->l_lvb_data;
-+ lsm->lsm_oinfo[stripe].loi_rss = lvb->lvb_size;
-+
-+ lock_res_and_lock(lock);
-+ ll_inode_size_lock(inode, 1);
-+ kms = MAX(lsm->lsm_oinfo[stripe].loi_kms, lvb->lvb_size);
-+ kms = ldlm_extent_shift_kms(NULL, kms);
-+ if (lsm->lsm_oinfo[stripe].loi_kms != kms)
-+ LDLM_DEBUG(lock, "updating kms from "LPU64" to "LPU64,
-+ lsm->lsm_oinfo[stripe].loi_kms, kms);
-+ lsm->lsm_oinfo[stripe].loi_kms = kms;
-+ ll_inode_size_unlock(inode, 1);
-+ unlock_res_and_lock(lock);
-+ }
-+
-+iput:
-+ iput(inode);
-+ wake_up(&lock->l_waitq);
-+
-+ ldlm_lock2handle(lock, &lockh);
-+ ldlm_lock_decref(&lockh, LCK_PR);
-+ RETURN(0);
-+}
-+#endif
-+
-+static int ll_glimpse_callback(struct ldlm_lock *lock, void *reqp)
-+{
-+ struct ptlrpc_request *req = reqp;
-+ struct inode *inode = ll_inode_from_lock(lock);
-+ struct ll_inode_info *lli;
-+ struct lov_stripe_md *lsm;
-+ struct ost_lvb *lvb;
-+ int rc, stripe;
-+ int size[2] = { sizeof(struct ptlrpc_body), sizeof(*lvb) };
-+ ENTRY;
-+
-+ if (inode == NULL)
-+ GOTO(out, rc = -ELDLM_NO_LOCK_DATA);
-+ lli = ll_i2info(inode);
-+ if (lli == NULL)
-+ GOTO(iput, rc = -ELDLM_NO_LOCK_DATA);
-+ lsm = lli->lli_smd;
-+ if (lsm == NULL)
-+ GOTO(iput, rc = -ELDLM_NO_LOCK_DATA);
-+
-+ /* First, find out which stripe index this lock corresponds to. */
-+ stripe = ll_lock_to_stripe_offset(inode, lock);
-+ if (stripe < 0)
-+ GOTO(iput, rc = -ELDLM_NO_LOCK_DATA);
-+
-+ rc = lustre_pack_reply(req, 2, size, NULL);
-+ if (rc)
-+ GOTO(iput, rc);
-+
-+ lvb = lustre_msg_buf(req->rq_repmsg, REPLY_REC_OFF, sizeof(*lvb));
-+ lvb->lvb_size = lli->lli_smd->lsm_oinfo[stripe]->loi_kms;
-+ lvb->lvb_mtime = LTIME_S(inode->i_mtime);
-+ lvb->lvb_atime = LTIME_S(inode->i_atime);
-+ lvb->lvb_ctime = LTIME_S(inode->i_ctime);
-+
-+ LDLM_DEBUG(lock, "i_size: %llu -> stripe number %u -> kms "LPU64
-+ " atime "LPU64", mtime "LPU64", ctime "LPU64,
-+ i_size_read(inode), stripe, lvb->lvb_size, lvb->lvb_mtime,
-+ lvb->lvb_atime, lvb->lvb_ctime);
-+ iput:
-+ iput(inode);
-+
-+ out:
-+ /* These errors are normal races, so we don't want to fill the console
-+ * with messages by calling ptlrpc_error() */
-+ if (rc == -ELDLM_NO_LOCK_DATA)
-+ lustre_pack_reply(req, 1, NULL, NULL);
-+
-+ req->rq_status = rc;
-+ return rc;
-+}
-+
-+int ll_glimpse_ioctl(struct ll_sb_info *sbi, struct lov_stripe_md *lsm,
-+ lstat_t *st)
-+{
-+ struct lustre_handle lockh = { 0 };
-+ struct ldlm_enqueue_info einfo = { 0 };
-+ struct obd_info oinfo = { { { 0 } } };
-+ struct ost_lvb lvb;
-+ int rc;
-+
-+ ENTRY;
-+
-+ einfo.ei_type = LDLM_EXTENT;
-+ einfo.ei_mode = LCK_PR;
-+ einfo.ei_cb_bl = osc_extent_blocking_cb;
-+ einfo.ei_cb_cp = ldlm_completion_ast;
-+ einfo.ei_cb_gl = ll_glimpse_callback;
-+ einfo.ei_cbdata = NULL;
-+
-+ oinfo.oi_policy.l_extent.end = OBD_OBJECT_EOF;
-+ oinfo.oi_lockh = &lockh;
-+ oinfo.oi_md = lsm;
-+ oinfo.oi_flags = LDLM_FL_HAS_INTENT;
-+
-+ rc = obd_enqueue_rqset(sbi->ll_osc_exp, &oinfo, &einfo);
-+ if (rc == -ENOENT)
-+ RETURN(rc);
-+ if (rc != 0) {
-+ CERROR("obd_enqueue returned rc %d, "
-+ "returning -EIO\n", rc);
-+ RETURN(rc > 0 ? -EIO : rc);
-+ }
-+
-+ lov_stripe_lock(lsm);
-+ memset(&lvb, 0, sizeof(lvb));
-+ obd_merge_lvb(sbi->ll_osc_exp, lsm, &lvb, 0);
-+ st->st_size = lvb.lvb_size;
-+ st->st_blocks = lvb.lvb_blocks;
-+ st->st_mtime = lvb.lvb_mtime;
-+ st->st_atime = lvb.lvb_atime;
-+ st->st_ctime = lvb.lvb_ctime;
-+ lov_stripe_unlock(lsm);
-+
-+ RETURN(rc);
-+}
-+
-+/* NB: obd_merge_lvb will prefer locally cached writes if they extend the
-+ * file (because it prefers KMS over RSS when larger) */
-+int ll_glimpse_size(struct inode *inode, int ast_flags)
-+{
-+ struct ll_inode_info *lli = ll_i2info(inode);
-+ struct ll_sb_info *sbi = ll_i2sbi(inode);
-+ struct lustre_handle lockh = { 0 };
-+ struct ldlm_enqueue_info einfo = { 0 };
-+ struct obd_info oinfo = { { { 0 } } };
-+ struct ost_lvb lvb;
-+ int rc;
-+ ENTRY;
-+
-+ CDEBUG(D_DLMTRACE, "Glimpsing inode %lu\n", inode->i_ino);
-+
-+ if (!lli->lli_smd) {
-+ CDEBUG(D_DLMTRACE, "No objects for inode %lu\n", inode->i_ino);
-+ RETURN(0);
-+ }
-+
-+ /* NOTE: this looks like DLM lock request, but it may not be one. Due
-+ * to LDLM_FL_HAS_INTENT flag, this is glimpse request, that
-+ * won't revoke any conflicting DLM locks held. Instead,
-+ * ll_glimpse_callback() will be called on each client
-+ * holding a DLM lock against this file, and resulting size
-+ * will be returned for each stripe. DLM lock on [0, EOF] is
-+ * acquired only if there were no conflicting locks. */
-+ einfo.ei_type = LDLM_EXTENT;
-+ einfo.ei_mode = LCK_PR;
-+ einfo.ei_cb_bl = osc_extent_blocking_cb;
-+ einfo.ei_cb_cp = ldlm_completion_ast;
-+ einfo.ei_cb_gl = ll_glimpse_callback;
-+ einfo.ei_cbdata = inode;
-+
-+ oinfo.oi_policy.l_extent.end = OBD_OBJECT_EOF;
-+ oinfo.oi_lockh = &lockh;
-+ oinfo.oi_md = lli->lli_smd;
-+ oinfo.oi_flags = ast_flags | LDLM_FL_HAS_INTENT;
-+
-+ rc = obd_enqueue_rqset(sbi->ll_osc_exp, &oinfo, &einfo);
-+ if (rc == -ENOENT)
-+ RETURN(rc);
-+ if (rc != 0) {
-+ CERROR("obd_enqueue returned rc %d, returning -EIO\n", rc);
-+ RETURN(rc > 0 ? -EIO : rc);
-+ }
-+
-+ ll_inode_size_lock(inode, 1);
-+ inode_init_lvb(inode, &lvb);
-+ rc = obd_merge_lvb(sbi->ll_osc_exp, lli->lli_smd, &lvb, 0);
-+ i_size_write(inode, lvb.lvb_size);
-+ inode->i_blocks = lvb.lvb_blocks;
-+ LTIME_S(inode->i_mtime) = lvb.lvb_mtime;
-+ LTIME_S(inode->i_atime) = lvb.lvb_atime;
-+ LTIME_S(inode->i_ctime) = lvb.lvb_ctime;
-+ ll_inode_size_unlock(inode, 1);
-+
-+ CDEBUG(D_DLMTRACE, "glimpse: size: %llu, blocks: %llu\n",
-+ i_size_read(inode), (long long)inode->i_blocks);
-+
-+ RETURN(rc);
-+}
-+
-+int ll_extent_lock(struct ll_file_data *fd, struct inode *inode,
-+ struct lov_stripe_md *lsm, int mode,
-+ ldlm_policy_data_t *policy, struct lustre_handle *lockh,
-+ int ast_flags)
-+{
-+ struct ll_sb_info *sbi = ll_i2sbi(inode);
-+ struct ost_lvb lvb;
-+ struct ldlm_enqueue_info einfo = { 0 };
-+ struct obd_info oinfo = { { { 0 } } };
-+ int rc;
-+ ENTRY;
-+
-+ LASSERT(!lustre_handle_is_used(lockh));
-+ LASSERT(lsm != NULL);
-+
-+ /* don't drop the mmapped file to LRU */
-+ if (mapping_mapped(inode->i_mapping))
-+ ast_flags |= LDLM_FL_NO_LRU;
-+
-+ /* XXX phil: can we do this? won't it screw the file size up? */
-+ if ((fd && (fd->fd_flags & LL_FILE_IGNORE_LOCK)) ||
-+ (sbi->ll_flags & LL_SBI_NOLCK))
-+ RETURN(0);
-+
-+ CDEBUG(D_DLMTRACE, "Locking inode %lu, start "LPU64" end "LPU64"\n",
-+ inode->i_ino, policy->l_extent.start, policy->l_extent.end);
-+
-+ einfo.ei_type = LDLM_EXTENT;
-+ einfo.ei_mode = mode;
-+ einfo.ei_cb_bl = osc_extent_blocking_cb;
-+ einfo.ei_cb_cp = ldlm_completion_ast;
-+ einfo.ei_cb_gl = ll_glimpse_callback;
-+ einfo.ei_cbdata = inode;
-+
-+ oinfo.oi_policy = *policy;
-+ oinfo.oi_lockh = lockh;
-+ oinfo.oi_md = lsm;
-+ oinfo.oi_flags = ast_flags;
-+
-+ rc = obd_enqueue(sbi->ll_osc_exp, &oinfo, &einfo, NULL);
-+ *policy = oinfo.oi_policy;
-+ if (rc > 0)
-+ rc = -EIO;
-+
-+ ll_inode_size_lock(inode, 1);
-+ inode_init_lvb(inode, &lvb);
-+ obd_merge_lvb(sbi->ll_osc_exp, lsm, &lvb, 1);
-+
-+ if (policy->l_extent.start == 0 &&
-+ policy->l_extent.end == OBD_OBJECT_EOF) {
-+ /* vmtruncate()->ll_truncate() first sets the i_size and then
-+ * the kms under both a DLM lock and the
-+ * ll_inode_size_lock(). If we don't get the
-+ * ll_inode_size_lock() here we can match the DLM lock and
-+ * reset i_size from the kms before the truncating path has
-+ * updated the kms. generic_file_write can then trust the
-+ * stale i_size when doing appending writes and effectively
-+ * cancel the result of the truncate. Getting the
-+ * ll_inode_size_lock() after the enqueue maintains the DLM
-+ * -> ll_inode_size_lock() acquiring order. */
-+ i_size_write(inode, lvb.lvb_size);
-+ CDEBUG(D_INODE, "inode=%lu, updating i_size %llu\n",
-+ inode->i_ino, i_size_read(inode));
-+ }
-+
-+ if (rc == 0) {
-+ LTIME_S(inode->i_mtime) = lvb.lvb_mtime;
-+ LTIME_S(inode->i_atime) = lvb.lvb_atime;
-+ LTIME_S(inode->i_ctime) = lvb.lvb_ctime;
-+ }
-+ ll_inode_size_unlock(inode, 1);
-+
-+ RETURN(rc);
-+}
-+
-+int ll_extent_unlock(struct ll_file_data *fd, struct inode *inode,
-+ struct lov_stripe_md *lsm, int mode,
-+ struct lustre_handle *lockh)
-+{
-+ struct ll_sb_info *sbi = ll_i2sbi(inode);
-+ int rc;
-+ ENTRY;
-+
-+ /* XXX phil: can we do this? won't it screw the file size up? */
-+ if ((fd && (fd->fd_flags & LL_FILE_IGNORE_LOCK)) ||
-+ (sbi->ll_flags & LL_SBI_NOLCK))
-+ RETURN(0);
-+
-+ rc = obd_cancel(sbi->ll_osc_exp, lsm, mode, lockh);
-+
-+ RETURN(rc);
-+}
-+
-+static void ll_set_file_contended(struct inode *inode)
-+{
-+ struct ll_inode_info *lli = ll_i2info(inode);
-+
-+ lli->lli_contention_time = cfs_time_current();
-+ set_bit(LLI_F_CONTENDED, &lli->lli_flags);
-+}
-+
-+void ll_clear_file_contended(struct inode *inode)
-+{
-+ struct ll_inode_info *lli = ll_i2info(inode);
-+
-+ clear_bit(LLI_F_CONTENDED, &lli->lli_flags);
-+}
-+
-+static int ll_is_file_contended(struct file *file)
-+{
-+ struct inode *inode = file->f_dentry->d_inode;
-+ struct ll_inode_info *lli = ll_i2info(inode);
-+ struct ll_sb_info *sbi = ll_i2sbi(inode);
-+ struct ll_file_data *fd = LUSTRE_FPRIVATE(file);
-+ ENTRY;
-+
-+ if (!(sbi->ll_lco.lco_flags & OBD_CONNECT_SRVLOCK)) {
-+ CDEBUG(D_INFO, "the server does not support SRVLOCK feature,"
-+ " osc connect flags = 0x"LPX64"\n",
-+ sbi->ll_lco.lco_flags);
-+ RETURN(0);
-+ }
-+ if (fd && (fd->fd_flags & LL_FILE_IGNORE_LOCK))
-+ RETURN(1);
-+ if (test_bit(LLI_F_CONTENDED, &lli->lli_flags)) {
-+ cfs_time_t cur_time = cfs_time_current();
-+ cfs_time_t retry_time;
-+
-+ retry_time = cfs_time_add(
-+ lli->lli_contention_time,
-+ cfs_time_seconds(sbi->ll_contention_time));
-+ if (cfs_time_after(cur_time, retry_time)) {
-+ ll_clear_file_contended(inode);
-+ RETURN(0);
-+ }
-+ RETURN(1);
-+ }
-+ RETURN(0);
-+}
-+
-+static int ll_file_get_tree_lock_iov(struct ll_lock_tree *tree,
-+ struct file *file, const struct iovec *iov,
-+ unsigned long nr_segs,
-+ loff_t start, loff_t end, int rw)
-+{
-+ int append;
-+ int tree_locked = 0;
-+ int rc;
-+ struct inode * inode = file->f_dentry->d_inode;
-+
-+ append = (rw == OBD_BRW_WRITE) && (file->f_flags & O_APPEND);
-+
-+ if (append || !ll_is_file_contended(file)) {
-+ struct ll_lock_tree_node *node;
-+ int ast_flags;
-+
-+ ast_flags = append ? 0 : LDLM_FL_DENY_ON_CONTENTION;
-+ if (file->f_flags & O_NONBLOCK)
-+ ast_flags |= LDLM_FL_BLOCK_NOWAIT;
-+ node = ll_node_from_inode(inode, start, end,
-+ (rw == OBD_BRW_WRITE) ? LCK_PW : LCK_PR);
-+ if (IS_ERR(node)) {
-+ rc = PTR_ERR(node);
-+ GOTO(out, rc);
-+ }
-+ tree->lt_fd = LUSTRE_FPRIVATE(file);
-+ rc = ll_tree_lock_iov(tree, node, iov, nr_segs, ast_flags);
-+ if (rc == 0)
-+ tree_locked = 1;
-+ else if (rc == -EUSERS)
-+ ll_set_file_contended(inode);
-+ else
-+ GOTO(out, rc);
-+ }
-+ RETURN(tree_locked);
-+out:
-+ return rc;
-+}
-+
-+/* XXX: exact copy from kernel code (__generic_file_aio_write_nolock from rhel4)
-+ */
-+static size_t ll_file_get_iov_count(const struct iovec *iov,
-+ unsigned long *nr_segs)
-+{
-+ size_t count = 0;
-+ unsigned long seg;
-+
-+ for (seg = 0; seg < *nr_segs; seg++) {
-+ const struct iovec *iv = &iov[seg];
-+
-+ /*
-+ * If any segment has a negative length, or the cumulative
-+ * length ever wraps negative then return -EINVAL.
-+ */
-+ count += iv->iov_len;
-+ if (unlikely((ssize_t)(count|iv->iov_len) < 0))
-+ return -EINVAL;
-+ if (access_ok(VERIFY_WRITE, iv->iov_base, iv->iov_len))
-+ continue;
-+ if (seg == 0)
-+ return -EFAULT;
-+ *nr_segs = seg;
-+ count -= iv->iov_len; /* This segment is no good */
-+ break;
-+ }
-+ return count;
-+}
-+
-+static int iov_copy_update(unsigned long *nr_segs, const struct iovec **iov_out,
-+ unsigned long *nrsegs_copy,
-+ struct iovec *iov_copy, size_t *offset,
-+ size_t size)
-+{
-+ int i;
-+ const struct iovec *iov = *iov_out;
-+ for (i = 0; i < *nr_segs;
-+ i++) {
-+ const struct iovec *iv = &iov[i];
-+ struct iovec *ivc = &iov_copy[i];
-+ *ivc = *iv;
-+ if (i == 0) {
-+ ivc->iov_len -= *offset;
-+ ivc->iov_base += *offset;
-+ }
-+ if (ivc->iov_len >= size) {
-+ ivc->iov_len = size;
-+ if (i == 0)
-+ *offset += size;
-+ else
-+ *offset = size;
-+ break;
-+ }
-+ size -= ivc->iov_len;
-+ }
-+ *iov_out += i;
-+ *nr_segs -= i;
-+ *nrsegs_copy = i + 1;
-+
-+ return 0;
-+}
-+
-+static int ll_reget_short_lock(struct page *page, int rw,
-+ obd_off start, obd_off end,
-+ void **cookie)
-+{
-+ struct ll_async_page *llap;
-+ struct obd_export *exp;
-+ struct inode *inode = page->mapping->host;
-+
-+ ENTRY;
-+
-+ exp = ll_i2obdexp(inode);
-+ if (exp == NULL)
-+ RETURN(0);
-+
-+ llap = llap_cast_private(page);
-+ if (llap == NULL)
-+ RETURN(0);
-+
-+ RETURN(obd_reget_short_lock(exp, ll_i2info(inode)->lli_smd,
-+ &llap->llap_cookie, rw, start, end,
-+ cookie));
-+}
-+
-+static void ll_release_short_lock(struct inode *inode, obd_off end,
-+ void *cookie, int rw)
-+{
-+ struct obd_export *exp;
-+ int rc;
-+
-+ exp = ll_i2obdexp(inode);
-+ if (exp == NULL)
-+ return;
-+
-+ rc = obd_release_short_lock(exp, ll_i2info(inode)->lli_smd, end,
-+ cookie, rw);
-+ if (rc < 0)
-+ CERROR("unlock failed (%d)\n", rc);
-+}
-+
-+static inline int ll_file_get_fast_lock(struct file *file,
-+ obd_off ppos, obd_off end,
-+ const struct iovec *iov,
-+ unsigned long nr_segs,
-+ void **cookie, int rw)
-+{
-+ int rc = 0, seg;
-+ struct page *page;
-+
-+ ENTRY;
-+
-+ /* we would like this read request to be lockfree */
-+ for (seg = 0; seg < nr_segs; seg++) {
-+ const struct iovec *iv = &iov[seg];
-+ if (ll_region_mapped((unsigned long)iv->iov_base, iv->iov_len))
-+ GOTO(out, rc);
-+ }
-+
-+ page = find_lock_page(file->f_dentry->d_inode->i_mapping,
-+ ppos >> CFS_PAGE_SHIFT);
-+ if (page) {
-+ if (ll_reget_short_lock(page, rw, ppos, end, cookie))
-+ rc = 1;
-+
-+ unlock_page(page);
-+ page_cache_release(page);
-+ }
-+
-+out:
-+ RETURN(rc);
-+}
-+
-+static inline void ll_file_put_fast_lock(struct inode *inode, obd_off end,
-+ void *cookie, int rw)
-+{
-+ ll_release_short_lock(inode, end, cookie, rw);
-+}
-+
-+enum ll_lock_style {
-+ LL_LOCK_STYLE_NOLOCK = 0,
-+ LL_LOCK_STYLE_FASTLOCK = 1,
-+ LL_LOCK_STYLE_TREELOCK = 2
-+};
-+
-+static inline int ll_file_get_lock(struct file *file, obd_off ppos,
-+ obd_off end, const struct iovec *iov,
-+ unsigned long nr_segs, void **cookie,
-+ struct ll_lock_tree *tree, int rw)
-+{
-+ int rc;
-+
-+ ENTRY;
-+
-+ if (ll_file_get_fast_lock(file, ppos, end, iov, nr_segs, cookie, rw))
-+ RETURN(LL_LOCK_STYLE_FASTLOCK);
-+
-+ rc = ll_file_get_tree_lock_iov(tree, file, iov, nr_segs,
-+ ppos, end, rw);
-+ /* rc: 1 for tree lock, 0 for no lock, <0 for error */
-+ switch (rc) {
-+ case 1:
-+ RETURN(LL_LOCK_STYLE_TREELOCK);
-+ case 0:
-+ RETURN(LL_LOCK_STYLE_NOLOCK);
-+ }
-+
-+ /* an error happened if we reached this point, rc = -errno here */
-+ RETURN(rc);
-+}
-+
-+static inline void ll_file_put_lock(struct inode *inode, obd_off end,
-+ enum ll_lock_style lock_style,
-+ void *cookie, struct ll_lock_tree *tree,
-+ int rw)
-+
-+{
-+ switch (lock_style) {
-+ case LL_LOCK_STYLE_TREELOCK:
-+ ll_tree_unlock(tree);
-+ break;
-+ case LL_LOCK_STYLE_FASTLOCK:
-+ ll_file_put_fast_lock(inode, end, cookie, rw);
-+ break;
-+ default:
-+ CERROR("invalid locking style (%d)\n", lock_style);
-+ }
-+}
-+
-+#ifdef HAVE_FILE_READV
-+static ssize_t ll_file_readv(struct file *file, const struct iovec *iov,
-+ unsigned long nr_segs, loff_t *ppos)
-+{
-+#else
-+static ssize_t ll_file_aio_read(struct kiocb *iocb, const struct iovec *iov,
-+ unsigned long nr_segs, loff_t pos)
-+{
-+ struct file *file = iocb->ki_filp;
-+ loff_t *ppos = &iocb->ki_pos;
-+#endif
-+ struct inode *inode = file->f_dentry->d_inode;
-+ struct ll_inode_info *lli = ll_i2info(inode);
-+ struct lov_stripe_md *lsm = lli->lli_smd;
-+ struct ll_sb_info *sbi = ll_i2sbi(inode);
-+ struct ll_lock_tree tree;
-+ struct ost_lvb lvb;
-+ struct ll_ra_read bead;
-+ int ra = 0;
-+ obd_off end;
-+ ssize_t retval, chunk, sum = 0;
-+ int lock_style;
-+ struct iovec *iov_copy = NULL;
-+ unsigned long nrsegs_copy, nrsegs_orig = 0;
-+ size_t count, iov_offset = 0;
-+ __u64 kms;
-+ void *cookie;
-+ ENTRY;
-+
-+ count = ll_file_get_iov_count(iov, &nr_segs);
-+ CDEBUG(D_VFSTRACE, "VFS Op:inode=%lu/%u(%p),size="LPSZ",offset=%Ld\n",
-+ inode->i_ino, inode->i_generation, inode, count, *ppos);
-+ /* "If nbyte is 0, read() will return 0 and have no other results."
-+ * -- Single Unix Spec */
-+ if (count == 0)
-+ RETURN(0);
-+
-+ ll_stats_ops_tally(sbi, LPROC_LL_READ_BYTES, count);
-+
-+ if (!lsm) {
-+ /* Read on file with no objects should return zero-filled
-+ * buffers up to file size (we can get non-zero sizes with
-+ * mknod + truncate, then opening file for read. This is a
-+ * common pattern in NFS case, it seems). Bug 6243 */
-+ int notzeroed;
-+ /* Since there are no objects on OSTs, we have nothing to get
-+ * lock on and so we are forced to access inode->i_size
-+ * unguarded */
-+
-+ /* Read beyond end of file */
-+ if (*ppos >= i_size_read(inode))
-+ RETURN(0);
-+
-+ if (count > i_size_read(inode) - *ppos)
-+ count = i_size_read(inode) - *ppos;
-+ /* Make sure to correctly adjust the file pos pointer for
-+ * EFAULT case */
-+ for (nrsegs_copy = 0; nrsegs_copy < nr_segs; nrsegs_copy++) {
-+ const struct iovec *iv = &iov[nrsegs_copy];
-+
-+ if (count < iv->iov_len)
-+ chunk = count;
-+ else
-+ chunk = iv->iov_len;
-+ notzeroed = clear_user(iv->iov_base, chunk);
-+ sum += (chunk - notzeroed);
-+ count -= (chunk - notzeroed);
-+ if (notzeroed || !count)
-+ break;
-+ }
-+ *ppos += sum;
-+ if (!sum)
-+ RETURN(-EFAULT);
-+ RETURN(sum);
-+ }
-+
-+repeat:
-+ if (sbi->ll_max_rw_chunk != 0) {
-+ /* first, let's know the end of the current stripe */
-+ end = *ppos;
-+ obd_extent_calc(sbi->ll_osc_exp, lsm, OBD_CALC_STRIPE_END,
-+ (obd_off *)&end);
-+
-+ /* correct, the end is beyond the request */
-+ if (end > *ppos + count - 1)
-+ end = *ppos + count - 1;
-+
-+ /* and chunk shouldn't be too large even if striping is wide */
-+ if (end - *ppos > sbi->ll_max_rw_chunk)
-+ end = *ppos + sbi->ll_max_rw_chunk - 1;
-+
-+ chunk = end - *ppos + 1;
-+ if ((count == chunk) && (iov_offset == 0)) {
-+ if (iov_copy)
-+ OBD_FREE(iov_copy, sizeof(*iov) * nrsegs_orig);
-+
-+ iov_copy = (struct iovec *)iov;
-+ nrsegs_copy = nr_segs;
-+ } else {
-+ if (!iov_copy) {
-+ nrsegs_orig = nr_segs;
-+ OBD_ALLOC(iov_copy, sizeof(*iov) * nr_segs);
-+ if (!iov_copy)
-+ GOTO(out, retval = -ENOMEM);
-+ }
-+
-+ iov_copy_update(&nr_segs, &iov, &nrsegs_copy, iov_copy,
-+ &iov_offset, chunk);
-+ }
-+ } else {
-+ end = *ppos + count - 1;
-+ iov_copy = (struct iovec *)iov;
-+ nrsegs_copy = nr_segs;
-+ }
-+
-+ lock_style = ll_file_get_lock(file, (obd_off)(*ppos), end,
-+ iov_copy, nrsegs_copy, &cookie, &tree,
-+ OBD_BRW_READ);
-+ if (lock_style < 0)
-+ GOTO(out, retval = lock_style);
-+
-+ ll_inode_size_lock(inode, 1);
-+ /*
-+ * Consistency guarantees: following possibilities exist for the
-+ * relation between region being read and real file size at this
-+ * moment:
-+ *
-+ * (A): the region is completely inside of the file;
-+ *
-+ * (B-x): x bytes of region are inside of the file, the rest is
-+ * outside;
-+ *
-+ * (C): the region is completely outside of the file.
-+ *
-+ * This classification is stable under DLM lock acquired by
-+ * ll_tree_lock() above, because to change class, other client has to
-+ * take DLM lock conflicting with our lock. Also, any updates to
-+ * ->i_size by other threads on this client are serialized by
-+ * ll_inode_size_lock(). This guarantees that short reads are handled
-+ * correctly in the face of concurrent writes and truncates.
-+ */
-+ inode_init_lvb(inode, &lvb);
-+ obd_merge_lvb(ll_i2sbi(inode)->ll_osc_exp, lsm, &lvb, 1);
-+ kms = lvb.lvb_size;
-+ if (*ppos + count - 1 > kms) {
-+ /* A glimpse is necessary to determine whether we return a
-+ * short read (B) or some zeroes at the end of the buffer (C) */
-+ ll_inode_size_unlock(inode, 1);
-+ retval = ll_glimpse_size(inode, LDLM_FL_BLOCK_GRANTED);
-+ if (retval) {
-+ if (lock_style != LL_LOCK_STYLE_NOLOCK)
-+ ll_file_put_lock(inode, end, lock_style,
-+ cookie, &tree, OBD_BRW_READ);
-+ goto out;
-+ }
-+ } else {
-+ /* region is within kms and, hence, within real file size (A).
-+ * We need to increase i_size to cover the read region so that
-+ * generic_file_read() will do its job, but that doesn't mean
-+ * the kms size is _correct_, it is only the _minimum_ size.
-+ * If someone does a stat they will get the correct size which
-+ * will always be >= the kms value here. b=11081 */
-+ if (i_size_read(inode) < kms)
-+ i_size_write(inode, kms);
-+ ll_inode_size_unlock(inode, 1);
-+ }
-+
-+ chunk = end - *ppos + 1;
-+ CDEBUG(D_INODE,"Read ino %lu, "LPSZ" bytes, offset %lld, i_size %llu\n",
-+ inode->i_ino, chunk, *ppos, i_size_read(inode));
-+
-+ /* turn off the kernel's read-ahead */
-+ if (lock_style != LL_LOCK_STYLE_NOLOCK) {
-+ file->f_ra.ra_pages = 0;
-+ /* initialize read-ahead window once per syscall */
-+ if (ra == 0) {
-+ ra = 1;
-+ bead.lrr_start = *ppos >> CFS_PAGE_SHIFT;
-+ bead.lrr_count = (count + CFS_PAGE_SIZE - 1) >> CFS_PAGE_SHIFT;
-+ ll_ra_read_in(file, &bead);
-+ }
-+
-+ /* BUG: 5972 */
-+ file_accessed(file);
-+#ifdef HAVE_FILE_READV
-+ retval = generic_file_readv(file, iov_copy, nrsegs_copy, ppos);
-+#else
-+ retval = generic_file_aio_read(iocb, iov_copy, nrsegs_copy,
-+ *ppos);
-+#endif
-+ ll_file_put_lock(inode, end, lock_style, cookie,
-+ &tree, OBD_BRW_READ);
-+ } else {
-+ retval = ll_file_lockless_io(file, iov_copy, nrsegs_copy, ppos,
-+ READ, chunk);
-+ }
-+ ll_rw_stats_tally(sbi, current->pid, file, count, 0);
-+ if (retval > 0) {
-+ count -= retval;
-+ sum += retval;
-+ if (retval == chunk && count > 0)
-+ goto repeat;
-+ }
-+
-+ out:
-+ if (ra != 0)
-+ ll_ra_read_ex(file, &bead);
-+ retval = (sum > 0) ? sum : retval;
-+
-+ if (iov_copy && iov_copy != iov)
-+ OBD_FREE(iov_copy, sizeof(*iov) * nrsegs_orig);
-+
-+ RETURN(retval);
-+}
-+
-+static ssize_t ll_file_read(struct file *file, char *buf, size_t count,
-+ loff_t *ppos)
-+{
-+ struct iovec local_iov = { .iov_base = (void __user *)buf,
-+ .iov_len = count };
-+#ifdef HAVE_FILE_READV
-+ return ll_file_readv(file, &local_iov, 1, ppos);
-+#else
-+ struct kiocb kiocb;
-+ ssize_t ret;
-+
-+ init_sync_kiocb(&kiocb, file);
-+ kiocb.ki_pos = *ppos;
-+ kiocb.ki_left = count;
-+
-+ ret = ll_file_aio_read(&kiocb, &local_iov, 1, kiocb.ki_pos);
-+ *ppos = kiocb.ki_pos;
-+ return ret;
-+#endif
-+}
-+
-+/*
-+ * Write to a file (through the page cache).
-+ */
-+#ifdef HAVE_FILE_WRITEV
-+static ssize_t ll_file_writev(struct file *file, const struct iovec *iov,
-+ unsigned long nr_segs, loff_t *ppos)
-+{
-+#else /* AIO stuff */
-+static ssize_t ll_file_aio_write(struct kiocb *iocb, const struct iovec *iov,
-+ unsigned long nr_segs, loff_t pos)
-+{
-+ struct file *file = iocb->ki_filp;
-+ loff_t *ppos = &iocb->ki_pos;
-+#endif
-+ struct inode *inode = file->f_dentry->d_inode;
-+ struct ll_sb_info *sbi = ll_i2sbi(inode);
-+ struct lov_stripe_md *lsm = ll_i2info(inode)->lli_smd;
-+ struct ll_lock_tree tree;
-+ loff_t maxbytes = ll_file_maxbytes(inode);
-+ loff_t lock_start, lock_end, end;
-+ ssize_t retval, chunk, sum = 0;
-+ int tree_locked;
-+ struct iovec *iov_copy = NULL;
-+ unsigned long nrsegs_copy, nrsegs_orig = 0;
-+ size_t count, iov_offset = 0;
-+ ENTRY;
-+
-+ count = ll_file_get_iov_count(iov, &nr_segs);
-+
-+ CDEBUG(D_VFSTRACE, "VFS Op:inode=%lu/%u(%p),size="LPSZ",offset=%Ld\n",
-+ inode->i_ino, inode->i_generation, inode, count, *ppos);
-+
-+ SIGNAL_MASK_ASSERT(); /* XXX BUG 1511 */
-+
-+ /* POSIX, but surprised the VFS doesn't check this already */
-+ if (count == 0)
-+ RETURN(0);
-+
-+ /* If file was opened for LL_IOC_LOV_SETSTRIPE but the ioctl wasn't
-+ * called on the file, don't fail the below assertion (bug 2388). */
-+ if (file->f_flags & O_LOV_DELAY_CREATE &&
-+ ll_i2info(inode)->lli_smd == NULL)
-+ RETURN(-EBADF);
-+
-+ LASSERT(ll_i2info(inode)->lli_smd != NULL);
-+
-+ down(&ll_i2info(inode)->lli_write_sem);
-+
-+repeat:
-+ chunk = 0; /* just to fix gcc's warning */
-+ end = *ppos + count - 1;
-+
-+ if (file->f_flags & O_APPEND) {
-+ lock_start = 0;
-+ lock_end = OBD_OBJECT_EOF;
-+ iov_copy = (struct iovec *)iov;
-+ nrsegs_copy = nr_segs;
-+ } else if (sbi->ll_max_rw_chunk != 0) {
-+ /* first, let's know the end of the current stripe */
-+ end = *ppos;
-+ obd_extent_calc(sbi->ll_osc_exp, lsm, OBD_CALC_STRIPE_END,
-+ (obd_off *)&end);
-+
-+ /* correct, the end is beyond the request */
-+ if (end > *ppos + count - 1)
-+ end = *ppos + count - 1;
-+
-+ /* and chunk shouldn't be too large even if striping is wide */
-+ if (end - *ppos > sbi->ll_max_rw_chunk)
-+ end = *ppos + sbi->ll_max_rw_chunk - 1;
-+ lock_start = *ppos;
-+ lock_end = end;
-+ chunk = end - *ppos + 1;
-+ if ((count == chunk) && (iov_offset == 0)) {
-+ if (iov_copy)
-+ OBD_FREE(iov_copy, sizeof(*iov) * nrsegs_orig);
-+
-+ iov_copy = (struct iovec *)iov;
-+ nrsegs_copy = nr_segs;
-+ } else {
-+ if (!iov_copy) {
-+ nrsegs_orig = nr_segs;
-+ OBD_ALLOC(iov_copy, sizeof(*iov) * nr_segs);
-+ if (!iov_copy)
-+ GOTO(out, retval = -ENOMEM);
-+ }
-+ iov_copy_update(&nr_segs, &iov, &nrsegs_copy, iov_copy,
-+ &iov_offset, chunk);
-+ }
-+ } else {
-+ lock_start = *ppos;
-+ lock_end = end;
-+ iov_copy = (struct iovec *)iov;
-+ nrsegs_copy = nr_segs;
-+ }
-+
-+ tree_locked = ll_file_get_tree_lock_iov(&tree, file, iov_copy,
-+ nrsegs_copy,
-+ (obd_off)lock_start,
-+ (obd_off)lock_end,
-+ OBD_BRW_WRITE);
-+ if (tree_locked < 0)
-+ GOTO(out, retval = tree_locked);
-+
-+ /* This is ok, g_f_w will overwrite this under i_sem if it races
-+ * with a local truncate, it just makes our maxbyte checking easier.
-+ * The i_size value gets updated in ll_extent_lock() as a consequence
-+ * of the [0,EOF] extent lock we requested above. */
-+ if (file->f_flags & O_APPEND) {
-+ *ppos = i_size_read(inode);
-+ end = *ppos + count - 1;
-+ }
-+
-+ if (*ppos >= maxbytes) {
-+ send_sig(SIGXFSZ, current, 0);
-+ GOTO(out_unlock, retval = -EFBIG);
-+ }
-+ if (end > maxbytes - 1)
-+ end = maxbytes - 1;
-+
-+ /* generic_file_write handles O_APPEND after getting i_mutex */
-+ chunk = end - *ppos + 1;
-+ CDEBUG(D_INFO, "Writing inode %lu, "LPSZ" bytes, offset %Lu\n",
-+ inode->i_ino, chunk, *ppos);
-+ if (tree_locked)
-+#ifdef HAVE_FILE_WRITEV
-+ retval = generic_file_writev(file, iov_copy, nrsegs_copy, ppos);
-+#else
-+ retval = generic_file_aio_write(iocb, iov_copy, nrsegs_copy,
-+ *ppos);
-+#endif
-+ else
-+ retval = ll_file_lockless_io(file, iov_copy, nrsegs_copy,
-+ ppos, WRITE, chunk);
-+ ll_rw_stats_tally(ll_i2sbi(inode), current->pid, file, chunk, 1);
-+
-+out_unlock:
-+ if (tree_locked)
-+ ll_tree_unlock(&tree);
-+
-+out:
-+ if (retval > 0) {
-+ count -= retval;
-+ sum += retval;
-+ if (retval == chunk && count > 0)
-+ goto repeat;
-+ }
-+
-+ up(&ll_i2info(inode)->lli_write_sem);
-+
-+ if (iov_copy && iov_copy != iov)
-+ OBD_FREE(iov_copy, sizeof(*iov) * nrsegs_orig);
-+
-+ retval = (sum > 0) ? sum : retval;
-+ ll_stats_ops_tally(ll_i2sbi(inode), LPROC_LL_WRITE_BYTES,
-+ retval > 0 ? retval : 0);
-+ RETURN(retval);
-+}
-+
-+static ssize_t ll_file_write(struct file *file, const char *buf, size_t count,
-+ loff_t *ppos)
-+{
-+ struct iovec local_iov = { .iov_base = (void __user *)buf,
-+ .iov_len = count };
-+
-+#ifdef HAVE_FILE_WRITEV
-+ return ll_file_writev(file, &local_iov, 1, ppos);
-+#else
-+ struct kiocb kiocb;
-+ ssize_t ret;
-+
-+ init_sync_kiocb(&kiocb, file);
-+ kiocb.ki_pos = *ppos;
-+ kiocb.ki_left = count;
-+
-+ ret = ll_file_aio_write(&kiocb, &local_iov, 1, kiocb.ki_pos);
-+ *ppos = kiocb.ki_pos;
-+
-+ return ret;
-+#endif
-+}
-+
-+/*
-+ * Send file content (through pagecache) somewhere with helper
-+ */
-+static ssize_t ll_file_sendfile(struct file *in_file, loff_t *ppos,size_t count,
-+ read_actor_t actor, void *target)
-+{
-+ struct inode *inode = in_file->f_dentry->d_inode;
-+ struct ll_inode_info *lli = ll_i2info(inode);
-+ struct lov_stripe_md *lsm = lli->lli_smd;
-+ struct ll_lock_tree tree;
-+ struct ll_lock_tree_node *node;
-+ struct ost_lvb lvb;
-+ struct ll_ra_read bead;
-+ int rc;
-+ ssize_t retval;
-+ __u64 kms;
-+ ENTRY;
-+ CDEBUG(D_VFSTRACE, "VFS Op:inode=%lu/%u(%p),size="LPSZ",offset=%Ld\n",
-+ inode->i_ino, inode->i_generation, inode, count, *ppos);
-+
-+ /* "If nbyte is 0, read() will return 0 and have no other results."
-+ * -- Single Unix Spec */
-+ if (count == 0)
-+ RETURN(0);
-+
-+ ll_stats_ops_tally(ll_i2sbi(inode), LPROC_LL_READ_BYTES, count);
-+ /* turn off the kernel's read-ahead */
-+ in_file->f_ra.ra_pages = 0;
-+
-+ /* File with no objects, nothing to lock */
-+ if (!lsm)
-+ RETURN(generic_file_sendfile(in_file, ppos, count, actor, target));
-+
-+ node = ll_node_from_inode(inode, *ppos, *ppos + count - 1, LCK_PR);
-+ if (IS_ERR(node))
-+ RETURN(PTR_ERR(node));
-+
-+ tree.lt_fd = LUSTRE_FPRIVATE(in_file);
-+ rc = ll_tree_lock(&tree, node, NULL, count,
-+ in_file->f_flags & O_NONBLOCK?LDLM_FL_BLOCK_NOWAIT:0);
-+ if (rc != 0)
-+ RETURN(rc);
-+
-+ ll_clear_file_contended(inode);
-+ ll_inode_size_lock(inode, 1);
-+ /*
-+ * Consistency guarantees: following possibilities exist for the
-+ * relation between region being read and real file size at this
-+ * moment:
-+ *
-+ * (A): the region is completely inside of the file;
-+ *
-+ * (B-x): x bytes of region are inside of the file, the rest is
-+ * outside;
-+ *
-+ * (C): the region is completely outside of the file.
-+ *
-+ * This classification is stable under DLM lock acquired by
-+ * ll_tree_lock() above, because to change class, other client has to
-+ * take DLM lock conflicting with our lock. Also, any updates to
-+ * ->i_size by other threads on this client are serialized by
-+ * ll_inode_size_lock(). This guarantees that short reads are handled
-+ * correctly in the face of concurrent writes and truncates.
-+ */
-+ inode_init_lvb(inode, &lvb);
-+ obd_merge_lvb(ll_i2sbi(inode)->ll_osc_exp, lsm, &lvb, 1);
-+ kms = lvb.lvb_size;
-+ if (*ppos + count - 1 > kms) {
-+ /* A glimpse is necessary to determine whether we return a
-+ * short read (B) or some zeroes at the end of the buffer (C) */
-+ ll_inode_size_unlock(inode, 1);
-+ retval = ll_glimpse_size(inode, LDLM_FL_BLOCK_GRANTED);
-+ if (retval)
-+ goto out;
-+ } else {
-+ /* region is within kms and, hence, within real file size (A) */
-+ i_size_write(inode, kms);
-+ ll_inode_size_unlock(inode, 1);
-+ }
-+
-+ CDEBUG(D_INFO, "Send ino %lu, "LPSZ" bytes, offset %lld, i_size %llu\n",
-+ inode->i_ino, count, *ppos, i_size_read(inode));
-+
-+ bead.lrr_start = *ppos >> CFS_PAGE_SHIFT;
-+ bead.lrr_count = (count + CFS_PAGE_SIZE - 1) >> CFS_PAGE_SHIFT;
-+ ll_ra_read_in(in_file, &bead);
-+ /* BUG: 5972 */
-+ file_accessed(in_file);
-+ retval = generic_file_sendfile(in_file, ppos, count, actor, target);
-+ ll_ra_read_ex(in_file, &bead);
-+
-+ out:
-+ ll_tree_unlock(&tree);
-+ RETURN(retval);
-+}
-+
-+static int ll_lov_recreate_obj(struct inode *inode, struct file *file,
-+ unsigned long arg)
-+{
-+ struct ll_inode_info *lli = ll_i2info(inode);
-+ struct obd_export *exp = ll_i2obdexp(inode);
-+ struct ll_recreate_obj ucreatp;
-+ struct obd_trans_info oti = { 0 };
-+ struct obdo *oa = NULL;
-+ int lsm_size;
-+ int rc = 0;
-+ struct lov_stripe_md *lsm, *lsm2;
-+ ENTRY;
-+
-+ if (!cfs_capable(CFS_CAP_SYS_ADMIN))
-+ RETURN(-EPERM);
-+
-+ rc = copy_from_user(&ucreatp, (struct ll_recreate_obj *)arg,
-+ sizeof(struct ll_recreate_obj));
-+ if (rc) {
-+ RETURN(-EFAULT);
-+ }
-+ OBDO_ALLOC(oa);
-+ if (oa == NULL)
-+ RETURN(-ENOMEM);
-+
-+ down(&lli->lli_size_sem);
-+ lsm = lli->lli_smd;
-+ if (lsm == NULL)
-+ GOTO(out, rc = -ENOENT);
-+ lsm_size = sizeof(*lsm) + (sizeof(struct lov_oinfo) *
-+ (lsm->lsm_stripe_count));
-+
-+ OBD_ALLOC(lsm2, lsm_size);
-+ if (lsm2 == NULL)
-+ GOTO(out, rc = -ENOMEM);
-+
-+ oa->o_id = ucreatp.lrc_id;
-+ oa->o_nlink = ucreatp.lrc_ost_idx;
-+ oa->o_flags |= OBD_FL_RECREATE_OBJS;
-+ oa->o_valid = OBD_MD_FLID | OBD_MD_FLFLAGS;
-+ obdo_from_inode(oa, inode, OBD_MD_FLTYPE | OBD_MD_FLATIME |
-+ OBD_MD_FLMTIME | OBD_MD_FLCTIME);
-+
-+ memcpy(lsm2, lsm, lsm_size);
-+ rc = obd_create(exp, oa, &lsm2, &oti);
-+
-+ OBD_FREE(lsm2, lsm_size);
-+ GOTO(out, rc);
-+out:
-+ up(&lli->lli_size_sem);
-+ OBDO_FREE(oa);
-+ return rc;
-+}
-+
-+int ll_lov_setstripe_ea_info(struct inode *inode, struct file *file,
-+ int flags, struct lov_user_md *lum,
-+ int lum_size)
-+{
-+ struct ll_inode_info *lli = ll_i2info(inode);
-+ struct lov_stripe_md *lsm;
-+ struct lookup_intent oit = {.it_op = IT_OPEN, .it_flags = flags};
-+ int rc = 0;
-+ ENTRY;
-+
-+ down(&lli->lli_size_sem);
-+ lsm = lli->lli_smd;
-+ if (lsm) {
-+ up(&lli->lli_size_sem);
-+ CDEBUG(D_IOCTL, "stripe already exists for ino %lu\n",
-+ inode->i_ino);
-+ RETURN(-EEXIST);
-+ }
-+
-+ rc = ll_intent_file_open(file, lum, lum_size, &oit);
-+ if (rc)
-+ GOTO(out, rc);
-+ if (it_disposition(&oit, DISP_LOOKUP_NEG))
-+ GOTO(out_req_free, rc = -ENOENT);
-+ rc = oit.d.lustre.it_status;
-+ if (rc < 0)
-+ GOTO(out_req_free, rc);
-+
-+ ll_release_openhandle(file->f_dentry, &oit);
-+
-+ out:
-+ up(&lli->lli_size_sem);
-+ ll_intent_release(&oit);
-+ RETURN(rc);
-+out_req_free:
-+ ptlrpc_req_finished((struct ptlrpc_request *) oit.d.lustre.it_data);
-+ goto out;
-+}
-+
-+int ll_lov_getstripe_ea_info(struct inode *inode, const char *filename,
-+ struct lov_mds_md **lmmp, int *lmm_size,
-+ struct ptlrpc_request **request)
-+{
-+ struct ll_sb_info *sbi = ll_i2sbi(inode);
-+ struct ll_fid fid;
-+ struct mds_body *body;
-+ struct lov_mds_md *lmm = NULL;
-+ struct ptlrpc_request *req = NULL;
-+ int rc, lmmsize;
-+
-+ ll_inode2fid(&fid, inode);
-+
-+ rc = ll_get_max_mdsize(sbi, &lmmsize);
-+ if (rc)
-+ RETURN(rc);
-+
-+ rc = mdc_getattr_name(sbi->ll_mdc_exp, &fid,
-+ filename, strlen(filename) + 1,
-+ OBD_MD_FLEASIZE | OBD_MD_FLDIREA,
-+ lmmsize, &req);
-+ if (rc < 0) {
-+ CDEBUG(D_INFO, "mdc_getattr_name failed "
-+ "on %s: rc %d\n", filename, rc);
-+ GOTO(out, rc);
-+ }
-+
-+ body = lustre_msg_buf(req->rq_repmsg, REPLY_REC_OFF,
-+ sizeof(*body));
-+ LASSERT(body != NULL); /* checked by mdc_getattr_name */
-+ /* swabbed by mdc_getattr_name */
-+ LASSERT(lustre_rep_swabbed(req, REPLY_REC_OFF));
-+
-+ lmmsize = body->eadatasize;
-+
-+ if (!(body->valid & (OBD_MD_FLEASIZE | OBD_MD_FLDIREA)) ||
-+ lmmsize == 0) {
-+ GOTO(out, rc = -ENODATA);
-+ }
-+
-+ lmm = lustre_msg_buf(req->rq_repmsg, REPLY_REC_OFF + 1,
-+ lmmsize);
-+ LASSERT(lmm != NULL);
-+ LASSERT(lustre_rep_swabbed(req, REPLY_REC_OFF + 1));
-+
-+ if ((lmm->lmm_magic != cpu_to_le32(LOV_MAGIC)) &&
-+ (lmm->lmm_magic != cpu_to_le32(LOV_MAGIC_JOIN))) {
-+ GOTO(out, rc = -EPROTO);
-+ }
-+ /*
-+ * This is coming from the MDS, so is probably in
-+ * little endian. We convert it to host endian before
-+ * passing it to userspace.
-+ */
-+ if (LOV_MAGIC != cpu_to_le32(LOV_MAGIC)) {
-+ if (lmm->lmm_magic == cpu_to_le32(LOV_MAGIC)) {
-+ lustre_swab_lov_user_md((struct lov_user_md *)lmm);
-+ /* if function called for directory - we should be
-+ * avoid swab not existent lsm objects */
-+ if (S_ISREG(body->mode))
-+ lustre_swab_lov_user_md_objects((struct lov_user_md *)lmm);
-+ } else if (lmm->lmm_magic == cpu_to_le32(LOV_MAGIC_JOIN)) {
-+ lustre_swab_lov_user_md_join((struct lov_user_md_join *)lmm);
-+ }
-+ }
-+
-+ if (lmm->lmm_magic == LOV_MAGIC_JOIN) {
-+ struct lov_stripe_md *lsm;
-+ struct lov_user_md_join *lmj;
-+ int lmj_size, i, aindex = 0;
-+
-+ rc = obd_unpackmd(sbi->ll_osc_exp, &lsm, lmm, lmmsize);
-+ if (rc < 0)
-+ GOTO(out, rc = -ENOMEM);
-+ rc = obd_checkmd(sbi->ll_osc_exp, sbi->ll_mdc_exp, lsm);
-+ if (rc)
-+ GOTO(out_free_memmd, rc);
-+
-+ lmj_size = sizeof(struct lov_user_md_join) +
-+ lsm->lsm_stripe_count *
-+ sizeof(struct lov_user_ost_data_join);
-+ OBD_ALLOC(lmj, lmj_size);
-+ if (!lmj)
-+ GOTO(out_free_memmd, rc = -ENOMEM);
-+
-+ memcpy(lmj, lmm, sizeof(struct lov_user_md_join));
-+ for (i = 0; i < lsm->lsm_stripe_count; i++) {
-+ struct lov_extent *lex =
-+ &lsm->lsm_array->lai_ext_array[aindex];
-+
-+ if (lex->le_loi_idx + lex->le_stripe_count <= i)
-+ aindex ++;
-+ CDEBUG(D_INFO, "aindex %d i %d l_extent_start "
-+ LPU64" len %d\n", aindex, i,
-+ lex->le_start, (int)lex->le_len);
-+ lmj->lmm_objects[i].l_extent_start =
-+ lex->le_start;
-+
-+ if ((int)lex->le_len == -1)
-+ lmj->lmm_objects[i].l_extent_end = -1;
-+ else
-+ lmj->lmm_objects[i].l_extent_end =
-+ lex->le_start + lex->le_len;
-+ lmj->lmm_objects[i].l_object_id =
-+ lsm->lsm_oinfo[i]->loi_id;
-+ lmj->lmm_objects[i].l_object_gr =
-+ lsm->lsm_oinfo[i]->loi_gr;
-+ lmj->lmm_objects[i].l_ost_gen =
-+ lsm->lsm_oinfo[i]->loi_ost_gen;
-+ lmj->lmm_objects[i].l_ost_idx =
-+ lsm->lsm_oinfo[i]->loi_ost_idx;
-+ }
-+ lmm = (struct lov_mds_md *)lmj;
-+ lmmsize = lmj_size;
-+out_free_memmd:
-+ obd_free_memmd(sbi->ll_osc_exp, &lsm);
-+ }
-+out:
-+ *lmmp = lmm;
-+ *lmm_size = lmmsize;
-+ *request = req;
-+ return rc;
-+}
-+static int ll_lov_setea(struct inode *inode, struct file *file,
-+ unsigned long arg)
-+{
-+ int flags = MDS_OPEN_HAS_OBJS | FMODE_WRITE;
-+ struct lov_user_md *lump;
-+ int lum_size = sizeof(struct lov_user_md) +
-+ sizeof(struct lov_user_ost_data);
-+ int rc;
-+ ENTRY;
-+
-+ if (!cfs_capable(CFS_CAP_SYS_ADMIN))
-+ RETURN(-EPERM);
-+
-+ OBD_ALLOC(lump, lum_size);
-+ if (lump == NULL) {
-+ RETURN(-ENOMEM);
-+ }
-+ rc = copy_from_user(lump, (struct lov_user_md *)arg, lum_size);
-+ if (rc) {
-+ OBD_FREE(lump, lum_size);
-+ RETURN(-EFAULT);
-+ }
-+
-+ rc = ll_lov_setstripe_ea_info(inode, file, flags, lump, lum_size);
-+
-+ OBD_FREE(lump, lum_size);
-+ RETURN(rc);
-+}
-+
-+static int ll_lov_setstripe(struct inode *inode, struct file *file,
-+ unsigned long arg)
-+{
-+ struct lov_user_md lum, *lump = (struct lov_user_md *)arg;
-+ int rc;
-+ int flags = FMODE_WRITE;
-+ ENTRY;
-+
-+ /* Bug 1152: copy properly when this is no longer true */
-+ LASSERT(sizeof(lum) == sizeof(*lump));
-+ LASSERT(sizeof(lum.lmm_objects[0]) == sizeof(lump->lmm_objects[0]));
-+ rc = copy_from_user(&lum, lump, sizeof(lum));
-+ if (rc)
-+ RETURN(-EFAULT);
-+
-+ rc = ll_lov_setstripe_ea_info(inode, file, flags, &lum, sizeof(lum));
-+ if (rc == 0) {
-+ put_user(0, &lump->lmm_stripe_count);
-+ rc = obd_iocontrol(LL_IOC_LOV_GETSTRIPE, ll_i2obdexp(inode),
-+ 0, ll_i2info(inode)->lli_smd, lump);
-+ }
-+ RETURN(rc);
-+}
-+
-+static int ll_lov_getstripe(struct inode *inode, unsigned long arg)
-+{
-+ struct lov_stripe_md *lsm = ll_i2info(inode)->lli_smd;
-+
-+ if (!lsm)
-+ RETURN(-ENODATA);
-+
-+ return obd_iocontrol(LL_IOC_LOV_GETSTRIPE, ll_i2obdexp(inode), 0, lsm,
-+ (void *)arg);
-+}
-+
-+static int ll_get_grouplock(struct inode *inode, struct file *file,
-+ unsigned long arg)
-+{
-+ struct ll_file_data *fd = LUSTRE_FPRIVATE(file);
-+ ldlm_policy_data_t policy = { .l_extent = { .start = 0,
-+ .end = OBD_OBJECT_EOF}};
-+ struct lustre_handle lockh = { 0 };
-+ struct ll_inode_info *lli = ll_i2info(inode);
-+ struct lov_stripe_md *lsm = lli->lli_smd;
-+ int flags = 0, rc;
-+ ENTRY;
-+
-+ if (fd->fd_flags & LL_FILE_GROUP_LOCKED) {
-+ RETURN(-EINVAL);
-+ }
-+
-+ policy.l_extent.gid = arg;
-+ if (file->f_flags & O_NONBLOCK)
-+ flags = LDLM_FL_BLOCK_NOWAIT;
-+
-+ rc = ll_extent_lock(fd, inode, lsm, LCK_GROUP, &policy, &lockh, flags);
-+ if (rc)
-+ RETURN(rc);
-+
-+ fd->fd_flags |= LL_FILE_GROUP_LOCKED|LL_FILE_IGNORE_LOCK;
-+ fd->fd_gid = arg;
-+ memcpy(&fd->fd_cwlockh, &lockh, sizeof(lockh));
-+
-+ RETURN(0);
-+}
-+
-+static int ll_put_grouplock(struct inode *inode, struct file *file,
-+ unsigned long arg)
-+{
-+ struct ll_file_data *fd = LUSTRE_FPRIVATE(file);
-+ struct ll_inode_info *lli = ll_i2info(inode);
-+ struct lov_stripe_md *lsm = lli->lli_smd;
-+ int rc;
-+ ENTRY;
-+
-+ if (!(fd->fd_flags & LL_FILE_GROUP_LOCKED)) {
-+ /* Ugh, it's already unlocked. */
-+ RETURN(-EINVAL);
-+ }
-+
-+ if (fd->fd_gid != arg) /* Ugh? Unlocking with different gid? */
-+ RETURN(-EINVAL);
-+
-+ fd->fd_flags &= ~(LL_FILE_GROUP_LOCKED|LL_FILE_IGNORE_LOCK);
-+
-+ rc = ll_extent_unlock(fd, inode, lsm, LCK_GROUP, &fd->fd_cwlockh);
-+ if (rc)
-+ RETURN(rc);
-+
-+ fd->fd_gid = 0;
-+ memset(&fd->fd_cwlockh, 0, sizeof(fd->fd_cwlockh));
-+
-+ RETURN(0);
-+}
-+
-+#if LUSTRE_FIX >= 50
-+static int join_sanity_check(struct inode *head, struct inode *tail)
-+{
-+ ENTRY;
-+ if ((ll_i2sbi(head)->ll_flags & LL_SBI_JOIN) == 0) {
-+ CERROR("server do not support join \n");
-+ RETURN(-EINVAL);
-+ }
-+ if (!S_ISREG(tail->i_mode) || !S_ISREG(head->i_mode)) {
-+ CERROR("tail ino %lu and ino head %lu must be regular\n",
-+ head->i_ino, tail->i_ino);
-+ RETURN(-EINVAL);
-+ }
-+ if (head->i_ino == tail->i_ino) {
-+ CERROR("file %lu can not be joined to itself \n", head->i_ino);
-+ RETURN(-EINVAL);
-+ }
-+ if (i_size_read(head) % JOIN_FILE_ALIGN) {
-+ CERROR("hsize %llu must be times of 64K\n", i_size_read(head));
-+ RETURN(-EINVAL);
-+ }
-+ RETURN(0);
-+}
-+
-+static int join_file(struct inode *head_inode, struct file *head_filp,
-+ struct file *tail_filp)
-+{
-+ struct dentry *tail_dentry = tail_filp->f_dentry;
-+ struct lookup_intent oit = {.it_op = IT_OPEN,
-+ .it_flags = head_filp->f_flags|O_JOIN_FILE};
-+ struct ldlm_enqueue_info einfo = { LDLM_IBITS, LCK_PW,
-+ ll_mdc_blocking_ast, ldlm_completion_ast, NULL, NULL };
-+
-+ struct lustre_handle lockh;
-+ struct mdc_op_data *op_data;
-+ int rc;
-+ loff_t data;
-+ ENTRY;
-+
-+ tail_dentry = tail_filp->f_dentry;
-+
-+ OBD_ALLOC_PTR(op_data);
-+ if (op_data == NULL) {
-+ RETURN(-ENOMEM);
-+ }
-+
-+ data = i_size_read(head_inode);
-+ ll_prepare_mdc_op_data(op_data, head_inode,
-+ tail_dentry->d_parent->d_inode,
-+ tail_dentry->d_name.name,
-+ tail_dentry->d_name.len, 0, &data);
-+ rc = mdc_enqueue(ll_i2mdcexp(head_inode), &einfo, &oit,
-+ op_data, &lockh, NULL, 0, 0);
-+
-+ if (rc < 0)
-+ GOTO(out, rc);
-+
-+ rc = oit.d.lustre.it_status;
-+
-+ if (rc < 0 || it_open_error(DISP_OPEN_OPEN, &oit)) {
-+ rc = rc ? rc : it_open_error(DISP_OPEN_OPEN, &oit);
-+ ptlrpc_req_finished((struct ptlrpc_request *)
-+ oit.d.lustre.it_data);
-+ GOTO(out, rc);
-+ }
-+
-+ if (oit.d.lustre.it_lock_mode) { /* If we got lock - release it right
-+ * away */
-+ ldlm_lock_decref(&lockh, oit.d.lustre.it_lock_mode);
-+ oit.d.lustre.it_lock_mode = 0;
-+ }
-+ ptlrpc_req_finished((struct ptlrpc_request *) oit.d.lustre.it_data);
-+ it_clear_disposition(&oit, DISP_ENQ_COMPLETE);
-+ ll_release_openhandle(head_filp->f_dentry, &oit);
-+out:
-+ if (op_data)
-+ OBD_FREE_PTR(op_data);
-+ ll_intent_release(&oit);
-+ RETURN(rc);
-+}
-+
-+static int ll_file_join(struct inode *head, struct file *filp,
-+ char *filename_tail)
-+{
-+ struct inode *tail = NULL, *first = NULL, *second = NULL;
-+ struct dentry *tail_dentry;
-+ struct file *tail_filp, *first_filp, *second_filp;
-+ struct ll_lock_tree first_tree, second_tree;
-+ struct ll_lock_tree_node *first_node, *second_node;
-+ struct ll_inode_info *hlli = ll_i2info(head), *tlli;
-+ int rc = 0, cleanup_phase = 0;
-+ ENTRY;
-+
-+ CDEBUG(D_VFSTRACE, "VFS Op:head=%lu/%u(%p) tail %s\n",
-+ head->i_ino, head->i_generation, head, filename_tail);
-+
-+ tail_filp = filp_open(filename_tail, O_WRONLY, 0644);
-+ if (IS_ERR(tail_filp)) {
-+ CERROR("Can not open tail file %s", filename_tail);
-+ rc = PTR_ERR(tail_filp);
-+ GOTO(cleanup, rc);
-+ }
-+ tail = igrab(tail_filp->f_dentry->d_inode);
-+
-+ tlli = ll_i2info(tail);
-+ tail_dentry = tail_filp->f_dentry;
-+ LASSERT(tail_dentry);
-+ cleanup_phase = 1;
-+
-+ /*reorder the inode for lock sequence*/
-+ first = head->i_ino > tail->i_ino ? head : tail;
-+ second = head->i_ino > tail->i_ino ? tail : head;
-+ first_filp = head->i_ino > tail->i_ino ? filp : tail_filp;
-+ second_filp = head->i_ino > tail->i_ino ? tail_filp : filp;
-+
-+ CDEBUG(D_INFO, "reorder object from %lu:%lu to %lu:%lu \n",
-+ head->i_ino, tail->i_ino, first->i_ino, second->i_ino);
-+ first_node = ll_node_from_inode(first, 0, OBD_OBJECT_EOF, LCK_EX);
-+ if (IS_ERR(first_node)){
-+ rc = PTR_ERR(first_node);
-+ GOTO(cleanup, rc);
-+ }
-+ first_tree.lt_fd = first_filp->private_data;
-+ rc = ll_tree_lock(&first_tree, first_node, NULL, 0, 0);
-+ if (rc != 0)
-+ GOTO(cleanup, rc);
-+ cleanup_phase = 2;
-+
-+ second_node = ll_node_from_inode(second, 0, OBD_OBJECT_EOF, LCK_EX);
-+ if (IS_ERR(second_node)){
-+ rc = PTR_ERR(second_node);
-+ GOTO(cleanup, rc);
-+ }
-+ second_tree.lt_fd = second_filp->private_data;
-+ rc = ll_tree_lock(&second_tree, second_node, NULL, 0, 0);
-+ if (rc != 0)
-+ GOTO(cleanup, rc);
-+ cleanup_phase = 3;
-+
-+ rc = join_sanity_check(head, tail);
-+ if (rc)
-+ GOTO(cleanup, rc);
-+
-+ rc = join_file(head, filp, tail_filp);
-+ if (rc)
-+ GOTO(cleanup, rc);
-+cleanup:
-+ switch (cleanup_phase) {
-+ case 3:
-+ ll_tree_unlock(&second_tree);
-+ obd_cancel_unused(ll_i2obdexp(second),
-+ ll_i2info(second)->lli_smd, 0, NULL);
-+ case 2:
-+ ll_tree_unlock(&first_tree);
-+ obd_cancel_unused(ll_i2obdexp(first),
-+ ll_i2info(first)->lli_smd, 0, NULL);
-+ case 1:
-+ filp_close(tail_filp, 0);
-+ if (tail)
-+ iput(tail);
-+ if (head && rc == 0) {
-+ obd_free_memmd(ll_i2sbi(head)->ll_osc_exp,
-+ &hlli->lli_smd);
-+ hlli->lli_smd = NULL;
-+ }
-+ case 0:
-+ break;
-+ default:
-+ CERROR("invalid cleanup_phase %d\n", cleanup_phase);
-+ LBUG();
-+ }
-+ RETURN(rc);
-+}
-+#endif /* LUSTRE_FIX >= 50 */
-+
-+/**
-+ * Close inode open handle
-+ *
-+ * \param dentry [in] dentry which contains the inode
-+ * \param it [in,out] intent which contains open info and result
-+ *
-+ * \retval 0 success
-+ * \retval <0 failure
-+ */
-+int ll_release_openhandle(struct dentry *dentry, struct lookup_intent *it)
-+{
-+ struct inode *inode = dentry->d_inode;
-+ struct obd_client_handle *och;
-+ int rc;
-+ ENTRY;
-+
-+ LASSERT(inode);
-+
-+ /* Root ? Do nothing. */
-+ if (dentry->d_inode->i_sb->s_root == dentry)
-+ RETURN(0);
-+
-+ /* No open handle to close? Move away */
-+ if (!it_disposition(it, DISP_OPEN_OPEN))
-+ RETURN(0);
-+
-+ LASSERT(it_open_error(DISP_OPEN_OPEN, it) == 0);
-+
-+ OBD_ALLOC(och, sizeof(*och));
-+ if (!och)
-+ GOTO(out, rc = -ENOMEM);
-+
-+ ll_och_fill(ll_i2info(inode), it, och);
-+
-+ rc = ll_close_inode_openhandle(inode, och);
-+
-+ OBD_FREE(och, sizeof(*och));
-+ out:
-+ /* this one is in place of ll_file_open */
-+ if (it_disposition(it, DISP_ENQ_OPEN_REF))
-+ ptlrpc_req_finished(it->d.lustre.it_data);
-+ it_clear_disposition(it, DISP_ENQ_OPEN_REF);
-+ RETURN(rc);
-+}
-+
-+int ll_fiemap(struct inode *inode, struct ll_user_fiemap *fiemap,
-+ int num_bytes)
-+{
-+ struct obd_export *exp = ll_i2obdexp(inode);
-+ struct lov_stripe_md *lsm = ll_i2info(inode)->lli_smd;
-+ struct ll_fiemap_info_key fm_key = { .name = KEY_FIEMAP, };
-+ int vallen = num_bytes;
-+ int rc;
-+ ENTRY;
-+
-+ /* If the stripe_count > 1 and the application does not understand
-+ * DEVICE_ORDER flag, then it cannot interpret the extents correctly.
-+ */
-+ if (lsm->lsm_stripe_count > 1 &&
-+ !(fiemap->fm_flags & FIEMAP_FLAG_DEVICE_ORDER))
-+ return -EOPNOTSUPP;
-+
-+ fm_key.oa.o_id = lsm->lsm_object_id;
-+ fm_key.oa.o_valid = OBD_MD_FLID;
-+
-+ obdo_from_inode(&fm_key.oa, inode, OBD_MD_FLFID | OBD_MD_FLSIZE);
-+
-+ /* If filesize is 0, then there would be no objects for mapping */
-+ if (fm_key.oa.o_size == 0) {
-+ fiemap->fm_mapped_extents = 0;
-+ RETURN(0);
-+ }
-+
-+ memcpy(&fm_key.fiemap, fiemap, sizeof(*fiemap));
-+
-+ rc = obd_get_info(exp, sizeof(fm_key), &fm_key, &vallen, fiemap, lsm);
-+ if (rc)
-+ CERROR("obd_get_info failed: rc = %d\n", rc);
-+
-+ RETURN(rc);
-+}
-+
-+int ll_file_ioctl(struct inode *inode, struct file *file, unsigned int cmd,
-+ unsigned long arg)
-+{
-+ struct ll_file_data *fd = LUSTRE_FPRIVATE(file);
-+ int flags;
-+ ENTRY;
-+
-+ CDEBUG(D_VFSTRACE, "VFS Op:inode=%lu/%u(%p),cmd=%x\n", inode->i_ino,
-+ inode->i_generation, inode, cmd);
-+ ll_stats_ops_tally(ll_i2sbi(inode), LPROC_LL_IOCTL, 1);
-+
-+ /* asm-ppc{,64} declares TCGETS, et. al. as type 't' not 'T' */
-+ if (_IOC_TYPE(cmd) == 'T' || _IOC_TYPE(cmd) == 't') /* tty ioctls */
-+ RETURN(-ENOTTY);
-+
-+ switch(cmd) {
-+ case LL_IOC_GETFLAGS:
-+ /* Get the current value of the file flags */
-+ return put_user(fd->fd_flags, (int *)arg);
-+ case LL_IOC_SETFLAGS:
-+ case LL_IOC_CLRFLAGS:
-+ /* Set or clear specific file flags */
-+ /* XXX This probably needs checks to ensure the flags are
-+ * not abused, and to handle any flag side effects.
-+ */
-+ if (get_user(flags, (int *) arg))
-+ RETURN(-EFAULT);
-+
-+ if (cmd == LL_IOC_SETFLAGS) {
-+ if ((flags & LL_FILE_IGNORE_LOCK) &&
-+ !(file->f_flags & O_DIRECT)) {
-+ CERROR("%s: unable to disable locking on "
-+ "non-O_DIRECT file\n", current->comm);
-+ RETURN(-EINVAL);
-+ }
-+
-+ fd->fd_flags |= flags;
-+ } else {
-+ fd->fd_flags &= ~flags;
-+ }
-+ RETURN(0);
-+ case LL_IOC_LOV_SETSTRIPE:
-+ RETURN(ll_lov_setstripe(inode, file, arg));
-+ case LL_IOC_LOV_SETEA:
-+ RETURN(ll_lov_setea(inode, file, arg));
-+ case LL_IOC_LOV_GETSTRIPE:
-+ RETURN(ll_lov_getstripe(inode, arg));
-+ case LL_IOC_RECREATE_OBJ:
-+ RETURN(ll_lov_recreate_obj(inode, file, arg));
-+ case EXT3_IOC_FIEMAP: {
-+ struct ll_user_fiemap *fiemap_s;
-+ size_t num_bytes, ret_bytes;
-+ unsigned int extent_count;
-+ int rc = 0;
-+
-+ /* Get the extent count so we can calculate the size of
-+ * required fiemap buffer */
-+ if (get_user(extent_count,
-+ &((struct ll_user_fiemap __user *)arg)->fm_extent_count))
-+ RETURN(-EFAULT);
-+ num_bytes = sizeof(*fiemap_s) + (extent_count *
-+ sizeof(struct ll_fiemap_extent));
-+ OBD_VMALLOC(fiemap_s, num_bytes);
-+ if (fiemap_s == NULL)
-+ RETURN(-ENOMEM);
-+
-+ if (copy_from_user(fiemap_s,(struct ll_user_fiemap __user *)arg,
-+ sizeof(*fiemap_s)))
-+ GOTO(error, rc = -EFAULT);
-+
-+ if (fiemap_s->fm_flags & ~LUSTRE_FIEMAP_FLAGS_COMPAT) {
-+ fiemap_s->fm_flags = fiemap_s->fm_flags &
-+ ~LUSTRE_FIEMAP_FLAGS_COMPAT;
-+ if (copy_to_user((char *)arg, fiemap_s,
-+ sizeof(*fiemap_s)))
-+ GOTO(error, rc = -EFAULT);
-+
-+ GOTO(error, rc = -EBADR);
-+ }
-+
-+ /* If fm_extent_count is non-zero, read the first extent since
-+ * it is used to calculate end_offset and device from previous
-+ * fiemap call. */
-+ if (extent_count) {
-+ if (copy_from_user(&fiemap_s->fm_extents[0],
-+ (char __user *)arg + sizeof(*fiemap_s),
-+ sizeof(struct ll_fiemap_extent)))
-+ GOTO(error, rc = -EFAULT);
-+ }
-+
-+ if (fiemap_s->fm_flags & FIEMAP_FLAG_SYNC) {
-+ int rc;
-+
-+ rc = filemap_fdatawrite(inode->i_mapping);
-+ if (rc)
-+ GOTO(error, rc);
-+ }
-+
-+ rc = ll_fiemap(inode, fiemap_s, num_bytes);
-+ if (rc)
-+ GOTO(error, rc);
-+
-+ ret_bytes = sizeof(struct ll_user_fiemap);
-+
-+ if (extent_count != 0)
-+ ret_bytes += (fiemap_s->fm_mapped_extents *
-+ sizeof(struct ll_fiemap_extent));
-+
-+ if (copy_to_user((void *)arg, fiemap_s, ret_bytes))
-+ rc = -EFAULT;
-+
-+error:
-+ OBD_VFREE(fiemap_s, num_bytes);
-+ RETURN(rc);
-+ }
-+ case EXT3_IOC_GETFLAGS:
-+ case EXT3_IOC_SETFLAGS:
-+ RETURN(ll_iocontrol(inode, file, cmd, arg));
-+ case EXT3_IOC_GETVERSION_OLD:
-+ case EXT3_IOC_GETVERSION:
-+ RETURN(put_user(inode->i_generation, (int *)arg));
-+ case LL_IOC_JOIN: {
-+#if LUSTRE_FIX >= 50
-+ /* Allow file join in beta builds to allow debuggging */
-+ char *ftail;
-+ int rc;
-+
-+ ftail = getname((const char *)arg);
-+ if (IS_ERR(ftail))
-+ RETURN(PTR_ERR(ftail));
-+ rc = ll_file_join(inode, file, ftail);
-+ putname(ftail);
-+ RETURN(rc);
-+#else
-+ CWARN("file join is not supported in this version of Lustre\n");
-+ RETURN(-ENOTTY);
-+#endif
-+ }
-+ case LL_IOC_GROUP_LOCK:
-+ RETURN(ll_get_grouplock(inode, file, arg));
-+ case LL_IOC_GROUP_UNLOCK:
-+ RETURN(ll_put_grouplock(inode, file, arg));
-+ case IOC_OBD_STATFS:
-+ RETURN(ll_obd_statfs(inode, (void *)arg));
-+ case OBD_IOC_GETNAME_OLD:
-+ case OBD_IOC_GETNAME: {
-+ struct obd_device *obd =
-+ class_exp2obd(ll_i2sbi(inode)->ll_osc_exp);
-+ if (!obd)
-+ RETURN(-EFAULT);
-+ if (copy_to_user((void *)arg, obd->obd_name,
-+ strlen(obd->obd_name) + 1))
-+ RETURN (-EFAULT);
-+ RETURN(0);
-+ }
-+
-+ /* We need to special case any other ioctls we want to handle,
-+ * to send them to the MDS/OST as appropriate and to properly
-+ * network encode the arg field.
-+ case EXT3_IOC_SETVERSION_OLD:
-+ case EXT3_IOC_SETVERSION:
-+ */
-+ default: {
-+ int err;
-+
-+ if (LLIOC_STOP ==
-+ ll_iocontrol_call(inode, file, cmd, arg, &err))
-+ RETURN(err);
-+
-+ RETURN(obd_iocontrol(cmd, ll_i2obdexp(inode), 0, NULL,
-+ (void *)arg));
-+ }
-+ }
-+}
-+
-+loff_t ll_file_seek(struct file *file, loff_t offset, int origin)
-+{
-+ struct inode *inode = file->f_dentry->d_inode;
-+ struct ll_inode_info *lli = ll_i2info(inode);
-+ struct lov_stripe_md *lsm = lli->lli_smd;
-+ loff_t retval;
-+ ENTRY;
-+ retval = offset + ((origin == 2) ? i_size_read(inode) :
-+ (origin == 1) ? file->f_pos : 0);
-+ CDEBUG(D_VFSTRACE, "VFS Op:inode=%lu/%u(%p), to=%Lu=%#Lx(%s)\n",
-+ inode->i_ino, inode->i_generation, inode, retval, retval,
-+ origin == 2 ? "SEEK_END": origin == 1 ? "SEEK_CUR" : "SEEK_SET");
-+ ll_stats_ops_tally(ll_i2sbi(inode), LPROC_LL_LLSEEK, 1);
-+
-+ if (origin == 2) { /* SEEK_END */
-+ int nonblock = 0, rc;
-+
-+ if (file->f_flags & O_NONBLOCK)
-+ nonblock = LDLM_FL_BLOCK_NOWAIT;
-+
-+ if (lsm != NULL) {
-+ rc = ll_glimpse_size(inode, nonblock);
-+ if (rc != 0)
-+ RETURN(rc);
-+ }
-+
-+ ll_inode_size_lock(inode, 0);
-+ offset += i_size_read(inode);
-+ ll_inode_size_unlock(inode, 0);
-+ } else if (origin == 1) { /* SEEK_CUR */
-+ offset += file->f_pos;
-+ }
-+
-+ retval = -EINVAL;
-+ if (offset >= 0 && offset <= ll_file_maxbytes(inode)) {
-+ if (offset != file->f_pos) {
-+ file->f_pos = offset;
-+ file->f_version = 0;
-+ }
-+ retval = offset;
-+ }
-+
-+ RETURN(retval);
-+}
-+
-+int ll_fsync(struct file *file, struct dentry *dentry, int data)
-+{
-+ struct inode *inode = dentry->d_inode;
-+ struct ll_inode_info *lli = ll_i2info(inode);
-+ struct lov_stripe_md *lsm = lli->lli_smd;
-+ struct ll_fid fid;
-+ struct ptlrpc_request *req;
-+ int rc, err;
-+ ENTRY;
-+ CDEBUG(D_VFSTRACE, "VFS Op:inode=%lu/%u(%p)\n", inode->i_ino,
-+ inode->i_generation, inode);
-+ ll_stats_ops_tally(ll_i2sbi(inode), LPROC_LL_FSYNC, 1);
-+
-+ /* fsync's caller has already called _fdata{sync,write}, we want
-+ * that IO to finish before calling the osc and mdc sync methods */
-+ rc = filemap_fdatawait(inode->i_mapping);
-+
-+ /* catch async errors that were recorded back when async writeback
-+ * failed for pages in this mapping. */
-+ err = lli->lli_async_rc;
-+ lli->lli_async_rc = 0;
-+ if (rc == 0)
-+ rc = err;
-+ if (lsm) {
-+ err = lov_test_and_clear_async_rc(lsm);
-+ if (rc == 0)
-+ rc = err;
-+ }
-+
-+ ll_inode2fid(&fid, inode);
-+ err = mdc_sync(ll_i2sbi(inode)->ll_mdc_exp, &fid, &req);
-+ if (!rc)
-+ rc = err;
-+ if (!err)
-+ ptlrpc_req_finished(req);
-+
-+ if (data && lsm) {
-+ struct obdo *oa;
-+
-+ OBDO_ALLOC(oa);
-+ if (!oa)
-+ RETURN(rc ? rc : -ENOMEM);
-+
-+ oa->o_id = lsm->lsm_object_id;
-+ oa->o_valid = OBD_MD_FLID;
-+ obdo_from_inode(oa, inode, OBD_MD_FLTYPE | OBD_MD_FLATIME |
-+ OBD_MD_FLMTIME | OBD_MD_FLCTIME);
-+
-+ err = obd_sync(ll_i2sbi(inode)->ll_osc_exp, oa, lsm,
-+ 0, OBD_OBJECT_EOF);
-+ if (!rc)
-+ rc = err;
-+ OBDO_FREE(oa);
-+ }
-+
-+ RETURN(rc);
-+}
-+
-+int ll_file_flock(struct file *file, int cmd, struct file_lock *file_lock)
-+{
-+ struct inode *inode = file->f_dentry->d_inode;
-+ struct ll_sb_info *sbi = ll_i2sbi(inode);
-+ struct ldlm_res_id res_id =
-+ { .name = {inode->i_ino, inode->i_generation, LDLM_FLOCK} };
-+ struct ldlm_enqueue_info einfo = { LDLM_FLOCK, 0, NULL,
-+ ldlm_flock_completion_ast, NULL, file_lock };
-+ struct lustre_handle lockh = {0};
-+ ldlm_policy_data_t flock;
-+ int flags = 0;
-+ int rc;
-+ ENTRY;
-+
-+ CDEBUG(D_VFSTRACE, "VFS Op:inode=%lu file_lock=%p\n",
-+ inode->i_ino, file_lock);
-+ ll_stats_ops_tally(ll_i2sbi(inode), LPROC_LL_FLOCK, 1);
-+
-+ if (file_lock->fl_flags & FL_FLOCK) {
-+ LASSERT((cmd == F_SETLKW) || (cmd == F_SETLK));
-+ /* set missing params for flock() calls */
-+ file_lock->fl_end = OFFSET_MAX;
-+ file_lock->fl_pid = current->tgid;
-+ }
-+ flock.l_flock.pid = file_lock->fl_pid;
-+ flock.l_flock.start = file_lock->fl_start;
-+ flock.l_flock.end = file_lock->fl_end;
-+
-+ switch (file_lock->fl_type) {
-+ case F_RDLCK:
-+ einfo.ei_mode = LCK_PR;
-+ break;
-+ case F_UNLCK:
-+ /* An unlock request may or may not have any relation to
-+ * existing locks so we may not be able to pass a lock handle
-+ * via a normal ldlm_lock_cancel() request. The request may even
-+ * unlock a byte range in the middle of an existing lock. In
-+ * order to process an unlock request we need all of the same
-+ * information that is given with a normal read or write record
-+ * lock request. To avoid creating another ldlm unlock (cancel)
-+ * message we'll treat a LCK_NL flock request as an unlock. */
-+ einfo.ei_mode = LCK_NL;
-+ break;
-+ case F_WRLCK:
-+ einfo.ei_mode = LCK_PW;
-+ break;
-+ default:
-+ CERROR("unknown fcntl lock type: %d\n", file_lock->fl_type);
-+ RETURN (-EINVAL);
-+ }
-+
-+ switch (cmd) {
-+ case F_SETLKW:
-+#ifdef F_SETLKW64
-+ case F_SETLKW64:
-+#endif
-+ flags = 0;
-+ break;
-+ case F_SETLK:
-+#ifdef F_SETLK64
-+ case F_SETLK64:
-+#endif
-+ flags = LDLM_FL_BLOCK_NOWAIT;
-+ break;
-+ case F_GETLK:
-+#ifdef F_GETLK64
-+ case F_GETLK64:
-+#endif
-+ flags = LDLM_FL_TEST_LOCK;
-+ /* Save the old mode so that if the mode in the lock changes we
-+ * can decrement the appropriate reader or writer refcount. */
-+ file_lock->fl_type = einfo.ei_mode;
-+ break;
-+ default:
-+ CERROR("unknown fcntl lock command: %d\n", cmd);
-+ RETURN (-EINVAL);
-+ }
-+
-+ CDEBUG(D_DLMTRACE, "inode=%lu, pid=%u, flags=%#x, mode=%u, "
-+ "start="LPU64", end="LPU64"\n", inode->i_ino, flock.l_flock.pid,
-+ flags, einfo.ei_mode, flock.l_flock.start, flock.l_flock.end);
-+
-+ rc = ldlm_cli_enqueue(sbi->ll_mdc_exp, NULL, &einfo, res_id,
-+ &flock, &flags, NULL, 0, NULL, &lockh, 0);
-+ if ((file_lock->fl_flags & FL_FLOCK) &&
-+ (rc == 0 || file_lock->fl_type == F_UNLCK))
-+ ll_flock_lock_file_wait(file, file_lock, (cmd == F_SETLKW));
-+#ifdef HAVE_F_OP_FLOCK
-+ if ((file_lock->fl_flags & FL_POSIX) &&
-+ (rc == 0 || file_lock->fl_type == F_UNLCK) &&
-+ !(flags & LDLM_FL_TEST_LOCK))
-+ posix_lock_file_wait(file, file_lock);
-+#endif
-+
-+ RETURN(rc);
-+}
-+
-+int ll_file_noflock(struct file *file, int cmd, struct file_lock *file_lock)
-+{
-+ ENTRY;
-+
-+ RETURN(-ENOSYS);
-+}
-+
-+int ll_have_md_lock(struct inode *inode, __u64 bits)
-+{
-+ struct lustre_handle lockh;
-+ struct ldlm_res_id res_id = { .name = {0} };
-+ struct obd_device *obddev;
-+ ldlm_policy_data_t policy = { .l_inodebits = {bits}};
-+ int flags;
-+ ENTRY;
-+
-+ if (!inode)
-+ RETURN(0);
-+
-+ obddev = ll_i2mdcexp(inode)->exp_obd;
-+ res_id.name[0] = inode->i_ino;
-+ res_id.name[1] = inode->i_generation;
-+
-+ CDEBUG(D_INFO, "trying to match res "LPU64"\n", res_id.name[0]);
-+
-+ flags = LDLM_FL_BLOCK_GRANTED | LDLM_FL_CBPENDING | LDLM_FL_TEST_LOCK;
-+ if (ldlm_lock_match(obddev->obd_namespace, flags, &res_id, LDLM_IBITS,
-+ &policy, LCK_CR|LCK_CW|LCK_PR|LCK_PW, &lockh)) {
-+ RETURN(1);
-+ }
-+
-+ RETURN(0);
-+}
-+
-+static int ll_inode_revalidate_fini(struct inode *inode, int rc) {
-+ if (rc == -ENOENT) { /* Already unlinked. Just update nlink
-+ * and return success */
-+ inode->i_nlink = 0;
-+ /* This path cannot be hit for regular files unless in
-+ * case of obscure races, so no need to to validate
-+ * size. */
-+ if (!S_ISREG(inode->i_mode) &&
-+ !S_ISDIR(inode->i_mode))
-+ return 0;
-+ }
-+
-+ if (rc) {
-+ CERROR("failure %d inode %lu\n", rc, inode->i_ino);
-+ return -abs(rc);
-+
-+ }
-+
-+ return 0;
-+}
-+
-+int ll_inode_revalidate_it(struct dentry *dentry, struct lookup_intent *it)
-+{
-+ struct inode *inode = dentry->d_inode;
-+ struct ptlrpc_request *req = NULL;
-+ struct obd_export *exp;
-+ int rc;
-+ ENTRY;
-+
-+ if (!inode) {
-+ CERROR("REPORT THIS LINE TO PETER\n");
-+ RETURN(0);
-+ }
-+ CDEBUG(D_VFSTRACE, "VFS Op:inode=%lu/%u(%p),name=%s\n",
-+ inode->i_ino, inode->i_generation, inode, dentry->d_name.name);
-+
-+ exp = ll_i2mdcexp(inode);
-+
-+ if (exp->exp_connect_flags & OBD_CONNECT_ATTRFID) {
-+ struct lookup_intent oit = { .it_op = IT_GETATTR };
-+ struct mdc_op_data op_data;
-+
-+ /* Call getattr by fid, so do not provide name at all. */
-+ ll_prepare_mdc_op_data(&op_data, dentry->d_parent->d_inode,
-+ dentry->d_inode, NULL, 0, 0, NULL);
-+ rc = mdc_intent_lock(exp, &op_data, NULL, 0,
-+ /* we are not interested in name
-+ based lookup */
-+ &oit, 0, &req,
-+ ll_mdc_blocking_ast, 0);
-+ if (rc < 0) {
-+ rc = ll_inode_revalidate_fini(inode, rc);
-+ GOTO (out, rc);
-+ }
-+
-+ rc = revalidate_it_finish(req, DLM_REPLY_REC_OFF, &oit, dentry);
-+ if (rc != 0) {
-+ ll_intent_release(&oit);
-+ GOTO(out, rc);
-+ }
-+
-+ /* Unlinked? Unhash dentry, so it is not picked up later by
-+ do_lookup() -> ll_revalidate_it(). We cannot use d_drop
-+ here to preserve get_cwd functionality on 2.6.
-+ Bug 10503 */
-+ if (!dentry->d_inode->i_nlink) {
-+ spin_lock(&ll_lookup_lock);
-+ spin_lock(&dcache_lock);
-+ ll_drop_dentry(dentry);
-+ spin_unlock(&dcache_lock);
-+ spin_unlock(&ll_lookup_lock);
-+ }
-+
-+ ll_lookup_finish_locks(&oit, dentry);
-+ } else if (!ll_have_md_lock(dentry->d_inode,
-+ MDS_INODELOCK_UPDATE|MDS_INODELOCK_LOOKUP)) {
-+ struct ll_sb_info *sbi = ll_i2sbi(dentry->d_inode);
-+ struct ll_fid fid;
-+ obd_valid valid = OBD_MD_FLGETATTR;
-+ int ealen = 0;
-+
-+ if (S_ISREG(inode->i_mode)) {
-+ rc = ll_get_max_mdsize(sbi, &ealen);
-+ if (rc)
-+ RETURN(rc);
-+ valid |= OBD_MD_FLEASIZE | OBD_MD_FLMODEASIZE;
-+ }
-+ ll_inode2fid(&fid, inode);
-+ rc = mdc_getattr(sbi->ll_mdc_exp, &fid, valid, ealen, &req);
-+ if (rc) {
-+ rc = ll_inode_revalidate_fini(inode, rc);
-+ RETURN(rc);
-+ }
-+
-+ rc = ll_prep_inode(sbi->ll_osc_exp, &inode, req, REPLY_REC_OFF,
-+ NULL);
-+ if (rc)
-+ GOTO(out, rc);
-+ }
-+
-+ /* if object not yet allocated, don't validate size */
-+ if (ll_i2info(inode)->lli_smd == NULL)
-+ GOTO(out, rc = 0);
-+
-+ /* ll_glimpse_size will prefer locally cached writes if they extend
-+ * the file */
-+ rc = ll_glimpse_size(inode, 0);
-+
-+out:
-+ ptlrpc_req_finished(req);
-+ RETURN(rc);
-+}
-+
-+int ll_getattr_it(struct vfsmount *mnt, struct dentry *de,
-+ struct lookup_intent *it, struct kstat *stat)
-+{
-+ struct inode *inode = de->d_inode;
-+ int res = 0;
-+
-+ res = ll_inode_revalidate_it(de, it);
-+ ll_stats_ops_tally(ll_i2sbi(inode), LPROC_LL_GETATTR, 1);
-+
-+ if (res)
-+ return res;
-+
-+ stat->dev = inode->i_sb->s_dev;
-+ stat->ino = inode->i_ino;
-+ stat->mode = inode->i_mode;
-+ stat->nlink = inode->i_nlink;
-+ stat->uid = inode->i_uid;
-+ stat->gid = inode->i_gid;
-+ stat->rdev = kdev_t_to_nr(inode->i_rdev);
-+ stat->atime = inode->i_atime;
-+ stat->mtime = inode->i_mtime;
-+ stat->ctime = inode->i_ctime;
-+#ifdef HAVE_INODE_BLKSIZE
-+ stat->blksize = inode->i_blksize;
-+#else
-+ stat->blksize = 1<<inode->i_blkbits;
-+#endif
-+
-+ ll_inode_size_lock(inode, 0);
-+ stat->size = i_size_read(inode);
-+ stat->blocks = inode->i_blocks;
-+ ll_inode_size_unlock(inode, 0);
-+
-+ return 0;
-+}
-+int ll_getattr(struct vfsmount *mnt, struct dentry *de, struct kstat *stat)
-+{
-+ struct lookup_intent it = { .it_op = IT_GETATTR };
-+
-+ return ll_getattr_it(mnt, de, &it, stat);
-+}
-+
-+static
-+int lustre_check_acl(struct inode *inode, int mask)
-+{
-+#ifdef CONFIG_FS_POSIX_ACL
-+ struct ll_inode_info *lli = ll_i2info(inode);
-+ struct posix_acl *acl;
-+ int rc;
-+ ENTRY;
-+
-+ spin_lock(&lli->lli_lock);
-+ acl = posix_acl_dup(lli->lli_posix_acl);
-+ spin_unlock(&lli->lli_lock);
-+
-+ if (!acl)
-+ RETURN(-EAGAIN);
-+
-+ rc = posix_acl_permission(inode, acl, mask);
-+ posix_acl_release(acl);
-+
-+ RETURN(rc);
-+#else
-+ return -EAGAIN;
-+#endif
-+}
-+
-+#if (LINUX_VERSION_CODE >= KERNEL_VERSION(2,6,10))
-+int ll_inode_permission(struct inode *inode, int mask, struct nameidata *nd)
-+{
-+ CDEBUG(D_VFSTRACE, "VFS Op:inode=%lu/%u(%p), mask %o\n",
-+ inode->i_ino, inode->i_generation, inode, mask);
-+
-+ ll_stats_ops_tally(ll_i2sbi(inode), LPROC_LL_INODE_PERM, 1);
-+ return generic_permission(inode, mask, lustre_check_acl);
-+}
-+#else
-+#if (LINUX_VERSION_CODE >= KERNEL_VERSION(2,6,0))
-+int ll_inode_permission(struct inode *inode, int mask, struct nameidata *nd)
-+#else
-+int ll_inode_permission(struct inode *inode, int mask)
-+#endif
-+{
-+ int mode = inode->i_mode;
-+ int rc;
-+
-+ CDEBUG(D_VFSTRACE, "VFS Op:inode=%lu/%u(%p), mask %o\n",
-+ inode->i_ino, inode->i_generation, inode, mask);
-+ ll_stats_ops_tally(ll_i2sbi(inode), LPROC_LL_INODE_PERM, 1);
-+
-+ if ((mask & MAY_WRITE) && IS_RDONLY(inode) &&
-+ (S_ISREG(mode) || S_ISDIR(mode) || S_ISLNK(mode)))
-+ return -EROFS;
-+ if ((mask & MAY_WRITE) && IS_IMMUTABLE(inode))
-+ return -EACCES;
-+ if (current->fsuid == inode->i_uid) {
-+ mode >>= 6;
-+ } else if (1) {
-+ if (((mode >> 3) & mask & S_IRWXO) != mask)
-+ goto check_groups;
-+ rc = lustre_check_acl(inode, mask);
-+ if (rc == -EAGAIN)
-+ goto check_groups;
-+ if (rc == -EACCES)
-+ goto check_capabilities;
-+ return rc;
-+ } else {
-+check_groups:
-+ if (in_group_p(inode->i_gid))
-+ mode >>= 3;
-+ }
-+ if ((mode & mask & S_IRWXO) == mask)
-+ return 0;
-+
-+check_capabilities:
-+ if (!(mask & MAY_EXEC) ||
-+ (inode->i_mode & S_IXUGO) || S_ISDIR(inode->i_mode))
-+ if (cfs_capable(CFS_CAP_DAC_OVERRIDE))
-+ return 0;
-+
-+ if (cfs_capable(CFS_CAP_DAC_READ_SEARCH) && ((mask == MAY_READ) ||
-+ (S_ISDIR(inode->i_mode) && !(mask & MAY_WRITE))))
-+ return 0;
-+
-+ return -EACCES;
-+}
-+#endif
-+
-+/* -o localflock - only provides locally consistent flock locks */
-+struct file_operations ll_file_operations = {
-+ .read = ll_file_read,
-+#ifdef HAVE_FILE_READV
-+ .readv = ll_file_readv,
-+#else
-+ .aio_read = ll_file_aio_read,
-+#endif
-+ .write = ll_file_write,
-+#ifdef HAVE_FILE_WRITEV
-+ .writev = ll_file_writev,
-+#else
-+ .aio_write = ll_file_aio_write,
-+#endif
-+ .ioctl = ll_file_ioctl,
-+ .open = ll_file_open,
-+ .release = ll_file_release,
-+ .mmap = ll_file_mmap,
-+ .llseek = ll_file_seek,
-+ .sendfile = ll_file_sendfile,
-+ .fsync = ll_fsync,
-+};
-+
-+struct file_operations ll_file_operations_flock = {
-+ .read = ll_file_read,
-+#ifdef HAVE_FILE_READV
-+ .readv = ll_file_readv,
-+#else
-+ .aio_read = ll_file_aio_read,
-+#endif
-+ .write = ll_file_write,
-+#ifdef HAVE_FILE_WRITEV
-+ .writev = ll_file_writev,
-+#else
-+ .aio_write = ll_file_aio_write,
-+#endif
-+ .ioctl = ll_file_ioctl,
-+ .open = ll_file_open,
-+ .release = ll_file_release,
-+ .mmap = ll_file_mmap,
-+ .llseek = ll_file_seek,
-+ .sendfile = ll_file_sendfile,
-+ .fsync = ll_fsync,
-+#ifdef HAVE_F_OP_FLOCK
-+ .flock = ll_file_flock,
-+#endif
-+ .lock = ll_file_flock
-+};
-+
-+/* These are for -o noflock - to return ENOSYS on flock calls */
-+struct file_operations ll_file_operations_noflock = {
-+ .read = ll_file_read,
-+#ifdef HAVE_FILE_READV
-+ .readv = ll_file_readv,
-+#else
-+ .aio_read = ll_file_aio_read,
-+#endif
-+ .write = ll_file_write,
-+#ifdef HAVE_FILE_WRITEV
-+ .writev = ll_file_writev,
-+#else
-+ .aio_write = ll_file_aio_write,
-+#endif
-+ .ioctl = ll_file_ioctl,
-+ .open = ll_file_open,
-+ .release = ll_file_release,
-+ .mmap = ll_file_mmap,
-+ .llseek = ll_file_seek,
-+ .sendfile = ll_file_sendfile,
-+ .fsync = ll_fsync,
-+#ifdef HAVE_F_OP_FLOCK
-+ .flock = ll_file_noflock,
-+#endif
-+ .lock = ll_file_noflock
-+};
-+
-+struct inode_operations ll_file_inode_operations = {
-+#ifdef HAVE_VFS_INTENT_PATCHES
-+ .setattr_raw = ll_setattr_raw,
-+#endif
-+ .setattr = ll_setattr,
-+ .truncate = ll_truncate,
-+ .getattr = ll_getattr,
-+ .permission = ll_inode_permission,
-+ .setxattr = ll_setxattr,
-+ .getxattr = ll_getxattr,
-+ .listxattr = ll_listxattr,
-+ .removexattr = ll_removexattr,
-+};
-+
-+/* dynamic ioctl number support routins */
-+static struct llioc_ctl_data {
-+ struct rw_semaphore ioc_sem;
-+ struct list_head ioc_head;
-+} llioc = {
-+ __RWSEM_INITIALIZER(llioc.ioc_sem),
-+ CFS_LIST_HEAD_INIT(llioc.ioc_head)
-+};
-+
-+
-+struct llioc_data {
-+ struct list_head iocd_list;
-+ unsigned int iocd_size;
-+ llioc_callback_t iocd_cb;
-+ unsigned int iocd_count;
-+ unsigned int iocd_cmd[0];
-+};
-+
-+void *ll_iocontrol_register(llioc_callback_t cb, int count, unsigned int *cmd)
-+{
-+ unsigned int size;
-+ struct llioc_data *in_data = NULL;
-+ ENTRY;
-+
-+ if (cb == NULL || cmd == NULL ||
-+ count > LLIOC_MAX_CMD || count < 0)
-+ RETURN(NULL);
-+
-+ size = sizeof(*in_data) + count * sizeof(unsigned int);
-+ OBD_ALLOC(in_data, size);
-+ if (in_data == NULL)
-+ RETURN(NULL);
-+
-+ memset(in_data, 0, sizeof(*in_data));
-+ in_data->iocd_size = size;
-+ in_data->iocd_cb = cb;
-+ in_data->iocd_count = count;
-+ memcpy(in_data->iocd_cmd, cmd, sizeof(unsigned int) * count);
-+
-+ down_write(&llioc.ioc_sem);
-+ list_add_tail(&in_data->iocd_list, &llioc.ioc_head);
-+ up_write(&llioc.ioc_sem);
-+
-+ RETURN(in_data);
-+}
-+
-+void ll_iocontrol_unregister(void *magic)
-+{
-+ struct llioc_data *tmp;
-+
-+ if (magic == NULL)
-+ return;
-+
-+ down_write(&llioc.ioc_sem);
-+ list_for_each_entry(tmp, &llioc.ioc_head, iocd_list) {
-+ if (tmp == magic) {
-+ unsigned int size = tmp->iocd_size;
-+
-+ list_del(&tmp->iocd_list);
-+ up_write(&llioc.ioc_sem);
-+
-+ OBD_FREE(tmp, size);
-+ return;
-+ }
-+ }
-+ up_write(&llioc.ioc_sem);
-+
-+ CWARN("didn't find iocontrol register block with magic: %p\n", magic);
-+}
-+
-+EXPORT_SYMBOL(ll_iocontrol_register);
-+EXPORT_SYMBOL(ll_iocontrol_unregister);
-+
-+enum llioc_iter ll_iocontrol_call(struct inode *inode, struct file *file,
-+ unsigned int cmd, unsigned long arg, int *rcp)
-+{
-+ enum llioc_iter ret = LLIOC_CONT;
-+ struct llioc_data *data;
-+ int rc = -EINVAL, i;
-+
-+ down_read(&llioc.ioc_sem);
-+ list_for_each_entry(data, &llioc.ioc_head, iocd_list) {
-+ for (i = 0; i < data->iocd_count; i++) {
-+ if (cmd != data->iocd_cmd[i])
-+ continue;
-+
-+ ret = data->iocd_cb(inode, file, cmd, arg, data, &rc);
-+ break;
-+ }
-+
-+ if (ret == LLIOC_STOP)
-+ break;
-+ }
-+ up_read(&llioc.ioc_sem);
-+
-+ if (rcp)
-+ *rcp = rc;
-+ return ret;
-+}
-diff -urNad lustre~/lustre/llite/llite_internal.h lustre/lustre/llite/llite_internal.h
---- lustre~/lustre/llite/llite_internal.h 2009-03-12 10:32:27.000000000 +0100
-+++ lustre/lustre/llite/llite_internal.h 2009-03-13 09:45:03.000000000 +0100
-@@ -647,7 +647,7 @@
- struct lookup_intent *it, struct kstat *stat);
- int ll_getattr(struct vfsmount *mnt, struct dentry *de, struct kstat *stat);
- struct ll_file_data *ll_file_data_get(void);
--#if (LINUX_VERSION_CODE >= KERNEL_VERSION(2,6,0))
-+#ifndef HAVE_INODE_PERMISION_2ARGS
- int ll_inode_permission(struct inode *inode, int mask, struct nameidata *nd);
- #else
- int ll_inode_permission(struct inode *inode, int mask);
-@@ -727,9 +727,6 @@
- /* llite/llite_nfs.c */
- extern struct export_operations lustre_export_operations;
- __u32 get_uuid2int(const char *name, int len);
--struct dentry *ll_fh_to_dentry(struct super_block *sb, __u32 *data, int len,
-- int fhtype, int parent);
--int ll_dentry_to_fh(struct dentry *, __u32 *datap, int *lenp, int need_parent);
-
- /* llite/special.c */
- extern struct inode_operations ll_special_inode_operations;
-diff -urNad lustre~/lustre/llite/llite_internal.h.orig lustre/lustre/llite/llite_internal.h.orig
---- lustre~/lustre/llite/llite_internal.h.orig 1970-01-01 00:00:00.000000000 +0000
-+++ lustre/lustre/llite/llite_internal.h.orig 2009-03-13 09:45:03.000000000 +0100
-@@ -0,0 +1,1027 @@
-+/* -*- mode: c; c-basic-offset: 8; indent-tabs-mode: nil; -*-
-+ * vim:expandtab:shiftwidth=8:tabstop=8:
-+ *
-+ * GPL HEADER START
-+ *
-+ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
-+ *
-+ * This program is free software; you can redistribute it and/or modify
-+ * it under the terms of the GNU General Public License version 2 only,
-+ * as published by the Free Software Foundation.
-+ *
-+ * This program is distributed in the hope that it will be useful, but
-+ * WITHOUT ANY WARRANTY; without even the implied warranty of
-+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
-+ * General Public License version 2 for more details (a copy is included
-+ * in the LICENSE file that accompanied this code).
-+ *
-+ * You should have received a copy of the GNU General Public License
-+ * version 2 along with this program; If not, see
-+ * http://www.sun.com/software/products/lustre/docs/GPLv2.pdf
-+ *
-+ * Please contact Sun Microsystems, Inc., 4150 Network Circle, Santa Clara,
-+ * CA 95054 USA or visit www.sun.com if you need additional information or
-+ * have any questions.
-+ *
-+ * GPL HEADER END
-+ */
-+/*
-+ * Copyright 2008 Sun Microsystems, Inc. All rights reserved
-+ * Use is subject to license terms.
-+ */
-+/*
-+ * This file is part of Lustre, http://www.lustre.org/
-+ * Lustre is a trademark of Sun Microsystems, Inc.
-+ */
-+
-+#ifndef LLITE_INTERNAL_H
-+#define LLITE_INTERNAL_H
-+
-+#ifdef CONFIG_FS_POSIX_ACL
-+# include <linux/fs.h>
-+#ifdef HAVE_XATTR_ACL
-+# include <linux/xattr_acl.h>
-+#endif
-+#ifdef HAVE_LINUX_POSIX_ACL_XATTR_H
-+# include <linux/posix_acl_xattr.h>
-+#endif
-+#endif
-+
-+#include <lustre_debug.h>
-+#include <lustre_ver.h>
-+#include <linux/lustre_version.h>
-+#include <lustre_disk.h> /* for s2sbi */
-+
-+#ifndef HAVE_LE_TYPES
-+typedef __u16 __le16;
-+typedef __u32 __le32;
-+#endif
-+
-+/*
-+struct lustre_intent_data {
-+ __u64 it_lock_handle[2];
-+ __u32 it_disposition;
-+ __u32 it_status;
-+ __u32 it_lock_mode;
-+ }; */
-+
-+/* If there is no FMODE_EXEC defined, make it to match nothing */
-+#ifndef FMODE_EXEC
-+#define FMODE_EXEC 0
-+#endif
-+
-+#define LL_IT2STR(it) ((it) ? ldlm_it2str((it)->it_op) : "0")
-+#define LUSTRE_FPRIVATE(file) ((file)->private_data)
-+
-+#ifdef HAVE_VFS_INTENT_PATCHES
-+static inline struct lookup_intent *ll_nd2it(struct nameidata *nd)
-+{
-+ return &nd->intent;
-+}
-+#endif
-+
-+/*
-+ * Directory entries are currently in the same format as ext2/ext3, but will
-+ * be changed in the future to accomodate FIDs
-+ */
-+#define LL_DIR_NAME_LEN (255)
-+#define LL_DIR_PAD (4)
-+
-+struct ll_dir_entry {
-+ /* number of inode, referenced by this entry */
-+ __le32 lde_inode;
-+ /* total record length, multiple of LL_DIR_PAD */
-+ __le16 lde_rec_len;
-+ /* length of name */
-+ __u8 lde_name_len;
-+ /* file type: regular, directory, device, etc. */
-+ __u8 lde_file_type;
-+ /* name. NOT NUL-terminated */
-+ char lde_name[LL_DIR_NAME_LEN];
-+};
-+
-+struct ll_dentry_data {
-+ int lld_cwd_count;
-+ int lld_mnt_count;
-+ struct obd_client_handle lld_cwd_och;
-+ struct obd_client_handle lld_mnt_och;
-+#ifndef HAVE_VFS_INTENT_PATCHES
-+ struct lookup_intent *lld_it;
-+#endif
-+ unsigned int lld_sa_generation;
-+};
-+
-+#define ll_d2d(de) ((struct ll_dentry_data*)((de)->d_fsdata))
-+
-+extern struct file_operations ll_pgcache_seq_fops;
-+
-+#define LLI_INODE_MAGIC 0x111d0de5
-+#define LLI_INODE_DEAD 0xdeadd00d
-+#define LLI_F_HAVE_OST_SIZE_LOCK 0
-+#define LLI_F_HAVE_MDS_SIZE_LOCK 1
-+#define LLI_F_CONTENDED 2
-+#define LLI_F_SRVLOCK 3
-+
-+struct ll_inode_info {
-+ int lli_inode_magic;
-+ struct semaphore lli_size_sem; /* protect open and change size */
-+ void *lli_size_sem_owner;
-+ struct semaphore lli_write_sem;
-+ struct lov_stripe_md *lli_smd;
-+ char *lli_symlink_name;
-+ __u64 lli_maxbytes;
-+ __u64 lli_io_epoch;
-+ unsigned long lli_flags;
-+ cfs_time_t lli_contention_time;
-+
-+ /* this lock protects s_d_w and p_w_ll and mmap_cnt */
-+ spinlock_t lli_lock;
-+#ifdef HAVE_CLOSE_THREAD
-+ struct list_head lli_pending_write_llaps;
-+ struct list_head lli_close_item;
-+ int lli_send_done_writing;
-+#endif
-+ atomic_t lli_mmap_cnt;
-+
-+ /* for writepage() only to communicate to fsync */
-+ int lli_async_rc;
-+
-+ struct posix_acl *lli_posix_acl;
-+
-+ struct list_head lli_dead_list;
-+
-+ struct semaphore lli_och_sem; /* Protects access to och pointers
-+ and their usage counters */
-+ /* We need all three because every inode may be opened in different
-+ modes */
-+ struct obd_client_handle *lli_mds_read_och;
-+ __u64 lli_open_fd_read_count;
-+ struct obd_client_handle *lli_mds_write_och;
-+ __u64 lli_open_fd_write_count;
-+ struct obd_client_handle *lli_mds_exec_och;
-+ __u64 lli_open_fd_exec_count;
-+ struct inode lli_vfs_inode;
-+
-+ /* metadata stat-ahead */
-+ /*
-+ * "opendir_pid" is the token when lookup/revalid -- I am the owner of
-+ * dir statahead.
-+ */
-+ pid_t lli_opendir_pid;
-+ /*
-+ * since parent-child threads can share the same @file struct,
-+ * "opendir_key" is the token when dir close for case of parent exit
-+ * before child -- it is me should cleanup the dir readahead. */
-+ void *lli_opendir_key;
-+ struct ll_statahead_info *lli_sai;
-+};
-+
-+/*
-+ * Locking to guarantee consistency of non-atomic updates to long long i_size,
-+ * consistency between file size and KMS, and consistency within
-+ * ->lli_smd->lsm_oinfo[]'s.
-+ *
-+ * Implemented by ->lli_size_sem and ->lsm_sem, nested in that order.
-+ */
-+
-+void ll_inode_size_lock(struct inode *inode, int lock_lsm);
-+void ll_inode_size_unlock(struct inode *inode, int unlock_lsm);
-+
-+// FIXME: replace the name of this with LL_I to conform to kernel stuff
-+// static inline struct ll_inode_info *LL_I(struct inode *inode)
-+static inline struct ll_inode_info *ll_i2info(struct inode *inode)
-+{
-+ return container_of(inode, struct ll_inode_info, lli_vfs_inode);
-+}
-+
-+/* default to about 40meg of readahead on a given system. That much tied
-+ * up in 512k readahead requests serviced at 40ms each is about 1GB/s. */
-+#define SBI_DEFAULT_READAHEAD_MAX (40UL << (20 - CFS_PAGE_SHIFT))
-+
-+/* default to read-ahead full files smaller than 2MB on the second read */
-+#define SBI_DEFAULT_READAHEAD_WHOLE_MAX (2UL << (20 - CFS_PAGE_SHIFT))
-+
-+enum ra_stat {
-+ RA_STAT_HIT = 0,
-+ RA_STAT_MISS,
-+ RA_STAT_DISTANT_READPAGE,
-+ RA_STAT_MISS_IN_WINDOW,
-+ RA_STAT_FAILED_GRAB_PAGE,
-+ RA_STAT_FAILED_MATCH,
-+ RA_STAT_DISCARDED,
-+ RA_STAT_ZERO_LEN,
-+ RA_STAT_ZERO_WINDOW,
-+ RA_STAT_EOF,
-+ RA_STAT_MAX_IN_FLIGHT,
-+ RA_STAT_WRONG_GRAB_PAGE,
-+ _NR_RA_STAT,
-+};
-+
-+struct ll_ra_info {
-+ unsigned long ra_cur_pages;
-+ unsigned long ra_max_pages;
-+ unsigned long ra_max_read_ahead_whole_pages;
-+ unsigned long ra_stats[_NR_RA_STAT];
-+};
-+
-+/* LL_HIST_MAX=32 causes an overflow */
-+#define LL_HIST_MAX 28
-+#define LL_HIST_START 12 /* buckets start at 2^12 = 4k */
-+#define LL_PROCESS_HIST_MAX 10
-+struct per_process_info {
-+ pid_t pid;
-+ struct obd_histogram pp_r_hist;
-+ struct obd_histogram pp_w_hist;
-+};
-+
-+/* pp_extents[LL_PROCESS_HIST_MAX] will hold the combined process info */
-+struct ll_rw_extents_info {
-+ struct per_process_info pp_extents[LL_PROCESS_HIST_MAX + 1];
-+};
-+
-+#define LL_OFFSET_HIST_MAX 100
-+struct ll_rw_process_info {
-+ pid_t rw_pid;
-+ int rw_op;
-+ loff_t rw_range_start;
-+ loff_t rw_range_end;
-+ loff_t rw_last_file_pos;
-+ loff_t rw_offset;
-+ size_t rw_smallest_extent;
-+ size_t rw_largest_extent;
-+ struct file *rw_last_file;
-+};
-+
-+
-+enum stats_track_type {
-+ STATS_TRACK_ALL = 0, /* track all processes */
-+ STATS_TRACK_PID, /* track process with this pid */
-+ STATS_TRACK_PPID, /* track processes with this ppid */
-+ STATS_TRACK_GID, /* track processes with this gid */
-+ STATS_TRACK_LAST,
-+};
-+
-+/* flags for sbi->ll_flags */
-+#define LL_SBI_NOLCK 0x01 /* DLM locking disabled (directio-only) */
-+#define LL_SBI_DATA_CHECKSUM 0x02 /* checksum each page on the wire */
-+#define LL_SBI_FLOCK 0x04
-+#define LL_SBI_USER_XATTR 0x08 /* support user xattr */
-+#define LL_SBI_ACL 0x10 /* support ACL */
-+#define LL_SBI_JOIN 0x20 /* support JOIN */
-+#define LL_SBI_LOCALFLOCK 0x40 /* Local flocks support by kernel */
-+#define LL_SBI_LRU_RESIZE 0x80 /* support lru resize */
-+#define LL_SBI_LLITE_CHECKSUM 0x100 /* checksum each page in memory */
-+
-+/* default value for ll_sb_info->contention_time */
-+#define SBI_DEFAULT_CONTENTION_SECONDS 60
-+/* default value for lockless_truncate_enable */
-+#define SBI_DEFAULT_LOCKLESS_TRUNCATE_ENABLE 1
-+
-+struct ll_sb_info {
-+ struct list_head ll_list;
-+ /* this protects pglist and ra_info. It isn't safe to
-+ * grab from interrupt contexts */
-+ spinlock_t ll_lock;
-+ spinlock_t ll_pp_extent_lock; /* Lock for pp_extent entries */
-+ spinlock_t ll_process_lock; /* Lock for ll_rw_process_info */
-+ struct obd_uuid ll_sb_uuid;
-+ struct obd_export *ll_mdc_exp;
-+ struct obd_export *ll_osc_exp;
-+ struct proc_dir_entry *ll_proc_root;
-+ obd_id ll_rootino; /* number of root inode */
-+
-+ int ll_flags;
-+ struct list_head ll_conn_chain; /* per-conn chain of SBs */
-+ struct lustre_client_ocd ll_lco;
-+
-+ struct list_head ll_orphan_dentry_list; /*please don't ask -p*/
-+ struct ll_close_queue *ll_lcq;
-+
-+ struct lprocfs_stats *ll_stats; /* lprocfs stats counter */
-+
-+ unsigned long ll_async_page_max;
-+ unsigned long ll_async_page_count;
-+ unsigned long ll_pglist_gen;
-+ struct list_head ll_pglist; /* all pages (llap_pglist_item) */
-+
-+ unsigned ll_contention_time; /* seconds */
-+ unsigned ll_lockless_truncate_enable; /* true/false */
-+
-+ struct ll_ra_info ll_ra_info;
-+ unsigned int ll_namelen;
-+ struct file_operations *ll_fop;
-+
-+#ifdef HAVE_EXPORT___IGET
-+ struct list_head ll_deathrow; /* inodes to be destroyed (b1443) */
-+ spinlock_t ll_deathrow_lock;
-+#endif
-+ /* =0 - hold lock over whole read/write
-+ * >0 - max. chunk to be read/written w/o lock re-acquiring */
-+ unsigned long ll_max_rw_chunk;
-+
-+ /* Statistics */
-+ struct ll_rw_extents_info ll_rw_extents_info;
-+ int ll_extent_process_count;
-+ struct ll_rw_process_info ll_rw_process_info[LL_PROCESS_HIST_MAX];
-+ unsigned int ll_offset_process_count;
-+ struct ll_rw_process_info ll_rw_offset_info[LL_OFFSET_HIST_MAX];
-+ unsigned int ll_rw_offset_entry_count;
-+ enum stats_track_type ll_stats_track_type;
-+ int ll_stats_track_id;
-+ int ll_rw_stats_on;
-+ dev_t ll_sdev_orig; /* save s_dev before assign for
-+ * clustred nfs */
-+
-+ /* metadata stat-ahead */
-+ unsigned int ll_sa_max; /* max statahead RPCs */
-+ unsigned int ll_sa_wrong; /* statahead thread stopped for
-+ * low hit ratio */
-+ unsigned int ll_sa_total; /* statahead thread started
-+ * count */
-+ unsigned long long ll_sa_blocked; /* ls count waiting for
-+ * statahead */
-+ unsigned long long ll_sa_cached; /* ls count got in cache */
-+ unsigned long long ll_sa_hit; /* hit count */
-+ unsigned long long ll_sa_miss; /* miss count */
-+};
-+
-+#define LL_DEFAULT_MAX_RW_CHUNK (32 * 1024 * 1024)
-+
-+struct ll_ra_read {
-+ pgoff_t lrr_start;
-+ pgoff_t lrr_count;
-+ struct task_struct *lrr_reader;
-+ struct list_head lrr_linkage;
-+};
-+
-+/*
-+ * per file-descriptor read-ahead data.
-+ */
-+struct ll_readahead_state {
-+ spinlock_t ras_lock;
-+ /*
-+ * index of the last page that read(2) needed and that wasn't in the
-+ * cache. Used by ras_update() to detect seeks.
-+ *
-+ * XXX nikita: if access seeks into cached region, Lustre doesn't see
-+ * this.
-+ */
-+ unsigned long ras_last_readpage;
-+ /*
-+ * number of pages read after last read-ahead window reset. As window
-+ * is reset on each seek, this is effectively a number of consecutive
-+ * accesses. Maybe ->ras_accessed_in_window is better name.
-+ *
-+ * XXX nikita: window is also reset (by ras_update()) when Lustre
-+ * believes that memory pressure evicts read-ahead pages. In that
-+ * case, it probably doesn't make sense to expand window to
-+ * PTLRPC_MAX_BRW_PAGES on the third access.
-+ */
-+ unsigned long ras_consecutive_pages;
-+ /*
-+ * number of read requests after the last read-ahead window reset
-+ * As window is reset on each seek, this is effectively the number
-+ * on consecutive read request and is used to trigger read-ahead.
-+ */
-+ unsigned long ras_consecutive_requests;
-+ /*
-+ * Parameters of current read-ahead window. Handled by
-+ * ras_update(). On the initial access to the file or after a seek,
-+ * window is reset to 0. After 3 consecutive accesses, window is
-+ * expanded to PTLRPC_MAX_BRW_PAGES. Afterwards, window is enlarged by
-+ * PTLRPC_MAX_BRW_PAGES chunks up to ->ra_max_pages.
-+ */
-+ unsigned long ras_window_start, ras_window_len;
-+ /*
-+ * Where next read-ahead should start at. This lies within read-ahead
-+ * window. Read-ahead window is read in pieces rather than at once
-+ * because: 1. lustre limits total number of pages under read-ahead by
-+ * ->ra_max_pages (see ll_ra_count_get()), 2. client cannot read pages
-+ * not covered by DLM lock.
-+ */
-+ unsigned long ras_next_readahead;
-+ /*
-+ * Total number of ll_file_read requests issued, reads originating
-+ * due to mmap are not counted in this total. This value is used to
-+ * trigger full file read-ahead after multiple reads to a small file.
-+ */
-+ unsigned long ras_requests;
-+ /*
-+ * Page index with respect to the current request, these value
-+ * will not be accurate when dealing with reads issued via mmap.
-+ */
-+ unsigned long ras_request_index;
-+ /*
-+ * list of struct ll_ra_read's one per read(2) call current in
-+ * progress against this file descriptor. Used by read-ahead code,
-+ * protected by ->ras_lock.
-+ */
-+ struct list_head ras_read_beads;
-+ /*
-+ * The following 3 items are used for detecting the stride I/O
-+ * mode.
-+ * In stride I/O mode,
-+ * ...............|-----data-----|****gap*****|--------|******|....
-+ * offset |-stride_pages-|-stride_gap-|
-+ * ras_stride_offset = offset;
-+ * ras_stride_length = stride_pages + stride_gap;
-+ * ras_stride_pages = stride_pages;
-+ * Note: all these three items are counted by pages.
-+ */
-+ unsigned long ras_stride_length;
-+ unsigned long ras_stride_pages;
-+ pgoff_t ras_stride_offset;
-+ /*
-+ * number of consecutive stride request count, and it is similar as
-+ * ras_consecutive_requests, but used for stride I/O mode.
-+ * Note: only more than 2 consecutive stride request are detected,
-+ * stride read-ahead will be enable
-+ */
-+ unsigned long ras_consecutive_stride_requests;
-+};
-+
-+extern cfs_mem_cache_t *ll_file_data_slab;
-+struct lustre_handle;
-+struct ll_file_data {
-+ struct ll_readahead_state fd_ras;
-+ int fd_omode;
-+ struct lustre_handle fd_cwlockh;
-+ unsigned long fd_gid;
-+ __u32 fd_flags;
-+};
-+
-+struct lov_stripe_md;
-+
-+extern spinlock_t inode_lock;
-+
-+extern struct proc_dir_entry *proc_lustre_fs_root;
-+
-+static inline struct inode *ll_info2i(struct ll_inode_info *lli)
-+{
-+ return &lli->lli_vfs_inode;
-+}
-+
-+struct it_cb_data {
-+ struct inode *icbd_parent;
-+ struct dentry **icbd_childp;
-+ obd_id hash;
-+};
-+
-+void ll_i2gids(__u32 *suppgids, struct inode *i1,struct inode *i2);
-+
-+#define LLAP_MAGIC 98764321
-+
-+extern cfs_mem_cache_t *ll_async_page_slab;
-+extern size_t ll_async_page_slab_size;
-+struct ll_async_page {
-+ int llap_magic;
-+ /* only trust these if the page lock is providing exclusion */
-+ unsigned int llap_write_queued:1,
-+ llap_defer_uptodate:1,
-+ llap_origin:3,
-+ llap_ra_used:1,
-+ llap_ignore_quota:1,
-+ llap_nocache:1,
-+ llap_lockless_io_page:1;
-+ void *llap_cookie;
-+ struct page *llap_page;
-+ struct list_head llap_pending_write;
-+ struct list_head llap_pglist_item;
-+ /* checksum for paranoid I/O debugging */
-+ __u32 llap_checksum;
-+};
-+
-+/*
-+ * enumeration of llap_from_page() call-sites. Used to export statistics in
-+ * /proc/fs/lustre/llite/fsN/dump_page_cache.
-+ */
-+enum {
-+ LLAP_ORIGIN_UNKNOWN = 0,
-+ LLAP_ORIGIN_READPAGE,
-+ LLAP_ORIGIN_READAHEAD,
-+ LLAP_ORIGIN_COMMIT_WRITE,
-+ LLAP_ORIGIN_WRITEPAGE,
-+ LLAP_ORIGIN_REMOVEPAGE,
-+ LLAP_ORIGIN_LOCKLESS_IO,
-+ LLAP__ORIGIN_MAX,
-+};
-+extern char *llap_origins[];
-+
-+#ifdef HAVE_REGISTER_CACHE
-+#define ll_register_cache(cache) register_cache(cache)
-+#define ll_unregister_cache(cache) unregister_cache(cache)
-+#else
-+#define ll_register_cache(cache) do {} while (0)
-+#define ll_unregister_cache(cache) do {} while (0)
-+#endif
-+
-+void ll_ra_read_in(struct file *f, struct ll_ra_read *rar);
-+void ll_ra_read_ex(struct file *f, struct ll_ra_read *rar);
-+struct ll_ra_read *ll_ra_read_get(struct file *f);
-+
-+/* llite/lproc_llite.c */
-+#ifdef LPROCFS
-+int lprocfs_register_mountpoint(struct proc_dir_entry *parent,
-+ struct super_block *sb, char *osc, char *mdc);
-+void lprocfs_unregister_mountpoint(struct ll_sb_info *sbi);
-+void ll_stats_ops_tally(struct ll_sb_info *sbi, int op, int count);
-+void lprocfs_llite_init_vars(struct lprocfs_static_vars *lvars);
-+#else
-+static inline int lprocfs_register_mountpoint(struct proc_dir_entry *parent,
-+ struct super_block *sb, char *osc, char *mdc){return 0;}
-+static inline void lprocfs_unregister_mountpoint(struct ll_sb_info *sbi) {}
-+static void ll_stats_ops_tally(struct ll_sb_info *sbi, int op, int count) {}
-+static void lprocfs_llite_init_vars(struct lprocfs_static_vars *lvars)
-+{
-+ memset(lvars, 0, sizeof(*lvars));
-+}
-+#endif
-+
-+
-+/* llite/dir.c */
-+extern struct file_operations ll_dir_operations;
-+extern struct inode_operations ll_dir_inode_operations;
-+
-+struct page *ll_get_dir_page(struct inode *dir, unsigned long n);
-+
-+static inline unsigned ll_dir_rec_len(unsigned name_len)
-+{
-+ return (name_len + 8 + LL_DIR_PAD - 1) & ~(LL_DIR_PAD - 1);
-+}
-+
-+static inline struct ll_dir_entry *ll_entry_at(void *base, unsigned offset)
-+{
-+ return (struct ll_dir_entry *)((char *)base + offset);
-+}
-+
-+/*
-+ * p is at least 6 bytes before the end of page
-+ */
-+static inline struct ll_dir_entry *ll_dir_next_entry(struct ll_dir_entry *p)
-+{
-+ return ll_entry_at(p, le16_to_cpu(p->lde_rec_len));
-+}
-+
-+static inline void ll_put_page(struct page *page)
-+{
-+ kunmap(page);
-+ page_cache_release(page);
-+}
-+
-+static inline unsigned long dir_pages(struct inode *inode)
-+{
-+ return (inode->i_size + CFS_PAGE_SIZE - 1) >> CFS_PAGE_SHIFT;
-+}
-+
-+int ll_objects_destroy(struct ptlrpc_request *request, struct inode *dir);
-+struct inode *ll_iget(struct super_block *sb, ino_t hash,
-+ struct lustre_md *lic);
-+int ll_mdc_cancel_unused(struct lustre_handle *, struct inode *, int flags,
-+ void *opaque);
-+int ll_mdc_blocking_ast(struct ldlm_lock *, struct ldlm_lock_desc *,
-+ void *data, int flag);
-+int ll_prepare_mdc_op_data(struct mdc_op_data *,
-+ struct inode *i1, struct inode *i2,
-+ const char *name, int namelen, int mode, void *data);
-+#ifndef HAVE_VFS_INTENT_PATCHES
-+struct lookup_intent *ll_convert_intent(struct open_intent *oit,
-+ int lookup_flags);
-+#endif
-+void ll_pin_extent_cb(void *data);
-+int ll_page_removal_cb(void *data, int discard);
-+int ll_extent_lock_cancel_cb(struct ldlm_lock *lock, struct ldlm_lock_desc *new,
-+ void *data, int flag);
-+int lookup_it_finish(struct ptlrpc_request *request, int offset,
-+ struct lookup_intent *it, void *data);
-+void ll_lookup_finish_locks(struct lookup_intent *it, struct dentry *dentry);
-+
-+/* llite/rw.c */
-+int ll_prepare_write(struct file *, struct page *, unsigned from, unsigned to);
-+int ll_commit_write(struct file *, struct page *, unsigned from, unsigned to);
-+int ll_writepage(struct page *page);
-+void ll_inode_fill_obdo(struct inode *inode, int cmd, struct obdo *oa);
-+int ll_ap_completion(void *data, int cmd, struct obdo *oa, int rc);
-+int llap_shrink_cache(struct ll_sb_info *sbi, int shrink_fraction);
-+extern struct cache_definition ll_cache_definition;
-+void ll_removepage(struct page *page);
-+int ll_readpage(struct file *file, struct page *page);
-+struct ll_async_page *llap_cast_private(struct page *page);
-+void ll_readahead_init(struct inode *inode, struct ll_readahead_state *ras);
-+void ll_ra_accounting(struct ll_async_page *llap,struct address_space *mapping);
-+void ll_truncate(struct inode *inode);
-+int ll_file_punch(struct inode *, loff_t, int);
-+ssize_t ll_file_lockless_io(struct file *, const struct iovec *,
-+ unsigned long, loff_t *, int, ssize_t);
-+void ll_clear_file_contended(struct inode*);
-+int ll_sync_page_range(struct inode *, struct address_space *, loff_t, size_t);
-+
-+/* llite/file.c */
-+extern struct file_operations ll_file_operations;
-+extern struct file_operations ll_file_operations_flock;
-+extern struct file_operations ll_file_operations_noflock;
-+extern struct inode_operations ll_file_inode_operations;
-+extern int ll_inode_revalidate_it(struct dentry *, struct lookup_intent *);
-+extern int ll_have_md_lock(struct inode *inode, __u64 bits);
-+int ll_region_mapped(unsigned long addr, size_t count);
-+int ll_extent_lock(struct ll_file_data *, struct inode *,
-+ struct lov_stripe_md *, int mode, ldlm_policy_data_t *,
-+ struct lustre_handle *, int ast_flags);
-+int ll_extent_unlock(struct ll_file_data *, struct inode *,
-+ struct lov_stripe_md *, int mode, struct lustre_handle *);
-+int ll_file_open(struct inode *inode, struct file *file);
-+int ll_file_release(struct inode *inode, struct file *file);
-+int ll_lsm_getattr(struct obd_export *, struct lov_stripe_md *, struct obdo *);
-+int ll_glimpse_ioctl(struct ll_sb_info *sbi,
-+ struct lov_stripe_md *lsm, lstat_t *st);
-+int ll_glimpse_size(struct inode *inode, int ast_flags);
-+int ll_local_open(struct file *file,
-+ struct lookup_intent *it, struct ll_file_data *fd,
-+ struct obd_client_handle *och);
-+int ll_release_openhandle(struct dentry *, struct lookup_intent *);
-+int ll_mdc_close(struct obd_export *mdc_exp, struct inode *inode,
-+ struct file *file);
-+int ll_mdc_real_close(struct inode *inode, int flags);
-+extern void ll_rw_stats_tally(struct ll_sb_info *sbi, pid_t pid, struct file
-+ *file, size_t count, int rw);
-+int ll_getattr_it(struct vfsmount *mnt, struct dentry *de,
-+ struct lookup_intent *it, struct kstat *stat);
-+int ll_getattr(struct vfsmount *mnt, struct dentry *de, struct kstat *stat);
-+struct ll_file_data *ll_file_data_get(void);
-+#if (LINUX_VERSION_CODE >= KERNEL_VERSION(2,6,0))
-+int ll_inode_permission(struct inode *inode, int mask, struct nameidata *nd);
-+#else
-+int ll_inode_permission(struct inode *inode, int mask);
-+#endif
-+int ll_lov_setstripe_ea_info(struct inode *inode, struct file *file,
-+ int flags, struct lov_user_md *lum,
-+ int lum_size);
-+int ll_lov_getstripe_ea_info(struct inode *inode, const char *filename,
-+ struct lov_mds_md **lmm, int *lmm_size,
-+ struct ptlrpc_request **request);
-+int ll_dir_setstripe(struct inode *inode, struct lov_user_md *lump,
-+ int set_default);
-+int ll_dir_getstripe(struct inode *inode, struct lov_mds_md **lmm,
-+ int *lmm_size, struct ptlrpc_request **request);
-+int ll_fsync(struct file *file, struct dentry *dentry, int data);
-+int ll_fiemap(struct inode *inode, struct ll_user_fiemap *fiemap,
-+ int num_bytes);
-+
-+/* llite/dcache.c */
-+/* llite/namei.c */
-+/**
-+ * protect race ll_find_aliases vs ll_revalidate_it vs ll_unhash_aliases
-+ */
-+extern spinlock_t ll_lookup_lock;
-+extern struct dentry_operations ll_d_ops;
-+void ll_intent_drop_lock(struct lookup_intent *);
-+void ll_intent_release(struct lookup_intent *);
-+extern void ll_set_dd(struct dentry *de);
-+int ll_drop_dentry(struct dentry *dentry);
-+void ll_unhash_aliases(struct inode *);
-+void ll_frob_intent(struct lookup_intent **itp, struct lookup_intent *deft);
-+void ll_lookup_finish_locks(struct lookup_intent *it, struct dentry *dentry);
-+int ll_dcompare(struct dentry *parent, struct qstr *d_name, struct qstr *name);
-+int revalidate_it_finish(struct ptlrpc_request *request, int offset,
-+ struct lookup_intent *it, struct dentry *de);
-+
-+/* llite/llite_lib.c */
-+extern struct super_operations lustre_super_operations;
-+
-+char *ll_read_opt(const char *opt, char *data);
-+void ll_lli_init(struct ll_inode_info *lli);
-+int ll_fill_super(struct super_block *sb);
-+void ll_put_super(struct super_block *sb);
-+void ll_kill_super(struct super_block *sb);
-+struct inode *ll_inode_from_lock(struct ldlm_lock *lock);
-+void ll_clear_inode(struct inode *inode);
-+int ll_setattr_raw(struct inode *inode, struct iattr *attr);
-+int ll_setattr(struct dentry *de, struct iattr *attr);
-+#ifndef HAVE_STATFS_DENTRY_PARAM
-+int ll_statfs(struct super_block *sb, struct kstatfs *sfs);
-+#else
-+int ll_statfs(struct dentry *de, struct kstatfs *sfs);
-+#endif
-+int ll_statfs_internal(struct super_block *sb, struct obd_statfs *osfs,
-+ __u64 max_age, __u32 flags);
-+void ll_update_inode(struct inode *inode, struct lustre_md *md);
-+void ll_read_inode2(struct inode *inode, void *opaque);
-+int ll_iocontrol(struct inode *inode, struct file *file,
-+ unsigned int cmd, unsigned long arg);
-+#ifdef HAVE_UMOUNTBEGIN_VFSMOUNT
-+void ll_umount_begin(struct vfsmount *vfsmnt, int flags);
-+#else
-+void ll_umount_begin(struct super_block *sb);
-+#endif
-+int ll_remount_fs(struct super_block *sb, int *flags, char *data);
-+int ll_show_options(struct seq_file *seq, struct vfsmount *vfs);
-+int ll_prep_inode(struct obd_export *exp, struct inode **inode,
-+ struct ptlrpc_request *req, int offset, struct super_block *);
-+void lustre_dump_dentry(struct dentry *, int recur);
-+void lustre_dump_inode(struct inode *);
-+struct ll_async_page *llite_pglist_next_llap(struct ll_sb_info *sbi,
-+ struct list_head *list);
-+int ll_obd_statfs(struct inode *inode, void *arg);
-+int ll_get_max_mdsize(struct ll_sb_info *sbi, int *max_mdsize);
-+int ll_process_config(struct lustre_cfg *lcfg);
-+
-+/* llite/llite_nfs.c */
-+extern struct export_operations lustre_export_operations;
-+__u32 get_uuid2int(const char *name, int len);
-+struct dentry *ll_fh_to_dentry(struct super_block *sb, __u32 *data, int len,
-+ int fhtype, int parent);
-+int ll_dentry_to_fh(struct dentry *, __u32 *datap, int *lenp, int need_parent);
-+
-+/* llite/special.c */
-+extern struct inode_operations ll_special_inode_operations;
-+extern struct file_operations ll_special_chr_inode_fops;
-+extern struct file_operations ll_special_chr_file_fops;
-+extern struct file_operations ll_special_blk_inode_fops;
-+extern struct file_operations ll_special_fifo_inode_fops;
-+extern struct file_operations ll_special_fifo_file_fops;
-+extern struct file_operations ll_special_sock_inode_fops;
-+
-+/* llite/symlink.c */
-+extern struct inode_operations ll_fast_symlink_inode_operations;
-+
-+/* llite/llite_close.c */
-+struct ll_close_queue {
-+ spinlock_t lcq_lock;
-+ struct list_head lcq_list;
-+ wait_queue_head_t lcq_waitq;
-+ struct completion lcq_comp;
-+};
-+
-+#ifdef HAVE_CLOSE_THREAD
-+void llap_write_pending(struct inode *inode, struct ll_async_page *llap);
-+void llap_write_complete(struct inode *inode, struct ll_async_page *llap);
-+void ll_open_complete(struct inode *inode);
-+int ll_is_inode_dirty(struct inode *inode);
-+void ll_try_done_writing(struct inode *inode);
-+void ll_queue_done_writing(struct inode *inode);
-+#else
-+static inline void llap_write_pending(struct inode *inode,
-+ struct ll_async_page *llap) { return; };
-+static inline void llap_write_complete(struct inode *inode,
-+ struct ll_async_page *llap) { return; };
-+static inline void ll_open_complete(struct inode *inode) { return; };
-+static inline int ll_is_inode_dirty(struct inode *inode) { return 0; };
-+static inline void ll_try_done_writing(struct inode *inode) { return; };
-+static inline void ll_queue_done_writing(struct inode *inode) { return; };
-+//static inline void ll_close_thread_shutdown(struct ll_close_queue *lcq) { return; };
-+//static inline int ll_close_thread_start(struct ll_close_queue **lcq_ret) { return 0; };
-+#endif
-+void ll_close_thread_shutdown(struct ll_close_queue *lcq);
-+int ll_close_thread_start(struct ll_close_queue **lcq_ret);
-+
-+/* llite/llite_mmap.c */
-+typedef struct rb_root rb_root_t;
-+typedef struct rb_node rb_node_t;
-+
-+struct ll_lock_tree_node;
-+struct ll_lock_tree {
-+ rb_root_t lt_root;
-+ struct list_head lt_locked_list;
-+ struct ll_file_data *lt_fd;
-+};
-+
-+int ll_teardown_mmaps(struct address_space *mapping, __u64 first, __u64 last);
-+int ll_file_mmap(struct file * file, struct vm_area_struct * vma);
-+struct ll_lock_tree_node * ll_node_from_inode(struct inode *inode, __u64 start,
-+ __u64 end, ldlm_mode_t mode);
-+int ll_tree_lock(struct ll_lock_tree *tree,
-+ struct ll_lock_tree_node *first_node,
-+ const char *buf, size_t count, int ast_flags);
-+int ll_tree_lock_iov(struct ll_lock_tree *tree,
-+ struct ll_lock_tree_node *first_node,
-+ const struct iovec *iov, unsigned long nr_segs,
-+ int ast_flags);
-+int ll_tree_unlock(struct ll_lock_tree *tree);
-+
-+#define ll_s2sbi(sb) (s2lsi(sb)->lsi_llsbi)
-+
-+static inline __u64 ll_ts2u64(struct timespec *time)
-+{
-+ __u64 t = time->tv_sec;
-+ return t;
-+}
-+
-+/* don't need an addref as the sb_info should be holding one */
-+static inline struct obd_export *ll_s2obdexp(struct super_block *sb)
-+{
-+ return ll_s2sbi(sb)->ll_osc_exp;
-+}
-+
-+/* don't need an addref as the sb_info should be holding one */
-+static inline struct obd_export *ll_s2mdcexp(struct super_block *sb)
-+{
-+ return ll_s2sbi(sb)->ll_mdc_exp;
-+}
-+
-+static inline struct client_obd *sbi2mdc(struct ll_sb_info *sbi)
-+{
-+ struct obd_device *obd = sbi->ll_mdc_exp->exp_obd;
-+ if (obd == NULL)
-+ LBUG();
-+ return &obd->u.cli;
-+}
-+
-+// FIXME: replace the name of this with LL_SB to conform to kernel stuff
-+static inline struct ll_sb_info *ll_i2sbi(struct inode *inode)
-+{
-+ return ll_s2sbi(inode->i_sb);
-+}
-+
-+static inline struct obd_export *ll_i2obdexp(struct inode *inode)
-+{
-+ return ll_s2obdexp(inode->i_sb);
-+}
-+
-+static inline struct obd_export *ll_i2mdcexp(struct inode *inode)
-+{
-+ return ll_s2mdcexp(inode->i_sb);
-+}
-+
-+static inline void ll_inode2fid(struct ll_fid *fid, struct inode *inode)
-+{
-+ mdc_pack_fid(fid, inode->i_ino, inode->i_generation,
-+ inode->i_mode & S_IFMT);
-+}
-+
-+static inline int ll_mds_max_easize(struct super_block *sb)
-+{
-+ return sbi2mdc(ll_s2sbi(sb))->cl_max_mds_easize;
-+}
-+
-+static inline __u64 ll_file_maxbytes(struct inode *inode)
-+{
-+ return ll_i2info(inode)->lli_maxbytes;
-+}
-+
-+/* llite/xattr.c */
-+int ll_setxattr(struct dentry *dentry, const char *name,
-+ const void *value, size_t size, int flags);
-+ssize_t ll_getxattr(struct dentry *dentry, const char *name,
-+ void *buffer, size_t size);
-+ssize_t ll_listxattr(struct dentry *dentry, char *buffer, size_t size);
-+int ll_removexattr(struct dentry *dentry, const char *name);
-+
-+/* statahead.c */
-+
-+#define LL_SA_RPC_MIN 2
-+#define LL_SA_RPC_DEF 32
-+#define LL_SA_RPC_MAX 8192
-+
-+/* per inode struct, for dir only */
-+struct ll_statahead_info {
-+ struct inode *sai_inode;
-+ unsigned int sai_generation; /* generation for statahead */
-+ atomic_t sai_refcount; /* when access this struct, hold
-+ * refcount */
-+ unsigned int sai_sent; /* stat requests sent count */
-+ unsigned int sai_replied; /* stat requests which received
-+ * reply */
-+ unsigned int sai_max; /* max ahead of lookup */
-+ unsigned int sai_index; /* index of statahead entry */
-+ unsigned int sai_index_next; /* index for the next statahead
-+ * entry to be stated */
-+ unsigned int sai_hit; /* hit count */
-+ unsigned int sai_miss; /* miss count:
-+ * for "ls -al" case, it includes
-+ * hidden dentry miss;
-+ * for "ls -l" case, it does not
-+ * include hidden dentry miss.
-+ * "sai_miss_hidden" is used for
-+ * the later case.
-+ */
-+ unsigned int sai_consecutive_miss; /* consecutive miss */
-+ unsigned int sai_miss_hidden;/* "ls -al", but first dentry
-+ * is not a hidden one */
-+ unsigned int sai_skip_hidden;/* skipped hidden dentry count */
-+ unsigned int sai_ls_all:1; /* "ls -al", do stat-ahead for
-+ * hidden entries */
-+ cfs_waitq_t sai_waitq; /* stat-ahead wait queue */
-+ struct ptlrpc_thread sai_thread; /* stat-ahead thread */
-+ struct list_head sai_entries_sent; /* entries sent out */
-+ struct list_head sai_entries_received; /* entries returned */
-+ struct list_head sai_entries_stated; /* entries stated */
-+};
-+
-+int do_statahead_enter(struct inode *dir, struct dentry **dentry, int lookup);
-+void ll_statahead_exit(struct dentry *dentry, int result);
-+void ll_stop_statahead(struct inode *inode, void *key);
-+
-+static inline
-+void ll_statahead_mark(struct dentry *dentry)
-+{
-+ struct ll_inode_info *lli = ll_i2info(dentry->d_parent->d_inode);
-+ struct ll_dentry_data *ldd = ll_d2d(dentry);
-+
-+ /* not the same process, don't mark */
-+ if (lli->lli_opendir_pid != cfs_curproc_pid())
-+ return;
-+
-+ spin_lock(&lli->lli_lock);
-+ if (likely(lli->lli_sai != NULL && ldd != NULL))
-+ ldd->lld_sa_generation = lli->lli_sai->sai_generation;
-+ spin_unlock(&lli->lli_lock);
-+}
-+
-+static inline
-+int ll_statahead_enter(struct inode *dir, struct dentry **dentryp, int lookup)
-+{
-+ struct ll_sb_info *sbi = ll_i2sbi(dir);
-+ struct ll_inode_info *lli = ll_i2info(dir);
-+ struct ll_dentry_data *ldd = ll_d2d(*dentryp);
-+
-+ if (sbi->ll_sa_max == 0)
-+ return -ENOTSUPP;
-+
-+ /* not the same process, don't statahead */
-+ if (lli->lli_opendir_pid != cfs_curproc_pid())
-+ return -EBADF;
-+
-+ /*
-+ * When "ls" a dentry, the system trigger more than once "revalidate" or
-+ * "lookup", for "getattr", for "getxattr", and maybe for others.
-+ * Under patchless client mode, the operation intent is not accurate,
-+ * it maybe misguide the statahead thread. For example:
-+ * The "revalidate" call for "getattr" and "getxattr" of a dentry maybe
-+ * have the same operation intent -- "IT_GETATTR".
-+ * In fact, one dentry should has only one chance to interact with the
-+ * statahead thread, otherwise the statahead windows will be confused.
-+ * The solution is as following:
-+ * Assign "lld_sa_generation" with "sai_generation" when a dentry
-+ * "IT_GETATTR" for the first time, and the subsequent "IT_GETATTR"
-+ * will bypass interacting with statahead thread for checking:
-+ * "lld_sa_generation == lli_sai->sai_generation"
-+ */
-+ if (ldd && lli->lli_sai &&
-+ ldd->lld_sa_generation == lli->lli_sai->sai_generation)
-+ return -EAGAIN;
-+
-+ return do_statahead_enter(dir, dentryp, lookup);
-+}
-+
-+static void inline ll_dops_init(struct dentry *de, int block)
-+{
-+ struct ll_dentry_data *lld = ll_d2d(de);
-+
-+ if (lld == NULL && block != 0) {
-+ ll_set_dd(de);
-+ lld = ll_d2d(de);
-+ }
-+
-+ if (lld != NULL)
-+ lld->lld_sa_generation = 0;
-+
-+ de->d_op = &ll_d_ops;
-+}
-+
-+/* llite ioctl register support rountine */
-+#ifdef __KERNEL__
-+enum llioc_iter {
-+ LLIOC_CONT = 0,
-+ LLIOC_STOP
-+};
-+
-+#define LLIOC_MAX_CMD 256
-+
-+/*
-+ * Rules to write a callback function:
-+ *
-+ * Parameters:
-+ * @magic: Dynamic ioctl call routine will feed this vaule with the pointer
-+ * returned to ll_iocontrol_register. Callback functions should use this
-+ * data to check the potential collasion of ioctl cmd. If collasion is
-+ * found, callback function should return LLIOC_CONT.
-+ * @rcp: The result of ioctl command.
-+ *
-+ * Return values:
-+ * If @magic matches the pointer returned by ll_iocontrol_data, the
-+ * callback should return LLIOC_STOP; return LLIOC_STOP otherwise.
-+ */
-+typedef enum llioc_iter (*llioc_callback_t)(struct inode *inode,
-+ struct file *file, unsigned int cmd, unsigned long arg,
-+ void *magic, int *rcp);
-+
-+enum llioc_iter ll_iocontrol_call(struct inode *inode, struct file *file,
-+ unsigned int cmd, unsigned long arg, int *rcp);
-+
-+/* export functions */
-+/* Register ioctl block dynamatically for a regular file.
-+ *
-+ * @cmd: the array of ioctl command set
-+ * @count: number of commands in the @cmd
-+ * @cb: callback function, it will be called if an ioctl command is found to
-+ * belong to the command list @cmd.
-+ *
-+ * Return vaule:
-+ * A magic pointer will be returned if success;
-+ * otherwise, NULL will be returned.
-+ * */
-+void *ll_iocontrol_register(llioc_callback_t cb, int count, unsigned int *cmd);
-+void ll_iocontrol_unregister(void *magic);
-+
-+#endif
-+
-+#endif /* LLITE_INTERNAL_H */
-diff -urNad lustre~/lustre/llite/llite_lib.c lustre/lustre/llite/llite_lib.c
---- lustre~/lustre/llite/llite_lib.c 2009-03-12 10:32:27.000000000 +0100
-+++ lustre/lustre/llite/llite_lib.c 2009-03-13 09:45:03.000000000 +0100
-@@ -1346,7 +1346,7 @@
- rc = vmtruncate(inode, new_size);
- clear_bit(LLI_F_SRVLOCK, &lli->lli_flags);
- if (rc != 0) {
-- LASSERT(atomic_read(&lli->lli_size_sem.count) <= 0);
-+ LASSERT(SEM_COUNT(&lli->lli_size_sem) <= 0);
- ll_inode_size_unlock(inode, 0);
- }
- }
-diff -urNad lustre~/lustre/llite/llite_lib.c.orig lustre/lustre/llite/llite_lib.c.orig
---- lustre~/lustre/llite/llite_lib.c.orig 1970-01-01 00:00:00.000000000 +0000
-+++ lustre/lustre/llite/llite_lib.c.orig 2009-03-13 09:45:03.000000000 +0100
-@@ -0,0 +1,2232 @@
-+/* -*- mode: c; c-basic-offset: 8; indent-tabs-mode: nil; -*-
-+ * vim:expandtab:shiftwidth=8:tabstop=8:
-+ *
-+ * GPL HEADER START
-+ *
-+ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
-+ *
-+ * This program is free software; you can redistribute it and/or modify
-+ * it under the terms of the GNU General Public License version 2 only,
-+ * as published by the Free Software Foundation.
-+ *
-+ * This program is distributed in the hope that it will be useful, but
-+ * WITHOUT ANY WARRANTY; without even the implied warranty of
-+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
-+ * General Public License version 2 for more details (a copy is included
-+ * in the LICENSE file that accompanied this code).
-+ *
-+ * You should have received a copy of the GNU General Public License
-+ * version 2 along with this program; If not, see
-+ * http://www.sun.com/software/products/lustre/docs/GPLv2.pdf
-+ *
-+ * Please contact Sun Microsystems, Inc., 4150 Network Circle, Santa Clara,
-+ * CA 95054 USA or visit www.sun.com if you need additional information or
-+ * have any questions.
-+ *
-+ * GPL HEADER END
-+ */
-+/*
-+ * Copyright 2008 Sun Microsystems, Inc. All rights reserved
-+ * Use is subject to license terms.
-+ */
-+/*
-+ * This file is part of Lustre, http://www.lustre.org/
-+ * Lustre is a trademark of Sun Microsystems, Inc.
-+ *
-+ * lustre/llite/llite_lib.c
-+ *
-+ * Lustre Light Super operations
-+ */
-+
-+#define DEBUG_SUBSYSTEM S_LLITE
-+
-+#include <linux/module.h>
-+#include <linux/types.h>
-+#include <linux/random.h>
-+#include <linux/version.h>
-+#include <linux/mm.h>
-+
-+#include <lustre_lite.h>
-+#include <lustre_ha.h>
-+#include <lustre_dlm.h>
-+#include <lprocfs_status.h>
-+#include <lustre_disk.h>
-+#include <lustre_param.h>
-+#include <lustre_cache.h>
-+#include "llite_internal.h"
-+
-+cfs_mem_cache_t *ll_file_data_slab;
-+
-+LIST_HEAD(ll_super_blocks);
-+spinlock_t ll_sb_lock = SPIN_LOCK_UNLOCKED;
-+
-+extern struct address_space_operations ll_aops;
-+extern struct address_space_operations ll_dir_aops;
-+
-+#ifndef log2
-+#define log2(n) ffz(~(n))
-+#endif
-+
-+
-+static struct ll_sb_info *ll_init_sbi(void)
-+{
-+ struct ll_sb_info *sbi = NULL;
-+ unsigned long pages;
-+ struct sysinfo si;
-+ class_uuid_t uuid;
-+ int i;
-+ ENTRY;
-+
-+ OBD_ALLOC(sbi, sizeof(*sbi));
-+ if (!sbi)
-+ RETURN(NULL);
-+
-+ spin_lock_init(&sbi->ll_lock);
-+ spin_lock_init(&sbi->ll_lco.lco_lock);
-+ spin_lock_init(&sbi->ll_pp_extent_lock);
-+ spin_lock_init(&sbi->ll_process_lock);
-+ sbi->ll_rw_stats_on = 0;
-+ INIT_LIST_HEAD(&sbi->ll_pglist);
-+
-+ si_meminfo(&si);
-+ pages = si.totalram - si.totalhigh;
-+ if (pages >> (20 - CFS_PAGE_SHIFT) < 512) {
-+#ifdef HAVE_BGL_SUPPORT
-+ sbi->ll_async_page_max = pages / 4;
-+#else
-+ sbi->ll_async_page_max = pages / 2;
-+#endif
-+ } else {
-+ sbi->ll_async_page_max = (pages / 4) * 3;
-+ }
-+ sbi->ll_ra_info.ra_max_pages = min(pages / 32,
-+ SBI_DEFAULT_READAHEAD_MAX);
-+ sbi->ll_ra_info.ra_max_read_ahead_whole_pages =
-+ SBI_DEFAULT_READAHEAD_WHOLE_MAX;
-+ sbi->ll_contention_time = SBI_DEFAULT_CONTENTION_SECONDS;
-+ sbi->ll_lockless_truncate_enable = SBI_DEFAULT_LOCKLESS_TRUNCATE_ENABLE;
-+ INIT_LIST_HEAD(&sbi->ll_conn_chain);
-+ INIT_LIST_HEAD(&sbi->ll_orphan_dentry_list);
-+
-+ ll_generate_random_uuid(uuid);
-+ class_uuid_unparse(uuid, &sbi->ll_sb_uuid);
-+ CDEBUG(D_CONFIG, "generated uuid: %s\n", sbi->ll_sb_uuid.uuid);
-+
-+ spin_lock(&ll_sb_lock);
-+ list_add_tail(&sbi->ll_list, &ll_super_blocks);
-+ spin_unlock(&ll_sb_lock);
-+
-+#ifdef ENABLE_CHECKSUM
-+ sbi->ll_flags |= LL_SBI_DATA_CHECKSUM;
-+#endif
-+#ifdef ENABLE_LLITE_CHECKSUM
-+ sbi->ll_flags |= LL_SBI_LLITE_CHECKSUM;
-+#endif
-+
-+#ifdef HAVE_LRU_RESIZE_SUPPORT
-+ sbi->ll_flags |= LL_SBI_LRU_RESIZE;
-+#endif
-+
-+#ifdef HAVE_EXPORT___IGET
-+ INIT_LIST_HEAD(&sbi->ll_deathrow);
-+ spin_lock_init(&sbi->ll_deathrow_lock);
-+#endif
-+ for (i = 0; i <= LL_PROCESS_HIST_MAX; i++) {
-+ spin_lock_init(&sbi->ll_rw_extents_info.pp_extents[i].pp_r_hist.oh_lock);
-+ spin_lock_init(&sbi->ll_rw_extents_info.pp_extents[i].pp_w_hist.oh_lock);
-+ }
-+
-+ /* metadata statahead is enabled by default */
-+ sbi->ll_sa_max = LL_SA_RPC_DEF;
-+
-+ RETURN(sbi);
-+}
-+
-+void ll_free_sbi(struct super_block *sb)
-+{
-+ struct ll_sb_info *sbi = ll_s2sbi(sb);
-+ ENTRY;
-+
-+ if (sbi != NULL) {
-+ spin_lock(&ll_sb_lock);
-+ list_del(&sbi->ll_list);
-+ spin_unlock(&ll_sb_lock);
-+ OBD_FREE(sbi, sizeof(*sbi));
-+ }
-+ EXIT;
-+}
-+
-+static struct dentry_operations ll_d_root_ops = {
-+#ifdef DCACHE_LUSTRE_INVALID
-+ .d_compare = ll_dcompare,
-+#endif
-+};
-+
-+static int client_common_fill_super(struct super_block *sb,
-+ char *mdc, char *osc)
-+{
-+ struct inode *root = 0;
-+ struct ll_sb_info *sbi = ll_s2sbi(sb);
-+ struct obd_device *obd;
-+ struct ll_fid rootfid;
-+ struct obd_statfs osfs;
-+ struct ptlrpc_request *request = NULL;
-+ struct lustre_handle osc_conn = {0, };
-+ struct lustre_handle mdc_conn = {0, };
-+ struct lustre_md md;
-+ struct obd_connect_data *data = NULL;
-+ int err, checksum;
-+ ENTRY;
-+
-+ obd = class_name2obd(mdc);
-+ if (!obd) {
-+ CERROR("MDC %s: not setup or attached\n", mdc);
-+ RETURN(-EINVAL);
-+ }
-+
-+ OBD_ALLOC(data, sizeof(*data));
-+ if (data == NULL)
-+ RETURN(-ENOMEM);
-+
-+ if (proc_lustre_fs_root) {
-+ err = lprocfs_register_mountpoint(proc_lustre_fs_root, sb,
-+ osc, mdc);
-+ if (err < 0)
-+ CERROR("could not register mount in /proc/fs/lustre\n");
-+ }
-+
-+ /* indicate the features supported by this client */
-+ data->ocd_connect_flags = OBD_CONNECT_VERSION | OBD_CONNECT_IBITS |
-+ OBD_CONNECT_JOIN | OBD_CONNECT_ATTRFID | OBD_CONNECT_NODEVOH |
-+ OBD_CONNECT_CANCELSET | OBD_CONNECT_AT;
-+#ifdef HAVE_LRU_RESIZE_SUPPORT
-+ if (sbi->ll_flags & LL_SBI_LRU_RESIZE)
-+ data->ocd_connect_flags |= OBD_CONNECT_LRU_RESIZE;
-+#endif
-+#ifdef CONFIG_FS_POSIX_ACL
-+ data->ocd_connect_flags |= OBD_CONNECT_ACL;
-+#endif
-+ data->ocd_ibits_known = MDS_INODELOCK_FULL;
-+ data->ocd_version = LUSTRE_VERSION_CODE;
-+
-+ if (sb->s_flags & MS_RDONLY)
-+ data->ocd_connect_flags |= OBD_CONNECT_RDONLY;
-+ if (sbi->ll_flags & LL_SBI_USER_XATTR)
-+ data->ocd_connect_flags |= OBD_CONNECT_XATTR;
-+
-+#ifdef HAVE_MS_FLOCK_LOCK
-+ /* force vfs to use lustre handler for flock() calls - bug 10743 */
-+ sb->s_flags |= MS_FLOCK_LOCK;
-+#endif
-+
-+ if (sbi->ll_flags & LL_SBI_FLOCK)
-+ sbi->ll_fop = &ll_file_operations_flock;
-+ else if (sbi->ll_flags & LL_SBI_LOCALFLOCK)
-+ sbi->ll_fop = &ll_file_operations;
-+ else
-+ sbi->ll_fop = &ll_file_operations_noflock;
-+
-+
-+ err = obd_connect(&mdc_conn, obd, &sbi->ll_sb_uuid, data, &sbi->ll_mdc_exp);
-+ if (err == -EBUSY) {
-+ LCONSOLE_ERROR_MSG(0x14f, "An MDT (mdc %s) is performing "
-+ "recovery, of which this client is not a "
-+ "part. Please wait for recovery to complete,"
-+ " abort, or time out.\n", mdc);
-+ GOTO(out, err);
-+ } else if (err) {
-+ CERROR("cannot connect to %s: rc = %d\n", mdc, err);
-+ GOTO(out, err);
-+ }
-+
-+ err = obd_statfs(obd, &osfs, cfs_time_current_64() - HZ, 0);
-+ if (err)
-+ GOTO(out_mdc, err);
-+
-+ /* MDC connect is surely finished by now because we actually sent
-+ * a statfs RPC, otherwise obd_connect() is asynchronous. */
-+ *data = class_exp2cliimp(sbi->ll_mdc_exp)->imp_connect_data;
-+
-+ LASSERT(osfs.os_bsize);
-+ sb->s_blocksize = osfs.os_bsize;
-+ sb->s_blocksize_bits = log2(osfs.os_bsize);
-+ sb->s_magic = LL_SUPER_MAGIC;
-+
-+ /* for bug 11559. in $LINUX/fs/read_write.c, function do_sendfile():
-+ * retval = in_file->f_op->sendfile(...);
-+ * if (*ppos > max)
-+ * retval = -EOVERFLOW;
-+ *
-+ * it will check if *ppos is greater than max. However, max equals to
-+ * s_maxbytes, which is a negative integer in a x86_64 box since loff_t
-+ * has been defined as a signed long long ineger in linux kernel. */
-+#if BITS_PER_LONG == 64
-+ sb->s_maxbytes = PAGE_CACHE_MAXBYTES >> 1;
-+#else
-+ sb->s_maxbytes = PAGE_CACHE_MAXBYTES;
-+#endif
-+ sbi->ll_namelen = osfs.os_namelen;
-+ sbi->ll_max_rw_chunk = LL_DEFAULT_MAX_RW_CHUNK;
-+
-+ if ((sbi->ll_flags & LL_SBI_USER_XATTR) &&
-+ !(data->ocd_connect_flags & OBD_CONNECT_XATTR)) {
-+ LCONSOLE_INFO("Disabling user_xattr feature because "
-+ "it is not supported on the server\n");
-+ sbi->ll_flags &= ~LL_SBI_USER_XATTR;
-+ }
-+
-+ if (data->ocd_connect_flags & OBD_CONNECT_ACL) {
-+#ifdef MS_POSIXACL
-+ sb->s_flags |= MS_POSIXACL;
-+#endif
-+ sbi->ll_flags |= LL_SBI_ACL;
-+ } else
-+ sbi->ll_flags &= ~LL_SBI_ACL;
-+
-+ if (data->ocd_connect_flags & OBD_CONNECT_JOIN)
-+ sbi->ll_flags |= LL_SBI_JOIN;
-+
-+ obd = class_name2obd(osc);
-+ if (!obd) {
-+ CERROR("OSC %s: not setup or attached\n", osc);
-+ GOTO(out_mdc, err = -ENODEV);
-+ }
-+
-+ data->ocd_connect_flags = OBD_CONNECT_VERSION | OBD_CONNECT_GRANT |
-+ OBD_CONNECT_REQPORTAL | OBD_CONNECT_BRW_SIZE |
-+ OBD_CONNECT_SRVLOCK | OBD_CONNECT_CANCELSET | OBD_CONNECT_AT |
-+ OBD_CONNECT_TRUNCLOCK;
-+
-+ if (!OBD_FAIL_CHECK(OBD_FAIL_OSC_CONNECT_CKSUM)) {
-+ /* OBD_CONNECT_CKSUM should always be set, even if checksums are
-+ * disabled by default, because it can still be enabled on the
-+ * fly via /proc. As a consequence, we still need to come to an
-+ * agreement on the supported algorithms at connect time */
-+ data->ocd_connect_flags |= OBD_CONNECT_CKSUM;
-+
-+ if (OBD_FAIL_CHECK(OBD_FAIL_OSC_CKSUM_ADLER_ONLY))
-+ data->ocd_cksum_types = OBD_CKSUM_ADLER;
-+ else
-+ /* send the list of supported checksum types */
-+ data->ocd_cksum_types = OBD_CKSUM_ALL;
-+ }
-+
-+#ifdef HAVE_LRU_RESIZE_SUPPORT
-+ if (sbi->ll_flags & LL_SBI_LRU_RESIZE)
-+ data->ocd_connect_flags |= OBD_CONNECT_LRU_RESIZE;
-+#endif
-+
-+ CDEBUG(D_RPCTRACE, "ocd_connect_flags: "LPX64" ocd_version: %d "
-+ "ocd_grant: %d\n", data->ocd_connect_flags,
-+ data->ocd_version, data->ocd_grant);
-+
-+ obd->obd_upcall.onu_owner = &sbi->ll_lco;
-+ obd->obd_upcall.onu_upcall = ll_ocd_update;
-+ data->ocd_brw_size = PTLRPC_MAX_BRW_PAGES << CFS_PAGE_SHIFT;
-+
-+ obd_register_lock_cancel_cb(obd, ll_extent_lock_cancel_cb);
-+ obd_register_page_removal_cb(obd, ll_page_removal_cb, ll_pin_extent_cb);
-+
-+
-+ err = obd_connect(&osc_conn, obd, &sbi->ll_sb_uuid, data, &sbi->ll_osc_exp);
-+ if (err == -EBUSY) {
-+ LCONSOLE_ERROR_MSG(0x150, "An OST (osc %s) is performing "
-+ "recovery, of which this client is not a "
-+ "part. Please wait for recovery to "
-+ "complete, abort, or time out.\n", osc);
-+ GOTO(out, err); // need clear cb?
-+ } else if (err) {
-+ CERROR("cannot connect to %s: rc = %d\n", osc, err);
-+ GOTO(out_cb, err);
-+ }
-+ spin_lock(&sbi->ll_lco.lco_lock);
-+ sbi->ll_lco.lco_flags = data->ocd_connect_flags;
-+ sbi->ll_lco.lco_mdc_exp = sbi->ll_mdc_exp;
-+ sbi->ll_lco.lco_osc_exp = sbi->ll_osc_exp;
-+ spin_unlock(&sbi->ll_lco.lco_lock);
-+
-+ err = mdc_init_ea_size(sbi->ll_mdc_exp, sbi->ll_osc_exp);
-+ if (err) {
-+ CERROR("cannot set max EA and cookie sizes: rc = %d\n", err);
-+ GOTO(out_osc, err);
-+ }
-+
-+ err = obd_prep_async_page(sbi->ll_osc_exp, NULL, NULL, NULL,
-+ 0, NULL, NULL, NULL, 0, NULL);
-+ if (err < 0) {
-+ LCONSOLE_ERROR_MSG(0x151, "There are no OST's in this "
-+ "filesystem. There must be at least one "
-+ "active OST for a client to start.\n");
-+ GOTO(out_osc, err);
-+ }
-+
-+ if (!ll_async_page_slab) {
-+ ll_async_page_slab_size =
-+ size_round(sizeof(struct ll_async_page)) + err;
-+ ll_async_page_slab = cfs_mem_cache_create("ll_async_page",
-+ ll_async_page_slab_size,
-+ 0, 0);
-+ if (!ll_async_page_slab)
-+ GOTO(out_osc, err = -ENOMEM);
-+ }
-+
-+ err = mdc_getstatus(sbi->ll_mdc_exp, &rootfid);
-+ if (err) {
-+ CERROR("cannot mds_connect: rc = %d\n", err);
-+ GOTO(out_osc, err);
-+ }
-+ CDEBUG(D_SUPER, "rootfid "LPU64"\n", rootfid.id);
-+ sbi->ll_rootino = rootfid.id;
-+
-+ sb->s_op = &lustre_super_operations;
-+#if THREAD_SIZE >= 8192
-+ /* Disable the NFS export because of stack overflow
-+ * when THREAD_SIZE < 8192. Please refer to 17630. */
-+ sb->s_export_op = &lustre_export_operations;
-+#endif
-+
-+ /* make root inode
-+ * XXX: move this to after cbd setup? */
-+ err = mdc_getattr(sbi->ll_mdc_exp, &rootfid,
-+ OBD_MD_FLGETATTR | OBD_MD_FLBLOCKS |
-+ (sbi->ll_flags & LL_SBI_ACL ? OBD_MD_FLACL : 0),
-+ 0, &request);
-+ if (err) {
-+ CERROR("mdc_getattr failed for root: rc = %d\n", err);
-+ GOTO(out_osc, err);
-+ }
-+
-+ err = mdc_req2lustre_md(request, REPLY_REC_OFF, sbi->ll_osc_exp, &md);
-+ if (err) {
-+ CERROR("failed to understand root inode md: rc = %d\n",err);
-+ ptlrpc_req_finished (request);
-+ GOTO(out_osc, err);
-+ }
-+
-+ LASSERT(sbi->ll_rootino != 0);
-+ root = ll_iget(sb, sbi->ll_rootino, &md);
-+
-+ ptlrpc_req_finished(request);
-+
-+ if (root == NULL || is_bad_inode(root)) {
-+ mdc_free_lustre_md(sbi->ll_osc_exp, &md);
-+ CERROR("lustre_lite: bad iget4 for root\n");
-+ GOTO(out_root, err = -EBADF);
-+ }
-+
-+ err = ll_close_thread_start(&sbi->ll_lcq);
-+ if (err) {
-+ CERROR("cannot start close thread: rc %d\n", err);
-+ GOTO(out_root, err);
-+ }
-+
-+ checksum = sbi->ll_flags & LL_SBI_DATA_CHECKSUM;
-+ err = obd_set_info_async(sbi->ll_osc_exp, sizeof(KEY_CHECKSUM),
-+ KEY_CHECKSUM, sizeof(checksum),
-+ &checksum, NULL);
-+
-+ /* making vm readahead 0 for 2.4.x. In the case of 2.6.x,
-+ backing dev info assigned to inode mapping is used for
-+ determining maximal readahead. */
-+#if (LINUX_VERSION_CODE < KERNEL_VERSION(2,6,0)) && \
-+ !defined(KERNEL_HAS_AS_MAX_READAHEAD)
-+ /* bug 2805 - set VM readahead to zero */
-+ vm_max_readahead = vm_min_readahead = 0;
-+#endif
-+
-+ sb->s_root = d_alloc_root(root);
-+ if (data != NULL)
-+ OBD_FREE(data, sizeof(*data));
-+ sb->s_root->d_op = &ll_d_root_ops;
-+
-+ sbi->ll_sdev_orig = sb->s_dev;
-+ /* We set sb->s_dev equal on all lustre clients in order to support
-+ * NFS export clustering. NFSD requires that the FSID be the same
-+ * on all clients. */
-+ /* s_dev is also used in lt_compare() to compare two fs, but that is
-+ * only a node-local comparison. */
-+ sb->s_dev = get_uuid2int(sbi2mdc(sbi)->cl_target_uuid.uuid,
-+ strlen(sbi2mdc(sbi)->cl_target_uuid.uuid));
-+
-+ RETURN(err);
-+
-+out_root:
-+ if (root)
-+ iput(root);
-+out_osc:
-+ obd_disconnect(sbi->ll_osc_exp);
-+ sbi->ll_osc_exp = NULL;
-+out_cb:
-+ obd = class_name2obd(osc);
-+ obd_unregister_lock_cancel_cb(obd, ll_extent_lock_cancel_cb);
-+ obd_unregister_page_removal_cb(obd, ll_page_removal_cb);
-+out_mdc:
-+ obd_disconnect(sbi->ll_mdc_exp);
-+ sbi->ll_mdc_exp = NULL;
-+out:
-+ if (data != NULL)
-+ OBD_FREE(data, sizeof(*data));
-+ lprocfs_unregister_mountpoint(sbi);
-+ RETURN(err);
-+}
-+
-+int ll_get_max_mdsize(struct ll_sb_info *sbi, int *lmmsize)
-+{
-+ int size, rc;
-+
-+ *lmmsize = obd_size_diskmd(sbi->ll_osc_exp, NULL);
-+ size = sizeof(int);
-+ rc = obd_get_info(sbi->ll_mdc_exp, sizeof(KEY_MAX_EASIZE),
-+ KEY_MAX_EASIZE, &size, lmmsize, NULL);
-+ if (rc)
-+ CERROR("Get max mdsize error rc %d \n", rc);
-+
-+ RETURN(rc);
-+}
-+
-+void ll_dump_inode(struct inode *inode)
-+{
-+ struct list_head *tmp;
-+ int dentry_count = 0;
-+
-+ LASSERT(inode != NULL);
-+
-+ list_for_each(tmp, &inode->i_dentry)
-+ dentry_count++;
-+
-+ CERROR("inode %p dump: dev=%s ino=%lu mode=%o count=%u, %d dentries\n",
-+ inode, ll_i2mdcexp(inode)->exp_obd->obd_name, inode->i_ino,
-+ inode->i_mode, atomic_read(&inode->i_count), dentry_count);
-+}
-+
-+void lustre_dump_dentry(struct dentry *dentry, int recur)
-+{
-+ struct list_head *tmp;
-+ int subdirs = 0;
-+
-+ LASSERT(dentry != NULL);
-+
-+ list_for_each(tmp, &dentry->d_subdirs)
-+ subdirs++;
-+
-+ CERROR("dentry %p dump: name=%.*s parent=%.*s (%p), inode=%p, count=%u,"
-+ " flags=0x%x, fsdata=%p, %d subdirs\n", dentry,
-+ dentry->d_name.len, dentry->d_name.name,
-+ dentry->d_parent->d_name.len, dentry->d_parent->d_name.name,
-+ dentry->d_parent, dentry->d_inode, atomic_read(&dentry->d_count),
-+ dentry->d_flags, dentry->d_fsdata, subdirs);
-+ if (dentry->d_inode != NULL)
-+ ll_dump_inode(dentry->d_inode);
-+
-+ if (recur == 0)
-+ return;
-+
-+ list_for_each(tmp, &dentry->d_subdirs) {
-+ struct dentry *d = list_entry(tmp, struct dentry, d_child);
-+ lustre_dump_dentry(d, recur - 1);
-+ }
-+}
-+
-+#ifdef HAVE_EXPORT___IGET
-+static void prune_dir_dentries(struct inode *inode)
-+{
-+ struct dentry *dentry, *prev = NULL;
-+
-+ /* due to lustre specific logic, a directory
-+ * can have few dentries - a bug from VFS POV */
-+restart:
-+ spin_lock(&dcache_lock);
-+ if (!list_empty(&inode->i_dentry)) {
-+ dentry = list_entry(inode->i_dentry.prev,
-+ struct dentry, d_alias);
-+ /* in order to prevent infinite loops we
-+ * break if previous dentry is busy */
-+ if (dentry != prev) {
-+ prev = dentry;
-+ dget_locked(dentry);
-+ spin_unlock(&dcache_lock);
-+
-+ /* try to kill all child dentries */
-+ shrink_dcache_parent(dentry);
-+ dput(dentry);
-+
-+ /* now try to get rid of current dentry */
-+ d_prune_aliases(inode);
-+ goto restart;
-+ }
-+ }
-+ spin_unlock(&dcache_lock);
-+}
-+
-+static void prune_deathrow_one(struct ll_inode_info *lli)
-+{
-+ struct inode *inode = ll_info2i(lli);
-+
-+ /* first, try to drop any dentries - they hold a ref on the inode */
-+ if (S_ISDIR(inode->i_mode))
-+ prune_dir_dentries(inode);
-+ else
-+ d_prune_aliases(inode);
-+
-+
-+ /* if somebody still uses it, leave it */
-+ LASSERT(atomic_read(&inode->i_count) > 0);
-+ if (atomic_read(&inode->i_count) > 1)
-+ goto out;
-+
-+ CDEBUG(D_INODE, "inode %lu/%u(%d) looks a good candidate for prune\n",
-+ inode->i_ino,inode->i_generation, atomic_read(&inode->i_count));
-+
-+ /* seems nobody uses it anymore */
-+ inode->i_nlink = 0;
-+
-+out:
-+ iput(inode);
-+ return;
-+}
-+
-+static void prune_deathrow(struct ll_sb_info *sbi, int try)
-+{
-+ struct ll_inode_info *lli;
-+ int empty;
-+
-+ do {
-+ if (need_resched() && try)
-+ break;
-+
-+ if (try) {
-+ if (!spin_trylock(&sbi->ll_deathrow_lock))
-+ break;
-+ } else {
-+ spin_lock(&sbi->ll_deathrow_lock);
-+ }
-+
-+ empty = 1;
-+ lli = NULL;
-+ if (!list_empty(&sbi->ll_deathrow)) {
-+ lli = list_entry(sbi->ll_deathrow.next,
-+ struct ll_inode_info,
-+ lli_dead_list);
-+ list_del_init(&lli->lli_dead_list);
-+ if (!list_empty(&sbi->ll_deathrow))
-+ empty = 0;
-+ }
-+ spin_unlock(&sbi->ll_deathrow_lock);
-+
-+ if (lli)
-+ prune_deathrow_one(lli);
-+
-+ } while (empty == 0);
-+}
-+#else /* !HAVE_EXPORT___IGET */
-+#define prune_deathrow(sbi, try) do {} while (0)
-+#endif /* HAVE_EXPORT___IGET */
-+
-+void client_common_put_super(struct super_block *sb)
-+{
-+ struct ll_sb_info *sbi = ll_s2sbi(sb);
-+ ENTRY;
-+
-+ ll_close_thread_shutdown(sbi->ll_lcq);
-+
-+ lprocfs_unregister_mountpoint(sbi);
-+
-+ /* destroy inodes in deathrow */
-+ prune_deathrow(sbi, 0);
-+
-+ list_del(&sbi->ll_conn_chain);
-+
-+ /* callbacks is cleared after disconnect each target */
-+ obd_disconnect(sbi->ll_osc_exp);
-+ sbi->ll_osc_exp = NULL;
-+
-+ obd_disconnect(sbi->ll_mdc_exp);
-+ sbi->ll_mdc_exp = NULL;
-+
-+ EXIT;
-+}
-+
-+void ll_kill_super(struct super_block *sb)
-+{
-+ struct ll_sb_info *sbi;
-+
-+ ENTRY;
-+
-+ /* not init sb ?*/
-+ if (!(sb->s_flags & MS_ACTIVE))
-+ return;
-+
-+ sbi = ll_s2sbi(sb);
-+ /* we need restore s_dev from changed for clustred NFS before put_super
-+ * because new kernels have cached s_dev and change sb->s_dev in
-+ * put_super not affected real removing devices */
-+ if (sbi)
-+ sb->s_dev = sbi->ll_sdev_orig;
-+ EXIT;
-+}
-+
-+char *ll_read_opt(const char *opt, char *data)
-+{
-+ char *value;
-+ char *retval;
-+ ENTRY;
-+
-+ CDEBUG(D_SUPER, "option: %s, data %s\n", opt, data);
-+ if (strncmp(opt, data, strlen(opt)))
-+ RETURN(NULL);
-+ if ((value = strchr(data, '=')) == NULL)
-+ RETURN(NULL);
-+
-+ value++;
-+ OBD_ALLOC(retval, strlen(value) + 1);
-+ if (!retval) {
-+ CERROR("out of memory!\n");
-+ RETURN(NULL);
-+ }
-+
-+ memcpy(retval, value, strlen(value)+1);
-+ CDEBUG(D_SUPER, "Assigned option: %s, value %s\n", opt, retval);
-+ RETURN(retval);
-+}
-+
-+static inline int ll_set_opt(const char *opt, char *data, int fl)
-+{
-+ if (strncmp(opt, data, strlen(opt)) != 0)
-+ return(0);
-+ else
-+ return(fl);
-+}
-+
-+/* non-client-specific mount options are parsed in lmd_parse */
-+static int ll_options(char *options, int *flags)
-+{
-+ int tmp;
-+ char *s1 = options, *s2;
-+ ENTRY;
-+
-+ if (!options)
-+ RETURN(0);
-+
-+ CDEBUG(D_CONFIG, "Parsing opts %s\n", options);
-+
-+ while (*s1) {
-+ CDEBUG(D_SUPER, "next opt=%s\n", s1);
-+ tmp = ll_set_opt("nolock", s1, LL_SBI_NOLCK);
-+ if (tmp) {
-+ *flags |= tmp;
-+ goto next;
-+ }
-+ tmp = ll_set_opt("flock", s1, LL_SBI_FLOCK);
-+ if (tmp) {
-+ *flags |= tmp;
-+ goto next;
-+ }
-+ tmp = ll_set_opt("localflock", s1, LL_SBI_LOCALFLOCK);
-+ if (tmp) {
-+ *flags |= tmp;
-+ goto next;
-+ }
-+ tmp = ll_set_opt("noflock", s1, LL_SBI_FLOCK|LL_SBI_LOCALFLOCK);
-+ if (tmp) {
-+ *flags &= ~tmp;
-+ goto next;
-+ }
-+ tmp = ll_set_opt("user_xattr", s1, LL_SBI_USER_XATTR);
-+ if (tmp) {
-+ *flags |= tmp;
-+ goto next;
-+ }
-+ tmp = ll_set_opt("nouser_xattr", s1, LL_SBI_USER_XATTR);
-+ if (tmp) {
-+ *flags &= ~tmp;
-+ goto next;
-+ }
-+ tmp = ll_set_opt("acl", s1, LL_SBI_ACL);
-+ if (tmp) {
-+ /* Ignore deprecated mount option. The client will
-+ * always try to mount with ACL support, whether this
-+ * is used depends on whether server supports it. */
-+ LCONSOLE_ERROR_MSG(0x152, "Ignoring deprecated "
-+ "mount option 'acl'.\n");
-+ goto next;
-+ }
-+ tmp = ll_set_opt("noacl", s1, LL_SBI_ACL);
-+ if (tmp) {
-+ LCONSOLE_ERROR_MSG(0x152, "Ignoring deprecated "
-+ "mount option 'noacl'.\n");
-+ goto next;
-+ }
-+
-+ tmp = ll_set_opt("checksum", s1, LL_SBI_DATA_CHECKSUM);
-+ if (tmp) {
-+ *flags |= tmp;
-+ goto next;
-+ }
-+ tmp = ll_set_opt("nochecksum", s1, LL_SBI_DATA_CHECKSUM);
-+ if (tmp) {
-+ *flags &= ~tmp;
-+ goto next;
-+ }
-+
-+ tmp = ll_set_opt("lruresize", s1, LL_SBI_LRU_RESIZE);
-+ if (tmp) {
-+ *flags |= tmp;
-+ goto next;
-+ }
-+ tmp = ll_set_opt("nolruresize", s1, LL_SBI_LRU_RESIZE);
-+ if (tmp) {
-+ *flags &= ~tmp;
-+ goto next;
-+ }
-+ LCONSOLE_ERROR_MSG(0x152, "Unknown option '%s', won't mount.\n",
-+ s1);
-+ RETURN(-EINVAL);
-+
-+next:
-+ /* Find next opt */
-+ s2 = strchr(s1, ',');
-+ if (s2 == NULL)
-+ break;
-+ s1 = s2 + 1;
-+ }
-+ RETURN(0);
-+}
-+
-+void ll_lli_init(struct ll_inode_info *lli)
-+{
-+ lli->lli_inode_magic = LLI_INODE_MAGIC;
-+ sema_init(&lli->lli_size_sem, 1);
-+ sema_init(&lli->lli_write_sem, 1);
-+ lli->lli_flags = 0;
-+ lli->lli_maxbytes = PAGE_CACHE_MAXBYTES;
-+ spin_lock_init(&lli->lli_lock);
-+ sema_init(&lli->lli_och_sem, 1);
-+ lli->lli_mds_read_och = lli->lli_mds_write_och = NULL;
-+ lli->lli_mds_exec_och = NULL;
-+ lli->lli_open_fd_read_count = lli->lli_open_fd_write_count = 0;
-+ lli->lli_open_fd_exec_count = 0;
-+ INIT_LIST_HEAD(&lli->lli_dead_list);
-+#ifdef HAVE_CLOSE_THREAD
-+ INIT_LIST_HEAD(&lli->lli_pending_write_llaps);
-+#endif
-+}
-+
-+/* COMPAT_146 */
-+#define MDCDEV "mdc_dev"
-+static int old_lustre_process_log(struct super_block *sb, char *newprofile,
-+ struct config_llog_instance *cfg)
-+{
-+ struct lustre_sb_info *lsi = s2lsi(sb);
-+ struct obd_device *obd;
-+ struct lustre_handle mdc_conn = {0, };
-+ struct obd_export *exp;
-+ char *ptr, *mdt, *profile;
-+ char niduuid[10] = "mdtnid0";
-+ class_uuid_t uuid;
-+ struct obd_uuid mdc_uuid;
-+ struct llog_ctxt *ctxt;
-+ struct obd_connect_data ocd = { 0 };
-+ lnet_nid_t nid;
-+ int i, rc = 0, recov_bk = 1, failnodes = 0;
-+ ENTRY;
-+
-+ ll_generate_random_uuid(uuid);
-+ class_uuid_unparse(uuid, &mdc_uuid);
-+ CDEBUG(D_HA, "generated uuid: %s\n", mdc_uuid.uuid);
-+
-+ /* Figure out the old mdt and profile name from new-style profile
-+ ("lustre" from "mds/lustre-client") */
-+ mdt = newprofile;
-+ profile = strchr(mdt, '/');
-+ if (profile == NULL) {
-+ CDEBUG(D_CONFIG, "Can't find MDT name in %s\n", newprofile);
-+ GOTO(out, rc = -EINVAL);
-+ }
-+ *profile = '\0';
-+ profile++;
-+ ptr = strrchr(profile, '-');
-+ if (ptr == NULL) {
-+ CDEBUG(D_CONFIG, "Can't find client name in %s\n", newprofile);
-+ GOTO(out, rc = -EINVAL);
-+ }
-+ *ptr = '\0';
-+
-+ LCONSOLE_WARN("This looks like an old mount command; I will try to "
-+ "contact MDT '%s' for profile '%s'\n", mdt, profile);
-+
-+ /* Use nids from mount line: uml1,1 at elan:uml2,2 at elan:/lustre */
-+ i = 0;
-+ ptr = lsi->lsi_lmd->lmd_dev;
-+ while (class_parse_nid(ptr, &nid, &ptr) == 0) {
-+ rc = do_lcfg(MDCDEV, nid, LCFG_ADD_UUID, niduuid, 0,0,0);
-+ i++;
-+ /* Stop at the first failover nid */
-+ if (*ptr == ':')
-+ break;
-+ }
-+ if (i == 0) {
-+ CERROR("No valid MDT nids found.\n");
-+ GOTO(out, rc = -EINVAL);
-+ }
-+ failnodes++;
-+
-+ rc = do_lcfg(MDCDEV, 0, LCFG_ATTACH, LUSTRE_MDC_NAME,mdc_uuid.uuid,0,0);
-+ if (rc < 0)
-+ GOTO(out_del_uuid, rc);
-+
-+ rc = do_lcfg(MDCDEV, 0, LCFG_SETUP, mdt, niduuid, 0, 0);
-+ if (rc < 0) {
-+ LCONSOLE_ERROR_MSG(0x153, "I couldn't establish a connection "
-+ "with the MDT. Check that the MDT host NID "
-+ "is correct and the networks are up.\n");
-+ GOTO(out_detach, rc);
-+ }
-+
-+ obd = class_name2obd(MDCDEV);
-+ if (obd == NULL)
-+ GOTO(out_cleanup, rc = -EINVAL);
-+
-+ /* Add any failover nids */
-+ while (*ptr == ':') {
-+ /* New failover node */
-+ sprintf(niduuid, "mdtnid%d", failnodes);
-+ i = 0;
-+ while (class_parse_nid(ptr, &nid, &ptr) == 0) {
-+ i++;
-+ rc = do_lcfg(MDCDEV, nid, LCFG_ADD_UUID, niduuid,0,0,0);
-+ if (rc)
-+ CERROR("Add uuid for %s failed %d\n",
-+ libcfs_nid2str(nid), rc);
-+ if (*ptr == ':')
-+ break;
-+ }
-+ if (i > 0) {
-+ rc = do_lcfg(MDCDEV, 0, LCFG_ADD_CONN, niduuid, 0, 0,0);
-+ if (rc)
-+ CERROR("Add conn for %s failed %d\n",
-+ libcfs_nid2str(nid), rc);
-+ failnodes++;
-+ } else {
-+ /* at ":/fsname" */
-+ break;
-+ }
-+ }
-+
-+ /* Try all connections, but only once. */
-+ rc = obd_set_info_async(obd->obd_self_export,
-+ sizeof(KEY_INIT_RECOV_BACKUP), KEY_INIT_RECOV_BACKUP,
-+ sizeof(recov_bk), &recov_bk, NULL);
-+ if (rc)
-+ GOTO(out_cleanup, rc);
-+
-+ /* If we don't have this then an ACL MDS will refuse the connection */
-+ ocd.ocd_connect_flags = OBD_CONNECT_ACL;
-+
-+ rc = obd_connect(&mdc_conn, obd, &mdc_uuid, &ocd, &exp);
-+ if (rc) {
-+ CERROR("cannot connect to %s: rc = %d\n", mdt, rc);
-+ GOTO(out_cleanup, rc);
-+ }
-+
-+ ctxt = llog_get_context(exp->exp_obd, LLOG_CONFIG_REPL_CTXT);
-+
-+ cfg->cfg_flags |= CFG_F_COMPAT146;
-+
-+#if 1
-+ rc = class_config_parse_llog(ctxt, profile, cfg);
-+#else
-+ /*
-+ * For debugging, it's useful to just dump the log
-+ */
-+ rc = class_config_dump_llog(ctxt, profile, cfg);
-+#endif
-+ llog_ctxt_put(ctxt);
-+ switch (rc) {
-+ case 0: {
-+ /* Set the caller's profile name to the old-style */
-+ memcpy(newprofile, profile, strlen(profile) + 1);
-+ break;
-+ }
-+ case -EINVAL:
-+ LCONSOLE_ERROR_MSG(0x154, "%s: The configuration '%s' could not"
-+ " be read from the MDT '%s'. Make sure this"
-+ " client and the MDT are running compatible "
-+ "versions of Lustre.\n",
-+ obd->obd_name, profile, mdt);
-+ /* fall through */
-+ default:
-+ LCONSOLE_ERROR_MSG(0x155, "%s: The configuration '%s' could not"
-+ " be read from the MDT '%s'. This may be "
-+ "the result of communication errors between "
-+ "the client and the MDT, or if the MDT is "
-+ "not running.\n", obd->obd_name, profile,
-+ mdt);
-+ break;
-+ }
-+
-+ /* We don't so much care about errors in cleaning up the config llog
-+ * connection, as we have already read the config by this point. */
-+ obd_disconnect(exp);
-+
-+out_cleanup:
-+ do_lcfg(MDCDEV, 0, LCFG_CLEANUP, 0, 0, 0, 0);
-+
-+out_detach:
-+ do_lcfg(MDCDEV, 0, LCFG_DETACH, 0, 0, 0, 0);
-+
-+out_del_uuid:
-+ /* class_add_uuid adds a nid even if the same uuid exists; we might
-+ delete any copy here. So they all better match. */
-+ for (i = 0; i < failnodes; i++) {
-+ sprintf(niduuid, "mdtnid%d", i);
-+ do_lcfg(MDCDEV, 0, LCFG_DEL_UUID, niduuid, 0, 0, 0);
-+ }
-+ /* class_import_put will get rid of the additional connections */
-+out:
-+ RETURN(rc);
-+}
-+/* end COMPAT_146 */
-+
-+int ll_fill_super(struct super_block *sb)
-+{
-+ struct lustre_profile *lprof;
-+ struct lustre_sb_info *lsi = s2lsi(sb);
-+ struct ll_sb_info *sbi;
-+ char *osc = NULL, *mdc = NULL;
-+ char *profilenm = get_profile_name(sb);
-+ struct config_llog_instance cfg = {0, };
-+ char ll_instance[sizeof(sb) * 2 + 1];
-+ int err;
-+ ENTRY;
-+
-+ CDEBUG(D_VFSTRACE, "VFS Op: sb %p\n", sb);
-+
-+ cfs_module_get();
-+
-+ /* client additional sb info */
-+ lsi->lsi_llsbi = sbi = ll_init_sbi();
-+ if (!sbi) {
-+ cfs_module_put();
-+ RETURN(-ENOMEM);
-+ }
-+
-+ err = ll_options(lsi->lsi_lmd->lmd_opts, &sbi->ll_flags);
-+ if (err)
-+ GOTO(out_free, err);
-+
-+ /* Generate a string unique to this super, in case some joker tries
-+ to mount the same fs at two mount points.
-+ Use the address of the super itself.*/
-+ sprintf(ll_instance, "%p", sb);
-+ cfg.cfg_instance = ll_instance;
-+ cfg.cfg_uuid = lsi->lsi_llsbi->ll_sb_uuid;
-+ cfg.cfg_sb = sb;
-+
-+ /* set up client obds */
-+ if (strchr(profilenm, '/') != NULL) /* COMPAT_146 */
-+ err = -EINVAL; /* skip error messages, use old config code */
-+ else
-+ err = lustre_process_log(sb, profilenm, &cfg);
-+ /* COMPAT_146 */
-+ if (err < 0) {
-+ char *oldname;
-+ int rc, oldnamelen;
-+ oldnamelen = strlen(profilenm) + 1;
-+ /* Temp storage for 1.4.6 profile name */
-+ OBD_ALLOC(oldname, oldnamelen);
-+ if (oldname) {
-+ memcpy(oldname, profilenm, oldnamelen);
-+ rc = old_lustre_process_log(sb, oldname, &cfg);
-+ if (rc >= 0) {
-+ /* That worked - update the profile name
-+ permanently */
-+ err = rc;
-+ OBD_FREE(lsi->lsi_lmd->lmd_profile,
-+ strlen(lsi->lsi_lmd->lmd_profile) + 1);
-+ OBD_ALLOC(lsi->lsi_lmd->lmd_profile,
-+ strlen(oldname) + 1);
-+ if (!lsi->lsi_lmd->lmd_profile) {
-+ OBD_FREE(oldname, oldnamelen);
-+ GOTO(out_free, err = -ENOMEM);
-+ }
-+ memcpy(lsi->lsi_lmd->lmd_profile, oldname,
-+ strlen(oldname) + 1);
-+ profilenm = get_profile_name(sb);
-+ /* Don't ever try to recover the MGS */
-+ rc = ptlrpc_set_import_active(
-+ lsi->lsi_mgc->u.cli.cl_import, 0);
-+ }
-+ OBD_FREE(oldname, oldnamelen);
-+ }
-+ }
-+ /* end COMPAT_146 */
-+ if (err < 0) {
-+ CERROR("Unable to process log: %d\n", err);
-+ GOTO(out_free, err);
-+ }
-+
-+ lprof = class_get_profile(profilenm);
-+ if (lprof == NULL) {
-+ LCONSOLE_ERROR_MSG(0x156, "The client profile '%s' could not be"
-+ " read from the MGS. Does that filesystem "
-+ "exist?\n", profilenm);
-+ GOTO(out_free, err = -EINVAL);
-+ }
-+ CDEBUG(D_CONFIG, "Found profile %s: mdc=%s osc=%s\n", profilenm,
-+ lprof->lp_mdc, lprof->lp_osc);
-+
-+ OBD_ALLOC(osc, strlen(lprof->lp_osc) +
-+ strlen(ll_instance) + 2);
-+ if (!osc)
-+ GOTO(out_free, err = -ENOMEM);
-+ sprintf(osc, "%s-%s", lprof->lp_osc, ll_instance);
-+
-+ OBD_ALLOC(mdc, strlen(lprof->lp_mdc) +
-+ strlen(ll_instance) + 2);
-+ if (!mdc)
-+ GOTO(out_free, err = -ENOMEM);
-+ sprintf(mdc, "%s-%s", lprof->lp_mdc, ll_instance);
-+
-+ /* connections, registrations, sb setup */
-+ err = client_common_fill_super(sb, mdc, osc);
-+
-+out_free:
-+ if (mdc)
-+ OBD_FREE(mdc, strlen(mdc) + 1);
-+ if (osc)
-+ OBD_FREE(osc, strlen(osc) + 1);
-+ if (err)
-+ ll_put_super(sb);
-+ else
-+ LCONSOLE_WARN("Client %s has started\n", profilenm);
-+
-+ RETURN(err);
-+} /* ll_fill_super */
-+
-+
-+void ll_put_super(struct super_block *sb)
-+{
-+ struct config_llog_instance cfg;
-+ char ll_instance[sizeof(sb) * 2 + 1];
-+ struct obd_device *obd;
-+ struct lustre_sb_info *lsi = s2lsi(sb);
-+ struct ll_sb_info *sbi = ll_s2sbi(sb);
-+ char *profilenm = get_profile_name(sb);
-+ int force = 1, next;
-+ ENTRY;
-+
-+ CDEBUG(D_VFSTRACE, "VFS Op: sb %p - %s\n", sb, profilenm);
-+
-+ sprintf(ll_instance, "%p", sb);
-+ cfg.cfg_instance = ll_instance;
-+ lustre_end_log(sb, NULL, &cfg);
-+
-+ if (sbi->ll_mdc_exp) {
-+ obd = class_exp2obd(sbi->ll_mdc_exp);
-+ if (obd)
-+ force = obd->obd_force;
-+ }
-+
-+ /* We need to set force before the lov_disconnect in
-+ lustre_common_put_super, since l_d cleans up osc's as well. */
-+ if (force) {
-+ next = 0;
-+ while ((obd = class_devices_in_group(&sbi->ll_sb_uuid,
-+ &next)) != NULL) {
-+ obd->obd_force = force;
-+ }
-+ }
-+
-+ if (sbi->ll_lcq) {
-+ /* Only if client_common_fill_super succeeded */
-+ client_common_put_super(sb);
-+ }
-+
-+ next = 0;
-+ while ((obd = class_devices_in_group(&sbi->ll_sb_uuid, &next)) !=NULL) {
-+ class_manual_cleanup(obd);
-+ }
-+
-+ if (profilenm)
-+ class_del_profile(profilenm);
-+
-+ ll_free_sbi(sb);
-+ lsi->lsi_llsbi = NULL;
-+
-+ lustre_common_put_super(sb);
-+
-+ LCONSOLE_WARN("client %s umount complete\n", ll_instance);
-+
-+ cfs_module_put();
-+
-+ EXIT;
-+} /* client_put_super */
-+
-+#ifdef HAVE_REGISTER_CACHE
-+#include <linux/cache_def.h>
-+#ifdef HAVE_CACHE_RETURN_INT
-+static int
-+#else
-+static void
-+#endif
-+ll_shrink_cache(int priority, unsigned int gfp_mask)
-+{
-+ struct ll_sb_info *sbi;
-+ int count = 0;
-+
-+ list_for_each_entry(sbi, &ll_super_blocks, ll_list)
-+ count += llap_shrink_cache(sbi, priority);
-+
-+#ifdef HAVE_CACHE_RETURN_INT
-+ return count;
-+#endif
-+}
-+
-+struct cache_definition ll_cache_definition = {
-+ .name = "llap_cache",
-+ .shrink = ll_shrink_cache
-+};
-+#endif /* HAVE_REGISTER_CACHE */
-+
-+struct inode *ll_inode_from_lock(struct ldlm_lock *lock)
-+{
-+ struct inode *inode = NULL;
-+ /* NOTE: we depend on atomic igrab() -bzzz */
-+ lock_res_and_lock(lock);
-+ if (lock->l_ast_data) {
-+ struct ll_inode_info *lli = ll_i2info(lock->l_ast_data);
-+ if (lli->lli_inode_magic == LLI_INODE_MAGIC) {
-+ inode = igrab(lock->l_ast_data);
-+ } else {
-+ inode = lock->l_ast_data;
-+ ldlm_lock_debug(NULL, inode->i_state & I_FREEING ?
-+ D_INFO : D_WARNING,
-+ lock, __FILE__, __func__, __LINE__,
-+ "l_ast_data %p is bogus: magic %08x",
-+ lock->l_ast_data, lli->lli_inode_magic);
-+ inode = NULL;
-+ }
-+ }
-+ unlock_res_and_lock(lock);
-+ return inode;
-+}
-+
-+static int null_if_equal(struct ldlm_lock *lock, void *data)
-+{
-+ if (data == lock->l_ast_data) {
-+ lock->l_ast_data = NULL;
-+
-+ if (lock->l_req_mode != lock->l_granted_mode)
-+ LDLM_ERROR(lock,"clearing inode with ungranted lock");
-+ }
-+
-+ return LDLM_ITER_CONTINUE;
-+}
-+
-+void ll_clear_inode(struct inode *inode)
-+{
-+ struct ll_fid fid;
-+ struct ll_inode_info *lli = ll_i2info(inode);
-+ struct ll_sb_info *sbi = ll_i2sbi(inode);
-+ ENTRY;
-+
-+ CDEBUG(D_VFSTRACE, "VFS Op:inode=%lu/%u(%p)\n", inode->i_ino,
-+ inode->i_generation, inode);
-+
-+ if (S_ISDIR(inode->i_mode)) {
-+ /* these should have been cleared in ll_file_release */
-+ LASSERT(lli->lli_sai == NULL);
-+ LASSERT(lli->lli_opendir_key == NULL);
-+ LASSERT(lli->lli_opendir_pid == 0);
-+ }
-+
-+ ll_inode2fid(&fid, inode);
-+ clear_bit(LLI_F_HAVE_MDS_SIZE_LOCK, &lli->lli_flags);
-+ mdc_change_cbdata(sbi->ll_mdc_exp, &fid, null_if_equal, inode);
-+
-+ LASSERT(!lli->lli_open_fd_write_count);
-+ LASSERT(!lli->lli_open_fd_read_count);
-+ LASSERT(!lli->lli_open_fd_exec_count);
-+
-+ if (lli->lli_mds_write_och)
-+ ll_mdc_real_close(inode, FMODE_WRITE);
-+ if (lli->lli_mds_exec_och) {
-+ if (!FMODE_EXEC)
-+ CERROR("No FMODE exec, bug exec och is present for "
-+ "inode %ld\n", inode->i_ino);
-+ ll_mdc_real_close(inode, FMODE_EXEC);
-+ }
-+ if (lli->lli_mds_read_och)
-+ ll_mdc_real_close(inode, FMODE_READ);
-+
-+
-+ if (lli->lli_smd) {
-+ obd_change_cbdata(sbi->ll_osc_exp, lli->lli_smd,
-+ null_if_equal, inode);
-+
-+ obd_free_memmd(sbi->ll_osc_exp, &lli->lli_smd);
-+ lli->lli_smd = NULL;
-+ }
-+
-+ if (lli->lli_symlink_name) {
-+ OBD_FREE(lli->lli_symlink_name,
-+ strlen(lli->lli_symlink_name) + 1);
-+ lli->lli_symlink_name = NULL;
-+ }
-+
-+#ifdef CONFIG_FS_POSIX_ACL
-+ if (lli->lli_posix_acl) {
-+ LASSERT(atomic_read(&lli->lli_posix_acl->a_refcount) == 1);
-+ posix_acl_release(lli->lli_posix_acl);
-+ lli->lli_posix_acl = NULL;
-+ }
-+#endif
-+
-+ lli->lli_inode_magic = LLI_INODE_DEAD;
-+
-+#ifdef HAVE_EXPORT___IGET
-+ spin_lock(&sbi->ll_deathrow_lock);
-+ list_del_init(&lli->lli_dead_list);
-+ spin_unlock(&sbi->ll_deathrow_lock);
-+#endif
-+
-+ EXIT;
-+}
-+static int ll_setattr_do_truncate(struct inode *inode, loff_t new_size)
-+{
-+ struct ll_sb_info *sbi = ll_i2sbi(inode);
-+ struct ll_inode_info *lli = ll_i2info(inode);
-+ struct lov_stripe_md *lsm = lli->lli_smd;
-+ int rc;
-+ ldlm_policy_data_t policy = { .l_extent = {new_size,
-+ OBD_OBJECT_EOF } };
-+ struct lustre_handle lockh = { 0 };
-+ int local_lock = 0; /* 0 - no local lock;
-+ * 1 - lock taken by lock_extent;
-+ * 2 - by obd_match*/
-+ int ast_flags;
-+ int err;
-+ ENTRY;
-+
-+ UNLOCK_INODE_MUTEX(inode);
-+ UP_WRITE_I_ALLOC_SEM(inode);
-+
-+ if (sbi->ll_lockless_truncate_enable &&
-+ (sbi->ll_lco.lco_flags & OBD_CONNECT_TRUNCLOCK)) {
-+ ast_flags = LDLM_FL_BLOCK_GRANTED;
-+ rc = obd_match(sbi->ll_osc_exp, lsm, LDLM_EXTENT,
-+ &policy, LCK_PW, &ast_flags, inode, &lockh);
-+ if (rc > 0) {
-+ local_lock = 2;
-+ rc = 0;
-+ } else if (rc == 0) {
-+ rc = ll_file_punch(inode, new_size, 1);
-+ }
-+ } else {
-+ /* XXX when we fix the AST intents to pass the discard-range
-+ * XXX extent, make ast_flags always LDLM_AST_DISCARD_DATA
-+ * XXX here. */
-+ ast_flags = (new_size == 0) ? LDLM_AST_DISCARD_DATA : 0;
-+ rc = ll_extent_lock(NULL, inode, lsm, LCK_PW, &policy,
-+ &lockh, ast_flags);
-+ if (likely(rc == 0))
-+ local_lock = 1;
-+ }
-+
-+ LOCK_INODE_MUTEX(inode);
-+ DOWN_WRITE_I_ALLOC_SEM(inode);
-+ if (likely(rc == 0)) {
-+ /* Only ll_inode_size_lock is taken at this level.
-+ * lov_stripe_lock() is grabbed by ll_truncate() only over
-+ * call to obd_adjust_kms(). If vmtruncate returns 0, then
-+ * ll_truncate dropped ll_inode_size_lock() */
-+ ll_inode_size_lock(inode, 0);
-+ if (!local_lock)
-+ set_bit(LLI_F_SRVLOCK, &lli->lli_flags);
-+ rc = vmtruncate(inode, new_size);
-+ clear_bit(LLI_F_SRVLOCK, &lli->lli_flags);
-+ if (rc != 0) {
-+ LASSERT(atomic_read(&lli->lli_size_sem.count) <= 0);
-+ ll_inode_size_unlock(inode, 0);
-+ }
-+ }
-+ if (local_lock) {
-+ if (local_lock == 2)
-+ err = obd_cancel(sbi->ll_osc_exp, lsm, LCK_PW, &lockh);
-+ else
-+ err = ll_extent_unlock(NULL, inode, lsm, LCK_PW, &lockh);
-+ if (unlikely(err != 0)){
-+ CERROR("extent unlock failed: err=%d,"
-+ " unlock method =%d\n", err, local_lock);
-+ if (rc == 0)
-+ rc = err;
-+ }
-+ }
-+ RETURN(rc);
-+}
-+
-+/* If this inode has objects allocated to it (lsm != NULL), then the OST
-+ * object(s) determine the file size and mtime. Otherwise, the MDS will
-+ * keep these values until such a time that objects are allocated for it.
-+ * We do the MDS operations first, as it is checking permissions for us.
-+ * We don't to the MDS RPC if there is nothing that we want to store there,
-+ * otherwise there is no harm in updating mtime/atime on the MDS if we are
-+ * going to do an RPC anyways.
-+ *
-+ * If we are doing a truncate, we will send the mtime and ctime updates
-+ * to the OST with the punch RPC, otherwise we do an explicit setattr RPC.
-+ * I don't believe it is possible to get e.g. ATTR_MTIME_SET and ATTR_SIZE
-+ * at the same time.
-+ */
-+int ll_setattr_raw(struct inode *inode, struct iattr *attr)
-+{
-+ struct ll_inode_info *lli = ll_i2info(inode);
-+ struct lov_stripe_md *lsm = lli->lli_smd;
-+ struct ll_sb_info *sbi = ll_i2sbi(inode);
-+ struct ptlrpc_request *request = NULL;
-+ struct mdc_op_data op_data;
-+ struct lustre_md md;
-+ int ia_valid = attr->ia_valid;
-+ int rc = 0;
-+ ENTRY;
-+
-+ CDEBUG(D_VFSTRACE, "VFS Op:inode=%lu valid %x\n", inode->i_ino,
-+ attr->ia_valid);
-+ ll_stats_ops_tally(ll_i2sbi(inode), LPROC_LL_SETATTR, 1);
-+
-+ if (ia_valid & ATTR_SIZE) {
-+ if (attr->ia_size > ll_file_maxbytes(inode)) {
-+ CDEBUG(D_INODE, "file too large %llu > "LPU64"\n",
-+ attr->ia_size, ll_file_maxbytes(inode));
-+ RETURN(-EFBIG);
-+ }
-+
-+ attr->ia_valid |= ATTR_MTIME | ATTR_CTIME;
-+ }
-+
-+ /* POSIX: check before ATTR_*TIME_SET set (from inode_change_ok) */
-+ if (ia_valid & (ATTR_MTIME_SET | ATTR_ATIME_SET)) {
-+ if (current->fsuid != inode->i_uid &&
-+ !cfs_capable(CFS_CAP_FOWNER))
-+ RETURN(-EPERM);
-+ }
-+
-+ /* We mark all of the fields "set" so MDS/OST does not re-set them */
-+ if (attr->ia_valid & ATTR_CTIME) {
-+ attr->ia_ctime = CURRENT_TIME;
-+ attr->ia_valid |= ATTR_CTIME_SET;
-+ }
-+ if (!(ia_valid & ATTR_ATIME_SET) && (attr->ia_valid & ATTR_ATIME)) {
-+ attr->ia_atime = CURRENT_TIME;
-+ attr->ia_valid |= ATTR_ATIME_SET;
-+ }
-+ if (!(ia_valid & ATTR_MTIME_SET) && (attr->ia_valid & ATTR_MTIME)) {
-+ attr->ia_mtime = CURRENT_TIME;
-+ attr->ia_valid |= ATTR_MTIME_SET;
-+ }
-+ if ((attr->ia_valid & ATTR_CTIME) && !(attr->ia_valid & ATTR_MTIME)) {
-+ /* To avoid stale mtime on mds, obtain it from ost and send
-+ to mds. */
-+ rc = ll_glimpse_size(inode, 0);
-+ if (rc)
-+ RETURN(rc);
-+
-+ attr->ia_valid |= ATTR_MTIME_SET | ATTR_MTIME;
-+ attr->ia_mtime = inode->i_mtime;
-+ }
-+
-+ if (attr->ia_valid & (ATTR_MTIME | ATTR_CTIME))
-+ CDEBUG(D_INODE, "setting mtime %lu, ctime %lu, now = %lu\n",
-+ LTIME_S(attr->ia_mtime), LTIME_S(attr->ia_ctime),
-+ CURRENT_SECONDS);
-+
-+ /* NB: ATTR_SIZE will only be set after this point if the size
-+ * resides on the MDS, ie, this file has no objects. */
-+ if (lsm)
-+ attr->ia_valid &= ~ATTR_SIZE;
-+
-+ /* We always do an MDS RPC, even if we're only changing the size;
-+ * only the MDS knows whether truncate() should fail with -ETXTBUSY */
-+ ll_prepare_mdc_op_data(&op_data, inode, NULL, NULL, 0, 0, NULL);
-+
-+ rc = mdc_setattr(sbi->ll_mdc_exp, &op_data,
-+ attr, NULL, 0, NULL, 0, &request);
-+
-+ if (rc) {
-+ ptlrpc_req_finished(request);
-+ if (rc == -ENOENT) {
-+ inode->i_nlink = 0;
-+ /* Unlinked special device node? Or just a race?
-+ * Pretend we done everything. */
-+ if (!S_ISREG(inode->i_mode) &&
-+ !S_ISDIR(inode->i_mode))
-+ rc = inode_setattr(inode, attr);
-+ } else if (rc != -EPERM && rc != -EACCES && rc != -ETXTBSY)
-+ CERROR("mdc_setattr fails: rc = %d\n", rc);
-+ RETURN(rc);
-+ }
-+
-+ rc = mdc_req2lustre_md(request, REPLY_REC_OFF, sbi->ll_osc_exp, &md);
-+ if (rc) {
-+ ptlrpc_req_finished(request);
-+ RETURN(rc);
-+ }
-+
-+ /* We call inode_setattr to adjust timestamps.
-+ * If there is at least some data in file, we cleared ATTR_SIZE above to
-+ * avoid invoking vmtruncate, otherwise it is important to call
-+ * vmtruncate in inode_setattr to update inode->i_size (bug 6196) */
-+ rc = inode_setattr(inode, attr);
-+
-+ ll_update_inode(inode, &md);
-+ ptlrpc_req_finished(request);
-+
-+ if (!lsm || !S_ISREG(inode->i_mode)) {
-+ CDEBUG(D_INODE, "no lsm: not setting attrs on OST\n");
-+ RETURN(rc);
-+ }
-+
-+ /* We really need to get our PW lock before we change inode->i_size.
-+ * If we don't we can race with other i_size updaters on our node, like
-+ * ll_file_read. We can also race with i_size propogation to other
-+ * nodes through dirtying and writeback of final cached pages. This
-+ * last one is especially bad for racing o_append users on other
-+ * nodes. */
-+ if (ia_valid & ATTR_SIZE) {
-+ rc = ll_setattr_do_truncate(inode, attr->ia_size);
-+ } else if (ia_valid & (ATTR_MTIME | ATTR_MTIME_SET)) {
-+ obd_flag flags;
-+ struct obd_info oinfo = { { { 0 } } };
-+ struct obdo *oa;
-+ OBDO_ALLOC(oa);
-+
-+ CDEBUG(D_INODE, "set mtime on OST inode %lu to %lu\n",
-+ inode->i_ino, LTIME_S(attr->ia_mtime));
-+
-+ if (oa) {
-+ oa->o_id = lsm->lsm_object_id;
-+ oa->o_valid = OBD_MD_FLID;
-+
-+ flags = OBD_MD_FLTYPE | OBD_MD_FLATIME |
-+ OBD_MD_FLMTIME | OBD_MD_FLCTIME |
-+ OBD_MD_FLFID | OBD_MD_FLGENER;
-+
-+ obdo_from_inode(oa, inode, flags);
-+
-+ oinfo.oi_oa = oa;
-+ oinfo.oi_md = lsm;
-+
-+ rc = obd_setattr_rqset(sbi->ll_osc_exp, &oinfo, NULL);
-+ if (rc)
-+ CERROR("obd_setattr_async fails: rc=%d\n", rc);
-+ OBDO_FREE(oa);
-+ } else {
-+ rc = -ENOMEM;
-+ }
-+ }
-+ RETURN(rc);
-+}
-+
-+int ll_setattr(struct dentry *de, struct iattr *attr)
-+{
-+ int mode;
-+
-+ if ((attr->ia_valid & (ATTR_CTIME|ATTR_SIZE|ATTR_MODE)) ==
-+ (ATTR_CTIME|ATTR_SIZE|ATTR_MODE))
-+ attr->ia_valid |= MDS_OPEN_OWNEROVERRIDE;
-+ if ((attr->ia_valid & (ATTR_MODE|ATTR_FORCE|ATTR_SIZE)) ==
-+ (ATTR_SIZE|ATTR_MODE)) {
-+ mode = de->d_inode->i_mode;
-+ if (((mode & S_ISUID) && (!(attr->ia_mode & S_ISUID))) ||
-+ ((mode & S_ISGID) && (mode & S_IXGRP) &&
-+ (!(attr->ia_mode & S_ISGID))))
-+ attr->ia_valid |= ATTR_FORCE;
-+ }
-+
-+ return ll_setattr_raw(de->d_inode, attr);
-+}
-+
-+int ll_statfs_internal(struct super_block *sb, struct obd_statfs *osfs,
-+ __u64 max_age, __u32 flags)
-+{
-+ struct ll_sb_info *sbi = ll_s2sbi(sb);
-+ struct obd_statfs obd_osfs;
-+ int rc;
-+ ENTRY;
-+
-+ rc = obd_statfs(class_exp2obd(sbi->ll_mdc_exp), osfs, max_age, flags);
-+ if (rc) {
-+ CERROR("mdc_statfs fails: rc = %d\n", rc);
-+ RETURN(rc);
-+ }
-+
-+ osfs->os_type = sb->s_magic;
-+
-+ CDEBUG(D_SUPER, "MDC blocks "LPU64"/"LPU64" objects "LPU64"/"LPU64"\n",
-+ osfs->os_bavail, osfs->os_blocks, osfs->os_ffree,osfs->os_files);
-+
-+ rc = obd_statfs_rqset(class_exp2obd(sbi->ll_osc_exp),
-+ &obd_osfs, max_age, flags);
-+ if (rc) {
-+ CERROR("obd_statfs fails: rc = %d\n", rc);
-+ RETURN(rc);
-+ }
-+
-+ CDEBUG(D_SUPER, "OSC blocks "LPU64"/"LPU64" objects "LPU64"/"LPU64"\n",
-+ obd_osfs.os_bavail, obd_osfs.os_blocks, obd_osfs.os_ffree,
-+ obd_osfs.os_files);
-+
-+ osfs->os_bsize = obd_osfs.os_bsize;
-+ osfs->os_blocks = obd_osfs.os_blocks;
-+ osfs->os_bfree = obd_osfs.os_bfree;
-+ osfs->os_bavail = obd_osfs.os_bavail;
-+
-+ /* If we don't have as many objects free on the OST as inodes
-+ * on the MDS, we reduce the total number of inodes to
-+ * compensate, so that the "inodes in use" number is correct.
-+ */
-+ if (obd_osfs.os_ffree < osfs->os_ffree) {
-+ osfs->os_files = (osfs->os_files - osfs->os_ffree) +
-+ obd_osfs.os_ffree;
-+ osfs->os_ffree = obd_osfs.os_ffree;
-+ }
-+
-+ RETURN(rc);
-+}
-+#ifndef HAVE_STATFS_DENTRY_PARAM
-+int ll_statfs(struct super_block *sb, struct kstatfs *sfs)
-+{
-+#else
-+int ll_statfs(struct dentry *de, struct kstatfs *sfs)
-+{
-+ struct super_block *sb = de->d_sb;
-+#endif
-+ struct obd_statfs osfs;
-+ int rc;
-+
-+ CDEBUG(D_VFSTRACE, "VFS Op: at "LPU64" jiffies\n", get_jiffies_64());
-+ ll_stats_ops_tally(ll_s2sbi(sb), LPROC_LL_STAFS, 1);
-+
-+ /* For now we will always get up-to-date statfs values, but in the
-+ * future we may allow some amount of caching on the client (e.g.
-+ * from QOS or lprocfs updates). */
-+ rc = ll_statfs_internal(sb, &osfs, cfs_time_current_64() - 1, 0);
-+ if (rc)
-+ return rc;
-+
-+ statfs_unpack(sfs, &osfs);
-+
-+ /* We need to downshift for all 32-bit kernels, because we can't
-+ * tell if the kernel is being called via sys_statfs64() or not.
-+ * Stop before overflowing f_bsize - in which case it is better
-+ * to just risk EOVERFLOW if caller is using old sys_statfs(). */
-+ if (sizeof(long) < 8) {
-+ while (osfs.os_blocks > ~0UL && sfs->f_bsize < 0x40000000) {
-+ sfs->f_bsize <<= 1;
-+
-+ osfs.os_blocks >>= 1;
-+ osfs.os_bfree >>= 1;
-+ osfs.os_bavail >>= 1;
-+ }
-+ }
-+
-+ sfs->f_blocks = osfs.os_blocks;
-+ sfs->f_bfree = osfs.os_bfree;
-+ sfs->f_bavail = osfs.os_bavail;
-+
-+ return 0;
-+}
-+
-+void ll_inode_size_lock(struct inode *inode, int lock_lsm)
-+{
-+ struct ll_inode_info *lli;
-+ struct lov_stripe_md *lsm;
-+
-+ lli = ll_i2info(inode);
-+ LASSERT(lli->lli_size_sem_owner != current);
-+ down(&lli->lli_size_sem);
-+ LASSERT(lli->lli_size_sem_owner == NULL);
-+ lli->lli_size_sem_owner = current;
-+ lsm = lli->lli_smd;
-+ LASSERTF(lsm != NULL || lock_lsm == 0, "lsm %p, lock_lsm %d\n",
-+ lsm, lock_lsm);
-+ if (lock_lsm)
-+ lov_stripe_lock(lsm);
-+}
-+
-+void ll_inode_size_unlock(struct inode *inode, int unlock_lsm)
-+{
-+ struct ll_inode_info *lli;
-+ struct lov_stripe_md *lsm;
-+
-+ lli = ll_i2info(inode);
-+ lsm = lli->lli_smd;
-+ LASSERTF(lsm != NULL || unlock_lsm == 0, "lsm %p, lock_lsm %d\n",
-+ lsm, unlock_lsm);
-+ if (unlock_lsm)
-+ lov_stripe_unlock(lsm);
-+ LASSERT(lli->lli_size_sem_owner == current);
-+ lli->lli_size_sem_owner = NULL;
-+ up(&lli->lli_size_sem);
-+}
-+
-+static void ll_replace_lsm(struct inode *inode, struct lov_stripe_md *lsm)
-+{
-+ struct ll_inode_info *lli = ll_i2info(inode);
-+
-+ dump_lsm(D_INODE, lsm);
-+ dump_lsm(D_INODE, lli->lli_smd);
-+ LASSERTF(lsm->lsm_magic == LOV_MAGIC_JOIN,
-+ "lsm must be joined lsm %p\n", lsm);
-+ obd_free_memmd(ll_i2obdexp(inode), &lli->lli_smd);
-+ CDEBUG(D_INODE, "replace lsm %p to lli_smd %p for inode %lu%u(%p)\n",
-+ lsm, lli->lli_smd, inode->i_ino, inode->i_generation, inode);
-+ lli->lli_smd = lsm;
-+ lli->lli_maxbytes = lsm->lsm_maxbytes;
-+ if (lli->lli_maxbytes > PAGE_CACHE_MAXBYTES)
-+ lli->lli_maxbytes = PAGE_CACHE_MAXBYTES;
-+}
-+
-+void ll_update_inode(struct inode *inode, struct lustre_md *md)
-+{
-+ struct ll_inode_info *lli = ll_i2info(inode);
-+ struct mds_body *body = md->body;
-+ struct lov_stripe_md *lsm = md->lsm;
-+
-+ LASSERT ((lsm != NULL) == ((body->valid & OBD_MD_FLEASIZE) != 0));
-+ if (lsm != NULL) {
-+ if (lli->lli_smd == NULL) {
-+ if (lsm->lsm_magic != LOV_MAGIC &&
-+ lsm->lsm_magic != LOV_MAGIC_JOIN) {
-+ dump_lsm(D_ERROR, lsm);
-+ LBUG();
-+ }
-+ CDEBUG(D_INODE, "adding lsm %p to inode %lu/%u(%p)\n",
-+ lsm, inode->i_ino, inode->i_generation, inode);
-+ /* ll_inode_size_lock() requires it is only called
-+ * with lli_smd != NULL or lock_lsm == 0 or we can
-+ * race between lock/unlock. bug 9547 */
-+ lli->lli_smd = lsm;
-+ lli->lli_maxbytes = lsm->lsm_maxbytes;
-+ if (lli->lli_maxbytes > PAGE_CACHE_MAXBYTES)
-+ lli->lli_maxbytes = PAGE_CACHE_MAXBYTES;
-+ } else {
-+ if (lli->lli_smd->lsm_magic == lsm->lsm_magic &&
-+ lli->lli_smd->lsm_stripe_count ==
-+ lsm->lsm_stripe_count) {
-+ if (lov_stripe_md_cmp(lli->lli_smd, lsm)) {
-+ CERROR("lsm mismatch for inode %ld\n",
-+ inode->i_ino);
-+ CERROR("lli_smd:\n");
-+ dump_lsm(D_ERROR, lli->lli_smd);
-+ CERROR("lsm:\n");
-+ dump_lsm(D_ERROR, lsm);
-+ LBUG();
-+ }
-+ } else
-+ ll_replace_lsm(inode, lsm);
-+ }
-+ if (lli->lli_smd != lsm)
-+ obd_free_memmd(ll_i2obdexp(inode), &lsm);
-+ }
-+
-+#ifdef CONFIG_FS_POSIX_ACL
-+ LASSERT(!md->posix_acl || (body->valid & OBD_MD_FLACL));
-+ if (body->valid & OBD_MD_FLACL) {
-+ spin_lock(&lli->lli_lock);
-+ if (lli->lli_posix_acl)
-+ posix_acl_release(lli->lli_posix_acl);
-+ lli->lli_posix_acl = md->posix_acl;
-+ spin_unlock(&lli->lli_lock);
-+ }
-+#endif
-+
-+ if (body->valid & OBD_MD_FLID)
-+ inode->i_ino = body->ino;
-+ if (body->valid & OBD_MD_FLATIME &&
-+ body->atime > LTIME_S(inode->i_atime))
-+ LTIME_S(inode->i_atime) = body->atime;
-+
-+ /* mtime is always updated with ctime, but can be set in past.
-+ As write and utime(2) may happen within 1 second, and utime's
-+ mtime has a priority over write's one, so take mtime from mds
-+ for the same ctimes. */
-+ if (body->valid & OBD_MD_FLCTIME &&
-+ body->ctime >= LTIME_S(inode->i_ctime)) {
-+ LTIME_S(inode->i_ctime) = body->ctime;
-+ if (body->valid & OBD_MD_FLMTIME) {
-+ CDEBUG(D_INODE, "setting ino %lu mtime "
-+ "from %lu to "LPU64"\n", inode->i_ino,
-+ LTIME_S(inode->i_mtime), body->mtime);
-+ LTIME_S(inode->i_mtime) = body->mtime;
-+ }
-+ }
-+ if (body->valid & OBD_MD_FLMODE)
-+ inode->i_mode = (inode->i_mode & S_IFMT)|(body->mode & ~S_IFMT);
-+ if (body->valid & OBD_MD_FLTYPE)
-+ inode->i_mode = (inode->i_mode & ~S_IFMT)|(body->mode & S_IFMT);
-+ if (S_ISREG(inode->i_mode)) {
-+ inode->i_blkbits = min(PTLRPC_MAX_BRW_BITS+1, LL_MAX_BLKSIZE_BITS);
-+ } else {
-+ inode->i_blkbits = inode->i_sb->s_blocksize_bits;
-+ }
-+#ifdef HAVE_INODE_BLKSIZE
-+ inode->i_blksize = 1<<inode->i_blkbits;
-+#endif
-+ if (body->valid & OBD_MD_FLUID)
-+ inode->i_uid = body->uid;
-+ if (body->valid & OBD_MD_FLGID)
-+ inode->i_gid = body->gid;
-+ if (body->valid & OBD_MD_FLFLAGS)
-+ inode->i_flags = ll_ext_to_inode_flags(body->flags);
-+
-+ if (body->valid & OBD_MD_FLNLINK)
-+ inode->i_nlink = body->nlink;
-+ if (body->valid & OBD_MD_FLGENER)
-+ inode->i_generation = body->generation;
-+ if (body->valid & OBD_MD_FLRDEV)
-+ inode->i_rdev = old_decode_dev(body->rdev);
-+ if (body->valid & OBD_MD_FLSIZE) {
-+#if 0 /* Can't block ll_test_inode->ll_update_inode, b=14326*/
-+ ll_inode_size_lock(inode, 0);
-+ i_size_write(inode, body->size);
-+ ll_inode_size_unlock(inode, 0);
-+#else
-+ inode->i_size = body->size;
-+#endif
-+ }
-+ if (body->valid & OBD_MD_FLBLOCKS)
-+ inode->i_blocks = body->blocks;
-+
-+ if (body->valid & OBD_MD_FLSIZE)
-+ set_bit(LLI_F_HAVE_MDS_SIZE_LOCK, &lli->lli_flags);
-+}
-+
-+static struct backing_dev_info ll_backing_dev_info = {
-+ .ra_pages = 0, /* No readahead */
-+#if (LINUX_VERSION_CODE >= KERNEL_VERSION(2,6,12))
-+ .capabilities = 0, /* Does contribute to dirty memory */
-+#else
-+ .memory_backed = 0, /* Does contribute to dirty memory */
-+#endif
-+};
-+
-+void ll_read_inode2(struct inode *inode, void *opaque)
-+{
-+ struct lustre_md *md = opaque;
-+ struct ll_inode_info *lli = ll_i2info(inode);
-+ ENTRY;
-+
-+ CDEBUG(D_VFSTRACE, "VFS Op:inode=%lu/%u(%p)\n", inode->i_ino,
-+ inode->i_generation, inode);
-+
-+ ll_lli_init(lli);
-+
-+ LASSERT(!lli->lli_smd);
-+
-+ /* Core attributes from the MDS first. This is a new inode, and
-+ * the VFS doesn't zero times in the core inode so we have to do
-+ * it ourselves. They will be overwritten by either MDS or OST
-+ * attributes - we just need to make sure they aren't newer. */
-+ LTIME_S(inode->i_mtime) = 0;
-+ LTIME_S(inode->i_atime) = 0;
-+ LTIME_S(inode->i_ctime) = 0;
-+ inode->i_rdev = 0;
-+ ll_update_inode(inode, md);
-+
-+ /* OIDEBUG(inode); */
-+
-+ if (S_ISREG(inode->i_mode)) {
-+ struct ll_sb_info *sbi = ll_i2sbi(inode);
-+ inode->i_op = &ll_file_inode_operations;
-+ inode->i_fop = sbi->ll_fop;
-+ inode->i_mapping->a_ops = &ll_aops;
-+ EXIT;
-+ } else if (S_ISDIR(inode->i_mode)) {
-+ inode->i_op = &ll_dir_inode_operations;
-+ inode->i_fop = &ll_dir_operations;
-+ inode->i_mapping->a_ops = &ll_dir_aops;
-+ EXIT;
-+ } else if (S_ISLNK(inode->i_mode)) {
-+ inode->i_op = &ll_fast_symlink_inode_operations;
-+ EXIT;
-+ } else {
-+ inode->i_op = &ll_special_inode_operations;
-+ init_special_inode(inode, inode->i_mode,
-+ kdev_t_to_nr(inode->i_rdev));
-+ /* initializing backing dev info. */
-+ inode->i_mapping->backing_dev_info = &ll_backing_dev_info;
-+ EXIT;
-+ }
-+}
-+
-+int ll_iocontrol(struct inode *inode, struct file *file,
-+ unsigned int cmd, unsigned long arg)
-+{
-+ struct ll_sb_info *sbi = ll_i2sbi(inode);
-+ struct ptlrpc_request *req = NULL;
-+ int rc, flags = 0;
-+ ENTRY;
-+
-+ switch(cmd) {
-+ case EXT3_IOC_GETFLAGS: {
-+ struct ll_fid fid;
-+ struct mds_body *body;
-+
-+ ll_inode2fid(&fid, inode);
-+ rc = mdc_getattr(sbi->ll_mdc_exp, &fid, OBD_MD_FLFLAGS,0,&req);
-+ if (rc) {
-+ CERROR("failure %d inode %lu\n", rc, inode->i_ino);
-+ RETURN(-abs(rc));
-+ }
-+
-+ body = lustre_msg_buf(req->rq_repmsg, REPLY_REC_OFF,
-+ sizeof(*body));
-+
-+ /* We want to return EXT3_*_FL flags to the caller via this
-+ * ioctl. An older MDS may be sending S_* flags, fix it up. */
-+ flags = ll_inode_to_ext_flags(body->flags,
-+ body->flags &MDS_BFLAG_EXT_FLAGS);
-+ ptlrpc_req_finished (req);
-+
-+ RETURN(put_user(flags, (int *)arg));
-+ }
-+ case EXT3_IOC_SETFLAGS: {
-+ struct mdc_op_data op_data;
-+ struct ll_iattr_struct attr;
-+ struct obd_info oinfo = { { { 0 } } };
-+ struct lov_stripe_md *lsm = ll_i2info(inode)->lli_smd;
-+
-+ if (get_user(flags, (int *)arg))
-+ RETURN(-EFAULT);
-+
-+ oinfo.oi_md = lsm;
-+ OBDO_ALLOC(oinfo.oi_oa);
-+ if (!oinfo.oi_oa)
-+ RETURN(-ENOMEM);
-+
-+ ll_prepare_mdc_op_data(&op_data, inode, NULL, NULL, 0, 0, NULL);
-+
-+ memset(&attr, 0, sizeof(attr));
-+ attr.ia_attr_flags = flags;
-+ ((struct iattr *)&attr)->ia_valid |= ATTR_ATTR_FLAG;
-+
-+ rc = mdc_setattr(sbi->ll_mdc_exp, &op_data,
-+ (struct iattr *)&attr, NULL, 0, NULL, 0, &req);
-+ ptlrpc_req_finished(req);
-+ if (rc || lsm == NULL) {
-+ OBDO_FREE(oinfo.oi_oa);
-+ RETURN(rc);
-+ }
-+
-+ oinfo.oi_oa->o_id = lsm->lsm_object_id;
-+ oinfo.oi_oa->o_flags = flags;
-+ oinfo.oi_oa->o_valid = OBD_MD_FLID | OBD_MD_FLFLAGS;
-+
-+ obdo_from_inode(oinfo.oi_oa, inode,
-+ OBD_MD_FLFID | OBD_MD_FLGENER);
-+ rc = obd_setattr_rqset(sbi->ll_osc_exp, &oinfo, NULL);
-+ OBDO_FREE(oinfo.oi_oa);
-+ if (rc) {
-+ if (rc != -EPERM && rc != -EACCES)
-+ CERROR("mdc_setattr_async fails: rc = %d\n", rc);
-+ RETURN(rc);
-+ }
-+
-+ inode->i_flags = ll_ext_to_inode_flags(flags |
-+ MDS_BFLAG_EXT_FLAGS);
-+ RETURN(0);
-+ }
-+ default:
-+ RETURN(-ENOSYS);
-+ }
-+
-+ RETURN(0);
-+}
-+
-+/* umount -f client means force down, don't save state */
-+#ifdef HAVE_UMOUNTBEGIN_VFSMOUNT
-+void ll_umount_begin(struct vfsmount *vfsmnt, int flags)
-+{
-+ struct super_block *sb = vfsmnt->mnt_sb;
-+#else
-+void ll_umount_begin(struct super_block *sb)
-+{
-+#endif
-+ struct lustre_sb_info *lsi = s2lsi(sb);
-+ struct ll_sb_info *sbi = ll_s2sbi(sb);
-+ struct obd_device *obd;
-+ struct obd_ioctl_data ioc_data = { 0 };
-+ ENTRY;
-+
-+#ifdef HAVE_UMOUNTBEGIN_VFSMOUNT
-+ if (!(flags & MNT_FORCE)) {
-+ EXIT;
-+ return;
-+ }
-+#endif
-+
-+ /* Tell the MGC we got umount -f */
-+ lsi->lsi_flags |= LSI_UMOUNT_FORCE;
-+
-+ CDEBUG(D_VFSTRACE, "VFS Op: superblock %p count %d active %d\n", sb,
-+ sb->s_count, atomic_read(&sb->s_active));
-+
-+ obd = class_exp2obd(sbi->ll_mdc_exp);
-+ if (obd == NULL) {
-+ CERROR("Invalid MDC connection handle "LPX64"\n",
-+ sbi->ll_mdc_exp->exp_handle.h_cookie);
-+ EXIT;
-+ return;
-+ }
-+ obd->obd_force = 1;
-+ obd_iocontrol(IOC_OSC_SET_ACTIVE, sbi->ll_mdc_exp, sizeof ioc_data,
-+ &ioc_data, NULL);
-+
-+ obd = class_exp2obd(sbi->ll_osc_exp);
-+ if (obd == NULL) {
-+ CERROR("Invalid LOV connection handle "LPX64"\n",
-+ sbi->ll_osc_exp->exp_handle.h_cookie);
-+ EXIT;
-+ return;
-+ }
-+
-+ obd->obd_force = 1;
-+ obd_iocontrol(IOC_OSC_SET_ACTIVE, sbi->ll_osc_exp, sizeof ioc_data,
-+ &ioc_data, NULL);
-+
-+ /* Really, we'd like to wait until there are no requests outstanding,
-+ * and then continue. For now, we just invalidate the requests,
-+ * schedule() and sleep one second if needed, and hope.
-+ */
-+ schedule();
-+#ifdef HAVE_UMOUNTBEGIN_VFSMOUNT
-+ if (atomic_read(&vfsmnt->mnt_count) > 2) {
-+ cfs_schedule_timeout(CFS_TASK_INTERRUPTIBLE,
-+ cfs_time_seconds(1));
-+ if (atomic_read(&vfsmnt->mnt_count) > 2)
-+ LCONSOLE_WARN("Mount still busy with %d refs! You "
-+ "may try to umount it a bit later\n",
-+ atomic_read(&vfsmnt->mnt_count));
-+ }
-+#endif
-+
-+ EXIT;
-+}
-+
-+int ll_remount_fs(struct super_block *sb, int *flags, char *data)
-+{
-+ struct ll_sb_info *sbi = ll_s2sbi(sb);
-+ int err;
-+ __u32 read_only;
-+
-+ if ((*flags & MS_RDONLY) != (sb->s_flags & MS_RDONLY)) {
-+ read_only = *flags & MS_RDONLY;
-+ err = obd_set_info_async(sbi->ll_mdc_exp, sizeof(KEY_READONLY),
-+ KEY_READONLY, sizeof(read_only),
-+ &read_only, NULL);
-+
-+ /* MDS might have expected a different ro key value, b=17493 */
-+ if (err == -EINVAL) {
-+ CDEBUG(D_CONFIG, "Retrying remount with 1.6.6 ro key\n");
-+ err = obd_set_info_async(sbi->ll_mdc_exp,
-+ sizeof(KEY_READONLY_166COMPAT),
-+ KEY_READONLY_166COMPAT,
-+ sizeof(read_only),
-+ &read_only, NULL);
-+ }
-+
-+ if (err) {
-+ CERROR("Failed to change the read-only flag during "
-+ "remount: %d\n", err);
-+ return err;
-+ }
-+
-+ if (read_only)
-+ sb->s_flags |= MS_RDONLY;
-+ else
-+ sb->s_flags &= ~MS_RDONLY;
-+ }
-+ return 0;
-+}
-+
-+int ll_prep_inode(struct obd_export *exp, struct inode **inode,
-+ struct ptlrpc_request *req, int offset,struct super_block *sb)
-+{
-+ struct lustre_md md;
-+ struct ll_sb_info *sbi = NULL;
-+ int rc = 0;
-+ ENTRY;
-+
-+ LASSERT(*inode || sb);
-+ sbi = sb ? ll_s2sbi(sb) : ll_i2sbi(*inode);
-+ prune_deathrow(sbi, 1);
-+
-+ rc = mdc_req2lustre_md(req, offset, exp, &md);
-+ if (rc)
-+ RETURN(rc);
-+
-+ if (*inode) {
-+ ll_update_inode(*inode, &md);
-+ } else {
-+ LASSERT(sb);
-+ *inode = ll_iget(sb, md.body->ino, &md);
-+ if (*inode == NULL || is_bad_inode(*inode)) {
-+ mdc_free_lustre_md(exp, &md);
-+ rc = -ENOMEM;
-+ CERROR("new_inode -fatal: rc %d\n", rc);
-+ GOTO(out, rc);
-+ }
-+ }
-+
-+ rc = obd_checkmd(exp, ll_i2mdcexp(*inode),
-+ ll_i2info(*inode)->lli_smd);
-+out:
-+ RETURN(rc);
-+}
-+
-+char *llap_origins[] = {
-+ [LLAP_ORIGIN_UNKNOWN] = "--",
-+ [LLAP_ORIGIN_READPAGE] = "rp",
-+ [LLAP_ORIGIN_READAHEAD] = "ra",
-+ [LLAP_ORIGIN_COMMIT_WRITE] = "cw",
-+ [LLAP_ORIGIN_WRITEPAGE] = "wp",
-+ [LLAP_ORIGIN_LOCKLESS_IO] = "ls"
-+};
-+
-+struct ll_async_page *llite_pglist_next_llap(struct ll_sb_info *sbi,
-+ struct list_head *list)
-+{
-+ struct ll_async_page *llap;
-+ struct list_head *pos;
-+
-+ list_for_each(pos, list) {
-+ if (pos == &sbi->ll_pglist)
-+ return NULL;
-+ llap = list_entry(pos, struct ll_async_page, llap_pglist_item);
-+ if (llap->llap_page == NULL)
-+ continue;
-+ return llap;
-+ }
-+ LBUG();
-+ return NULL;
-+}
-+
-+int ll_obd_statfs(struct inode *inode, void *arg)
-+{
-+ struct ll_sb_info *sbi = NULL;
-+ struct obd_device *client_obd = NULL, *lov_obd = NULL;
-+ struct lov_obd *lov = NULL;
-+ struct obd_statfs stat_buf = {0};
-+ char *buf = NULL;
-+ struct obd_ioctl_data *data = NULL;
-+ __u32 type, index;
-+ int len = 0, rc;
-+
-+ if (!inode || !(sbi = ll_i2sbi(inode)))
-+ GOTO(out_statfs, rc = -EINVAL);
-+
-+ rc = obd_ioctl_getdata(&buf, &len, arg);
-+ if (rc)
-+ GOTO(out_statfs, rc);
-+
-+ data = (void*)buf;
-+ if (!data->ioc_inlbuf1 || !data->ioc_inlbuf2 ||
-+ !data->ioc_pbuf1 || !data->ioc_pbuf2)
-+ GOTO(out_statfs, rc = -EINVAL);
-+
-+ memcpy(&type, data->ioc_inlbuf1, sizeof(__u32));
-+ memcpy(&index, data->ioc_inlbuf2, sizeof(__u32));
-+
-+ if (type == LL_STATFS_MDC) {
-+ if (index > 0)
-+ GOTO(out_statfs, rc = -ENODEV);
-+ client_obd = class_exp2obd(sbi->ll_mdc_exp);
-+ } else if (type == LL_STATFS_LOV) {
-+ lov_obd = class_exp2obd(sbi->ll_osc_exp);
-+ lov = &lov_obd->u.lov;
-+
-+ if (index >= lov->desc.ld_tgt_count)
-+ GOTO(out_statfs, rc = -ENODEV);
-+
-+ if (!lov->lov_tgts[index])
-+ /* Try again with the next index */
-+ GOTO(out_statfs, rc = -EAGAIN);
-+
-+ client_obd = class_exp2obd(lov->lov_tgts[index]->ltd_exp);
-+ if (!lov->lov_tgts[index]->ltd_active)
-+ GOTO(out_uuid, rc = -ENODATA);
-+ }
-+
-+ if (!client_obd)
-+ GOTO(out_statfs, rc = -EINVAL);
-+
-+ rc = obd_statfs(client_obd, &stat_buf, cfs_time_current_64() - HZ, 1);
-+ if (rc)
-+ GOTO(out_statfs, rc);
-+
-+ if (copy_to_user(data->ioc_pbuf1, &stat_buf, data->ioc_plen1))
-+ GOTO(out_statfs, rc = -EFAULT);
-+
-+out_uuid:
-+ if (copy_to_user(data->ioc_pbuf2, obd2cli_tgt(client_obd),
-+ data->ioc_plen2))
-+ rc = -EFAULT;
-+
-+out_statfs:
-+ if (buf)
-+ obd_ioctl_freedata(buf, len);
-+ return rc;
-+}
-+
-+int ll_process_config(struct lustre_cfg *lcfg)
-+{
-+ char *ptr;
-+ void *sb;
-+ struct lprocfs_static_vars lvars;
-+ unsigned long x;
-+ int rc = 0;
-+
-+ lprocfs_llite_init_vars(&lvars);
-+
-+ /* The instance name contains the sb: lustre-client-aacfe000 */
-+ ptr = strrchr(lustre_cfg_string(lcfg, 0), '-');
-+ if (!ptr || !*(++ptr))
-+ return -EINVAL;
-+ if (sscanf(ptr, "%lx", &x) != 1)
-+ return -EINVAL;
-+ sb = (void *)x;
-+ /* This better be a real Lustre superblock! */
-+ LASSERT(s2lsi((struct super_block *)sb)->lsi_lmd->lmd_magic == LMD_MAGIC);
-+
-+ /* Note we have not called client_common_fill_super yet, so
-+ proc fns must be able to handle that! */
-+ rc = class_process_proc_param(PARAM_LLITE, lvars.obd_vars,
-+ lcfg, sb);
-+ return(rc);
-+}
-+
-+int ll_show_options(struct seq_file *seq, struct vfsmount *vfs)
-+{
-+ struct ll_sb_info *sbi;
-+
-+ LASSERT((seq != NULL) && (vfs != NULL));
-+ sbi = ll_s2sbi(vfs->mnt_sb);
-+
-+ if (sbi->ll_flags & LL_SBI_NOLCK)
-+ seq_puts(seq, ",nolock");
-+
-+ if (sbi->ll_flags & LL_SBI_FLOCK)
-+ seq_puts(seq, ",flock");
-+
-+ if (sbi->ll_flags & LL_SBI_LOCALFLOCK)
-+ seq_puts(seq, ",localflock");
-+
-+ if (sbi->ll_flags & LL_SBI_USER_XATTR)
-+ seq_puts(seq, ",user_xattr");
-+
-+ if (sbi->ll_flags & LL_SBI_ACL)
-+ seq_puts(seq, ",acl");
-+
-+ RETURN(0);
-+}
-diff -urNad lustre~/lustre/llite/llite_mmap.c lustre/lustre/llite/llite_mmap.c
---- lustre~/lustre/llite/llite_mmap.c 2009-03-12 10:32:27.000000000 +0100
-+++ lustre/lustre/llite/llite_mmap.c 2009-03-13 09:45:03.000000000 +0100
-@@ -81,8 +81,7 @@
- int lt_get_mmap_locks(struct ll_lock_tree *tree,
- unsigned long addr, size_t count);
-
--struct page *ll_nopage(struct vm_area_struct *vma, unsigned long address,
-- int *type);
-+static struct vm_operations_struct ll_file_vm_ops;
-
- struct ll_lock_tree_node * ll_node_from_inode(struct inode *inode, __u64 start,
- __u64 end, ldlm_mode_t mode)
-@@ -285,9 +284,19 @@
- return LCK_PR;
- }
-
-+static void policy_from_vma_pgoff(ldlm_policy_data_t *policy,
-+ struct vm_area_struct *vma,
-+ __u64 pgoff, size_t count)
-+{
-+ policy->l_extent.start = pgoff << CFS_PAGE_SHIFT;
-+ policy->l_extent.end = (policy->l_extent.start + count - 1) |
-+ ~CFS_PAGE_MASK;
-+}
-+
- static void policy_from_vma(ldlm_policy_data_t *policy,
- struct vm_area_struct *vma, unsigned long addr,
- size_t count)
-+
- {
- policy->l_extent.start = ((addr - vma->vm_start) & CFS_PAGE_MASK) +
- ((__u64)vma->vm_pgoff << CFS_PAGE_SHIFT);
-@@ -308,7 +317,7 @@
- spin_lock(&mm->page_table_lock);
- for(vma = find_vma(mm, addr);
- vma != NULL && vma->vm_start < (addr + count); vma = vma->vm_next) {
-- if (vma->vm_ops && vma->vm_ops->nopage == ll_nopage &&
-+ if (vma->vm_ops && vma->vm_ops == &ll_file_vm_ops &&
- vma->vm_flags & VM_SHARED) {
- ret = vma;
- break;
-@@ -360,44 +369,30 @@
- }
- RETURN(0);
- }
--/**
-- * Page fault handler.
-- *
-- * \param vma - is virtiual area struct related to page fault
-- * \param address - address when hit fault
-- * \param type - of fault
-- *
-- * \return allocated and filled page for address
-- * \retval NOPAGE_SIGBUS if page not exist on this address
-- * \retval NOPAGE_OOM not have memory for allocate new page
-- */
--struct page *ll_nopage(struct vm_area_struct *vma, unsigned long address,
-- int *type)
-+
-+static int ll_get_extent_lock(struct vm_area_struct *vma, unsigned long pgoff,
-+ int *save_flags, struct lustre_handle *lockh)
- {
- struct file *filp = vma->vm_file;
- struct ll_file_data *fd = LUSTRE_FPRIVATE(filp);
- struct inode *inode = filp->f_dentry->d_inode;
-- struct lustre_handle lockh = { 0 };
- ldlm_policy_data_t policy;
- ldlm_mode_t mode;
-- struct page *page = NULL;
- struct ll_inode_info *lli = ll_i2info(inode);
-- struct lov_stripe_md *lsm;
- struct ost_lvb lvb;
- __u64 kms, old_mtime;
-- unsigned long pgoff, size, rand_read, seq_read;
-- int rc = 0;
-+ unsigned long size;
- ENTRY;
-
- if (lli->lli_smd == NULL) {
- CERROR("No lsm on fault?\n");
-- RETURN(NOPAGE_SIGBUS);
-+ RETURN(0);
- }
-
- ll_clear_file_contended(inode);
-
- /* start and end the lock on the first and last bytes in the page */
-- policy_from_vma(&policy, vma, address, CFS_PAGE_SIZE);
-+ policy_from_vma_pgoff(&policy, vma, pgoff, CFS_PAGE_SIZE);
-
- CDEBUG(D_MMAP, "nopage vma %p inode %lu, locking ["LPU64", "LPU64"]\n",
- vma, inode->i_ino, policy.l_extent.start, policy.l_extent.end);
-@@ -405,26 +400,28 @@
- mode = mode_from_vma(vma);
- old_mtime = LTIME_S(inode->i_mtime);
-
-- lsm = lli->lli_smd;
-- rc = ll_extent_lock(fd, inode, lsm, mode, &policy,
-- &lockh, LDLM_FL_CBPENDING | LDLM_FL_NO_LRU);
-- if (rc != 0)
-- RETURN(NOPAGE_SIGBUS);
-+ if(ll_extent_lock(fd, inode, lli->lli_smd, mode, &policy,
-+ lockh, LDLM_FL_CBPENDING | LDLM_FL_NO_LRU) != 0)
-+ RETURN(0);
-
- if (vma->vm_flags & VM_EXEC && LTIME_S(inode->i_mtime) != old_mtime)
- CWARN("binary changed. inode %lu\n", inode->i_ino);
-
-- lov_stripe_lock(lsm);
-+ lov_stripe_lock(lli->lli_smd);
- inode_init_lvb(inode, &lvb);
-- obd_merge_lvb(ll_i2obdexp(inode), lsm, &lvb, 1);
-+ if(obd_merge_lvb(ll_i2obdexp(inode), lli->lli_smd, &lvb, 1)) {
-+ lov_stripe_unlock(lli->lli_smd);
-+ RETURN(0);
-+ }
- kms = lvb.lvb_size;
-
-- pgoff = ((address - vma->vm_start) >> CFS_PAGE_SHIFT) + vma->vm_pgoff;
- size = (kms + CFS_PAGE_SIZE - 1) >> CFS_PAGE_SHIFT;
-+ CDEBUG(D_INFO, "Kms %lu - %lu\n", size, pgoff);
-
- if (pgoff >= size) {
-- lov_stripe_unlock(lsm);
-+ lov_stripe_unlock(lli->lli_smd);
- ll_glimpse_size(inode, LDLM_FL_BLOCK_GRANTED);
-+ lov_stripe_lock(lli->lli_smd);
- } else {
- /* XXX change inode size without ll_inode_size_lock() held!
- * there is a race condition with truncate path. (see
-@@ -446,29 +443,69 @@
- CDEBUG(D_INODE, "ino=%lu, updating i_size %llu\n",
- inode->i_ino, i_size_read(inode));
- }
-- lov_stripe_unlock(lsm);
- }
-
- /* If mapping is writeable, adjust kms to cover this page,
- * but do not extend kms beyond actual file size.
- * policy.l_extent.end is set to the end of the page by policy_from_vma
- * bug 10919 */
-- lov_stripe_lock(lsm);
- if (mode == LCK_PW)
-- obd_adjust_kms(ll_i2obdexp(inode), lsm,
-+ obd_adjust_kms(ll_i2obdexp(inode), lli->lli_smd,
- min_t(loff_t, policy.l_extent.end + 1,
- i_size_read(inode)), 0);
-- lov_stripe_unlock(lsm);
-+ lov_stripe_unlock(lli->lli_smd);
-
- /* disable VM_SEQ_READ and use VM_RAND_READ to make sure that
- * the kernel will not read other pages not covered by ldlm in
- * filemap_nopage. we do our readahead in ll_readpage.
- */
-- rand_read = vma->vm_flags & VM_RAND_READ;
-- seq_read = vma->vm_flags & VM_SEQ_READ;
-+ *save_flags = vma->vm_flags & (VM_RAND_READ | VM_SEQ_READ);
- vma->vm_flags &= ~ VM_SEQ_READ;
- vma->vm_flags |= VM_RAND_READ;
-
-+ return 1;
-+}
-+
-+static void ll_put_extent_lock(struct vm_area_struct *vma, int save_flags,
-+ struct lustre_handle *lockh)
-+{
-+ struct file *filp = vma->vm_file;
-+ struct ll_file_data *fd = LUSTRE_FPRIVATE(filp);
-+ struct inode *inode = filp->f_dentry->d_inode;
-+ ldlm_mode_t mode;
-+
-+ mode = mode_from_vma(vma);
-+ vma->vm_flags &= ~(VM_RAND_READ | VM_SEQ_READ);
-+ vma->vm_flags |= save_flags;
-+
-+ ll_extent_unlock(fd, inode, ll_i2info(inode)->lli_smd, mode, lockh);
-+}
-+
-+#ifndef HAVE_VM_OP_FAULT
-+/**
-+ * Page fault handler.
-+ *
-+ * \param vma - is virtiual area struct related to page fault
-+ * \param address - address when hit fault
-+ * \param type - of fault
-+ *
-+ * \return allocated and filled page for address
-+ * \retval NOPAGE_SIGBUS if page not exist on this address
-+ * \retval NOPAGE_OOM not have memory for allocate new page
-+ */
-+struct page *ll_nopage(struct vm_area_struct *vma, unsigned long address,
-+ int *type)
-+{
-+ struct lustre_handle lockh = { 0 };
-+ int save_fags = 0;
-+ unsigned long pgoff;
-+ struct page *page;
-+ ENTRY;
-+
-+ pgoff = ((address - vma->vm_start) >> CFS_PAGE_SHIFT) + vma->vm_pgoff;
-+ if(!ll_get_extent_lock(vma, pgoff, &save_fags, &lockh))
-+ RETURN(NOPAGE_SIGBUS);
-+
- page = filemap_nopage(vma, address, type);
- if (page != NOPAGE_SIGBUS && page != NOPAGE_OOM)
- LL_CDEBUG_PAGE(D_PAGE, page, "got addr %lu type %lx\n", address,
-@@ -477,13 +514,48 @@
- CDEBUG(D_PAGE, "got addr %lu type %lx - SIGBUS\n", address,
- (long)type);
-
-- vma->vm_flags &= ~VM_RAND_READ;
-- vma->vm_flags |= (rand_read | seq_read);
-+ ll_put_extent_lock(vma, save_fags, &lockh);
-
-- ll_extent_unlock(fd, inode, ll_i2info(inode)->lli_smd, mode, &lockh);
- RETURN(page);
- }
-
-+#else
-+/* New fault() API*/
-+/**
-+ * Page fault handler.
-+ *
-+ * \param vma - is virtiual area struct related to page fault
-+ * \param address - address when hit fault
-+ * \param type - of fault
-+ *
-+ * \return allocated and filled page for address
-+ * \retval NOPAGE_SIGBUS if page not exist on this address
-+ * \retval NOPAGE_OOM not have memory for allocate new page
-+ */
-+int ll_fault(struct vm_area_struct *vma, struct vm_fault *vmf)
-+{
-+ struct lustre_handle lockh = { 0 };
-+ int save_fags = 0;
-+ int rc;
-+ ENTRY;
-+
-+ if(!ll_get_extent_lock(vma, vmf->pgoff, &save_fags, &lockh))
-+ RETURN(VM_FAULT_SIGBUS);
-+
-+ rc = filemap_fault(vma, vmf);
-+ if (vmf->page)
-+ LL_CDEBUG_PAGE(D_PAGE, vmf->page, "got addr %p type NOPAGE\n",
-+ vmf->virtual_address);
-+ else
-+ CDEBUG(D_PAGE, "got addr %p - SIGBUS\n",
-+ vmf->virtual_address);
-+
-+ ll_put_extent_lock(vma, save_fags, &lockh);
-+
-+ RETURN(rc);
-+}
-+#endif
-+
- /* To avoid cancel the locks covering mmapped region for lock cache pressure,
- * we track the mapped vma count by lli_mmap_cnt.
- * ll_vm_open(): when first vma is linked, split locks from lru.
-@@ -548,6 +620,7 @@
- }
- }
-
-+#ifndef HAVE_VM_OP_FAULT
- #ifndef HAVE_FILEMAP_POPULATE
- static int (*filemap_populate)(struct vm_area_struct * area, unsigned long address, unsigned long len, pgprot_t prot, unsigned long pgoff, int nonblock);
- #endif
-@@ -562,6 +635,7 @@
- rc = filemap_populate(area, address, len, prot, pgoff, 1);
- RETURN(rc);
- }
-+#endif
-
- /* return the user space pointer that maps to a file offset via a vma */
- static inline unsigned long file_to_user(struct vm_area_struct *vma, __u64 byte)
-@@ -588,10 +662,14 @@
- }
-
- static struct vm_operations_struct ll_file_vm_ops = {
-- .nopage = ll_nopage,
- .open = ll_vm_open,
- .close = ll_vm_close,
-+#ifdef HAVE_VM_OP_FAULT
-+ .fault = ll_fault,
-+#else
-+ .nopage = ll_nopage,
- .populate = ll_populate,
-+#endif
- };
-
- int ll_file_mmap(struct file * file, struct vm_area_struct * vma)
-@@ -602,7 +680,7 @@
- ll_stats_ops_tally(ll_i2sbi(file->f_dentry->d_inode), LPROC_LL_MAP, 1);
- rc = generic_file_mmap(file, vma);
- if (rc == 0) {
--#ifndef HAVE_FILEMAP_POPULATE
-+#if !defined(HAVE_FILEMAP_POPULATE) && !defined(HAVE_VM_OP_FAULT)
- if (!filemap_populate)
- filemap_populate = vma->vm_ops->populate;
- #endif
-diff -urNad lustre~/lustre/llite/llite_nfs.c lustre/lustre/llite/llite_nfs.c
---- lustre~/lustre/llite/llite_nfs.c 2009-03-12 10:32:27.000000000 +0100
-+++ lustre/lustre/llite/llite_nfs.c 2009-03-13 09:45:03.000000000 +0100
-@@ -68,36 +68,30 @@
- }
-
- static struct inode * search_inode_for_lustre(struct super_block *sb,
-- unsigned long ino,
-- unsigned long generation,
-- int mode)
-+ struct ll_fid *iid)
- {
- struct ptlrpc_request *req = NULL;
- struct ll_sb_info *sbi = ll_s2sbi(sb);
-- struct ll_fid fid;
- unsigned long valid = 0;
- int eadatalen = 0, rc;
- struct inode *inode = NULL;
-- struct ll_fid iid = { .id = ino, .generation = generation };
- ENTRY;
-
-- inode = ILOOKUP(sb, ino, ll_nfs_test_inode, &iid);
-+ inode = ILOOKUP(sb, iid->id, ll_nfs_test_inode, iid);
-
- if (inode)
- RETURN(inode);
-- if (S_ISREG(mode)) {
-- rc = ll_get_max_mdsize(sbi, &eadatalen);
-- if (rc)
-- RETURN(ERR_PTR(rc));
-- valid |= OBD_MD_FLEASIZE;
-- }
-- fid.id = (__u64)ino;
-- fid.generation = generation;
-- fid.f_type = mode;
-
-- rc = mdc_getattr(sbi->ll_mdc_exp, &fid, valid, eadatalen, &req);
-+ rc = ll_get_max_mdsize(sbi, &eadatalen);
-+ if (rc)
-+ RETURN(ERR_PTR(rc));
-+
-+ valid |= OBD_MD_FLEASIZE;
-+
-+ /* mds_fid2dentry is ignore f_type */
-+ rc = mdc_getattr(sbi->ll_mdc_exp, iid, valid, eadatalen, &req);
- if (rc) {
-- CERROR("failure %d inode %lu\n", rc, ino);
-+ CERROR("failure %d inode "LPU64"\n", rc, iid->id);
- RETURN(ERR_PTR(rc));
- }
-
-@@ -111,27 +105,27 @@
- RETURN(inode);
- }
-
--static struct dentry *ll_iget_for_nfs(struct super_block *sb, unsigned long ino,
-- __u32 generation, umode_t mode)
-+static struct dentry *ll_iget_for_nfs(struct super_block *sb,
-+ struct ll_fid *iid)
- {
- struct inode *inode;
- struct dentry *result;
- ENTRY;
-
-- if (ino == 0)
-+ if (iid->id == 0)
- RETURN(ERR_PTR(-ESTALE));
-
-- inode = search_inode_for_lustre(sb, ino, generation, mode);
-- if (IS_ERR(inode)) {
-+ inode = search_inode_for_lustre(sb, iid);
-+ if (IS_ERR(inode))
- RETURN(ERR_PTR(PTR_ERR(inode)));
-- }
-+
- if (is_bad_inode(inode) ||
-- (generation && inode->i_generation != generation)){
-+ (iid->generation && inode->i_generation != iid->generation)) {
- /* we didn't find the right inode.. */
- CERROR("Inode %lu, Bad count: %lu %d or version %u %u\n",
- inode->i_ino, (unsigned long)inode->i_nlink,
- atomic_read(&inode->i_count), inode->i_generation,
-- generation);
-+ iid->generation);
- iput(inode);
- RETURN(ERR_PTR(-ESTALE));
- }
-@@ -146,57 +140,102 @@
- RETURN(result);
- }
-
--struct dentry *ll_fh_to_dentry(struct super_block *sb, __u32 *data, int len,
-- int fhtype, int parent)
-+#define LUSTRE_NFS_FID 0x94
-+
-+struct lustre_nfs_fid {
-+ struct ll_fid child;
-+ struct ll_fid parent;
-+ umode_t mode;
-+};
-+
-+/* The return value is file handle type:
-+ * 1 -- contains child file handle;
-+ * 2 -- contains child file handle and parent file handle;
-+ * 255 -- error.
-+ */
-+static int ll_encode_fh(struct dentry *de, __u32 *fh, int *plen,
-+ int connectable)
- {
-- switch (fhtype) {
-- case 2:
-- if (len < 5)
-- break;
-- if (parent)
-- return ll_iget_for_nfs(sb, data[3], 0, data[4]);
-- case 1:
-- if (len < 3)
-- break;
-- if (parent)
-- break;
-- return ll_iget_for_nfs(sb, data[0], data[1], data[2]);
-- default: break;
-- }
-- return ERR_PTR(-EINVAL);
-+ struct inode *inode = de->d_inode;
-+ struct inode *parent = de->d_parent->d_inode;
-+ struct lustre_nfs_fid *nfs_fid = (void *)fh;
-+ ENTRY;
-+
-+ CDEBUG(D_INFO, "encoding for (%lu) maxlen=%d minlen=%lu\n",
-+ inode->i_ino, *plen,
-+ sizeof(struct lustre_nfs_fid));
-+
-+ if (*plen < sizeof(struct lustre_nfs_fid))
-+ RETURN(255);
-+
-+ ll_inode2fid(&nfs_fid->child, inode);
-+ ll_inode2fid(&nfs_fid->parent, parent);
-+
-+ nfs_fid->mode = (S_IFMT & inode->i_mode);
-+ *plen = sizeof(struct lustre_nfs_fid);
-+
-+ RETURN(LUSTRE_NFS_FID);
- }
-
--int ll_dentry_to_fh(struct dentry *dentry, __u32 *datap, int *lenp,
-- int need_parent)
-+#ifdef HAVE_FH_TO_DENTRY
-+static struct dentry *ll_fh_to_dentry(struct super_block *sb, struct fid *fid,
-+ int fh_len, int fh_type)
- {
-- if (*lenp < 3)
-- return 255;
-- *datap++ = dentry->d_inode->i_ino;
-- *datap++ = dentry->d_inode->i_generation;
-- *datap++ = (__u32)(S_IFMT & dentry->d_inode->i_mode);
-+ struct lustre_nfs_fid *nfs_fid = (struct lustre_nfs_fid *)fid;
-
-- if (*lenp == 3 || S_ISDIR(dentry->d_inode->i_mode)) {
-- *lenp = 3;
-- return 1;
-- }
-- if (dentry->d_parent) {
-- *datap++ = dentry->d_parent->d_inode->i_ino;
-- *datap++ = (__u32)(S_IFMT & dentry->d_parent->d_inode->i_mode);
-+ if (fh_type != LUSTRE_NFS_FID)
-+ RETURN(ERR_PTR(-EINVAL));
-
-- *lenp = 5;
-- return 2;
-- }
-- *lenp = 3;
-- return 1;
-+ RETURN(ll_iget_for_nfs(sb, &nfs_fid->child));
-+}
-+static struct dentry *ll_fh_to_parent(struct super_block *sb, struct fid *fid,
-+ int fh_len, int fh_type)
-+{
-+ struct lustre_nfs_fid *nfs_fid = (struct lustre_nfs_fid *)fid;
-+
-+ if (fh_type != LUSTRE_NFS_FID)
-+ RETURN(ERR_PTR(-EINVAL));
-+ RETURN(ll_iget_for_nfs(sb, &nfs_fid->parent));
- }
-
--#if THREAD_SIZE >= 8192
-+#else
-+/*
-+ * This length is counted as amount of __u32,
-+ * It is composed of a fid and a mode
-+ */
-+static struct dentry *ll_decode_fh(struct super_block *sb, __u32 *fh, int fh_len,
-+ int fh_type,
-+ int (*acceptable)(void *, struct dentry *),
-+ void *context)
-+{
-+ struct lustre_nfs_fid *nfs_fid = (void *)fh;
-+ struct dentry *entry;
-+ ENTRY;
-+
-+ CDEBUG(D_INFO, "decoding for "LPU64" fh_len=%d fh_type=%x\n",
-+ nfs_fid->child.id, fh_len, fh_type);
-+
-+ if (fh_type != LUSTRE_NFS_FID)
-+ RETURN(ERR_PTR(-ESTALE));
-+
-+ entry = sb->s_export_op->find_exported_dentry(sb, &nfs_fid->child,
-+ &nfs_fid->parent,
-+ acceptable, context);
-+ RETURN(entry);
-+}
-+
-+
- struct dentry *ll_get_dentry(struct super_block *sb, void *data)
- {
-- __u32 *inump = (__u32*)data;
-- return ll_iget_for_nfs(sb, inump[0], inump[1], S_IFREG);
-+ struct lustre_nfs_fid *fid = data;
-+ ENTRY;
-+
-+ RETURN(ll_iget_for_nfs(sb, &fid->child));
-+
- }
-
-+#endif
-+
- struct dentry *ll_get_parent(struct dentry *dchild)
- {
- struct ptlrpc_request *req = NULL;
-@@ -208,11 +247,11 @@
- char dotdot[] = "..";
- int rc = 0;
- ENTRY;
--
-+
- LASSERT(dir && S_ISDIR(dir->i_mode));
--
-- sbi = ll_s2sbi(dir->i_sb);
--
-+
-+ sbi = ll_s2sbi(dir->i_sb);
-+
- fid.id = (__u64)dir->i_ino;
- fid.generation = dir->i_generation;
- fid.f_type = S_IFDIR;
-@@ -223,11 +262,12 @@
- CERROR("failure %d inode %lu get parent\n", rc, dir->i_ino);
- return ERR_PTR(rc);
- }
-- body = lustre_msg_buf(req->rq_repmsg, REPLY_REC_OFF, sizeof (*body));
--
-+ body = lustre_msg_buf(req->rq_repmsg, REPLY_REC_OFF, sizeof (*body));
-+
- LASSERT((body->valid & OBD_MD_FLGENER) && (body->valid & OBD_MD_FLID));
--
-- result = ll_iget_for_nfs(dir->i_sb, body->ino, body->generation, S_IFDIR);
-+ fid.id = body->ino;
-+ fid.generation = body->generation;
-+ result = ll_iget_for_nfs(dir->i_sb, &fid);
-
- if (IS_ERR(result))
- rc = PTR_ERR(result);
-@@ -236,10 +276,18 @@
- if (rc)
- return ERR_PTR(rc);
- RETURN(result);
--}
-+}
-
-+
-+#if THREAD_SIZE >= 8192
- struct export_operations lustre_export_operations = {
-- .get_parent = ll_get_parent,
-- .get_dentry = ll_get_dentry,
-+ .encode_fh = ll_encode_fh,
-+#ifdef HAVE_FH_TO_DENTRY
-+ .fh_to_dentry = ll_fh_to_dentry,
-+ .fh_to_parent = ll_fh_to_parent,
-+#else
-+ .get_dentry = ll_get_dentry,
-+ .decode_fh = ll_decode_fh,
-+#endif
- };
- #endif
-diff -urNad lustre~/lustre/llite/lloop.c lustre/lustre/llite/lloop.c
---- lustre~/lustre/llite/lloop.c 2009-03-12 10:32:27.000000000 +0100
-+++ lustre/lustre/llite/lloop.c 2009-03-13 09:45:45.000000000 +0100
-@@ -152,7 +152,7 @@
- struct semaphore lo_bh_mutex;
- atomic_t lo_pending;
-
-- request_queue_t *lo_queue;
-+ struct request_queue *lo_queue;
-
- /* data to handle bio for lustre. */
- struct lo_request_data {
-@@ -283,7 +283,7 @@
- return bio;
- }
-
--static int loop_make_request(request_queue_t *q, struct bio *old_bio)
-+static int loop_make_request(struct request_queue *q, struct bio *old_bio)
- {
- struct lloop_device *lo = q->queuedata;
- int rw = bio_rw(old_bio);
-@@ -312,7 +312,7 @@
- if (atomic_dec_and_test(&lo->lo_pending))
- up(&lo->lo_bh_mutex);
- out:
-- bio_io_error(old_bio, old_bio->bi_size);
-+ cfs_bio_io_error(old_bio, old_bio->bi_size);
- return 0;
- inactive:
- spin_unlock_irq(&lo->lo_lock);
-@@ -322,7 +322,7 @@
- /*
- * kick off io on the underlying address space
- */
--static void loop_unplug(request_queue_t *q)
-+static void loop_unplug(struct request_queue *q)
- {
- struct lloop_device *lo = q->queuedata;
-
-@@ -334,7 +334,7 @@
- {
- int ret;
- ret = do_bio_filebacked(lo, bio);
-- bio_endio(bio, bio->bi_size, ret);
-+ cfs_bio_endio(bio, bio->bi_size, ret);
- }
-
- /*
-@@ -736,7 +736,7 @@
-
- out_mem4:
- while (i--)
-- blk_put_queue(loop_dev[i].lo_queue);
-+ blk_cleanup_queue(loop_dev[i].lo_queue);
- i = max_loop;
- out_mem3:
- while (i--)
-@@ -758,7 +758,7 @@
- ll_iocontrol_unregister(ll_iocontrol_magic);
- for (i = 0; i < max_loop; i++) {
- del_gendisk(disks[i]);
-- blk_put_queue(loop_dev[i].lo_queue);
-+ blk_cleanup_queue(loop_dev[i].lo_queue);
- put_disk(disks[i]);
- }
- if (ll_unregister_blkdev(lloop_major, "lloop"))
-diff -urNad lustre~/lustre/llite/lloop.c.orig lustre/lustre/llite/lloop.c.orig
---- lustre~/lustre/llite/lloop.c.orig 1970-01-01 00:00:00.000000000 +0000
-+++ lustre/lustre/llite/lloop.c.orig 2009-03-13 09:45:03.000000000 +0100
-@@ -0,0 +1,777 @@
-+/* -*- mode: c; c-basic-offset: 8; indent-tabs-mode: nil; -*-
-+ * vim:expandtab:shiftwidth=8:tabstop=8:
-+ *
-+ * GPL HEADER START
-+ *
-+ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
-+ *
-+ * This program is free software; you can redistribute it and/or modify
-+ * it under the terms of the GNU General Public License version 2 only,
-+ * as published by the Free Software Foundation.
-+ *
-+ * This program is distributed in the hope that it will be useful, but
-+ * WITHOUT ANY WARRANTY; without even the implied warranty of
-+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
-+ * General Public License version 2 for more details (a copy is included
-+ * in the LICENSE file that accompanied this code).
-+ *
-+ * You should have received a copy of the GNU General Public License
-+ * version 2 along with this program; If not, see
-+ * http://www.sun.com/software/products/lustre/docs/GPLv2.pdf
-+ *
-+ * Please contact Sun Microsystems, Inc., 4150 Network Circle, Santa Clara,
-+ * CA 95054 USA or visit www.sun.com if you need additional information or
-+ * have any questions.
-+ *
-+ * GPL HEADER END
-+ */
-+/*
-+ * Copyright 2008 Sun Microsystems, Inc. All rights reserved
-+ * Use is subject to license terms.
-+ */
-+/*
-+ * This file is part of Lustre, http://www.lustre.org/
-+ * Lustre is a trademark of Sun Microsystems, Inc.
-+ */
-+
-+/*
-+ * linux/drivers/block/loop.c
-+ *
-+ * Written by Theodore Ts'o, 3/29/93
-+ *
-+ * Copyright 1993 by Theodore Ts'o. Redistribution of this file is
-+ * permitted under the GNU General Public License.
-+ *
-+ * DES encryption plus some minor changes by Werner Almesberger, 30-MAY-1993
-+ * more DES encryption plus IDEA encryption by Nicholas J. Leon, June 20, 1996
-+ *
-+ * Modularized and updated for 1.1.16 kernel - Mitch Dsouza 28th May 1994
-+ * Adapted for 1.3.59 kernel - Andries Brouwer, 1 Feb 1996
-+ *
-+ * Fixed do_loop_request() re-entrancy - Vincent.Renardias at waw.com Mar 20, 1997
-+ *
-+ * Added devfs support - Richard Gooch <rgooch at atnf.csiro.au> 16-Jan-1998
-+ *
-+ * Handle sparse backing files correctly - Kenn Humborg, Jun 28, 1998
-+ *
-+ * Loadable modules and other fixes by AK, 1998
-+ *
-+ * Make real block number available to downstream transfer functions, enables
-+ * CBC (and relatives) mode encryption requiring unique IVs per data block.
-+ * Reed H. Petty, rhp at draper.net
-+ *
-+ * Maximum number of loop devices now dynamic via max_loop module parameter.
-+ * Russell Kroll <rkroll at exploits.org> 19990701
-+ *
-+ * Maximum number of loop devices when compiled-in now selectable by passing
-+ * max_loop=<1-255> to the kernel on boot.
-+ * Erik I. Bols?, <eriki at himolde.no>, Oct 31, 1999
-+ *
-+ * Completely rewrite request handling to be make_request_fn style and
-+ * non blocking, pushing work to a helper thread. Lots of fixes from
-+ * Al Viro too.
-+ * Jens Axboe <axboe at suse.de>, Nov 2000
-+ *
-+ * Support up to 256 loop devices
-+ * Heinz Mauelshagen <mge at sistina.com>, Feb 2002
-+ *
-+ * Support for falling back on the write file operation when the address space
-+ * operations prepare_write and/or commit_write are not available on the
-+ * backing filesystem.
-+ * Anton Altaparmakov, 16 Feb 2005
-+ *
-+ * Still To Fix:
-+ * - Advisory locking is ignored here.
-+ * - Should use an own CAP_* category instead of CAP_SYS_ADMIN
-+ *
-+ */
-+
-+#ifndef AUTOCONF_INCLUDED
-+#include <linux/config.h>
-+#endif
-+#include <linux/module.h>
-+
-+#include <linux/sched.h>
-+#include <linux/fs.h>
-+#include <linux/file.h>
-+#include <linux/stat.h>
-+#include <linux/errno.h>
-+#include <linux/major.h>
-+#include <linux/wait.h>
-+#include <linux/blkdev.h>
-+#include <linux/blkpg.h>
-+#include <linux/init.h>
-+#include <linux/smp_lock.h>
-+#include <linux/swap.h>
-+#include <linux/slab.h>
-+#include <linux/suspend.h>
-+#include <linux/writeback.h>
-+#include <linux/buffer_head.h> /* for invalidate_bdev() */
-+#include <linux/completion.h>
-+#include <linux/highmem.h>
-+#include <linux/gfp.h>
-+#include <linux/swap.h>
-+#include <linux/pagevec.h>
-+
-+#include <asm/uaccess.h>
-+
-+#include <lustre_lib.h>
-+#include <lustre_lite.h>
-+#include "llite_internal.h"
-+
-+#define LLOOP_MAX_SEGMENTS PTLRPC_MAX_BRW_PAGES
-+
-+/* Possible states of device */
-+enum {
-+ LLOOP_UNBOUND,
-+ LLOOP_BOUND,
-+ LLOOP_RUNDOWN,
-+};
-+
-+struct lloop_device {
-+ int lo_number;
-+ int lo_refcnt;
-+ loff_t lo_offset;
-+ loff_t lo_sizelimit;
-+ int lo_flags;
-+ int (*ioctl)(struct lloop_device *, int cmd,
-+ unsigned long arg);
-+
-+ struct file * lo_backing_file;
-+ struct block_device *lo_device;
-+ unsigned lo_blocksize;
-+
-+ int old_gfp_mask;
-+
-+ spinlock_t lo_lock;
-+ struct bio *lo_bio;
-+ struct bio *lo_biotail;
-+ int lo_state;
-+ struct semaphore lo_sem;
-+ struct semaphore lo_ctl_mutex;
-+ struct semaphore lo_bh_mutex;
-+ atomic_t lo_pending;
-+
-+ request_queue_t *lo_queue;
-+
-+ /* data to handle bio for lustre. */
-+ struct lo_request_data {
-+ struct brw_page lrd_pages[LLOOP_MAX_SEGMENTS];
-+ struct obdo lrd_oa;
-+ } lo_requests[1];
-+
-+};
-+
-+/*
-+ * Loop flags
-+ */
-+enum {
-+ LO_FLAGS_READ_ONLY = 1,
-+};
-+
-+static int lloop_major;
-+static int max_loop = 8;
-+static struct lloop_device *loop_dev;
-+static struct gendisk **disks;
-+static struct semaphore lloop_mutex;
-+static void *ll_iocontrol_magic = NULL;
-+
-+static loff_t get_loop_size(struct lloop_device *lo, struct file *file)
-+{
-+ loff_t size, offset, loopsize;
-+
-+ /* Compute loopsize in bytes */
-+ size = i_size_read(file->f_mapping->host);
-+ offset = lo->lo_offset;
-+ loopsize = size - offset;
-+ if (lo->lo_sizelimit > 0 && lo->lo_sizelimit < loopsize)
-+ loopsize = lo->lo_sizelimit;
-+
-+ /*
-+ * Unfortunately, if we want to do I/O on the device,
-+ * the number of 512-byte sectors has to fit into a sector_t.
-+ */
-+ return loopsize >> 9;
-+}
-+
-+static int do_bio_filebacked(struct lloop_device *lo, struct bio *bio)
-+{
-+ struct inode *inode = lo->lo_backing_file->f_dentry->d_inode;
-+ struct ll_inode_info *lli = ll_i2info(inode);
-+ struct lov_stripe_md *lsm = lli->lli_smd;
-+ struct obd_info oinfo = {{{0}}};
-+ struct brw_page *pg = lo->lo_requests[0].lrd_pages;
-+ struct obdo *oa = &lo->lo_requests[0].lrd_oa;
-+ pgoff_t offset;
-+ int ret, cmd, i;
-+ struct bio_vec *bvec;
-+
-+ BUG_ON(bio->bi_hw_segments > LLOOP_MAX_SEGMENTS);
-+
-+ offset = (pgoff_t)(bio->bi_sector << 9) + lo->lo_offset;
-+ bio_for_each_segment(bvec, bio, i) {
-+ BUG_ON(bvec->bv_offset != 0);
-+ BUG_ON(bvec->bv_len != CFS_PAGE_SIZE);
-+
-+ pg->pg = bvec->bv_page;
-+ pg->off = offset;
-+ pg->count = bvec->bv_len;
-+ pg->flag = OBD_BRW_SRVLOCK;
-+
-+ pg++;
-+ offset += bvec->bv_len;
-+ }
-+
-+ oa->o_mode = inode->i_mode;
-+ oa->o_id = lsm->lsm_object_id;
-+ oa->o_valid = OBD_MD_FLID | OBD_MD_FLMODE | OBD_MD_FLTYPE;
-+ obdo_from_inode(oa, inode, OBD_MD_FLFID | OBD_MD_FLGENER);
-+
-+ cmd = OBD_BRW_READ;
-+ if (bio_rw(bio) == WRITE)
-+ cmd = OBD_BRW_WRITE;
-+
-+ if (cmd == OBD_BRW_WRITE)
-+ ll_stats_ops_tally(ll_i2sbi(inode), LPROC_LL_BRW_WRITE, bio->bi_size);
-+ else
-+ ll_stats_ops_tally(ll_i2sbi(inode), LPROC_LL_BRW_READ, bio->bi_size);
-+ oinfo.oi_oa = oa;
-+ oinfo.oi_md = lsm;
-+ ret = obd_brw(cmd, ll_i2obdexp(inode), &oinfo,
-+ (obd_count)(i - bio->bi_idx),
-+ lo->lo_requests[0].lrd_pages, NULL);
-+ if (ret == 0)
-+ obdo_to_inode(inode, oa, OBD_MD_FLBLOCKS);
-+ return ret;
-+}
-+
-+
-+/*
-+ * Add bio to back of pending list
-+ */
-+static void loop_add_bio(struct lloop_device *lo, struct bio *bio)
-+{
-+ unsigned long flags;
-+
-+ spin_lock_irqsave(&lo->lo_lock, flags);
-+ if (lo->lo_biotail) {
-+ lo->lo_biotail->bi_next = bio;
-+ lo->lo_biotail = bio;
-+ } else
-+ lo->lo_bio = lo->lo_biotail = bio;
-+ spin_unlock_irqrestore(&lo->lo_lock, flags);
-+
-+ up(&lo->lo_bh_mutex);
-+}
-+
-+/*
-+ * Grab first pending buffer
-+ */
-+static struct bio *loop_get_bio(struct lloop_device *lo)
-+{
-+ struct bio *bio;
-+
-+ spin_lock_irq(&lo->lo_lock);
-+ if ((bio = lo->lo_bio)) {
-+ if (bio == lo->lo_biotail)
-+ lo->lo_biotail = NULL;
-+ lo->lo_bio = bio->bi_next;
-+ bio->bi_next = NULL;
-+ }
-+ spin_unlock_irq(&lo->lo_lock);
-+
-+ return bio;
-+}
-+
-+static int loop_make_request(request_queue_t *q, struct bio *old_bio)
-+{
-+ struct lloop_device *lo = q->queuedata;
-+ int rw = bio_rw(old_bio);
-+
-+ if (!lo)
-+ goto out;
-+
-+ spin_lock_irq(&lo->lo_lock);
-+ if (lo->lo_state != LLOOP_BOUND)
-+ goto inactive;
-+ atomic_inc(&lo->lo_pending);
-+ spin_unlock_irq(&lo->lo_lock);
-+
-+ if (rw == WRITE) {
-+ if (lo->lo_flags & LO_FLAGS_READ_ONLY)
-+ goto err;
-+ } else if (rw == READA) {
-+ rw = READ;
-+ } else if (rw != READ) {
-+ CERROR("lloop: unknown command (%x)\n", rw);
-+ goto err;
-+ }
-+ loop_add_bio(lo, old_bio);
-+ return 0;
-+err:
-+ if (atomic_dec_and_test(&lo->lo_pending))
-+ up(&lo->lo_bh_mutex);
-+out:
-+ bio_io_error(old_bio, old_bio->bi_size);
-+ return 0;
-+inactive:
-+ spin_unlock_irq(&lo->lo_lock);
-+ goto out;
-+}
-+
-+/*
-+ * kick off io on the underlying address space
-+ */
-+static void loop_unplug(request_queue_t *q)
-+{
-+ struct lloop_device *lo = q->queuedata;
-+
-+ clear_bit(QUEUE_FLAG_PLUGGED, &q->queue_flags);
-+ blk_run_address_space(lo->lo_backing_file->f_mapping);
-+}
-+
-+static inline void loop_handle_bio(struct lloop_device *lo, struct bio *bio)
-+{
-+ int ret;
-+ ret = do_bio_filebacked(lo, bio);
-+ bio_endio(bio, bio->bi_size, ret);
-+}
-+
-+/*
-+ * worker thread that handles reads/writes to file backed loop devices,
-+ * to avoid blocking in our make_request_fn. it also does loop decrypting
-+ * on reads for block backed loop, as that is too heavy to do from
-+ * b_end_io context where irqs may be disabled.
-+ */
-+static int loop_thread(void *data)
-+{
-+ struct lloop_device *lo = data;
-+ struct bio *bio;
-+
-+ daemonize("lloop%d", lo->lo_number);
-+
-+ set_user_nice(current, -20);
-+
-+ lo->lo_state = LLOOP_BOUND;
-+ atomic_inc(&lo->lo_pending);
-+
-+ /*
-+ * up sem, we are running
-+ */
-+ up(&lo->lo_sem);
-+
-+ for (;;) {
-+ down_interruptible(&lo->lo_bh_mutex);
-+ /*
-+ * could be upped because of tear-down, not because of
-+ * pending work
-+ */
-+ if (!atomic_read(&lo->lo_pending))
-+ break;
-+
-+ bio = loop_get_bio(lo);
-+ if (!bio) {
-+ CWARN("lloop(minor: %d): missing bio\n", lo->lo_number);
-+ continue;
-+ }
-+ loop_handle_bio(lo, bio);
-+
-+ /*
-+ * upped both for pending work and tear-down, lo_pending
-+ * will hit zero then
-+ */
-+ if (atomic_dec_and_test(&lo->lo_pending))
-+ break;
-+ }
-+
-+ up(&lo->lo_sem);
-+ return 0;
-+}
-+
-+static int loop_set_fd(struct lloop_device *lo, struct file *unused,
-+ struct block_device *bdev, struct file *file)
-+{
-+ struct inode *inode;
-+ struct address_space *mapping;
-+ int lo_flags = 0;
-+ int error;
-+ loff_t size;
-+
-+ if (!try_module_get(THIS_MODULE))
-+ return -ENODEV;
-+
-+ error = -EBUSY;
-+ if (lo->lo_state != LLOOP_UNBOUND)
-+ goto out;
-+
-+ mapping = file->f_mapping;
-+ inode = mapping->host;
-+
-+ error = -EINVAL;
-+ if (!S_ISREG(inode->i_mode) || inode->i_sb->s_magic != LL_SUPER_MAGIC)
-+ goto out;
-+
-+ if (!(file->f_mode & FMODE_WRITE))
-+ lo_flags |= LO_FLAGS_READ_ONLY;
-+
-+ size = get_loop_size(lo, file);
-+
-+ if ((loff_t)(sector_t)size != size) {
-+ error = -EFBIG;
-+ goto out;
-+ }
-+
-+ /* remove all pages in cache so as dirty pages not to be existent. */
-+ truncate_inode_pages(mapping, 0);
-+
-+ set_device_ro(bdev, (lo_flags & LO_FLAGS_READ_ONLY) != 0);
-+
-+ lo->lo_blocksize = CFS_PAGE_SIZE;
-+ lo->lo_device = bdev;
-+ lo->lo_flags = lo_flags;
-+ lo->lo_backing_file = file;
-+ lo->ioctl = NULL;
-+ lo->lo_sizelimit = 0;
-+ lo->old_gfp_mask = mapping_gfp_mask(mapping);
-+ mapping_set_gfp_mask(mapping, lo->old_gfp_mask & ~(__GFP_IO|__GFP_FS));
-+
-+ lo->lo_bio = lo->lo_biotail = NULL;
-+
-+ /*
-+ * set queue make_request_fn, and add limits based on lower level
-+ * device
-+ */
-+ blk_queue_make_request(lo->lo_queue, loop_make_request);
-+ lo->lo_queue->queuedata = lo;
-+ lo->lo_queue->unplug_fn = loop_unplug;
-+
-+ /* queue parameters */
-+ blk_queue_hardsect_size(lo->lo_queue, CFS_PAGE_SIZE);
-+ blk_queue_max_sectors(lo->lo_queue, LLOOP_MAX_SEGMENTS);
-+ blk_queue_max_phys_segments(lo->lo_queue, LLOOP_MAX_SEGMENTS);
-+
-+ set_capacity(disks[lo->lo_number], size);
-+ bd_set_size(bdev, size << 9);
-+
-+ set_blocksize(bdev, lo->lo_blocksize);
-+
-+ kernel_thread(loop_thread, lo, CLONE_KERNEL);
-+ down(&lo->lo_sem);
-+ return 0;
-+
-+ out:
-+ /* This is safe: open() is still holding a reference. */
-+ module_put(THIS_MODULE);
-+ return error;
-+}
-+
-+static int loop_clr_fd(struct lloop_device *lo, struct block_device *bdev,
-+ int count)
-+{
-+ struct file *filp = lo->lo_backing_file;
-+ int gfp = lo->old_gfp_mask;
-+
-+ if (lo->lo_state != LLOOP_BOUND)
-+ return -ENXIO;
-+
-+ if (lo->lo_refcnt > count) /* we needed one fd for the ioctl */
-+ return -EBUSY;
-+
-+ if (filp == NULL)
-+ return -EINVAL;
-+
-+ spin_lock_irq(&lo->lo_lock);
-+ lo->lo_state = LLOOP_RUNDOWN;
-+ if (atomic_dec_and_test(&lo->lo_pending))
-+ up(&lo->lo_bh_mutex);
-+ spin_unlock_irq(&lo->lo_lock);
-+
-+ down(&lo->lo_sem);
-+ lo->lo_backing_file = NULL;
-+ lo->ioctl = NULL;
-+ lo->lo_device = NULL;
-+ lo->lo_offset = 0;
-+ lo->lo_sizelimit = 0;
-+ lo->lo_flags = 0;
-+ ll_invalidate_bdev(bdev, 0);
-+ set_capacity(disks[lo->lo_number], 0);
-+ bd_set_size(bdev, 0);
-+ mapping_set_gfp_mask(filp->f_mapping, gfp);
-+ lo->lo_state = LLOOP_UNBOUND;
-+ fput(filp);
-+ /* This is safe: open() is still holding a reference. */
-+ module_put(THIS_MODULE);
-+ return 0;
-+}
-+
-+static int lo_open(struct inode *inode, struct file *file)
-+{
-+ struct lloop_device *lo = inode->i_bdev->bd_disk->private_data;
-+
-+ down(&lo->lo_ctl_mutex);
-+ lo->lo_refcnt++;
-+ up(&lo->lo_ctl_mutex);
-+
-+ return 0;
-+}
-+
-+static int lo_release(struct inode *inode, struct file *file)
-+{
-+ struct lloop_device *lo = inode->i_bdev->bd_disk->private_data;
-+
-+ down(&lo->lo_ctl_mutex);
-+ --lo->lo_refcnt;
-+ up(&lo->lo_ctl_mutex);
-+
-+ return 0;
-+}
-+
-+/* lloop device node's ioctl function. */
-+static int lo_ioctl(struct inode *inode, struct file *unused,
-+ unsigned int cmd, unsigned long arg)
-+{
-+ struct lloop_device *lo = inode->i_bdev->bd_disk->private_data;
-+ struct block_device *bdev = inode->i_bdev;
-+ int err = 0;
-+
-+ down(&lloop_mutex);
-+ switch (cmd) {
-+ case LL_IOC_LLOOP_DETACH: {
-+ err = loop_clr_fd(lo, bdev, 2);
-+ if (err == 0)
-+ blkdev_put(bdev); /* grabbed in LLOOP_ATTACH */
-+ break;
-+ }
-+
-+ case LL_IOC_LLOOP_INFO: {
-+ __u64 ino = 0;
-+
-+ if (lo->lo_state == LLOOP_BOUND)
-+ ino = lo->lo_backing_file->f_dentry->d_inode->i_ino;
-+
-+ if (put_user(ino, (__u64 *)arg))
-+ err = -EFAULT;
-+ break;
-+ }
-+
-+ default:
-+ err = -EINVAL;
-+ break;
-+ }
-+ up(&lloop_mutex);
-+
-+ return err;
-+}
-+
-+static struct block_device_operations lo_fops = {
-+ .owner = THIS_MODULE,
-+ .open = lo_open,
-+ .release = lo_release,
-+ .ioctl = lo_ioctl,
-+};
-+
-+/* dynamic iocontrol callback.
-+ * This callback is registered in lloop_init and will be called by
-+ * ll_iocontrol_call.
-+ * This is a llite regular file ioctl function. It takes the responsibility
-+ * of attaching a file, and detaching a file by a lloop's device numner.
-+ */
-+static enum llioc_iter lloop_ioctl(struct inode *unused, struct file *file,
-+ unsigned int cmd, unsigned long arg,
-+ void *magic, int *rcp)
-+{
-+ struct lloop_device *lo = NULL;
-+ struct block_device *bdev = NULL;
-+ int err = 0;
-+ dev_t dev;
-+
-+ if (magic != ll_iocontrol_magic)
-+ return LLIOC_CONT;
-+
-+ if (disks == NULL)
-+ GOTO(out1, err = -ENODEV);
-+
-+ down(&lloop_mutex);
-+ switch (cmd) {
-+ case LL_IOC_LLOOP_ATTACH: {
-+ struct lloop_device *lo_free = NULL;
-+ int i;
-+
-+ for (i = 0; i < max_loop; i++, lo = NULL) {
-+ lo = &loop_dev[i];
-+ if (lo->lo_state == LLOOP_UNBOUND) {
-+ if (!lo_free)
-+ lo_free = lo;
-+ continue;
-+ }
-+ if (lo->lo_backing_file->f_dentry->d_inode ==
-+ file->f_dentry->d_inode)
-+ break;
-+ }
-+ if (lo || !lo_free)
-+ GOTO(out, err = -EBUSY);
-+
-+ lo = lo_free;
-+ dev = MKDEV(lloop_major, lo->lo_number);
-+
-+ /* quit if the used pointer is writable */
-+ if (put_user((long)old_encode_dev(dev), (long*)arg))
-+ GOTO(out, err = -EFAULT);
-+
-+ bdev = open_by_devnum(dev, file->f_mode);
-+ if (IS_ERR(bdev))
-+ GOTO(out, err = PTR_ERR(bdev));
-+
-+ get_file(file);
-+ err = loop_set_fd(lo, NULL, bdev, file);
-+ if (err) {
-+ fput(file);
-+ blkdev_put(bdev);
-+ }
-+
-+ break;
-+ }
-+
-+ case LL_IOC_LLOOP_DETACH_BYDEV: {
-+ int minor;
-+
-+ dev = old_decode_dev(arg);
-+ if (MAJOR(dev) != lloop_major)
-+ GOTO(out, err = -EINVAL);
-+
-+ minor = MINOR(dev);
-+ if (minor > max_loop - 1)
-+ GOTO(out, err = -EINVAL);
-+
-+ lo = &loop_dev[minor];
-+ if (lo->lo_state != LLOOP_BOUND)
-+ GOTO(out, err = -EINVAL);
-+
-+ bdev = lo->lo_device;
-+ err = loop_clr_fd(lo, bdev, 1);
-+ if (err == 0)
-+ blkdev_put(bdev); /* grabbed in LLOOP_ATTACH */
-+
-+ break;
-+ }
-+
-+ default:
-+ err = -EINVAL;
-+ break;
-+ }
-+
-+out:
-+ up(&lloop_mutex);
-+out1:
-+ if (rcp)
-+ *rcp = err;
-+ return LLIOC_STOP;
-+}
-+
-+static int __init lloop_init(void)
-+{
-+ int i;
-+ unsigned int cmdlist[] = {
-+ LL_IOC_LLOOP_ATTACH,
-+ LL_IOC_LLOOP_DETACH_BYDEV,
-+ };
-+
-+ if (max_loop < 1 || max_loop > 256) {
-+ CWARN("lloop: invalid max_loop (must be between"
-+ " 1 and 256), using default (8)\n");
-+ max_loop = 8;
-+ }
-+
-+ lloop_major = register_blkdev(0, "lloop");
-+ if (lloop_major < 0)
-+ return -EIO;
-+
-+ ll_iocontrol_magic = ll_iocontrol_register(lloop_ioctl, 2, cmdlist);
-+ if (ll_iocontrol_magic == NULL)
-+ goto out_mem1;
-+
-+ loop_dev = kmalloc(max_loop * sizeof(struct lloop_device), GFP_KERNEL);
-+ if (!loop_dev)
-+ goto out_mem1;
-+ memset(loop_dev, 0, max_loop * sizeof(struct lloop_device));
-+
-+ disks = kmalloc(max_loop * sizeof(struct gendisk *), GFP_KERNEL);
-+ if (!disks)
-+ goto out_mem2;
-+
-+ for (i = 0; i < max_loop; i++) {
-+ disks[i] = alloc_disk(1);
-+ if (!disks[i])
-+ goto out_mem3;
-+ }
-+
-+ init_MUTEX(&lloop_mutex);
-+
-+ for (i = 0; i < max_loop; i++) {
-+ struct lloop_device *lo = &loop_dev[i];
-+ struct gendisk *disk = disks[i];
-+
-+ memset(lo, 0, sizeof(*lo));
-+ lo->lo_queue = blk_alloc_queue(GFP_KERNEL);
-+ if (!lo->lo_queue)
-+ goto out_mem4;
-+
-+ init_MUTEX(&lo->lo_ctl_mutex);
-+ init_MUTEX_LOCKED(&lo->lo_sem);
-+ init_MUTEX_LOCKED(&lo->lo_bh_mutex);
-+ lo->lo_number = i;
-+ spin_lock_init(&lo->lo_lock);
-+ disk->major = lloop_major;
-+ disk->first_minor = i;
-+ disk->fops = &lo_fops;
-+ sprintf(disk->disk_name, "lloop%d", i);
-+ disk->private_data = lo;
-+ disk->queue = lo->lo_queue;
-+ }
-+
-+ /* We cannot fail after we call this, so another loop!*/
-+ for (i = 0; i < max_loop; i++)
-+ add_disk(disks[i]);
-+ return 0;
-+
-+out_mem4:
-+ while (i--)
-+ blk_put_queue(loop_dev[i].lo_queue);
-+ i = max_loop;
-+out_mem3:
-+ while (i--)
-+ put_disk(disks[i]);
-+ kfree(disks);
-+out_mem2:
-+ kfree(loop_dev);
-+out_mem1:
-+ unregister_blkdev(lloop_major, "lloop");
-+ ll_iocontrol_unregister(ll_iocontrol_magic);
-+ CERROR("lloop: ran out of memory\n");
-+ return -ENOMEM;
-+}
-+
-+static void lloop_exit(void)
-+{
-+ int i;
-+
-+ ll_iocontrol_unregister(ll_iocontrol_magic);
-+ for (i = 0; i < max_loop; i++) {
-+ del_gendisk(disks[i]);
-+ blk_put_queue(loop_dev[i].lo_queue);
-+ put_disk(disks[i]);
-+ }
-+ if (ll_unregister_blkdev(lloop_major, "lloop"))
-+ CWARN("lloop: cannot unregister blkdev\n");
-+
-+ kfree(disks);
-+ kfree(loop_dev);
-+}
-+
-+module_init(lloop_init);
-+module_exit(lloop_exit);
-+
-+CFS_MODULE_PARM(max_loop, "i", int, 0444, "maximum of lloop_device");
-+MODULE_AUTHOR("Sun Microsystems, Inc. <http://www.lustre.org/>");
-+MODULE_DESCRIPTION("Lustre virtual block device");
-+MODULE_LICENSE("GPL");
-diff -urNad lustre~/lustre/llite/rw.c lustre/lustre/llite/rw.c
---- lustre~/lustre/llite/rw.c 2009-03-12 10:32:27.000000000 +0100
-+++ lustre/lustre/llite/rw.c 2009-03-13 09:45:03.000000000 +0100
-@@ -61,6 +61,8 @@
-
- #define DEBUG_SUBSYSTEM S_LLITE
-
-+#include <linux/page-flags.h>
-+
- #include <lustre_lite.h>
- #include "llite_internal.h"
- #include <linux/lustre_compat25.h>
-@@ -186,7 +188,7 @@
- GOTO(out_unlock, 0);
- }
-
-- LASSERT(atomic_read(&lli->lli_size_sem.count) <= 0);
-+ LASSERT(SEM_COUNT(&lli->lli_size_sem) <= 0);
-
- if (!srvlock) {
- struct ost_lvb lvb;
-@@ -2122,7 +2124,7 @@
- rc = generic_write_checks(file, ppos, &count, 0);
- if (rc)
- GOTO(out, rc);
-- rc = ll_remove_suid(file->f_dentry, file->f_vfsmnt);
-+ rc = ll_remove_suid(file, file->f_vfsmnt);
- if (rc)
- GOTO(out, rc);
- }
-diff -urNad lustre~/lustre/llite/rw.c.orig lustre/lustre/llite/rw.c.orig
---- lustre~/lustre/llite/rw.c.orig 1970-01-01 00:00:00.000000000 +0000
-+++ lustre/lustre/llite/rw.c.orig 2009-03-13 09:45:03.000000000 +0100
-@@ -0,0 +1,2215 @@
-+/* -*- mode: c; c-basic-offset: 8; indent-tabs-mode: nil; -*-
-+ * vim:expandtab:shiftwidth=8:tabstop=8:
-+ *
-+ * GPL HEADER START
-+ *
-+ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
-+ *
-+ * This program is free software; you can redistribute it and/or modify
-+ * it under the terms of the GNU General Public License version 2 only,
-+ * as published by the Free Software Foundation.
-+ *
-+ * This program is distributed in the hope that it will be useful, but
-+ * WITHOUT ANY WARRANTY; without even the implied warranty of
-+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
-+ * General Public License version 2 for more details (a copy is included
-+ * in the LICENSE file that accompanied this code).
-+ *
-+ * You should have received a copy of the GNU General Public License
-+ * version 2 along with this program; If not, see
-+ * http://www.sun.com/software/products/lustre/docs/GPLv2.pdf
-+ *
-+ * Please contact Sun Microsystems, Inc., 4150 Network Circle, Santa Clara,
-+ * CA 95054 USA or visit www.sun.com if you need additional information or
-+ * have any questions.
-+ *
-+ * GPL HEADER END
-+ */
-+/*
-+ * Copyright 2008 Sun Microsystems, Inc. All rights reserved
-+ * Use is subject to license terms.
-+ */
-+/*
-+ * This file is part of Lustre, http://www.lustre.org/
-+ * Lustre is a trademark of Sun Microsystems, Inc.
-+ *
-+ * lustre/llite/rw.c
-+ *
-+ * Lustre Lite I/O page cache routines shared by different kernel revs
-+ */
-+
-+#ifndef AUTOCONF_INCLUDED
-+#include <linux/config.h>
-+#endif
-+#include <linux/kernel.h>
-+#include <linux/mm.h>
-+#include <linux/string.h>
-+#include <linux/stat.h>
-+#include <linux/errno.h>
-+#include <linux/smp_lock.h>
-+#include <linux/unistd.h>
-+#include <linux/version.h>
-+#include <asm/system.h>
-+#include <asm/uaccess.h>
-+
-+#include <linux/fs.h>
-+#include <linux/stat.h>
-+#include <asm/uaccess.h>
-+#include <linux/mm.h>
-+#include <linux/pagemap.h>
-+#include <linux/smp_lock.h>
-+
-+#define DEBUG_SUBSYSTEM S_LLITE
-+
-+#include <lustre_lite.h>
-+#include "llite_internal.h"
-+#include <linux/lustre_compat25.h>
-+
-+#ifndef list_for_each_prev_safe
-+#define list_for_each_prev_safe(pos, n, head) \
-+ for (pos = (head)->prev, n = pos->prev; pos != (head); \
-+ pos = n, n = pos->prev )
-+#endif
-+
-+cfs_mem_cache_t *ll_async_page_slab = NULL;
-+size_t ll_async_page_slab_size = 0;
-+
-+/* SYNCHRONOUS I/O to object storage for an inode */
-+static int ll_brw(int cmd, struct inode *inode, struct obdo *oa,
-+ struct page *page, int flags)
-+{
-+ struct ll_inode_info *lli = ll_i2info(inode);
-+ struct lov_stripe_md *lsm = lli->lli_smd;
-+ struct obd_info oinfo = { { { 0 } } };
-+ struct brw_page pg;
-+ int rc;
-+ ENTRY;
-+
-+ pg.pg = page;
-+ pg.off = ((obd_off)page->index) << CFS_PAGE_SHIFT;
-+
-+ if ((cmd & OBD_BRW_WRITE) && (pg.off+CFS_PAGE_SIZE>i_size_read(inode)))
-+ pg.count = i_size_read(inode) % CFS_PAGE_SIZE;
-+ else
-+ pg.count = CFS_PAGE_SIZE;
-+
-+ LL_CDEBUG_PAGE(D_PAGE, page, "%s %d bytes ino %lu at "LPU64"/"LPX64"\n",
-+ cmd & OBD_BRW_WRITE ? "write" : "read", pg.count,
-+ inode->i_ino, pg.off, pg.off);
-+ if (pg.count == 0) {
-+ CERROR("ZERO COUNT: ino %lu: size %p:%Lu(%p:%Lu) idx %lu off "
-+ LPU64"\n", inode->i_ino, inode, i_size_read(inode),
-+ page->mapping->host, i_size_read(page->mapping->host),
-+ page->index, pg.off);
-+ }
-+
-+ pg.flag = flags;
-+
-+ if (cmd & OBD_BRW_WRITE)
-+ ll_stats_ops_tally(ll_i2sbi(inode), LPROC_LL_BRW_WRITE,
-+ pg.count);
-+ else
-+ ll_stats_ops_tally(ll_i2sbi(inode), LPROC_LL_BRW_READ,
-+ pg.count);
-+ oinfo.oi_oa = oa;
-+ oinfo.oi_md = lsm;
-+ rc = obd_brw(cmd, ll_i2obdexp(inode), &oinfo, 1, &pg, NULL);
-+ if (rc == 0)
-+ obdo_to_inode(inode, oa, OBD_MD_FLBLOCKS);
-+ else if (rc != -EIO)
-+ CERROR("error from obd_brw: rc = %d\n", rc);
-+ RETURN(rc);
-+}
-+
-+int ll_file_punch(struct inode * inode, loff_t new_size, int srvlock)
-+{
-+ struct ll_inode_info *lli = ll_i2info(inode);
-+ struct obd_info oinfo = { { { 0 } } };
-+ struct obdo oa;
-+ int rc;
-+
-+ ENTRY;
-+ CDEBUG(D_INFO, "calling punch for "LPX64" (new size %Lu=%#Lx)\n",
-+ lli->lli_smd->lsm_object_id, new_size, new_size);
-+
-+ oinfo.oi_md = lli->lli_smd;
-+ oinfo.oi_policy.l_extent.start = new_size;
-+ oinfo.oi_policy.l_extent.end = OBD_OBJECT_EOF;
-+ oinfo.oi_oa = &oa;
-+ oa.o_id = lli->lli_smd->lsm_object_id;
-+ oa.o_valid = OBD_MD_FLID;
-+ if (srvlock) {
-+ /* set OBD_MD_FLFLAGS in o_valid, only if we
-+ * set OBD_FL_TRUNCLOCK, otherwise ost_punch
-+ * and filter_setattr get confused, see the comment
-+ * in ost_punch */
-+ oa.o_flags = OBD_FL_TRUNCLOCK;
-+ oa.o_valid |= OBD_MD_FLFLAGS;
-+ }
-+ obdo_from_inode(&oa, inode, OBD_MD_FLTYPE | OBD_MD_FLMODE |OBD_MD_FLFID|
-+ OBD_MD_FLATIME | OBD_MD_FLMTIME | OBD_MD_FLCTIME |
-+ OBD_MD_FLUID | OBD_MD_FLGID | OBD_MD_FLGENER |
-+ OBD_MD_FLBLOCKS);
-+ rc = obd_punch_rqset(ll_i2obdexp(inode), &oinfo, NULL);
-+ if (rc) {
-+ CERROR("obd_truncate fails (%d) ino %lu\n", rc, inode->i_ino);
-+ RETURN(rc);
-+ }
-+ obdo_to_inode(inode, &oa, OBD_MD_FLSIZE | OBD_MD_FLBLOCKS |
-+ OBD_MD_FLATIME | OBD_MD_FLMTIME | OBD_MD_FLCTIME);
-+ RETURN(0);
-+}
-+/* this isn't where truncate starts. roughly:
-+ * sys_truncate->ll_setattr_raw->vmtruncate->ll_truncate. setattr_raw grabs
-+ * DLM lock on [size, EOF], i_mutex, ->lli_size_sem, and WRITE_I_ALLOC_SEM to
-+ * avoid races.
-+ *
-+ * must be called under ->lli_size_sem */
-+void ll_truncate(struct inode *inode)
-+{
-+ struct ll_inode_info *lli = ll_i2info(inode);
-+ int srvlock = test_bit(LLI_F_SRVLOCK, &lli->lli_flags);
-+ loff_t new_size;
-+ ENTRY;
-+ CDEBUG(D_VFSTRACE, "VFS Op:inode=%lu/%u(%p) to %Lu=%#Lx\n",inode->i_ino,
-+ inode->i_generation, inode, i_size_read(inode), i_size_read(inode));
-+
-+ ll_stats_ops_tally(ll_i2sbi(inode), LPROC_LL_TRUNC, 1);
-+ if (lli->lli_size_sem_owner != current) {
-+ EXIT;
-+ return;
-+ }
-+
-+ if (!lli->lli_smd) {
-+ CDEBUG(D_INODE, "truncate on inode %lu with no objects\n",
-+ inode->i_ino);
-+ GOTO(out_unlock, 0);
-+ }
-+
-+ LASSERT(atomic_read(&lli->lli_size_sem.count) <= 0);
-+
-+ if (!srvlock) {
-+ struct ost_lvb lvb;
-+ int rc;
-+
-+ /* XXX I'm pretty sure this is a hack to paper over a more fundamental
-+ * race condition. */
-+ lov_stripe_lock(lli->lli_smd);
-+ inode_init_lvb(inode, &lvb);
-+ rc = obd_merge_lvb(ll_i2obdexp(inode), lli->lli_smd, &lvb, 0);
-+ inode->i_blocks = lvb.lvb_blocks;
-+ if (lvb.lvb_size == i_size_read(inode) && rc == 0) {
-+ CDEBUG(D_VFSTRACE, "skipping punch for obj "LPX64", %Lu=%#Lx\n",
-+ lli->lli_smd->lsm_object_id, i_size_read(inode),
-+ i_size_read(inode));
-+ lov_stripe_unlock(lli->lli_smd);
-+ GOTO(out_unlock, 0);
-+ }
-+
-+ obd_adjust_kms(ll_i2obdexp(inode), lli->lli_smd,
-+ i_size_read(inode), 1);
-+ lov_stripe_unlock(lli->lli_smd);
-+ }
-+
-+ if (unlikely((ll_i2sbi(inode)->ll_flags & LL_SBI_LLITE_CHECKSUM) &&
-+ (i_size_read(inode) & ~CFS_PAGE_MASK))) {
-+ /* If the truncate leaves a partial page, update its checksum */
-+ struct page *page = find_get_page(inode->i_mapping,
-+ i_size_read(inode) >>
-+ CFS_PAGE_SHIFT);
-+ if (page != NULL) {
-+ struct ll_async_page *llap = llap_cast_private(page);
-+ if (llap != NULL) {
-+ char *kaddr = kmap_atomic(page, KM_USER0);
-+ llap->llap_checksum =
-+ init_checksum(OSC_DEFAULT_CKSUM);
-+ llap->llap_checksum =
-+ compute_checksum(llap->llap_checksum,
-+ kaddr, CFS_PAGE_SIZE,
-+ OSC_DEFAULT_CKSUM);
-+ kunmap_atomic(kaddr, KM_USER0);
-+ }
-+ page_cache_release(page);
-+ }
-+ }
-+
-+ new_size = i_size_read(inode);
-+ ll_inode_size_unlock(inode, 0);
-+ if (!srvlock)
-+ ll_file_punch(inode, new_size, 0);
-+ else
-+ ll_stats_ops_tally(ll_i2sbi(inode), LPROC_LL_LOCKLESS_TRUNC, 1);
-+
-+ EXIT;
-+ return;
-+
-+ out_unlock:
-+ ll_inode_size_unlock(inode, 0);
-+} /* ll_truncate */
-+
-+int ll_prepare_write(struct file *file, struct page *page, unsigned from,
-+ unsigned to)
-+{
-+ struct inode *inode = page->mapping->host;
-+ struct ll_inode_info *lli = ll_i2info(inode);
-+ struct lov_stripe_md *lsm = lli->lli_smd;
-+ obd_off offset = ((obd_off)page->index) << CFS_PAGE_SHIFT;
-+ struct obd_info oinfo = { { { 0 } } };
-+ struct brw_page pga;
-+ struct obdo oa;
-+ struct ost_lvb lvb;
-+ int rc = 0;
-+ ENTRY;
-+
-+ LASSERT(PageLocked(page));
-+ (void)llap_cast_private(page); /* assertion */
-+
-+ /* Check to see if we should return -EIO right away */
-+ pga.pg = page;
-+ pga.off = offset;
-+ pga.count = CFS_PAGE_SIZE;
-+ pga.flag = 0;
-+
-+ oa.o_mode = inode->i_mode;
-+ oa.o_id = lsm->lsm_object_id;
-+ oa.o_valid = OBD_MD_FLID | OBD_MD_FLMODE | OBD_MD_FLTYPE;
-+ obdo_from_inode(&oa, inode, OBD_MD_FLFID | OBD_MD_FLGENER);
-+
-+ oinfo.oi_oa = &oa;
-+ oinfo.oi_md = lsm;
-+ rc = obd_brw(OBD_BRW_CHECK, ll_i2obdexp(inode), &oinfo, 1, &pga, NULL);
-+ if (rc)
-+ RETURN(rc);
-+
-+ if (PageUptodate(page)) {
-+ LL_CDEBUG_PAGE(D_PAGE, page, "uptodate\n");
-+ RETURN(0);
-+ }
-+
-+ /* We're completely overwriting an existing page, so _don't_ set it up
-+ * to date until commit_write */
-+ if (from == 0 && to == CFS_PAGE_SIZE) {
-+ LL_CDEBUG_PAGE(D_PAGE, page, "full page write\n");
-+ POISON_PAGE(page, 0x11);
-+ RETURN(0);
-+ }
-+
-+ /* If are writing to a new page, no need to read old data. The extent
-+ * locking will have updated the KMS, and for our purposes here we can
-+ * treat it like i_size. */
-+ lov_stripe_lock(lsm);
-+ inode_init_lvb(inode, &lvb);
-+ obd_merge_lvb(ll_i2obdexp(inode), lsm, &lvb, 1);
-+ lov_stripe_unlock(lsm);
-+ if (lvb.lvb_size <= offset) {
-+ char *kaddr = kmap_atomic(page, KM_USER0);
-+ LL_CDEBUG_PAGE(D_PAGE, page, "kms "LPU64" <= offset "LPU64"\n",
-+ lvb.lvb_size, offset);
-+ memset(kaddr, 0, CFS_PAGE_SIZE);
-+ kunmap_atomic(kaddr, KM_USER0);
-+ GOTO(prepare_done, rc = 0);
-+ }
-+
-+ /* XXX could be an async ocp read.. read-ahead? */
-+ rc = ll_brw(OBD_BRW_READ, inode, &oa, page, 0);
-+ if (rc == 0) {
-+ /* bug 1598: don't clobber blksize */
-+ oa.o_valid &= ~(OBD_MD_FLSIZE | OBD_MD_FLBLKSZ);
-+ obdo_refresh_inode(inode, &oa, oa.o_valid);
-+ }
-+
-+ EXIT;
-+ prepare_done:
-+ if (rc == 0)
-+ SetPageUptodate(page);
-+
-+ return rc;
-+}
-+
-+/**
-+ * make page ready for ASYNC write
-+ * \param data - pointer to llap cookie
-+ * \param cmd - is OBD_BRW_* macroses
-+ *
-+ * \retval 0 is page successfully prepared to send
-+ * \retval -EAGAIN is page not need to send
-+ */
-+static int ll_ap_make_ready(void *data, int cmd)
-+{
-+ struct ll_async_page *llap;
-+ struct page *page;
-+ ENTRY;
-+
-+ llap = LLAP_FROM_COOKIE(data);
-+ page = llap->llap_page;
-+
-+ /* we're trying to write, but the page is locked.. come back later */
-+ if (TryLockPage(page))
-+ RETURN(-EAGAIN);
-+
-+ LASSERTF(!(cmd & OBD_BRW_READ) || !PageWriteback(page),
-+ "cmd %x page %p ino %lu index %lu fl %lx\n", cmd, page,
-+ page->mapping->host->i_ino, page->index, page->flags);
-+
-+ /* if we left PageDirty we might get another writepage call
-+ * in the future. list walkers are bright enough
-+ * to check page dirty so we can leave it on whatever list
-+ * its on. XXX also, we're called with the cli list so if
-+ * we got the page cache list we'd create a lock inversion
-+ * with the removepage path which gets the page lock then the
-+ * cli lock */
-+ if(!clear_page_dirty_for_io(page)) {
-+ unlock_page(page);
-+ RETURN(-EAGAIN);
-+ }
-+
-+ /* This actually clears the dirty bit in the radix tree.*/
-+ set_page_writeback(page);
-+
-+ LL_CDEBUG_PAGE(D_PAGE, page, "made ready\n");
-+ page_cache_get(page);
-+
-+ RETURN(0);
-+}
-+
-+/* We have two reasons for giving llite the opportunity to change the
-+ * write length of a given queued page as it builds the RPC containing
-+ * the page:
-+ *
-+ * 1) Further extending writes may have landed in the page cache
-+ * since a partial write first queued this page requiring us
-+ * to write more from the page cache. (No further races are possible, since
-+ * by the time this is called, the page is locked.)
-+ * 2) We might have raced with truncate and want to avoid performing
-+ * write RPCs that are just going to be thrown away by the
-+ * truncate's punch on the storage targets.
-+ *
-+ * The kms serves these purposes as it is set at both truncate and extending
-+ * writes.
-+ */
-+static int ll_ap_refresh_count(void *data, int cmd)
-+{
-+ struct ll_inode_info *lli;
-+ struct ll_async_page *llap;
-+ struct lov_stripe_md *lsm;
-+ struct page *page;
-+ struct inode *inode;
-+ struct ost_lvb lvb;
-+ __u64 kms;
-+ ENTRY;
-+
-+ /* readpage queues with _COUNT_STABLE, shouldn't get here. */
-+ LASSERT(cmd != OBD_BRW_READ);
-+
-+ llap = LLAP_FROM_COOKIE(data);
-+ page = llap->llap_page;
-+ inode = page->mapping->host;
-+ lli = ll_i2info(inode);
-+ lsm = lli->lli_smd;
-+
-+ lov_stripe_lock(lsm);
-+ inode_init_lvb(inode, &lvb);
-+ obd_merge_lvb(ll_i2obdexp(inode), lsm, &lvb, 1);
-+ kms = lvb.lvb_size;
-+ lov_stripe_unlock(lsm);
-+
-+ /* catch race with truncate */
-+ if (((__u64)page->index << CFS_PAGE_SHIFT) >= kms)
-+ return 0;
-+
-+ /* catch sub-page write at end of file */
-+ if (((__u64)page->index << CFS_PAGE_SHIFT) + CFS_PAGE_SIZE > kms)
-+ return kms % CFS_PAGE_SIZE;
-+
-+ return CFS_PAGE_SIZE;
-+}
-+
-+void ll_inode_fill_obdo(struct inode *inode, int cmd, struct obdo *oa)
-+{
-+ struct lov_stripe_md *lsm;
-+ obd_flag valid_flags;
-+
-+ lsm = ll_i2info(inode)->lli_smd;
-+
-+ oa->o_id = lsm->lsm_object_id;
-+ oa->o_valid = OBD_MD_FLID;
-+ valid_flags = OBD_MD_FLTYPE | OBD_MD_FLATIME;
-+ if (cmd & OBD_BRW_WRITE) {
-+ oa->o_valid |= OBD_MD_FLEPOCH;
-+ oa->o_easize = ll_i2info(inode)->lli_io_epoch;
-+
-+ valid_flags |= OBD_MD_FLMTIME | OBD_MD_FLCTIME |
-+ OBD_MD_FLUID | OBD_MD_FLGID |
-+ OBD_MD_FLFID | OBD_MD_FLGENER;
-+ }
-+
-+ obdo_from_inode(oa, inode, valid_flags);
-+}
-+
-+static void ll_ap_fill_obdo(void *data, int cmd, struct obdo *oa)
-+{
-+ struct ll_async_page *llap;
-+ ENTRY;
-+
-+ llap = LLAP_FROM_COOKIE(data);
-+ ll_inode_fill_obdo(llap->llap_page->mapping->host, cmd, oa);
-+
-+ EXIT;
-+}
-+
-+static void ll_ap_update_obdo(void *data, int cmd, struct obdo *oa,
-+ obd_valid valid)
-+{
-+ struct ll_async_page *llap;
-+ ENTRY;
-+
-+ llap = LLAP_FROM_COOKIE(data);
-+ obdo_from_inode(oa, llap->llap_page->mapping->host, valid);
-+
-+ EXIT;
-+}
-+
-+static struct obd_async_page_ops ll_async_page_ops = {
-+ .ap_make_ready = ll_ap_make_ready,
-+ .ap_refresh_count = ll_ap_refresh_count,
-+ .ap_fill_obdo = ll_ap_fill_obdo,
-+ .ap_update_obdo = ll_ap_update_obdo,
-+ .ap_completion = ll_ap_completion,
-+};
-+
-+struct ll_async_page *llap_cast_private(struct page *page)
-+{
-+ struct ll_async_page *llap = (struct ll_async_page *)page_private(page);
-+
-+ LASSERTF(llap == NULL || llap->llap_magic == LLAP_MAGIC,
-+ "page %p private %lu gave magic %d which != %d\n",
-+ page, page_private(page), llap->llap_magic, LLAP_MAGIC);
-+
-+ return llap;
-+}
-+
-+/* Try to shrink the page cache for the @sbi filesystem by 1/@shrink_fraction.
-+ *
-+ * There is an llap attached onto every page in lustre, linked off @sbi.
-+ * We add an llap to the list so we don't lose our place during list walking.
-+ * If llaps in the list are being moved they will only move to the end
-+ * of the LRU, and we aren't terribly interested in those pages here (we
-+ * start at the beginning of the list where the least-used llaps are.
-+ */
-+int llap_shrink_cache(struct ll_sb_info *sbi, int shrink_fraction)
-+{
-+ struct ll_async_page *llap, dummy_llap = { .llap_magic = 0xd11ad11a };
-+ unsigned long total, want, count = 0;
-+
-+ total = sbi->ll_async_page_count;
-+
-+ /* There can be a large number of llaps (600k or more in a large
-+ * memory machine) so the VM 1/6 shrink ratio is likely too much.
-+ * Since we are freeing pages also, we don't necessarily want to
-+ * shrink so much. Limit to 40MB of pages + llaps per call. */
-+ if (shrink_fraction == 0)
-+ want = sbi->ll_async_page_count - sbi->ll_async_page_max + 32;
-+ else
-+ want = (total + shrink_fraction - 1) / shrink_fraction;
-+
-+ if (want > 40 << (20 - CFS_PAGE_SHIFT))
-+ want = 40 << (20 - CFS_PAGE_SHIFT);
-+
-+ CDEBUG(D_CACHE, "shrinking %lu of %lu pages (1/%d)\n",
-+ want, total, shrink_fraction);
-+
-+ spin_lock(&sbi->ll_lock);
-+ list_add(&dummy_llap.llap_pglist_item, &sbi->ll_pglist);
-+
-+ while (--total >= 0 && count < want) {
-+ struct page *page;
-+ int keep;
-+
-+ if (unlikely(need_resched())) {
-+ spin_unlock(&sbi->ll_lock);
-+ cond_resched();
-+ spin_lock(&sbi->ll_lock);
-+ }
-+
-+ llap = llite_pglist_next_llap(sbi,&dummy_llap.llap_pglist_item);
-+ list_del_init(&dummy_llap.llap_pglist_item);
-+ if (llap == NULL)
-+ break;
-+
-+ page = llap->llap_page;
-+ LASSERT(page != NULL);
-+
-+ list_add(&dummy_llap.llap_pglist_item, &llap->llap_pglist_item);
-+
-+ /* Page needs/undergoing IO */
-+ if (TryLockPage(page)) {
-+ LL_CDEBUG_PAGE(D_PAGE, page, "can't lock\n");
-+ continue;
-+ }
-+
-+ keep = (llap->llap_write_queued || PageDirty(page) ||
-+ PageWriteback(page) || (!PageUptodate(page) &&
-+ llap->llap_origin != LLAP_ORIGIN_READAHEAD));
-+
-+ LL_CDEBUG_PAGE(D_PAGE, page,"%s LRU page: %s%s%s%s%s origin %s\n",
-+ keep ? "keep" : "drop",
-+ llap->llap_write_queued ? "wq " : "",
-+ PageDirty(page) ? "pd " : "",
-+ PageUptodate(page) ? "" : "!pu ",
-+ PageWriteback(page) ? "wb" : "",
-+ llap->llap_defer_uptodate ? "" : "!du",
-+ llap_origins[llap->llap_origin]);
-+
-+ /* If page is dirty or undergoing IO don't discard it */
-+ if (keep) {
-+ unlock_page(page);
-+ continue;
-+ }
-+
-+ page_cache_get(page);
-+ spin_unlock(&sbi->ll_lock);
-+
-+ if (page->mapping != NULL) {
-+ ll_teardown_mmaps(page->mapping,
-+ (__u64)page->index << CFS_PAGE_SHIFT,
-+ ((__u64)page->index << CFS_PAGE_SHIFT)|
-+ ~CFS_PAGE_MASK);
-+ if (!PageDirty(page) && !page_mapped(page)) {
-+ ll_ra_accounting(llap, page->mapping);
-+ ll_truncate_complete_page(page);
-+ ++count;
-+ } else {
-+ LL_CDEBUG_PAGE(D_PAGE, page, "Not dropping page"
-+ " because it is "
-+ "%s\n",
-+ PageDirty(page)?
-+ "dirty":"mapped");
-+ }
-+ }
-+ unlock_page(page);
-+ page_cache_release(page);
-+
-+ spin_lock(&sbi->ll_lock);
-+ }
-+ list_del(&dummy_llap.llap_pglist_item);
-+ spin_unlock(&sbi->ll_lock);
-+
-+ CDEBUG(D_CACHE, "shrank %lu/%lu and left %lu unscanned\n",
-+ count, want, total);
-+
-+ return count;
-+}
-+
-+static struct ll_async_page *llap_from_page_with_lockh(struct page *page,
-+ unsigned origin,
-+ struct lustre_handle *lockh)
-+{
-+ struct ll_async_page *llap;
-+ struct obd_export *exp;
-+ struct inode *inode = page->mapping->host;
-+ struct ll_sb_info *sbi;
-+ int rc;
-+ ENTRY;
-+
-+ if (!inode) {
-+ static int triggered;
-+
-+ if (!triggered) {
-+ LL_CDEBUG_PAGE(D_ERROR, page, "Bug 10047. Wrong anon "
-+ "page received\n");
-+ libcfs_debug_dumpstack(NULL);
-+ triggered = 1;
-+ }
-+ RETURN(ERR_PTR(-EINVAL));
-+ }
-+ sbi = ll_i2sbi(inode);
-+ LASSERT(ll_async_page_slab);
-+ LASSERTF(origin < LLAP__ORIGIN_MAX, "%u\n", origin);
-+
-+ llap = llap_cast_private(page);
-+ if (llap != NULL) {
-+ /* move to end of LRU list, except when page is just about to
-+ * die */
-+ if (origin != LLAP_ORIGIN_REMOVEPAGE) {
-+ spin_lock(&sbi->ll_lock);
-+ sbi->ll_pglist_gen++;
-+ list_del_init(&llap->llap_pglist_item);
-+ list_add_tail(&llap->llap_pglist_item, &sbi->ll_pglist);
-+ spin_unlock(&sbi->ll_lock);
-+ }
-+ GOTO(out, llap);
-+ }
-+
-+ exp = ll_i2obdexp(page->mapping->host);
-+ if (exp == NULL)
-+ RETURN(ERR_PTR(-EINVAL));
-+
-+ /* limit the number of lustre-cached pages */
-+ if (sbi->ll_async_page_count >= sbi->ll_async_page_max)
-+ llap_shrink_cache(sbi, 0);
-+
-+ OBD_SLAB_ALLOC(llap, ll_async_page_slab, CFS_ALLOC_STD,
-+ ll_async_page_slab_size);
-+ if (llap == NULL)
-+ RETURN(ERR_PTR(-ENOMEM));
-+ llap->llap_magic = LLAP_MAGIC;
-+ llap->llap_cookie = (void *)llap + size_round(sizeof(*llap));
-+
-+ /* XXX: for bug 11270 - check for lockless origin here! */
-+ if (origin == LLAP_ORIGIN_LOCKLESS_IO)
-+ llap->llap_nocache = 1;
-+
-+ rc = obd_prep_async_page(exp, ll_i2info(inode)->lli_smd, NULL, page,
-+ (obd_off)page->index << CFS_PAGE_SHIFT,
-+ &ll_async_page_ops, llap, &llap->llap_cookie,
-+ llap->llap_nocache, lockh);
-+ if (rc) {
-+ OBD_SLAB_FREE(llap, ll_async_page_slab,
-+ ll_async_page_slab_size);
-+ RETURN(ERR_PTR(rc));
-+ }
-+
-+ CDEBUG(D_CACHE, "llap %p page %p cookie %p obj off "LPU64"\n", llap,
-+ page, llap->llap_cookie, (obd_off)page->index << CFS_PAGE_SHIFT);
-+ /* also zeroing the PRIVBITS low order bitflags */
-+ __set_page_ll_data(page, llap);
-+ llap->llap_page = page;
-+
-+ spin_lock(&sbi->ll_lock);
-+ sbi->ll_pglist_gen++;
-+ sbi->ll_async_page_count++;
-+ list_add_tail(&llap->llap_pglist_item, &sbi->ll_pglist);
-+ spin_unlock(&sbi->ll_lock);
-+
-+ out:
-+ if (unlikely(sbi->ll_flags & LL_SBI_LLITE_CHECKSUM)) {
-+ __u32 csum;
-+ char *kaddr = kmap_atomic(page, KM_USER0);
-+ csum = init_checksum(OSC_DEFAULT_CKSUM);
-+ csum = compute_checksum(csum, kaddr, CFS_PAGE_SIZE,
-+ OSC_DEFAULT_CKSUM);
-+ kunmap_atomic(kaddr, KM_USER0);
-+ if (origin == LLAP_ORIGIN_READAHEAD ||
-+ origin == LLAP_ORIGIN_READPAGE ||
-+ origin == LLAP_ORIGIN_LOCKLESS_IO) {
-+ llap->llap_checksum = 0;
-+ } else if (origin == LLAP_ORIGIN_COMMIT_WRITE ||
-+ llap->llap_checksum == 0) {
-+ llap->llap_checksum = csum;
-+ CDEBUG(D_PAGE, "page %p cksum %x\n", page, csum);
-+ } else if (llap->llap_checksum == csum) {
-+ /* origin == LLAP_ORIGIN_WRITEPAGE */
-+ CDEBUG(D_PAGE, "page %p cksum %x confirmed\n",
-+ page, csum);
-+ } else {
-+ /* origin == LLAP_ORIGIN_WRITEPAGE */
-+ LL_CDEBUG_PAGE(D_ERROR, page, "old cksum %x != new "
-+ "%x!\n", llap->llap_checksum, csum);
-+ }
-+ }
-+
-+ llap->llap_origin = origin;
-+ RETURN(llap);
-+}
-+
-+static inline struct ll_async_page *llap_from_page(struct page *page,
-+ unsigned origin)
-+{
-+ return llap_from_page_with_lockh(page, origin, NULL);
-+}
-+
-+static int queue_or_sync_write(struct obd_export *exp, struct inode *inode,
-+ struct ll_async_page *llap,
-+ unsigned to, obd_flag async_flags)
-+{
-+ unsigned long size_index = i_size_read(inode) >> CFS_PAGE_SHIFT;
-+ struct obd_io_group *oig;
-+ struct ll_sb_info *sbi = ll_i2sbi(inode);
-+ int rc, noquot = llap->llap_ignore_quota ? OBD_BRW_NOQUOTA : 0;
-+ ENTRY;
-+
-+ /* _make_ready only sees llap once we've unlocked the page */
-+ llap->llap_write_queued = 1;
-+ rc = obd_queue_async_io(exp, ll_i2info(inode)->lli_smd, NULL,
-+ llap->llap_cookie, OBD_BRW_WRITE | noquot,
-+ 0, 0, 0, async_flags);
-+ if (rc == 0) {
-+ LL_CDEBUG_PAGE(D_PAGE, llap->llap_page, "write queued\n");
-+ llap_write_pending(inode, llap);
-+ GOTO(out, 0);
-+ }
-+
-+ llap->llap_write_queued = 0;
-+
-+ rc = oig_init(&oig);
-+ if (rc)
-+ GOTO(out, rc);
-+
-+ /* make full-page requests if we are not at EOF (bug 4410) */
-+ if (to != CFS_PAGE_SIZE && llap->llap_page->index < size_index) {
-+ LL_CDEBUG_PAGE(D_PAGE, llap->llap_page,
-+ "sync write before EOF: size_index %lu, to %d\n",
-+ size_index, to);
-+ to = CFS_PAGE_SIZE;
-+ } else if (to != CFS_PAGE_SIZE && llap->llap_page->index == size_index){
-+ int size_to = i_size_read(inode) & ~CFS_PAGE_MASK;
-+ LL_CDEBUG_PAGE(D_PAGE, llap->llap_page,
-+ "sync write at EOF: size_index %lu, to %d/%d\n",
-+ size_index, to, size_to);
-+ if (to < size_to)
-+ to = size_to;
-+ }
-+
-+ /* compare the checksum once before the page leaves llite */
-+ if (unlikely((sbi->ll_flags & LL_SBI_LLITE_CHECKSUM) &&
-+ llap->llap_checksum != 0)) {
-+ __u32 csum;
-+ struct page *page = llap->llap_page;
-+ char *kaddr = kmap_atomic(page, KM_USER0);
-+ csum = init_checksum(OSC_DEFAULT_CKSUM);
-+ csum = compute_checksum(csum, kaddr, CFS_PAGE_SIZE,
-+ OSC_DEFAULT_CKSUM);
-+ kunmap_atomic(kaddr, KM_USER0);
-+ if (llap->llap_checksum == csum) {
-+ CDEBUG(D_PAGE, "page %p cksum %x confirmed\n",
-+ page, csum);
-+ } else {
-+ CERROR("page %p old cksum %x != new cksum %x!\n",
-+ page, llap->llap_checksum, csum);
-+ }
-+ }
-+
-+ rc = obd_queue_group_io(exp, ll_i2info(inode)->lli_smd, NULL, oig,
-+ llap->llap_cookie, OBD_BRW_WRITE | noquot,
-+ 0, to, 0, ASYNC_READY | ASYNC_URGENT |
-+ ASYNC_COUNT_STABLE | ASYNC_GROUP_SYNC);
-+ if (rc)
-+ GOTO(free_oig, rc);
-+
-+ rc = obd_trigger_group_io(exp, ll_i2info(inode)->lli_smd, NULL, oig);
-+ if (rc)
-+ GOTO(free_oig, rc);
-+
-+ rc = oig_wait(oig);
-+
-+ if (!rc && async_flags & ASYNC_READY) {
-+ unlock_page(llap->llap_page);
-+ if (PageWriteback(llap->llap_page))
-+ end_page_writeback(llap->llap_page);
-+ }
-+
-+ LL_CDEBUG_PAGE(D_PAGE, llap->llap_page, "sync write returned %d\n", rc);
-+
-+free_oig:
-+ oig_release(oig);
-+out:
-+ RETURN(rc);
-+}
-+
-+/* update our write count to account for i_size increases that may have
-+ * happened since we've queued the page for io. */
-+
-+/* be careful not to return success without setting the page Uptodate or
-+ * the next pass through prepare_write will read in stale data from disk. */
-+int ll_commit_write(struct file *file, struct page *page, unsigned from,
-+ unsigned to)
-+{
-+ struct ll_file_data *fd = LUSTRE_FPRIVATE(file);
-+ struct inode *inode = page->mapping->host;
-+ struct ll_inode_info *lli = ll_i2info(inode);
-+ struct lov_stripe_md *lsm = lli->lli_smd;
-+ struct obd_export *exp;
-+ struct ll_async_page *llap;
-+ loff_t size;
-+ struct lustre_handle *lockh = NULL;
-+ int rc = 0;
-+ ENTRY;
-+
-+ SIGNAL_MASK_ASSERT(); /* XXX BUG 1511 */
-+ LASSERT(inode == file->f_dentry->d_inode);
-+ LASSERT(PageLocked(page));
-+
-+ CDEBUG(D_INODE, "inode %p is writing page %p from %d to %d at %lu\n",
-+ inode, page, from, to, page->index);
-+
-+ if (fd->fd_flags & LL_FILE_GROUP_LOCKED)
-+ lockh = &fd->fd_cwlockh;
-+
-+ llap = llap_from_page_with_lockh(page, LLAP_ORIGIN_COMMIT_WRITE, lockh);
-+ if (IS_ERR(llap))
-+ RETURN(PTR_ERR(llap));
-+
-+ exp = ll_i2obdexp(inode);
-+ if (exp == NULL)
-+ RETURN(-EINVAL);
-+
-+ llap->llap_ignore_quota = cfs_capable(CFS_CAP_SYS_RESOURCE);
-+
-+ /* queue a write for some time in the future the first time we
-+ * dirty the page */
-+ if (!PageDirty(page)) {
-+ ll_stats_ops_tally(ll_i2sbi(inode), LPROC_LL_DIRTY_MISSES, 1);
-+
-+ rc = queue_or_sync_write(exp, inode, llap, to, 0);
-+ if (rc)
-+ GOTO(out, rc);
-+ } else {
-+ ll_stats_ops_tally(ll_i2sbi(inode), LPROC_LL_DIRTY_HITS, 1);
-+ }
-+
-+ /* put the page in the page cache, from now on ll_removepage is
-+ * responsible for cleaning up the llap.
-+ * only set page dirty when it's queued to be write out */
-+ if (llap->llap_write_queued)
-+ set_page_dirty(page);
-+
-+out:
-+ size = (((obd_off)page->index) << CFS_PAGE_SHIFT) + to;
-+ ll_inode_size_lock(inode, 0);
-+ if (rc == 0) {
-+ lov_stripe_lock(lsm);
-+ obd_adjust_kms(exp, lsm, size, 0);
-+ lov_stripe_unlock(lsm);
-+ if (size > i_size_read(inode))
-+ i_size_write(inode, size);
-+ SetPageUptodate(page);
-+ } else if (size > i_size_read(inode)) {
-+ /* this page beyond the pales of i_size, so it can't be
-+ * truncated in ll_p_r_e during lock revoking. we must
-+ * teardown our book-keeping here. */
-+ ll_removepage(page);
-+ }
-+ ll_inode_size_unlock(inode, 0);
-+ RETURN(rc);
-+}
-+
-+static unsigned long ll_ra_count_get(struct ll_sb_info *sbi, unsigned long len)
-+{
-+ struct ll_ra_info *ra = &sbi->ll_ra_info;
-+ unsigned long ret;
-+ ENTRY;
-+
-+ spin_lock(&sbi->ll_lock);
-+ ret = min(ra->ra_max_pages - ra->ra_cur_pages, len);
-+ ra->ra_cur_pages += ret;
-+ spin_unlock(&sbi->ll_lock);
-+
-+ RETURN(ret);
-+}
-+
-+static void ll_ra_count_put(struct ll_sb_info *sbi, unsigned long len)
-+{
-+ struct ll_ra_info *ra = &sbi->ll_ra_info;
-+ spin_lock(&sbi->ll_lock);
-+ LASSERTF(ra->ra_cur_pages >= len, "r_c_p %lu len %lu\n",
-+ ra->ra_cur_pages, len);
-+ ra->ra_cur_pages -= len;
-+ spin_unlock(&sbi->ll_lock);
-+}
-+
-+/* called for each page in a completed rpc.*/
-+int ll_ap_completion(void *data, int cmd, struct obdo *oa, int rc)
-+{
-+ struct ll_async_page *llap;
-+ struct page *page;
-+ int ret = 0;
-+ ENTRY;
-+
-+ llap = LLAP_FROM_COOKIE(data);
-+ page = llap->llap_page;
-+ LASSERT(PageLocked(page));
-+ LASSERT(CheckWriteback(page,cmd));
-+
-+ LL_CDEBUG_PAGE(D_PAGE, page, "completing cmd %d with %d\n", cmd, rc);
-+
-+ if (cmd & OBD_BRW_READ && llap->llap_defer_uptodate)
-+ ll_ra_count_put(ll_i2sbi(page->mapping->host), 1);
-+
-+ if (rc == 0) {
-+ if (cmd & OBD_BRW_READ) {
-+ if (!llap->llap_defer_uptodate)
-+ SetPageUptodate(page);
-+ } else {
-+ llap->llap_write_queued = 0;
-+ }
-+ ClearPageError(page);
-+ } else {
-+ if (cmd & OBD_BRW_READ) {
-+ llap->llap_defer_uptodate = 0;
-+ }
-+ SetPageError(page);
-+ if (rc == -ENOSPC)
-+ set_bit(AS_ENOSPC, &page->mapping->flags);
-+ else
-+ set_bit(AS_EIO, &page->mapping->flags);
-+ }
-+
-+ /* be carefull about clear WB.
-+ * if WB will cleared after page lock is released - paralel IO can be
-+ * started before ap_make_ready is finished - so we will be have page
-+ * with PG_Writeback set from ->writepage() and completed READ which
-+ * clear this flag */
-+ if ((cmd & OBD_BRW_WRITE) && PageWriteback(page))
-+ end_page_writeback(page);
-+
-+ unlock_page(page);
-+
-+ if (cmd & OBD_BRW_WRITE) {
-+ llap_write_complete(page->mapping->host, llap);
-+ ll_try_done_writing(page->mapping->host);
-+ }
-+
-+ page_cache_release(page);
-+
-+ RETURN(ret);
-+}
-+
-+static void __ll_put_llap(struct page *page)
-+{
-+ struct inode *inode = page->mapping->host;
-+ struct obd_export *exp;
-+ struct ll_async_page *llap;
-+ struct ll_sb_info *sbi = ll_i2sbi(inode);
-+ int rc;
-+ ENTRY;
-+
-+ exp = ll_i2obdexp(inode);
-+ if (exp == NULL) {
-+ CERROR("page %p ind %lu gave null export\n", page, page->index);
-+ EXIT;
-+ return;
-+ }
-+
-+ llap = llap_from_page(page, LLAP_ORIGIN_REMOVEPAGE);
-+ if (IS_ERR(llap)) {
-+ CERROR("page %p ind %lu couldn't find llap: %ld\n", page,
-+ page->index, PTR_ERR(llap));
-+ EXIT;
-+ return;
-+ }
-+
-+ //llap_write_complete(inode, llap);
-+ rc = obd_teardown_async_page(exp, ll_i2info(inode)->lli_smd, NULL,
-+ llap->llap_cookie);
-+ if (rc != 0)
-+ CERROR("page %p ind %lu failed: %d\n", page, page->index, rc);
-+
-+ /* this unconditional free is only safe because the page lock
-+ * is providing exclusivity to memory pressure/truncate/writeback..*/
-+ __clear_page_ll_data(page);
-+
-+ spin_lock(&sbi->ll_lock);
-+ if (!list_empty(&llap->llap_pglist_item))
-+ list_del_init(&llap->llap_pglist_item);
-+ sbi->ll_pglist_gen++;
-+ sbi->ll_async_page_count--;
-+ spin_unlock(&sbi->ll_lock);
-+ OBD_SLAB_FREE(llap, ll_async_page_slab, ll_async_page_slab_size);
-+
-+ EXIT;
-+}
-+
-+/* the kernel calls us here when a page is unhashed from the page cache.
-+ * the page will be locked and the kernel is holding a spinlock, so
-+ * we need to be careful. we're just tearing down our book-keeping
-+ * here. */
-+void ll_removepage(struct page *page)
-+{
-+ struct ll_async_page *llap = llap_cast_private(page);
-+ ENTRY;
-+
-+ LASSERT(!in_interrupt());
-+
-+ /* sync pages or failed read pages can leave pages in the page
-+ * cache that don't have our data associated with them anymore */
-+ if (page_private(page) == 0) {
-+ EXIT;
-+ return;
-+ }
-+
-+ LASSERT(!llap->llap_lockless_io_page);
-+ LASSERT(!llap->llap_nocache);
-+
-+ LL_CDEBUG_PAGE(D_PAGE, page, "being evicted\n");
-+ __ll_put_llap(page);
-+
-+ EXIT;
-+}
-+
-+static int ll_issue_page_read(struct obd_export *exp,
-+ struct ll_async_page *llap,
-+ struct obd_io_group *oig, int defer)
-+{
-+ struct page *page = llap->llap_page;
-+ int rc;
-+
-+ page_cache_get(page);
-+ llap->llap_defer_uptodate = defer;
-+ llap->llap_ra_used = 0;
-+ rc = obd_queue_group_io(exp, ll_i2info(page->mapping->host)->lli_smd,
-+ NULL, oig, llap->llap_cookie, OBD_BRW_READ, 0,
-+ CFS_PAGE_SIZE, 0, ASYNC_COUNT_STABLE | ASYNC_READY |
-+ ASYNC_URGENT);
-+ if (rc) {
-+ LL_CDEBUG_PAGE(D_ERROR, page, "read queue failed: rc %d\n", rc);
-+ page_cache_release(page);
-+ }
-+ RETURN(rc);
-+}
-+
-+static void ll_ra_stats_inc_unlocked(struct ll_ra_info *ra, enum ra_stat which)
-+{
-+ LASSERTF(which >= 0 && which < _NR_RA_STAT, "which: %u\n", which);
-+ ra->ra_stats[which]++;
-+}
-+
-+static void ll_ra_stats_inc(struct address_space *mapping, enum ra_stat which)
-+{
-+ struct ll_sb_info *sbi = ll_i2sbi(mapping->host);
-+ struct ll_ra_info *ra = &ll_i2sbi(mapping->host)->ll_ra_info;
-+
-+ spin_lock(&sbi->ll_lock);
-+ ll_ra_stats_inc_unlocked(ra, which);
-+ spin_unlock(&sbi->ll_lock);
-+}
-+
-+void ll_ra_accounting(struct ll_async_page *llap, struct address_space *mapping)
-+{
-+ if (!llap->llap_defer_uptodate || llap->llap_ra_used)
-+ return;
-+
-+ ll_ra_stats_inc(mapping, RA_STAT_DISCARDED);
-+}
-+
-+#define RAS_CDEBUG(ras) \
-+ CDEBUG(D_READA, \
-+ "lrp %lu cr %lu cp %lu ws %lu wl %lu nra %lu r %lu ri %lu" \
-+ "csr %lu sf %lu sp %lu sl %lu \n", \
-+ ras->ras_last_readpage, ras->ras_consecutive_requests, \
-+ ras->ras_consecutive_pages, ras->ras_window_start, \
-+ ras->ras_window_len, ras->ras_next_readahead, \
-+ ras->ras_requests, ras->ras_request_index, \
-+ ras->ras_consecutive_stride_requests, ras->ras_stride_offset, \
-+ ras->ras_stride_pages, ras->ras_stride_length)
-+
-+static int index_in_window(unsigned long index, unsigned long point,
-+ unsigned long before, unsigned long after)
-+{
-+ unsigned long start = point - before, end = point + after;
-+
-+ if (start > point)
-+ start = 0;
-+ if (end < point)
-+ end = ~0;
-+
-+ return start <= index && index <= end;
-+}
-+
-+static struct ll_readahead_state *ll_ras_get(struct file *f)
-+{
-+ struct ll_file_data *fd;
-+
-+ fd = LUSTRE_FPRIVATE(f);
-+ return &fd->fd_ras;
-+}
-+
-+void ll_ra_read_in(struct file *f, struct ll_ra_read *rar)
-+{
-+ struct ll_readahead_state *ras;
-+
-+ ras = ll_ras_get(f);
-+
-+ spin_lock(&ras->ras_lock);
-+ ras->ras_requests++;
-+ ras->ras_request_index = 0;
-+ ras->ras_consecutive_requests++;
-+ rar->lrr_reader = current;
-+
-+ list_add(&rar->lrr_linkage, &ras->ras_read_beads);
-+ spin_unlock(&ras->ras_lock);
-+}
-+
-+void ll_ra_read_ex(struct file *f, struct ll_ra_read *rar)
-+{
-+ struct ll_readahead_state *ras;
-+
-+ ras = ll_ras_get(f);
-+
-+ spin_lock(&ras->ras_lock);
-+ list_del_init(&rar->lrr_linkage);
-+ spin_unlock(&ras->ras_lock);
-+}
-+
-+static struct ll_ra_read *ll_ra_read_get_locked(struct ll_readahead_state *ras)
-+{
-+ struct ll_ra_read *scan;
-+
-+ list_for_each_entry(scan, &ras->ras_read_beads, lrr_linkage) {
-+ if (scan->lrr_reader == current)
-+ return scan;
-+ }
-+ return NULL;
-+}
-+
-+struct ll_ra_read *ll_ra_read_get(struct file *f)
-+{
-+ struct ll_readahead_state *ras;
-+ struct ll_ra_read *bead;
-+
-+ ras = ll_ras_get(f);
-+
-+ spin_lock(&ras->ras_lock);
-+ bead = ll_ra_read_get_locked(ras);
-+ spin_unlock(&ras->ras_lock);
-+ return bead;
-+}
-+
-+static int ll_read_ahead_page(struct obd_export *exp, struct obd_io_group *oig,
-+ int index, struct address_space *mapping)
-+{
-+ struct ll_async_page *llap;
-+ struct page *page;
-+ unsigned int gfp_mask = 0;
-+ int rc = 0;
-+
-+ gfp_mask = GFP_HIGHUSER & ~__GFP_WAIT;
-+#ifdef __GFP_NOWARN
-+ gfp_mask |= __GFP_NOWARN;
-+#endif
-+ page = grab_cache_page_nowait_gfp(mapping, index, gfp_mask);
-+ if (page == NULL) {
-+ ll_ra_stats_inc(mapping, RA_STAT_FAILED_GRAB_PAGE);
-+ CDEBUG(D_READA, "g_c_p_n failed\n");
-+ return 0;
-+ }
-+
-+ /* Check if page was truncated or reclaimed */
-+ if (page->mapping != mapping) {
-+ ll_ra_stats_inc(mapping, RA_STAT_WRONG_GRAB_PAGE);
-+ CDEBUG(D_READA, "g_c_p_n returned invalid page\n");
-+ GOTO(unlock_page, rc = 0);
-+ }
-+
-+ /* we do this first so that we can see the page in the /proc
-+ * accounting */
-+ llap = llap_from_page(page, LLAP_ORIGIN_READAHEAD);
-+ if (IS_ERR(llap) || llap->llap_defer_uptodate) {
-+ if (PTR_ERR(llap) == -ENOLCK) {
-+ ll_ra_stats_inc(mapping, RA_STAT_FAILED_MATCH);
-+ CDEBUG(D_READA | D_PAGE,
-+ "Adding page to cache failed index "
-+ "%d\n", index);
-+ CDEBUG(D_READA, "nolock page\n");
-+ GOTO(unlock_page, rc = -ENOLCK);
-+ }
-+ CDEBUG(D_READA, "read-ahead page\n");
-+ GOTO(unlock_page, rc = 0);
-+ }
-+
-+ /* skip completed pages */
-+ if (Page_Uptodate(page))
-+ GOTO(unlock_page, rc = 0);
-+
-+ /* bail out when we hit the end of the lock. */
-+ rc = ll_issue_page_read(exp, llap, oig, 1);
-+ if (rc == 0) {
-+ LL_CDEBUG_PAGE(D_READA | D_PAGE, page, "started read-ahead\n");
-+ rc = 1;
-+ } else {
-+unlock_page:
-+ unlock_page(page);
-+ LL_CDEBUG_PAGE(D_READA | D_PAGE, page, "skipping read-ahead\n");
-+ }
-+ page_cache_release(page);
-+ return rc;
-+}
-+
-+/* ra_io_arg will be filled in the beginning of ll_readahead with
-+ * ras_lock, then the following ll_read_ahead_pages will read RA
-+ * pages according to this arg, all the items in this structure are
-+ * counted by page index.
-+ */
-+struct ra_io_arg {
-+ unsigned long ria_start; /* start offset of read-ahead*/
-+ unsigned long ria_end; /* end offset of read-ahead*/
-+ /* If stride read pattern is detected, ria_stoff means where
-+ * stride read is started. Note: for normal read-ahead, the
-+ * value here is meaningless, and also it will not be accessed*/
-+ pgoff_t ria_stoff;
-+ /* ria_length and ria_pages are the length and pages length in the
-+ * stride I/O mode. And they will also be used to check whether
-+ * it is stride I/O read-ahead in the read-ahead pages*/
-+ unsigned long ria_length;
-+ unsigned long ria_pages;
-+};
-+
-+#define RIA_DEBUG(ria) \
-+ CDEBUG(D_READA, "rs %lu re %lu ro %lu rl %lu rp %lu\n", \
-+ ria->ria_start, ria->ria_end, ria->ria_stoff, ria->ria_length,\
-+ ria->ria_pages)
-+
-+#define RAS_INCREASE_STEP (1024 * 1024 >> CFS_PAGE_SHIFT)
-+
-+static inline int stride_io_mode(struct ll_readahead_state *ras)
-+{
-+ return ras->ras_consecutive_stride_requests > 1;
-+}
-+
-+/* The function calculates how much pages will be read in
-+ * [off, off + length], which will be read by stride I/O mode,
-+ * stride_offset = st_off, stride_lengh = st_len,
-+ * stride_pages = st_pgs
-+ */
-+static unsigned long
-+stride_pg_count(pgoff_t st_off, unsigned long st_len, unsigned long st_pgs,
-+ unsigned long off, unsigned length)
-+{
-+ unsigned long cont_len = st_off > off ? st_off - off : 0;
-+ __u64 stride_len = length + off > st_off ?
-+ length + off + 1 - st_off : 0;
-+ unsigned long left, pg_count;
-+
-+ if (st_len == 0 || length == 0)
-+ return length;
-+
-+ left = do_div(stride_len, st_len);
-+ left = min(left, st_pgs);
-+
-+ pg_count = left + stride_len * st_pgs + cont_len;
-+
-+ LASSERT(pg_count >= left);
-+
-+ CDEBUG(D_READA, "st_off %lu, st_len %lu st_pgs %lu off %lu length %u"
-+ "pgcount %lu\n", st_off, st_len, st_pgs, off, length, pg_count);
-+
-+ return pg_count;
-+}
-+
-+static int ria_page_count(struct ra_io_arg *ria)
-+{
-+ __u64 length = ria->ria_end >= ria->ria_start ?
-+ ria->ria_end - ria->ria_start + 1 : 0;
-+
-+ return stride_pg_count(ria->ria_stoff, ria->ria_length,
-+ ria->ria_pages, ria->ria_start,
-+ length);
-+}
-+
-+/*Check whether the index is in the defined ra-window */
-+static int ras_inside_ra_window(unsigned long idx, struct ra_io_arg *ria)
-+{
-+ /* If ria_length == ria_pages, it means non-stride I/O mode,
-+ * idx should always inside read-ahead window in this case
-+ * For stride I/O mode, just check whether the idx is inside
-+ * the ria_pages. */
-+ return ria->ria_length == 0 || ria->ria_length == ria->ria_pages ||
-+ (idx - ria->ria_stoff) % ria->ria_length < ria->ria_pages;
-+}
-+
-+static int ll_read_ahead_pages(struct obd_export *exp,
-+ struct obd_io_group *oig,
-+ struct ra_io_arg *ria,
-+ unsigned long *reserved_pages,
-+ struct address_space *mapping,
-+ unsigned long *ra_end)
-+{
-+ int rc, count = 0, stride_ria;
-+ unsigned long page_idx;
-+
-+ LASSERT(ria != NULL);
-+ RIA_DEBUG(ria);
-+
-+ stride_ria = ria->ria_length > ria->ria_pages && ria->ria_pages > 0;
-+ for (page_idx = ria->ria_start; page_idx <= ria->ria_end &&
-+ *reserved_pages > 0; page_idx++) {
-+ if (ras_inside_ra_window(page_idx, ria)) {
-+ /* If the page is inside the read-ahead window*/
-+ rc = ll_read_ahead_page(exp, oig, page_idx, mapping);
-+ if (rc == 1) {
-+ (*reserved_pages)--;
-+ count ++;
-+ } else if (rc == -ENOLCK)
-+ break;
-+ } else if (stride_ria) {
-+ /* If it is not in the read-ahead window, and it is
-+ * read-ahead mode, then check whether it should skip
-+ * the stride gap */
-+ pgoff_t offset;
-+ /* FIXME: This assertion only is valid when it is for
-+ * forward read-ahead, it will be fixed when backward
-+ * read-ahead is implemented */
-+ LASSERTF(page_idx > ria->ria_stoff, "since %lu in the"
-+ " gap of ra window,it should bigger than stride"
-+ " offset %lu \n", page_idx, ria->ria_stoff);
-+
-+ offset = page_idx - ria->ria_stoff;
-+ offset = offset % (ria->ria_length);
-+ if (offset > ria->ria_pages) {
-+ page_idx += ria->ria_length - offset;
-+ CDEBUG(D_READA, "i %lu skip %lu \n", page_idx,
-+ ria->ria_length - offset);
-+ continue;
-+ }
-+ }
-+ }
-+ *ra_end = page_idx;
-+ return count;
-+}
-+
-+static int ll_readahead(struct ll_readahead_state *ras,
-+ struct obd_export *exp, struct address_space *mapping,
-+ struct obd_io_group *oig, int flags)
-+{
-+ unsigned long start = 0, end = 0, reserved;
-+ unsigned long ra_end, len;
-+ struct inode *inode;
-+ struct lov_stripe_md *lsm;
-+ struct ll_ra_read *bead;
-+ struct ost_lvb lvb;
-+ struct ra_io_arg ria = { 0 };
-+ int ret = 0;
-+ __u64 kms;
-+ ENTRY;
-+
-+ inode = mapping->host;
-+ lsm = ll_i2info(inode)->lli_smd;
-+
-+ lov_stripe_lock(lsm);
-+ inode_init_lvb(inode, &lvb);
-+ obd_merge_lvb(ll_i2obdexp(inode), lsm, &lvb, 1);
-+ kms = lvb.lvb_size;
-+ lov_stripe_unlock(lsm);
-+ if (kms == 0) {
-+ ll_ra_stats_inc(mapping, RA_STAT_ZERO_LEN);
-+ RETURN(0);
-+ }
-+
-+ spin_lock(&ras->ras_lock);
-+ bead = ll_ra_read_get_locked(ras);
-+ /* Enlarge the RA window to encompass the full read */
-+ if (bead != NULL && ras->ras_window_start + ras->ras_window_len <
-+ bead->lrr_start + bead->lrr_count) {
-+ ras->ras_window_len = bead->lrr_start + bead->lrr_count -
-+ ras->ras_window_start;
-+ }
-+ /* Reserve a part of the read-ahead window that we'll be issuing */
-+ if (ras->ras_window_len) {
-+ start = ras->ras_next_readahead;
-+ end = ras->ras_window_start + ras->ras_window_len - 1;
-+ }
-+ if (end != 0) {
-+ /* Truncate RA window to end of file */
-+ end = min(end, (unsigned long)((kms - 1) >> CFS_PAGE_SHIFT));
-+ ras->ras_next_readahead = max(end, end + 1);
-+ RAS_CDEBUG(ras);
-+ }
-+ ria.ria_start = start;
-+ ria.ria_end = end;
-+ /* If stride I/O mode is detected, get stride window*/
-+ if (stride_io_mode(ras)) {
-+ ria.ria_stoff = ras->ras_stride_offset;
-+ ria.ria_length = ras->ras_stride_length;
-+ ria.ria_pages = ras->ras_stride_pages;
-+ }
-+ spin_unlock(&ras->ras_lock);
-+
-+ if (end == 0) {
-+ ll_ra_stats_inc(mapping, RA_STAT_ZERO_WINDOW);
-+ RETURN(0);
-+ }
-+
-+ len = ria_page_count(&ria);
-+ if (len == 0)
-+ RETURN(0);
-+
-+ reserved = ll_ra_count_get(ll_i2sbi(inode), len);
-+ if (reserved < len)
-+ ll_ra_stats_inc(mapping, RA_STAT_MAX_IN_FLIGHT);
-+
-+ CDEBUG(D_READA, "reserved page %lu \n", reserved);
-+
-+ ret = ll_read_ahead_pages(exp, oig, &ria, &reserved, mapping, &ra_end);
-+
-+ LASSERTF(reserved >= 0, "reserved %lu\n", reserved);
-+ if (reserved != 0)
-+ ll_ra_count_put(ll_i2sbi(inode), reserved);
-+
-+ if (ra_end == end + 1 && ra_end == (kms >> CFS_PAGE_SHIFT))
-+ ll_ra_stats_inc(mapping, RA_STAT_EOF);
-+
-+ /* if we didn't get to the end of the region we reserved from
-+ * the ras we need to go back and update the ras so that the
-+ * next read-ahead tries from where we left off. we only do so
-+ * if the region we failed to issue read-ahead on is still ahead
-+ * of the app and behind the next index to start read-ahead from */
-+ CDEBUG(D_READA, "ra_end %lu end %lu stride end %lu \n",
-+ ra_end, end, ria.ria_end);
-+
-+ if (ra_end != (end + 1)) {
-+ spin_lock(&ras->ras_lock);
-+ if (ra_end < ras->ras_next_readahead &&
-+ index_in_window(ra_end, ras->ras_window_start, 0,
-+ ras->ras_window_len)) {
-+ ras->ras_next_readahead = ra_end;
-+ RAS_CDEBUG(ras);
-+ }
-+ spin_unlock(&ras->ras_lock);
-+ }
-+
-+ RETURN(ret);
-+}
-+
-+static void ras_set_start(struct ll_readahead_state *ras, unsigned long index)
-+{
-+ ras->ras_window_start = index & (~(RAS_INCREASE_STEP - 1));
-+}
-+
-+/* called with the ras_lock held or from places where it doesn't matter */
-+static void ras_reset(struct ll_readahead_state *ras, unsigned long index)
-+{
-+ ras->ras_last_readpage = index;
-+ ras->ras_consecutive_requests = 0;
-+ ras->ras_consecutive_pages = 0;
-+ ras->ras_window_len = 0;
-+ ras_set_start(ras, index);
-+ ras->ras_next_readahead = max(ras->ras_window_start, index);
-+
-+ RAS_CDEBUG(ras);
-+}
-+
-+/* called with the ras_lock held or from places where it doesn't matter */
-+static void ras_stride_reset(struct ll_readahead_state *ras)
-+{
-+ ras->ras_consecutive_stride_requests = 0;
-+ ras->ras_stride_length = 0;
-+ ras->ras_stride_pages = 0;
-+ RAS_CDEBUG(ras);
-+}
-+
-+void ll_readahead_init(struct inode *inode, struct ll_readahead_state *ras)
-+{
-+ spin_lock_init(&ras->ras_lock);
-+ ras_reset(ras, 0);
-+ ras->ras_requests = 0;
-+ INIT_LIST_HEAD(&ras->ras_read_beads);
-+}
-+
-+/*
-+ * Check whether the read request is in the stride window.
-+ * If it is in the stride window, return 1, otherwise return 0.
-+ */
-+static int index_in_stride_window(unsigned long index,
-+ struct ll_readahead_state *ras,
-+ struct inode *inode)
-+{
-+ unsigned long stride_gap = index - ras->ras_last_readpage - 1;
-+
-+ if (ras->ras_stride_length == 0 || ras->ras_stride_pages == 0)
-+ return 0;
-+
-+ /* If it is contiguous read */
-+ if (stride_gap == 0)
-+ return ras->ras_consecutive_pages + 1 <= ras->ras_stride_pages;
-+
-+ /*Otherwise check the stride by itself */
-+ return (ras->ras_stride_length - ras->ras_stride_pages) == stride_gap &&
-+ ras->ras_consecutive_pages == ras->ras_stride_pages;
-+}
-+
-+static void ras_update_stride_detector(struct ll_readahead_state *ras,
-+ unsigned long index)
-+{
-+ unsigned long stride_gap = index - ras->ras_last_readpage - 1;
-+
-+ if (!stride_io_mode(ras) && (stride_gap != 0 ||
-+ ras->ras_consecutive_stride_requests == 0)) {
-+ ras->ras_stride_pages = ras->ras_consecutive_pages;
-+ ras->ras_stride_length = stride_gap +ras->ras_consecutive_pages;
-+ }
-+ RAS_CDEBUG(ras);
-+}
-+
-+static unsigned long
-+stride_page_count(struct ll_readahead_state *ras, unsigned long len)
-+{
-+ return stride_pg_count(ras->ras_stride_offset, ras->ras_stride_length,
-+ ras->ras_stride_pages, ras->ras_stride_offset,
-+ len);
-+}
-+
-+/* Stride Read-ahead window will be increased inc_len according to
-+ * stride I/O pattern */
-+static void ras_stride_increase_window(struct ll_readahead_state *ras,
-+ struct ll_ra_info *ra,
-+ unsigned long inc_len)
-+{
-+ unsigned long left, step, window_len;
-+ unsigned long stride_len;
-+
-+ LASSERT(ras->ras_stride_length > 0);
-+
-+ stride_len = ras->ras_window_start + ras->ras_window_len -
-+ ras->ras_stride_offset;
-+
-+ LASSERTF(stride_len >= 0, "window_start %lu, window_len %lu"
-+ " stride_offset %lu\n", ras->ras_window_start,
-+ ras->ras_window_len, ras->ras_stride_offset);
-+
-+ left = stride_len % ras->ras_stride_length;
-+
-+ window_len = ras->ras_window_len - left;
-+
-+ if (left < ras->ras_stride_pages)
-+ left += inc_len;
-+ else
-+ left = ras->ras_stride_pages + inc_len;
-+
-+ LASSERT(ras->ras_stride_pages != 0);
-+
-+ step = left / ras->ras_stride_pages;
-+ left %= ras->ras_stride_pages;
-+
-+ window_len += step * ras->ras_stride_length + left;
-+
-+ if (stride_page_count(ras, window_len) <= ra->ra_max_pages)
-+ ras->ras_window_len = window_len;
-+
-+ RAS_CDEBUG(ras);
-+}
-+
-+/* Set stride I/O read-ahead window start offset */
-+static void ras_set_stride_offset(struct ll_readahead_state *ras)
-+{
-+ unsigned long window_len = ras->ras_next_readahead -
-+ ras->ras_window_start;
-+ unsigned long left;
-+
-+ LASSERT(ras->ras_stride_length != 0);
-+
-+ left = window_len % ras->ras_stride_length;
-+
-+ ras->ras_stride_offset = ras->ras_next_readahead - left;
-+
-+ RAS_CDEBUG(ras);
-+}
-+
-+static void ras_update(struct ll_sb_info *sbi, struct inode *inode,
-+ struct ll_readahead_state *ras, unsigned long index,
-+ unsigned hit)
-+{
-+ struct ll_ra_info *ra = &sbi->ll_ra_info;
-+ int zero = 0, stride_detect = 0, ra_miss = 0;
-+ ENTRY;
-+
-+ spin_lock(&sbi->ll_lock);
-+ spin_lock(&ras->ras_lock);
-+
-+ ll_ra_stats_inc_unlocked(ra, hit ? RA_STAT_HIT : RA_STAT_MISS);
-+
-+ /* reset the read-ahead window in two cases. First when the app seeks
-+ * or reads to some other part of the file. Secondly if we get a
-+ * read-ahead miss that we think we've previously issued. This can
-+ * be a symptom of there being so many read-ahead pages that the VM is
-+ * reclaiming it before we get to it. */
-+ if (!index_in_window(index, ras->ras_last_readpage, 8, 8)) {
-+ zero = 1;
-+ ll_ra_stats_inc_unlocked(ra, RA_STAT_DISTANT_READPAGE);
-+ } else if (!hit && ras->ras_window_len &&
-+ index < ras->ras_next_readahead &&
-+ index_in_window(index, ras->ras_window_start, 0,
-+ ras->ras_window_len)) {
-+ ra_miss = 1;
-+ ll_ra_stats_inc_unlocked(ra, RA_STAT_MISS_IN_WINDOW);
-+ }
-+
-+ /* On the second access to a file smaller than the tunable
-+ * ra_max_read_ahead_whole_pages trigger RA on all pages in the
-+ * file up to ra_max_pages. This is simply a best effort and
-+ * only occurs once per open file. Normal RA behavior is reverted
-+ * to for subsequent IO. The mmap case does not increment
-+ * ras_requests and thus can never trigger this behavior. */
-+ if (ras->ras_requests == 2 && !ras->ras_request_index) {
-+ __u64 kms_pages;
-+
-+ kms_pages = (i_size_read(inode) + CFS_PAGE_SIZE - 1) >>
-+ CFS_PAGE_SHIFT;
-+
-+ CDEBUG(D_READA, "kmsp "LPU64" mwp %lu mp %lu\n", kms_pages,
-+ ra->ra_max_read_ahead_whole_pages, ra->ra_max_pages);
-+
-+ if (kms_pages &&
-+ kms_pages <= ra->ra_max_read_ahead_whole_pages) {
-+ ras->ras_window_start = 0;
-+ ras->ras_last_readpage = 0;
-+ ras->ras_next_readahead = 0;
-+ ras->ras_window_len = min(ra->ra_max_pages,
-+ ra->ra_max_read_ahead_whole_pages);
-+ GOTO(out_unlock, 0);
-+ }
-+ }
-+ if (zero) {
-+ /* check whether it is in stride I/O mode*/
-+ if (!index_in_stride_window(index, ras, inode)) {
-+ ras_reset(ras, index);
-+ ras->ras_consecutive_pages++;
-+ ras_stride_reset(ras);
-+ GOTO(out_unlock, 0);
-+ } else {
-+ ras->ras_consecutive_requests = 0;
-+ if (++ras->ras_consecutive_stride_requests > 1)
-+ stride_detect = 1;
-+ RAS_CDEBUG(ras);
-+ }
-+ } else {
-+ if (ra_miss) {
-+ if (index_in_stride_window(index, ras, inode) &&
-+ stride_io_mode(ras)) {
-+ /*If stride-RA hit cache miss, the stride dector
-+ *will not be reset to avoid the overhead of
-+ *redetecting read-ahead mode */
-+ if (index != ras->ras_last_readpage + 1)
-+ ras->ras_consecutive_pages = 0;
-+ RAS_CDEBUG(ras);
-+ } else {
-+ /*Reset both stride window and normal RA window*/
-+ ras_reset(ras, index);
-+ ras->ras_consecutive_pages++;
-+ ras_stride_reset(ras);
-+ GOTO(out_unlock, 0);
-+ }
-+ } else if (stride_io_mode(ras)) {
-+ /* If this is contiguous read but in stride I/O mode
-+ * currently, check whether stride step still is valid,
-+ * if invalid, it will reset the stride ra window*/
-+ if (!index_in_stride_window(index, ras, inode)) {
-+ /*Shrink stride read-ahead window to be zero*/
-+ ras_stride_reset(ras);
-+ ras->ras_window_len = 0;
-+ ras->ras_next_readahead = index;
-+ }
-+ }
-+ }
-+ ras->ras_consecutive_pages++;
-+ ras_update_stride_detector(ras, index);
-+ ras->ras_last_readpage = index;
-+ ras_set_start(ras, index);
-+ ras->ras_next_readahead = max(ras->ras_window_start,
-+ ras->ras_next_readahead);
-+ RAS_CDEBUG(ras);
-+
-+ /* Trigger RA in the mmap case where ras_consecutive_requests
-+ * is not incremented and thus can't be used to trigger RA */
-+ if (!ras->ras_window_len && ras->ras_consecutive_pages == 4) {
-+ ras->ras_window_len = RAS_INCREASE_STEP;
-+ GOTO(out_unlock, 0);
-+ }
-+
-+ /* Initially reset the stride window offset to next_readahead*/
-+ if (ras->ras_consecutive_stride_requests == 2 && stride_detect)
-+ ras_set_stride_offset(ras);
-+
-+ /* The initial ras_window_len is set to the request size. To avoid
-+ * uselessly reading and discarding pages for random IO the window is
-+ * only increased once per consecutive request received. */
-+ if ((ras->ras_consecutive_requests > 1 &&
-+ !ras->ras_request_index) || stride_detect) {
-+ if (stride_io_mode(ras))
-+ ras_stride_increase_window(ras, ra, RAS_INCREASE_STEP);
-+ else
-+ ras->ras_window_len = min(ras->ras_window_len +
-+ RAS_INCREASE_STEP,
-+ ra->ra_max_pages);
-+ }
-+ EXIT;
-+out_unlock:
-+ RAS_CDEBUG(ras);
-+ ras->ras_request_index++;
-+ spin_unlock(&ras->ras_lock);
-+ spin_unlock(&sbi->ll_lock);
-+ return;
-+}
-+
-+int ll_writepage(struct page *page)
-+{
-+ struct inode *inode = page->mapping->host;
-+ struct ll_inode_info *lli = ll_i2info(inode);
-+ struct obd_export *exp;
-+ struct ll_async_page *llap;
-+ int rc = 0;
-+ ENTRY;
-+
-+ LASSERT(PageLocked(page));
-+
-+ exp = ll_i2obdexp(inode);
-+ if (exp == NULL)
-+ GOTO(out, rc = -EINVAL);
-+
-+ llap = llap_from_page(page, LLAP_ORIGIN_WRITEPAGE);
-+ if (IS_ERR(llap))
-+ GOTO(out, rc = PTR_ERR(llap));
-+
-+ LASSERT(!llap->llap_nocache);
-+ LASSERT(!PageWriteback(page));
-+ set_page_writeback(page);
-+
-+ page_cache_get(page);
-+ if (llap->llap_write_queued) {
-+ LL_CDEBUG_PAGE(D_PAGE, page, "marking urgent\n");
-+ rc = obd_set_async_flags(exp, lli->lli_smd, NULL,
-+ llap->llap_cookie,
-+ ASYNC_READY | ASYNC_URGENT);
-+ } else {
-+ rc = queue_or_sync_write(exp, inode, llap, CFS_PAGE_SIZE,
-+ ASYNC_READY | ASYNC_URGENT);
-+ }
-+ if (rc) {
-+ /* re-dirty page on error so it retries write */
-+ if (PageWriteback(page))
-+ end_page_writeback(page);
-+
-+ /* resend page only for not started IO*/
-+ if (!PageError(page))
-+ ll_redirty_page(page);
-+
-+ page_cache_release(page);
-+ }
-+out:
-+ if (rc) {
-+ if (!lli->lli_async_rc)
-+ lli->lli_async_rc = rc;
-+ /* resend page only for not started IO*/
-+ unlock_page(page);
-+ }
-+ RETURN(rc);
-+}
-+
-+/*
-+ * for now we do our readpage the same on both 2.4 and 2.5. The kernel's
-+ * read-ahead assumes it is valid to issue readpage all the way up to
-+ * i_size, but our dlm locks make that not the case. We disable the
-+ * kernel's read-ahead and do our own by walking ahead in the page cache
-+ * checking for dlm lock coverage. the main difference between 2.4 and
-+ * 2.6 is how read-ahead gets batched and issued, but we're using our own,
-+ * so they look the same.
-+ */
-+int ll_readpage(struct file *filp, struct page *page)
-+{
-+ struct ll_file_data *fd = LUSTRE_FPRIVATE(filp);
-+ struct inode *inode = page->mapping->host;
-+ struct obd_export *exp;
-+ struct ll_async_page *llap;
-+ struct obd_io_group *oig = NULL;
-+ struct lustre_handle *lockh = NULL;
-+ int rc;
-+ ENTRY;
-+
-+ LASSERT(PageLocked(page));
-+ LASSERT(!PageUptodate(page));
-+ CDEBUG(D_VFSTRACE, "VFS Op:inode=%lu/%u(%p),offset=%Lu=%#Lx\n",
-+ inode->i_ino, inode->i_generation, inode,
-+ (((loff_t)page->index) << CFS_PAGE_SHIFT),
-+ (((loff_t)page->index) << CFS_PAGE_SHIFT));
-+ LASSERT(atomic_read(&filp->f_dentry->d_inode->i_count) > 0);
-+
-+ if (!ll_i2info(inode)->lli_smd) {
-+ /* File with no objects - one big hole */
-+ /* We use this just for remove_from_page_cache that is not
-+ * exported, we'd make page back up to date. */
-+ ll_truncate_complete_page(page);
-+ clear_page(kmap(page));
-+ kunmap(page);
-+ SetPageUptodate(page);
-+ unlock_page(page);
-+ RETURN(0);
-+ }
-+
-+ rc = oig_init(&oig);
-+ if (rc < 0)
-+ GOTO(out, rc);
-+
-+ exp = ll_i2obdexp(inode);
-+ if (exp == NULL)
-+ GOTO(out, rc = -EINVAL);
-+
-+ if (fd->fd_flags & LL_FILE_GROUP_LOCKED)
-+ lockh = &fd->fd_cwlockh;
-+
-+ llap = llap_from_page_with_lockh(page, LLAP_ORIGIN_READPAGE, lockh);
-+ if (IS_ERR(llap)) {
-+ if (PTR_ERR(llap) == -ENOLCK) {
-+ CWARN("ino %lu page %lu (%llu) not covered by "
-+ "a lock (mmap?). check debug logs.\n",
-+ inode->i_ino, page->index,
-+ (long long)page->index << PAGE_CACHE_SHIFT);
-+ }
-+ GOTO(out, rc = PTR_ERR(llap));
-+ }
-+
-+ if (ll_i2sbi(inode)->ll_ra_info.ra_max_pages)
-+ ras_update(ll_i2sbi(inode), inode, &fd->fd_ras, page->index,
-+ llap->llap_defer_uptodate);
-+
-+
-+ if (llap->llap_defer_uptodate) {
-+ /* This is the callpath if we got the page from a readahead */
-+ llap->llap_ra_used = 1;
-+ rc = ll_readahead(&fd->fd_ras, exp, page->mapping, oig,
-+ fd->fd_flags);
-+ if (rc > 0)
-+ obd_trigger_group_io(exp, ll_i2info(inode)->lli_smd,
-+ NULL, oig);
-+ LL_CDEBUG_PAGE(D_PAGE, page, "marking uptodate from defer\n");
-+ SetPageUptodate(page);
-+ unlock_page(page);
-+ GOTO(out_oig, rc = 0);
-+ }
-+
-+ rc = ll_issue_page_read(exp, llap, oig, 0);
-+ if (rc)
-+ GOTO(out, rc);
-+
-+ LL_CDEBUG_PAGE(D_PAGE, page, "queued readpage\n");
-+ /* We have just requested the actual page we want, see if we can tack
-+ * on some readahead to that page's RPC before it is sent. */
-+ if (ll_i2sbi(inode)->ll_ra_info.ra_max_pages)
-+ ll_readahead(&fd->fd_ras, exp, page->mapping, oig,
-+ fd->fd_flags);
-+
-+ rc = obd_trigger_group_io(exp, ll_i2info(inode)->lli_smd, NULL, oig);
-+
-+out:
-+ if (rc)
-+ unlock_page(page);
-+out_oig:
-+ if (oig != NULL)
-+ oig_release(oig);
-+ RETURN(rc);
-+}
-+
-+static void ll_file_put_pages(struct page **pages, int numpages)
-+{
-+ int i;
-+ struct page **pp;
-+ ENTRY;
-+
-+ for (i = 0, pp = pages; i < numpages; i++, pp++) {
-+ if (*pp) {
-+ LL_CDEBUG_PAGE(D_PAGE, (*pp), "free\n");
-+ __ll_put_llap(*pp);
-+ if (page_private(*pp))
-+ CERROR("the llap wasn't freed\n");
-+ (*pp)->mapping = NULL;
-+ if (page_count(*pp) != 1)
-+ CERROR("page %p, flags %#lx, count %i, private %p\n",
-+ (*pp), (unsigned long)(*pp)->flags, page_count(*pp),
-+ (void*)page_private(*pp));
-+ __free_pages(*pp, 0);
-+ }
-+ }
-+ OBD_FREE(pages, numpages * sizeof(struct page*));
-+ EXIT;
-+}
-+
-+static struct page **ll_file_prepare_pages(int numpages, struct inode *inode,
-+ unsigned long first)
-+{
-+ struct page **pages;
-+ int i;
-+ int rc = 0;
-+ ENTRY;
-+
-+ OBD_ALLOC(pages, sizeof(struct page *) * numpages);
-+ if (pages == NULL)
-+ RETURN(ERR_PTR(-ENOMEM));
-+ for (i = 0; i < numpages; i++) {
-+ struct page *page;
-+ struct ll_async_page *llap;
-+
-+ page = alloc_pages(GFP_HIGHUSER, 0);
-+ if (page == NULL)
-+ GOTO(err, rc = -ENOMEM);
-+ pages[i] = page;
-+ /* llap_from_page needs page index and mapping to be set */
-+ page->index = first++;
-+ page->mapping = inode->i_mapping;
-+ llap = llap_from_page(page, LLAP_ORIGIN_LOCKLESS_IO);
-+ if (IS_ERR(llap))
-+ GOTO(err, rc = PTR_ERR(llap));
-+ llap->llap_lockless_io_page = 1;
-+ }
-+ RETURN(pages);
-+err:
-+ ll_file_put_pages(pages, numpages);
-+ RETURN(ERR_PTR(rc));
-+ }
-+
-+static ssize_t ll_file_copy_pages(struct page **pages, int numpages,
-+ const struct iovec *iov, unsigned long nsegs,
-+ ssize_t iov_offset, loff_t pos, size_t count,
-+ int rw)
-+{
-+ ssize_t amount = 0;
-+ int i;
-+ int updatechecksum = ll_i2sbi(pages[0]->mapping->host)->ll_flags &
-+ LL_SBI_LLITE_CHECKSUM;
-+ ENTRY;
-+
-+ for (i = 0; i < numpages; i++) {
-+ unsigned offset, bytes, left = 0;
-+ char *vaddr;
-+
-+ vaddr = kmap(pages[i]);
-+ offset = pos & (CFS_PAGE_SIZE - 1);
-+ bytes = min_t(unsigned, CFS_PAGE_SIZE - offset, count);
-+ LL_CDEBUG_PAGE(D_PAGE, pages[i], "op = %s, addr = %p, "
-+ "bytes = %u\n",
-+ (rw == WRITE) ? "CFU" : "CTU",
-+ vaddr + offset, bytes);
-+ while (bytes > 0 && !left && nsegs) {
-+ unsigned copy = min_t(ssize_t, bytes,
-+ iov->iov_len - iov_offset);
-+ if (rw == WRITE) {
-+ left = copy_from_user(vaddr + offset,
-+ iov->iov_base +iov_offset,
-+ copy);
-+ if (updatechecksum) {
-+ struct ll_async_page *llap;
-+
-+ llap = llap_cast_private(pages[i]);
-+ llap->llap_checksum =
-+ init_checksum(OSC_DEFAULT_CKSUM);
-+ llap->llap_checksum =
-+ compute_checksum(llap->llap_checksum,
-+ vaddr,CFS_PAGE_SIZE,
-+ OSC_DEFAULT_CKSUM);
-+ }
-+ } else {
-+ left = copy_to_user(iov->iov_base + iov_offset,
-+ vaddr + offset, copy);
-+ }
-+
-+ amount += copy;
-+ count -= copy;
-+ pos += copy;
-+ iov_offset += copy;
-+ bytes -= copy;
-+ if (iov_offset == iov->iov_len) {
-+ iov_offset = 0;
-+ iov++;
-+ nsegs--;
-+ }
-+ }
-+ kunmap(pages[i]);
-+ if (left) {
-+ amount -= left;
-+ break;
-+ }
-+ }
-+ if (amount == 0)
-+ RETURN(-EFAULT);
-+ RETURN(amount);
-+}
-+
-+static int ll_file_oig_pages(struct inode * inode, struct page **pages,
-+ int numpages, loff_t pos, size_t count, int rw)
-+{
-+ struct obd_io_group *oig;
-+ struct ll_inode_info *lli = ll_i2info(inode);
-+ struct obd_export *exp;
-+ loff_t org_pos = pos;
-+ obd_flag brw_flags;
-+ int rc;
-+ int i;
-+ ENTRY;
-+
-+ exp = ll_i2obdexp(inode);
-+ if (exp == NULL)
-+ RETURN(-EINVAL);
-+ rc = oig_init(&oig);
-+ if (rc)
-+ RETURN(rc);
-+ brw_flags = OBD_BRW_SRVLOCK;
-+ if (cfs_capable(CFS_CAP_SYS_RESOURCE))
-+ brw_flags |= OBD_BRW_NOQUOTA;
-+
-+ for (i = 0; i < numpages; i++) {
-+ struct ll_async_page *llap;
-+ unsigned from, bytes;
-+
-+ from = pos & (CFS_PAGE_SIZE - 1);
-+ bytes = min_t(unsigned, CFS_PAGE_SIZE - from,
-+ count - pos + org_pos);
-+ llap = llap_cast_private(pages[i]);
-+ LASSERT(llap);
-+
-+ lock_page(pages[i]);
-+
-+ LL_CDEBUG_PAGE(D_PAGE, pages[i], "offset "LPU64","
-+ " from %u, bytes = %u\n",
-+ pos, from, bytes);
-+ LASSERTF(pos >> CFS_PAGE_SHIFT == pages[i]->index,
-+ "wrong page index %lu (%lu)\n",
-+ pages[i]->index,
-+ (unsigned long)(pos >> CFS_PAGE_SHIFT));
-+ rc = obd_queue_group_io(exp, lli->lli_smd, NULL, oig,
-+ llap->llap_cookie,
-+ (rw == WRITE) ?
-+ OBD_BRW_WRITE:OBD_BRW_READ,
-+ from, bytes, brw_flags,
-+ ASYNC_READY | ASYNC_URGENT |
-+ ASYNC_COUNT_STABLE | ASYNC_GROUP_SYNC);
-+ if (rc) {
-+ i++;
-+ GOTO(out, rc);
-+ }
-+ pos += bytes;
-+ }
-+ rc = obd_trigger_group_io(exp, lli->lli_smd, NULL, oig);
-+ if (rc)
-+ GOTO(out, rc);
-+ rc = oig_wait(oig);
-+out:
-+ while(--i >= 0)
-+ unlock_page(pages[i]);
-+ oig_release(oig);
-+ RETURN(rc);
-+}
-+
-+/* Advance through passed iov, adjust iov pointer as necessary and return
-+ * starting offset in individual entry we are pointing at. Also reduce
-+ * nr_segs as needed */
-+static ssize_t ll_iov_advance(const struct iovec **iov, unsigned long *nr_segs,
-+ ssize_t offset)
-+{
-+ while (*nr_segs > 0) {
-+ if ((*iov)->iov_len > offset)
-+ return ((*iov)->iov_len - offset);
-+ offset -= (*iov)->iov_len;
-+ (*iov)++;
-+ (*nr_segs)--;
-+ }
-+ return 0;
-+}
-+
-+ssize_t ll_file_lockless_io(struct file *file, const struct iovec *iov,
-+ unsigned long nr_segs,
-+ loff_t *ppos, int rw, ssize_t count)
-+{
-+ loff_t pos;
-+ struct inode *inode = file->f_dentry->d_inode;
-+ ssize_t rc = 0;
-+ int max_pages;
-+ size_t amount = 0;
-+ unsigned long first, last;
-+ const struct iovec *iv = &iov[0];
-+ unsigned long nsegs = nr_segs;
-+ unsigned long offset = 0;
-+ ENTRY;
-+
-+ if (rw == READ) {
-+ loff_t isize;
-+
-+ ll_inode_size_lock(inode, 0);
-+ isize = i_size_read(inode);
-+ ll_inode_size_unlock(inode, 0);
-+ if (*ppos >= isize)
-+ GOTO(out, rc = 0);
-+ if (*ppos + count >= isize)
-+ count -= *ppos + count - isize;
-+ if (count == 0)
-+ GOTO(out, rc);
-+ } else {
-+ rc = generic_write_checks(file, ppos, &count, 0);
-+ if (rc)
-+ GOTO(out, rc);
-+ rc = ll_remove_suid(file->f_dentry, file->f_vfsmnt);
-+ if (rc)
-+ GOTO(out, rc);
-+ }
-+
-+ pos = *ppos;
-+ first = pos >> CFS_PAGE_SHIFT;
-+ last = (pos + count - 1) >> CFS_PAGE_SHIFT;
-+ max_pages = PTLRPC_MAX_BRW_PAGES *
-+ ll_i2info(inode)->lli_smd->lsm_stripe_count;
-+ CDEBUG(D_INFO, "%u, stripe_count = %u\n",
-+ PTLRPC_MAX_BRW_PAGES /* max_pages_per_rpc */,
-+ ll_i2info(inode)->lli_smd->lsm_stripe_count);
-+
-+ while (first <= last && rc >= 0) {
-+ int pages_for_io;
-+ struct page **pages;
-+ size_t bytes = count - amount;
-+
-+ pages_for_io = min_t(int, last - first + 1, max_pages);
-+ pages = ll_file_prepare_pages(pages_for_io, inode, first);
-+ if (IS_ERR(pages)) {
-+ rc = PTR_ERR(pages);
-+ break;
-+ }
-+ if (rw == WRITE) {
-+ rc = ll_file_copy_pages(pages, pages_for_io, iv, nsegs,
-+ offset, pos + amount, bytes,
-+ rw);
-+ if (rc < 0)
-+ GOTO(put_pages, rc);
-+ offset = ll_iov_advance(&iv, &nsegs, offset + rc);
-+ bytes = rc;
-+ }
-+ rc = ll_file_oig_pages(inode, pages, pages_for_io,
-+ pos + amount, bytes, rw);
-+ if (rc)
-+ GOTO(put_pages, rc);
-+ if (rw == READ) {
-+ rc = ll_file_copy_pages(pages, pages_for_io, iv, nsegs,
-+ offset, pos + amount, bytes, rw);
-+ if (rc < 0)
-+ GOTO(put_pages, rc);
-+ offset = ll_iov_advance(&iv, &nsegs, offset + rc);
-+ bytes = rc;
-+ }
-+ amount += bytes;
-+put_pages:
-+ ll_file_put_pages(pages, pages_for_io);
-+ first += pages_for_io;
-+ /* a short read/write check */
-+ if (pos + amount < ((loff_t)first << CFS_PAGE_SHIFT))
-+ break;
-+ /* Check if we are out of userspace buffers. (how that could
-+ happen?) */
-+ if (nsegs == 0)
-+ break;
-+ }
-+ /* NOTE: don't update i_size and KMS in absence of LDLM locks even
-+ * write makes the file large */
-+ file_accessed(file);
-+ if (rw == READ && amount < count && rc == 0) {
-+ unsigned long not_cleared;
-+
-+ while (nsegs > 0) {
-+ ssize_t to_clear = min_t(ssize_t, count - amount,
-+ iv->iov_len - offset);
-+ not_cleared = clear_user(iv->iov_base + offset,
-+ to_clear);
-+ amount += to_clear - not_cleared;
-+ if (not_cleared) {
-+ rc = -EFAULT;
-+ break;
-+ }
-+ offset = 0;
-+ iv++;
-+ nsegs--;
-+ }
-+ }
-+ if (amount > 0) {
-+ lprocfs_counter_add(ll_i2sbi(inode)->ll_stats,
-+ (rw == WRITE) ?
-+ LPROC_LL_LOCKLESS_WRITE :
-+ LPROC_LL_LOCKLESS_READ,
-+ (long)amount);
-+ *ppos += amount;
-+ RETURN(amount);
-+ }
-+out:
-+ RETURN(rc);
-+}
-diff -urNad lustre~/lustre/llite/symlink.c lustre/lustre/llite/symlink.c
---- lustre~/lustre/llite/symlink.c 2009-03-12 10:32:27.000000000 +0100
-+++ lustre/lustre/llite/symlink.c 2009-03-13 09:45:03.000000000 +0100
-@@ -177,8 +177,12 @@
- up(&lli->lli_size_sem);
- }
- if (rc) {
-+#ifdef HAVE_PATH_RELEASE
- path_release(nd); /* Kernel assumes that ->follow_link()
- releases nameidata on error */
-+#else
-+ path_put(&nd->path);
-+#endif
- GOTO(out, rc);
- }
-
-diff -urNad lustre~/lustre/lvfs/lvfs_linux.c lustre/lustre/lvfs/lvfs_linux.c
---- lustre~/lustre/lvfs/lvfs_linux.c 2009-03-12 10:32:27.000000000 +0100
-+++ lustre/lustre/lvfs/lvfs_linux.c 2009-03-13 09:45:03.000000000 +0100
-@@ -148,10 +148,10 @@
- */
-
- save->fs = get_fs();
-- LASSERT(atomic_read(¤t->fs->pwd->d_count));
-+ LASSERT(atomic_read(&cfs_fs_pwd(current->fs)->d_count));
- LASSERT(atomic_read(&new_ctx->pwd->d_count));
-- save->pwd = dget(current->fs->pwd);
-- save->pwdmnt = mntget(current->fs->pwdmnt);
-+ save->pwd = dget(cfs_fs_pwd(current->fs));
-+ save->pwdmnt = mntget(cfs_fs_mnt(current->fs));
- save->luc.luc_umask = current->fs->umask;
-
- LASSERT(save->pwd);
-@@ -205,10 +205,10 @@
- atomic_read(¤t->fs->pwdmnt->mnt_count));
- */
-
-- LASSERTF(current->fs->pwd == new_ctx->pwd, "%p != %p\n",
-- current->fs->pwd, new_ctx->pwd);
-- LASSERTF(current->fs->pwdmnt == new_ctx->pwdmnt, "%p != %p\n",
-- current->fs->pwdmnt, new_ctx->pwdmnt);
-+ LASSERTF(cfs_fs_pwd(current->fs) == new_ctx->pwd, "%p != %p\n",
-+ cfs_fs_pwd(current->fs), new_ctx->pwd);
-+ LASSERTF(cfs_fs_mnt(current->fs) == new_ctx->pwdmnt, "%p != %p\n",
-+ cfs_fs_mnt(current->fs), new_ctx->pwdmnt);
-
- set_fs(saved->fs);
- ll_set_fs_pwd(current->fs, saved->pwdmnt, saved->pwd);
-diff -urNad lustre~/lustre/mgc/mgc_request.c lustre/lustre/mgc/mgc_request.c
---- lustre~/lustre/mgc/mgc_request.c 2009-03-12 10:32:27.000000000 +0100
-+++ lustre/lustre/mgc/mgc_request.c 2009-03-13 09:45:03.000000000 +0100
-@@ -415,7 +415,7 @@
- obd->obd_lvfs_ctxt.fs = get_ds();
-
- push_ctxt(&saved, &obd->obd_lvfs_ctxt, NULL);
-- dentry = lookup_one_len(MOUNT_CONFIGS_DIR, current->fs->pwd,
-+ dentry = lookup_one_len(MOUNT_CONFIGS_DIR, cfs_fs_pwd(current->fs),
- strlen(MOUNT_CONFIGS_DIR));
- pop_ctxt(&saved, &obd->obd_lvfs_ctxt, NULL);
- if (IS_ERR(dentry)) {
-diff -urNad lustre~/lustre/obdclass/linux/linux-module.c lustre/lustre/obdclass/linux/linux-module.c
---- lustre~/lustre/obdclass/linux/linux-module.c 2009-03-12 10:32:27.000000000 +0100
-+++ lustre/lustre/obdclass/linux/linux-module.c 2009-03-13 09:45:03.000000000 +0100
-@@ -419,13 +419,14 @@
- ENTRY;
-
- obd_sysctl_init();
-- proc_lustre_root = proc_mkdir("lustre", proc_root_fs);
-+ proc_lustre_root = lprocfs_register("fs/lustre", NULL,
-+ lprocfs_base, NULL);
- if (!proc_lustre_root) {
- printk(KERN_ERR
- "LustreError: error registering /proc/fs/lustre\n");
- RETURN(-ENOMEM);
- }
-- proc_version = lprocfs_add_vars(proc_lustre_root, lprocfs_base, NULL);
-+
- entry = create_proc_entry("devices", 0444, proc_lustre_root);
- if (entry == NULL) {
- CERROR("error registering /proc/fs/lustre/devices\n");
-diff -urNad lustre~/lustre/obdclass/linux/linux-sysctl.c lustre/lustre/obdclass/linux/linux-sysctl.c
---- lustre~/lustre/obdclass/linux/linux-sysctl.c 2009-03-12 10:32:27.000000000 +0100
-+++ lustre/lustre/obdclass/linux/linux-sysctl.c 2009-03-13 09:45:03.000000000 +0100
-@@ -56,7 +56,9 @@
-
- cfs_sysctl_table_header_t *obd_table_header = NULL;
-
--#define OBD_SYSCTL 300
-+#ifndef HAVE_SYSCTL_UNNUMBERED
-+
-+#define CTL_LUSTRE 300
-
- enum {
- OBD_FAIL_LOC = 1, /* control test failures instrumentation */
-@@ -74,6 +76,23 @@
- OBD_ALLOC_FAIL_RATE, /* memory allocation random failure rate */
- OBD_MAX_DIRTY_PAGES, /* maximum dirty pages */
- };
-+#else
-+#define CTL_LUSTRE CTL_UNNUMBERED
-+#define OBD_FAIL_LOC CTL_UNNUMBERED
-+#define OBD_FAIL_VAL CTL_UNNUMBERED
-+#define OBD_TIMEOUT CTL_UNNUMBERED
-+#define OBD_DUMP_ON_TIMEOUT CTL_UNNUMBERED
-+#define OBD_MEMUSED CTL_UNNUMBERED
-+#define OBD_PAGESUSED CTL_UNNUMBERED
-+#define OBD_MAXMEMUSED CTL_UNNUMBERED
-+#define OBD_MAXPAGESUSED CTL_UNNUMBERED
-+#define OBD_SYNCFILTER CTL_UNNUMBERED
-+#define OBD_LDLM_TIMEOUT CTL_UNNUMBERED
-+#define OBD_DUMP_ON_EVICTION CTL_UNNUMBERED
-+#define OBD_DEBUG_PEER_ON_TIMEOUT CTL_UNNUMBERED
-+#define OBD_ALLOC_FAIL_RATE CTL_UNNUMBERED
-+#define OBD_MAX_DIRTY_PAGES CTL_UNNUMBERED
-+#endif
-
- int LL_PROC_PROTO(proc_fail_loc)
- {
-@@ -120,7 +139,8 @@
- obd_max_dirty_pages = 4 << (20 - CFS_PAGE_SHIFT);
- }
- } else {
-- char buf[21];
-+ char buf[22];
-+ struct ctl_table dummy;
- int len;
-
- len = lprocfs_read_frac_helper(buf, sizeof(buf),
-@@ -129,7 +149,13 @@
- if (len > *lenp)
- len = *lenp;
- buf[len] = '\0';
-- if (copy_to_user(buffer, buf, len))
-+
-+ dummy = *table;
-+ dummy.data = buf;
-+ dummy.maxlen = sizeof(buf);
-+
-+ rc = ll_proc_dostring(&dummy,write,filp,buffer,lenp, ppos);
-+ if (rc)
- return -EFAULT;
- *lenp = len;
- }
-@@ -152,7 +178,8 @@
- (unsigned int*)table->data,
- OBD_ALLOC_FAIL_MULT);
- } else {
-- char buf[21];
-+ char buf[22];
-+ struct ctl_table dummy;
- int len;
-
- len = lprocfs_read_frac_helper(buf, sizeof(buf),
-@@ -161,7 +188,12 @@
- if (len > *lenp)
- len = *lenp;
- buf[len] = '\0';
-- if (copy_to_user(buffer, buf, len))
-+ dummy = *table;
-+ dummy.data = buf;
-+ dummy.maxlen = sizeof(buf);
-+
-+ rc = ll_proc_dostring(&dummy,write,filp,buffer,lenp, ppos);
-+ if(rc)
- return -EFAULT;
- *lenp = len;
- }
-@@ -172,6 +204,7 @@
-
- int LL_PROC_PROTO(proc_memory_alloc)
- {
-+ struct ctl_table dummy;
- char buf[22];
- int len;
- DECLARE_LL_PROC_PPOS_DECL;
-@@ -187,15 +220,17 @@
- if (len > *lenp)
- len = *lenp;
- buf[len] = '\0';
-- if (copy_to_user(buffer, buf, len))
-- return -EFAULT;
-- *lenp = len;
-- *ppos += *lenp;
-- return 0;
-+
-+ dummy = *table;
-+ dummy.data = buf;
-+ dummy.maxlen = sizeof(buf);
-+
-+ return ll_proc_dostring(&dummy,write,filp,buffer,lenp, ppos);
- }
-
- int LL_PROC_PROTO(proc_pages_alloc)
- {
-+ struct ctl_table dummy;
- char buf[22];
- int len;
- DECLARE_LL_PROC_PPOS_DECL;
-@@ -211,15 +246,17 @@
- if (len > *lenp)
- len = *lenp;
- buf[len] = '\0';
-- if (copy_to_user(buffer, buf, len))
-- return -EFAULT;
-- *lenp = len;
-- *ppos += *lenp;
-- return 0;
-+
-+ dummy = *table;
-+ dummy.data = buf;
-+ dummy.maxlen = sizeof(buf);
-+
-+ return ll_proc_dostring(&dummy,write,filp,buffer,lenp, ppos);
- }
-
- int LL_PROC_PROTO(proc_mem_max)
- {
-+ struct ctl_table dummy;
- char buf[22];
- int len;
- DECLARE_LL_PROC_PPOS_DECL;
-@@ -235,17 +272,19 @@
- if (len > *lenp)
- len = *lenp;
- buf[len] = '\0';
-- if (copy_to_user(buffer, buf, len))
-- return -EFAULT;
-- *lenp = len;
-- *ppos += *lenp;
-- return 0;
-+
-+ dummy = *table;
-+ dummy.data = buf;
-+ dummy.maxlen = sizeof(buf);
-+
-+ return ll_proc_dostring(&dummy,write,filp,buffer,lenp, ppos);
- }
-
- int LL_PROC_PROTO(proc_pages_max)
- {
- char buf[22];
- int len;
-+ struct ctl_table dummy;
- DECLARE_LL_PROC_PPOS_DECL;
-
- if (!*lenp || (*ppos && !write)) {
-@@ -254,16 +293,17 @@
- }
- if (write)
- return -EINVAL;
-+ dummy = *table;
-+ dummy.data = buf;
-+ dummy.maxlen = sizeof(buf);
-+ len = snprintf(buf, sizeof(buf), LPU64,
-+ obd_pages_max());
-
-- len = snprintf(buf, sizeof(buf), LPU64"\n", obd_pages_max());
-- if (len > *lenp)
-- len = *lenp;
-- buf[len] = '\0';
-- if (copy_to_user(buffer, buf, len))
-- return -EFAULT;
-- *lenp = len;
-- *ppos += *lenp;
-- return 0;
-+ if (len > *lenp)
-+ len = *lenp;
-+ buf[len] = '\0';
-+
-+ return ll_proc_dostring(&dummy,write,filp,buffer,lenp, ppos);
- }
-
- static cfs_sysctl_table_t obd_table[] = {
-@@ -281,7 +321,8 @@
- .data = &obd_fail_val,
- .maxlen = sizeof(int),
- .mode = 0644,
-- .proc_handler = &proc_dointvec
-+ .proc_handler = &proc_dointvec,
-+ .strategy = &sysctl_intvec,
- },
- {
- .ctl_name = OBD_TIMEOUT,
-@@ -297,7 +338,7 @@
- .data = &obd_debug_peer_on_timeout,
- .maxlen = sizeof(int),
- .mode = 0644,
-- .proc_handler = &proc_dointvec
-+ .proc_handler = &proc_dointvec,
- },
- {
- .ctl_name = OBD_DUMP_ON_TIMEOUT,
-@@ -305,7 +346,7 @@
- .data = &obd_dump_on_timeout,
- .maxlen = sizeof(int),
- .mode = 0644,
-- .proc_handler = &proc_dointvec
-+ .proc_handler = &proc_dointvec,
- },
- {
- .ctl_name = OBD_DUMP_ON_EVICTION,
-@@ -313,7 +354,7 @@
- .data = &obd_dump_on_eviction,
- .maxlen = sizeof(int),
- .mode = 0644,
-- .proc_handler = &proc_dointvec
-+ .proc_handler = &proc_dointvec,
- },
- {
- .ctl_name = OBD_MEMUSED,
-@@ -321,7 +362,7 @@
- .data = NULL,
- .maxlen = 0,
- .mode = 0444,
-- .proc_handler = &proc_memory_alloc
-+ .proc_handler = &proc_memory_alloc,
- },
- {
- .ctl_name = OBD_PAGESUSED,
-@@ -329,7 +370,7 @@
- .data = NULL,
- .maxlen = 0,
- .mode = 0444,
-- .proc_handler = &proc_pages_alloc
-+ .proc_handler = &proc_pages_alloc,
- },
- {
- .ctl_name = OBD_MAXMEMUSED,
-@@ -337,7 +378,7 @@
- .data = NULL,
- .maxlen = 0,
- .mode = 0444,
-- .proc_handler = &proc_mem_max
-+ .proc_handler = &proc_mem_max,
- },
- {
- .ctl_name = OBD_MAXPAGESUSED,
-@@ -345,7 +386,7 @@
- .data = NULL,
- .maxlen = 0,
- .mode = 0444,
-- .proc_handler = &proc_pages_max
-+ .proc_handler = &proc_pages_max,
- },
- {
- .ctl_name = OBD_LDLM_TIMEOUT,
-@@ -378,7 +419,7 @@
-
- static cfs_sysctl_table_t parent_table[] = {
- {
-- .ctl_name = OBD_SYSCTL,
-+ .ctl_name = CTL_LUSTRE,
- .procname = "lustre",
- .data = NULL,
- .maxlen = 0,
-diff -urNad lustre~/lustre/obdclass/lprocfs_status.c lustre/lustre/obdclass/lprocfs_status.c
---- lustre~/lustre/obdclass/lprocfs_status.c 2009-03-12 10:32:27.000000000 +0100
-+++ lustre/lustre/obdclass/lprocfs_status.c 2009-03-13 09:45:03.000000000 +0100
-@@ -151,7 +151,7 @@
-
- LPROCFS_ENTRY();
- OBD_FAIL_TIMEOUT(OBD_FAIL_LPROC_REMOVE, 10);
-- if (!dp->deleted && dp->read_proc)
-+ if (!LPROCFS_CHECK_DELETED(dp) && dp->read_proc)
- rc = dp->read_proc(page, &start, *ppos, PAGE_SIZE,
- &eof, dp->data);
- LPROCFS_EXIT();
-@@ -191,7 +191,7 @@
- int rc = -EIO;
-
- LPROCFS_ENTRY();
-- if (!dp->deleted && dp->write_proc)
-+ if (!LPROCFS_CHECK_DELETED(dp) && dp->write_proc)
- rc = dp->write_proc(f, buf, size, dp->data);
- LPROCFS_EXIT();
- return rc;
-diff -urNad lustre~/lustre/obdclass/lprocfs_status.c.orig lustre/lustre/obdclass/lprocfs_status.c.orig
---- lustre~/lustre/obdclass/lprocfs_status.c.orig 1970-01-01 00:00:00.000000000 +0000
-+++ lustre/lustre/obdclass/lprocfs_status.c.orig 2009-03-13 09:45:03.000000000 +0100
-@@ -0,0 +1,2062 @@
-+/* -*- mode: c; c-basic-offset: 8; indent-tabs-mode: nil; -*-
-+ * vim:expandtab:shiftwidth=8:tabstop=8:
-+ *
-+ * GPL HEADER START
-+ *
-+ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
-+ *
-+ * This program is free software; you can redistribute it and/or modify
-+ * it under the terms of the GNU General Public License version 2 only,
-+ * as published by the Free Software Foundation.
-+ *
-+ * This program is distributed in the hope that it will be useful, but
-+ * WITHOUT ANY WARRANTY; without even the implied warranty of
-+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
-+ * General Public License version 2 for more details (a copy is included
-+ * in the LICENSE file that accompanied this code).
-+ *
-+ * You should have received a copy of the GNU General Public License
-+ * version 2 along with this program; If not, see
-+ * http://www.sun.com/software/products/lustre/docs/GPLv2.pdf
-+ *
-+ * Please contact Sun Microsystems, Inc., 4150 Network Circle, Santa Clara,
-+ * CA 95054 USA or visit www.sun.com if you need additional information or
-+ * have any questions.
-+ *
-+ * GPL HEADER END
-+ */
-+/*
-+ * Copyright 2008 Sun Microsystems, Inc. All rights reserved
-+ * Use is subject to license terms.
-+ */
-+/*
-+ * This file is part of Lustre, http://www.lustre.org/
-+ * Lustre is a trademark of Sun Microsystems, Inc.
-+ *
-+ * lustre/obdclass/lprocfs_status.c
-+ *
-+ * Author: Hariharan Thantry <thantry at users.sourceforge.net>
-+ */
-+
-+#ifndef EXPORT_SYMTAB
-+# define EXPORT_SYMTAB
-+#endif
-+#define DEBUG_SUBSYSTEM S_CLASS
-+
-+#ifndef __KERNEL__
-+# include <liblustre.h>
-+#endif
-+
-+#include <obd_class.h>
-+#include <lprocfs_status.h>
-+#include <lustre_fsfilt.h>
-+
-+#if defined(LPROCFS)
-+
-+#define MAX_STRING_SIZE 128
-+
-+/* for bug 10866, global variable */
-+DECLARE_RWSEM(_lprocfs_lock);
-+EXPORT_SYMBOL(_lprocfs_lock);
-+
-+int lprocfs_seq_release(struct inode *inode, struct file *file)
-+{
-+ LPROCFS_EXIT();
-+ return seq_release(inode, file);
-+}
-+EXPORT_SYMBOL(lprocfs_seq_release);
-+
-+struct proc_dir_entry *lprocfs_srch(struct proc_dir_entry *head,
-+ const char *name)
-+{
-+ struct proc_dir_entry *temp;
-+
-+ if (head == NULL)
-+ return NULL;
-+
-+ LPROCFS_ENTRY();
-+ temp = head->subdir;
-+ while (temp != NULL) {
-+ if (strcmp(temp->name, name) == 0) {
-+ LPROCFS_EXIT();
-+ return temp;
-+ }
-+
-+ temp = temp->next;
-+ }
-+ LPROCFS_EXIT();
-+ return NULL;
-+}
-+
-+/* lprocfs API calls */
-+
-+/* Function that emulates snprintf but also has the side effect of advancing
-+ the page pointer for the next write into the buffer, incrementing the total
-+ length written to the buffer, and decrementing the size left in the
-+ buffer. */
-+static int lprocfs_obd_snprintf(char **page, int end, int *len,
-+ const char *format, ...)
-+{
-+ va_list list;
-+ int n;
-+
-+ if (*len >= end)
-+ return 0;
-+
-+ va_start(list, format);
-+ n = vsnprintf(*page, end - *len, format, list);
-+ va_end(list);
-+
-+ *page += n; *len += n;
-+ return n;
-+}
-+
-+int lprocfs_add_simple(struct proc_dir_entry *root, char *name,
-+ read_proc_t *read_proc, write_proc_t *write_proc,
-+ void *data)
-+{
-+ struct proc_dir_entry *proc;
-+ mode_t mode = 0;
-+
-+ if (root == NULL || name == NULL)
-+ return -EINVAL;
-+ if (read_proc)
-+ mode = 0444;
-+ if (write_proc)
-+ mode |= 0200;
-+ proc = create_proc_entry(name, mode, root);
-+ if (!proc) {
-+ CERROR("LprocFS: No memory to create /proc entry %s", name);
-+ return -ENOMEM;
-+ }
-+ proc->read_proc = read_proc;
-+ proc->write_proc = write_proc;
-+ proc->data = data;
-+ return 0;
-+}
-+
-+static ssize_t lprocfs_fops_read(struct file *f, char __user *buf, size_t size,
-+ loff_t *ppos)
-+{
-+ struct proc_dir_entry *dp = PDE(f->f_dentry->d_inode);
-+ char *page, *start = NULL;
-+ int rc = 0, eof = 1, count;
-+
-+ if (*ppos >= PAGE_SIZE)
-+ return 0;
-+
-+ page = (char *)__get_free_page(GFP_KERNEL);
-+ if (page == NULL)
-+ return -ENOMEM;
-+
-+ LPROCFS_ENTRY();
-+ OBD_FAIL_TIMEOUT(OBD_FAIL_LPROC_REMOVE, 10);
-+ if (!dp->deleted && dp->read_proc)
-+ rc = dp->read_proc(page, &start, *ppos, PAGE_SIZE,
-+ &eof, dp->data);
-+ LPROCFS_EXIT();
-+ if (rc <= 0)
-+ goto out;
-+
-+ /* for lustre proc read, the read count must be less than PAGE_SIZE */
-+ LASSERT(eof == 1);
-+
-+ if (start == NULL) {
-+ rc -= *ppos;
-+ if (rc < 0)
-+ rc = 0;
-+ if (rc == 0)
-+ goto out;
-+ start = page + *ppos;
-+ } else if (start < page) {
-+ start = page;
-+ }
-+
-+ count = (rc < size) ? rc : size;
-+ if (copy_to_user(buf, start, count)) {
-+ rc = -EFAULT;
-+ goto out;
-+ }
-+ *ppos += count;
-+
-+out:
-+ free_page((unsigned long)page);
-+ return rc;
-+}
-+
-+static ssize_t lprocfs_fops_write(struct file *f, const char __user *buf,
-+ size_t size, loff_t *ppos)
-+{
-+ struct proc_dir_entry *dp = PDE(f->f_dentry->d_inode);
-+ int rc = -EIO;
-+
-+ LPROCFS_ENTRY();
-+ if (!dp->deleted && dp->write_proc)
-+ rc = dp->write_proc(f, buf, size, dp->data);
-+ LPROCFS_EXIT();
-+ return rc;
-+}
-+
-+static struct file_operations lprocfs_generic_fops = {
-+ .owner = THIS_MODULE,
-+ .read = lprocfs_fops_read,
-+ .write = lprocfs_fops_write,
-+};
-+
-+int lprocfs_evict_client_open(struct inode *inode, struct file *f)
-+{
-+ struct proc_dir_entry *dp = PDE(f->f_dentry->d_inode);
-+ struct obd_device *obd = dp->data;
-+
-+ atomic_inc(&obd->obd_evict_inprogress);
-+
-+ return 0;
-+}
-+
-+int lprocfs_evict_client_release(struct inode *inode, struct file *f)
-+{
-+ struct proc_dir_entry *dp = PDE(f->f_dentry->d_inode);
-+ struct obd_device *obd = dp->data;
-+
-+ atomic_dec(&obd->obd_evict_inprogress);
-+ wake_up(&obd->obd_evict_inprogress_waitq);
-+
-+ return 0;
-+}
-+
-+struct file_operations lprocfs_evict_client_fops = {
-+ .owner = THIS_MODULE,
-+ .read = lprocfs_fops_read,
-+ .write = lprocfs_fops_write,
-+ .open = lprocfs_evict_client_open,
-+ .release = lprocfs_evict_client_release,
-+};
-+EXPORT_SYMBOL(lprocfs_evict_client_fops);
-+
-+/**
-+ * Add /proc entrys.
-+ *
-+ * \param root [in] The parent proc entry on which new entry will be added.
-+ * \param list [in] Array of proc entries to be added.
-+ * \param data [in] The argument to be passed when entries read/write routines
-+ * are called through /proc file.
-+ *
-+ * \retval 0 on success
-+ * < 0 on error
-+ */
-+int lprocfs_add_vars(struct proc_dir_entry *root, struct lprocfs_vars *list,
-+ void *data)
-+{
-+ if (root == NULL || list == NULL)
-+ return -EINVAL;
-+
-+ while (list->name != NULL) {
-+ struct proc_dir_entry *cur_root, *proc;
-+ char *pathcopy, *cur, *next, pathbuf[64];
-+ int pathsize = strlen(list->name) + 1;
-+
-+ proc = NULL;
-+ cur_root = root;
-+
-+ /* need copy of path for strsep */
-+ if (strlen(list->name) > sizeof(pathbuf) - 1) {
-+ OBD_ALLOC(pathcopy, pathsize);
-+ if (pathcopy == NULL)
-+ return -ENOMEM;
-+ } else {
-+ pathcopy = pathbuf;
-+ }
-+
-+ next = pathcopy;
-+ strcpy(pathcopy, list->name);
-+
-+ while (cur_root != NULL && (cur = strsep(&next, "/"))) {
-+ if (*cur =='\0') /* skip double/trailing "/" */
-+ continue;
-+
-+ proc = lprocfs_srch(cur_root, cur);
-+ CDEBUG(D_OTHER, "cur_root=%s, cur=%s, next=%s, (%s)\n",
-+ cur_root->name, cur, next,
-+ (proc ? "exists" : "new"));
-+ if (next != NULL) {
-+ cur_root = (proc ? proc :
-+ proc_mkdir(cur, cur_root));
-+ } else if (proc == NULL) {
-+ mode_t mode = 0;
-+ if (list->proc_mode != 0000) {
-+ mode = list->proc_mode;
-+ } else {
-+ if (list->read_fptr)
-+ mode = 0444;
-+ if (list->write_fptr)
-+ mode |= 0200;
-+ }
-+ proc = create_proc_entry(cur, mode, cur_root);
-+ }
-+ }
-+
-+ if (pathcopy != pathbuf)
-+ OBD_FREE(pathcopy, pathsize);
-+
-+ if (cur_root == NULL || proc == NULL) {
-+ CERROR("LprocFS: No memory to create /proc entry %s",
-+ list->name);
-+ return -ENOMEM;
-+ }
-+
-+ if (list->fops)
-+ proc->proc_fops = list->fops;
-+ else
-+ proc->proc_fops = &lprocfs_generic_fops;
-+ proc->read_proc = list->read_fptr;
-+ proc->write_proc = list->write_fptr;
-+ proc->data = (list->data ? list->data : data);
-+ list++;
-+ }
-+ return 0;
-+}
-+
-+void lprocfs_remove(struct proc_dir_entry **rooth)
-+{
-+ struct proc_dir_entry *root = *rooth;
-+ struct proc_dir_entry *temp = root;
-+ struct proc_dir_entry *rm_entry;
-+ struct proc_dir_entry *parent;
-+
-+ if (!root)
-+ return;
-+ *rooth = NULL;
-+
-+ parent = root->parent;
-+ LASSERT(parent != NULL);
-+ LPROCFS_WRITE_ENTRY(); /* search vs remove race */
-+
-+ while (1) {
-+ while (temp->subdir != NULL)
-+ temp = temp->subdir;
-+
-+ rm_entry = temp;
-+ temp = temp->parent;
-+
-+ /* Memory corruption once caused this to fail, and
-+ without this LASSERT we would loop here forever. */
-+ LASSERTF(strlen(rm_entry->name) == rm_entry->namelen,
-+ "0x%p %s/%s len %d\n", rm_entry, temp->name,
-+ rm_entry->name, (int)strlen(rm_entry->name));
-+
-+ /* Now, the rm_entry->deleted flags is protected
-+ * by _lprocfs_lock. */
-+ rm_entry->data = NULL;
-+ remove_proc_entry(rm_entry->name, temp);
-+ if (temp == parent)
-+ break;
-+ }
-+ LPROCFS_WRITE_EXIT();
-+}
-+
-+struct proc_dir_entry *lprocfs_register(const char *name,
-+ struct proc_dir_entry *parent,
-+ struct lprocfs_vars *list, void *data)
-+{
-+ struct proc_dir_entry *newchild;
-+
-+ newchild = lprocfs_srch(parent, name);
-+ if (newchild != NULL) {
-+ CERROR(" Lproc: Attempting to register %s more than once \n",
-+ name);
-+ return ERR_PTR(-EALREADY);
-+ }
-+
-+ newchild = proc_mkdir(name, parent);
-+ if (newchild != NULL && list != NULL) {
-+ int rc = lprocfs_add_vars(newchild, list, data);
-+ if (rc) {
-+ lprocfs_remove(&newchild);
-+ return ERR_PTR(rc);
-+ }
-+ }
-+ return newchild;
-+}
-+
-+/* Generic callbacks */
-+int lprocfs_rd_uint(char *page, char **start, off_t off,
-+ int count, int *eof, void *data)
-+{
-+ unsigned int *temp = (unsigned int *)data;
-+ return snprintf(page, count, "%u\n", *temp);
-+}
-+
-+int lprocfs_wr_uint(struct file *file, const char *buffer,
-+ unsigned long count, void *data)
-+{
-+ unsigned *p = data;
-+ char dummy[MAX_STRING_SIZE + 1] = { '\0' }, *end;
-+ unsigned long tmp;
-+
-+ if (count >= sizeof(dummy) || count == 0)
-+ return -EINVAL;
-+
-+ if (copy_from_user(dummy, buffer, count))
-+ return -EFAULT;
-+
-+ tmp = simple_strtoul(dummy, &end, 0);
-+ if (dummy == end)
-+ return -EINVAL;
-+
-+ *p = (unsigned int)tmp;
-+ return count;
-+}
-+
-+int lprocfs_rd_u64(char *page, char **start, off_t off,
-+ int count, int *eof, void *data)
-+{
-+ LASSERT(data != NULL);
-+ *eof = 1;
-+ return snprintf(page, count, LPU64"\n", *(__u64 *)data);
-+}
-+
-+int lprocfs_rd_atomic(char *page, char **start, off_t off,
-+ int count, int *eof, void *data)
-+{
-+ atomic_t *atom = (atomic_t *)data;
-+ LASSERT(atom != NULL);
-+ *eof = 1;
-+ return snprintf(page, count, "%d\n", atomic_read(atom));
-+}
-+
-+int lprocfs_wr_atomic(struct file *file, const char *buffer,
-+ unsigned long count, void *data)
-+{
-+ atomic_t *atm = data;
-+ int val = 0;
-+ int rc;
-+
-+ rc = lprocfs_write_helper(buffer, count, &val);
-+ if (rc < 0)
-+ return rc;
-+
-+ if (val <= 0)
-+ return -ERANGE;
-+
-+ atomic_set(atm, val);
-+ return count;
-+}
-+
-+int lprocfs_rd_uuid(char *page, char **start, off_t off, int count,
-+ int *eof, void *data)
-+{
-+ struct obd_device *obd = (struct obd_device*)data;
-+
-+ LASSERT(obd != NULL);
-+ *eof = 1;
-+ return snprintf(page, count, "%s\n", obd->obd_uuid.uuid);
-+}
-+
-+int lprocfs_rd_name(char *page, char **start, off_t off, int count,
-+ int *eof, void* data)
-+{
-+ struct obd_device *dev = (struct obd_device *)data;
-+
-+ LASSERT(dev != NULL);
-+ LASSERT(dev->obd_name != NULL);
-+ *eof = 1;
-+ return snprintf(page, count, "%s\n", dev->obd_name);
-+}
-+
-+int lprocfs_rd_fstype(char *page, char **start, off_t off, int count, int *eof,
-+ void *data)
-+{
-+ struct obd_device *obd = (struct obd_device *)data;
-+
-+ LASSERT(obd != NULL);
-+ LASSERT(obd->obd_fsops != NULL);
-+ LASSERT(obd->obd_fsops->fs_type != NULL);
-+ return snprintf(page, count, "%s\n", obd->obd_fsops->fs_type);
-+}
-+
-+int lprocfs_rd_blksize(char *page, char **start, off_t off, int count,
-+ int *eof, void *data)
-+{
-+ struct obd_statfs osfs;
-+ int rc = obd_statfs(data, &osfs, cfs_time_current_64() - HZ,
-+ OBD_STATFS_NODELAY);
-+ if (!rc) {
-+ *eof = 1;
-+ rc = snprintf(page, count, "%u\n", osfs.os_bsize);
-+ }
-+ return rc;
-+}
-+
-+int lprocfs_rd_kbytestotal(char *page, char **start, off_t off, int count,
-+ int *eof, void *data)
-+{
-+ struct obd_statfs osfs;
-+ int rc = obd_statfs(data, &osfs, cfs_time_current_64() - HZ,
-+ OBD_STATFS_NODELAY);
-+ if (!rc) {
-+ __u32 blk_size = osfs.os_bsize >> 10;
-+ __u64 result = osfs.os_blocks;
-+
-+ while (blk_size >>= 1)
-+ result <<= 1;
-+
-+ *eof = 1;
-+ rc = snprintf(page, count, LPU64"\n", result);
-+ }
-+ return rc;
-+}
-+
-+int lprocfs_rd_kbytesfree(char *page, char **start, off_t off, int count,
-+ int *eof, void *data)
-+{
-+ struct obd_statfs osfs;
-+ int rc = obd_statfs(data, &osfs, cfs_time_current_64() - HZ,
-+ OBD_STATFS_NODELAY);
-+ if (!rc) {
-+ __u32 blk_size = osfs.os_bsize >> 10;
-+ __u64 result = osfs.os_bfree;
-+
-+ while (blk_size >>= 1)
-+ result <<= 1;
-+
-+ *eof = 1;
-+ rc = snprintf(page, count, LPU64"\n", result);
-+ }
-+ return rc;
-+}
-+
-+int lprocfs_rd_kbytesavail(char *page, char **start, off_t off, int count,
-+ int *eof, void *data)
-+{
-+ struct obd_statfs osfs;
-+ int rc = obd_statfs(data, &osfs, cfs_time_current_64() - HZ,
-+ OBD_STATFS_NODELAY);
-+ if (!rc) {
-+ __u32 blk_size = osfs.os_bsize >> 10;
-+ __u64 result = osfs.os_bavail;
-+
-+ while (blk_size >>= 1)
-+ result <<= 1;
-+
-+ *eof = 1;
-+ rc = snprintf(page, count, LPU64"\n", result);
-+ }
-+ return rc;
-+}
-+
-+int lprocfs_rd_filestotal(char *page, char **start, off_t off, int count,
-+ int *eof, void *data)
-+{
-+ struct obd_statfs osfs;
-+ int rc = obd_statfs(data, &osfs, cfs_time_current_64() - HZ,
-+ OBD_STATFS_NODELAY);
-+ if (!rc) {
-+ *eof = 1;
-+ rc = snprintf(page, count, LPU64"\n", osfs.os_files);
-+ }
-+
-+ return rc;
-+}
-+
-+int lprocfs_rd_filesfree(char *page, char **start, off_t off, int count,
-+ int *eof, void *data)
-+{
-+ struct obd_statfs osfs;
-+ int rc = obd_statfs(data, &osfs, cfs_time_current_64() - HZ,
-+ OBD_STATFS_NODELAY);
-+ if (!rc) {
-+ *eof = 1;
-+ rc = snprintf(page, count, LPU64"\n", osfs.os_ffree);
-+ }
-+ return rc;
-+}
-+
-+int lprocfs_rd_server_uuid(char *page, char **start, off_t off, int count,
-+ int *eof, void *data)
-+{
-+ struct obd_device *obd = (struct obd_device *)data;
-+ struct obd_import *imp;
-+ char *imp_state_name = NULL;
-+ int rc = 0;
-+
-+ LASSERT(obd != NULL);
-+ LPROCFS_CLIMP_CHECK(obd);
-+ imp = obd->u.cli.cl_import;
-+ imp_state_name = ptlrpc_import_state_name(imp->imp_state);
-+ *eof = 1;
-+ rc = snprintf(page, count, "%s\t%s%s\n",
-+ obd2cli_tgt(obd), imp_state_name,
-+ imp->imp_deactive ? "\tDEACTIVATED" : "");
-+
-+ LPROCFS_CLIMP_EXIT(obd);
-+ return rc;
-+}
-+
-+int lprocfs_rd_conn_uuid(char *page, char **start, off_t off, int count,
-+ int *eof, void *data)
-+{
-+ struct obd_device *obd = (struct obd_device*)data;
-+ struct ptlrpc_connection *conn;
-+ int rc = 0;
-+
-+ LASSERT(obd != NULL);
-+ LPROCFS_CLIMP_CHECK(obd);
-+ conn = obd->u.cli.cl_import->imp_connection;
-+ LASSERT(conn != NULL);
-+ *eof = 1;
-+ rc = snprintf(page, count, "%s\n", conn->c_remote_uuid.uuid);
-+
-+ LPROCFS_CLIMP_EXIT(obd);
-+ return rc;
-+}
-+
-+#define flag2str(flag) \
-+ if (imp->imp_##flag && max - len > 0) \
-+ len += snprintf(str + len, max - len, " " #flag);
-+
-+/**
-+ * Append a space separated list of current set flags to str.
-+ */
-+static int obd_import_flags2str(struct obd_import *imp, char *str,
-+ int max)
-+{
-+ int len = 0;
-+
-+ if (imp->imp_obd->obd_no_recov)
-+ len += snprintf(str, max - len, " no_recov");
-+
-+ flag2str(invalid);
-+ flag2str(deactive);
-+ flag2str(replayable);
-+ flag2str(pingable);
-+ flag2str(recon_bk);
-+ flag2str(last_recon);
-+ return len;
-+}
-+#undef flags2str
-+
-+int lprocfs_rd_import(char *page, char **start, off_t off, int count,
-+ int *eof, void *data)
-+{
-+ struct obd_device *obd = (struct obd_device *)data;
-+ struct obd_import *imp;
-+ char *imp_state_name = NULL;
-+ int rc = 0;
-+
-+ LASSERT(obd != NULL);
-+ LPROCFS_CLIMP_CHECK(obd);
-+ imp = obd->u.cli.cl_import;
-+ imp_state_name = ptlrpc_import_state_name(imp->imp_state);
-+ *eof = 1;
-+
-+ rc = snprintf(page, count,
-+ "import: %s\n"
-+ " target: %s@%s\n"
-+ " state: %s\n"
-+ " inflight: %u\n"
-+ " unregistering: %u\n"
-+ " conn_cnt: %u\n"
-+ " generation: %u\n"
-+ " inval_cnt: %u\n"
-+ " last_replay_transno: "LPU64"\n"
-+ " peer_committed_transno: "LPU64"\n"
-+ " last_trasno_checked: "LPU64"\n"
-+ " flags:",
-+ obd->obd_name,
-+ obd2cli_tgt(obd), imp->imp_connection->c_remote_uuid.uuid,
-+ imp_state_name,
-+ atomic_read(&imp->imp_inflight),
-+ atomic_read(&imp->imp_unregistering),
-+ imp->imp_conn_cnt,
-+ imp->imp_generation,
-+ atomic_read(&imp->imp_inval_count),
-+ imp->imp_last_replay_transno,
-+ imp->imp_peer_committed_transno,
-+ imp->imp_last_transno_checked);
-+ rc += obd_import_flags2str(imp, page + rc, count - rc);
-+ rc += snprintf(page+rc, count - rc, "\n");
-+ LPROCFS_CLIMP_EXIT(obd);
-+ return rc;
-+}
-+
-+int lprocfs_at_hist_helper(char *page, int count, int rc,
-+ struct adaptive_timeout *at)
-+{
-+ int i;
-+ for (i = 0; i < AT_BINS; i++)
-+ rc += snprintf(page + rc, count - rc, "%3u ", at->at_hist[i]);
-+ rc += snprintf(page + rc, count - rc, "\n");
-+ return rc;
-+}
-+
-+/* See also ptlrpc_lprocfs_rd_timeouts */
-+int lprocfs_rd_timeouts(char *page, char **start, off_t off, int count,
-+ int *eof, void *data)
-+{
-+ struct obd_device *obd = (struct obd_device *)data;
-+ struct obd_import *imp;
-+ unsigned int cur, worst;
-+ time_t now, worstt;
-+ struct dhms ts;
-+ int i, rc = 0;
-+
-+ LASSERT(obd != NULL);
-+ LPROCFS_CLIMP_CHECK(obd);
-+ imp = obd->u.cli.cl_import;
-+ *eof = 1;
-+
-+ now = cfs_time_current_sec();
-+
-+ /* Some network health info for kicks */
-+ s2dhms(&ts, now - imp->imp_last_reply_time);
-+ rc += snprintf(page + rc, count - rc,
-+ "%-10s : %ld, "DHMS_FMT" ago\n",
-+ "last reply", imp->imp_last_reply_time, DHMS_VARS(&ts));
-+
-+ cur = at_get(&imp->imp_at.iat_net_latency);
-+ worst = imp->imp_at.iat_net_latency.at_worst_ever;
-+ worstt = imp->imp_at.iat_net_latency.at_worst_time;
-+ s2dhms(&ts, now - worstt);
-+ rc += snprintf(page + rc, count - rc,
-+ "%-10s : cur %3u worst %3u (at %ld, "DHMS_FMT" ago) ",
-+ "network", cur, worst, worstt, DHMS_VARS(&ts));
-+ rc = lprocfs_at_hist_helper(page, count, rc,
-+ &imp->imp_at.iat_net_latency);
-+
-+ for(i = 0; i < IMP_AT_MAX_PORTALS; i++) {
-+ if (imp->imp_at.iat_portal[i] == 0)
-+ break;
-+ cur = at_get(&imp->imp_at.iat_service_estimate[i]);
-+ worst = imp->imp_at.iat_service_estimate[i].at_worst_ever;
-+ worstt = imp->imp_at.iat_service_estimate[i].at_worst_time;
-+ s2dhms(&ts, now - worstt);
-+ rc += snprintf(page + rc, count - rc,
-+ "portal %-2d : cur %3u worst %3u (at %ld, "
-+ DHMS_FMT" ago) ", imp->imp_at.iat_portal[i],
-+ cur, worst, worstt, DHMS_VARS(&ts));
-+ rc = lprocfs_at_hist_helper(page, count, rc,
-+ &imp->imp_at.iat_service_estimate[i]);
-+ }
-+
-+ LPROCFS_CLIMP_EXIT(obd);
-+ return rc;
-+}
-+
-+static const char *obd_connect_names[] = {
-+ "read_only",
-+ "lov_index",
-+ "unused",
-+ "write_grant",
-+ "server_lock",
-+ "version",
-+ "request_portal",
-+ "acl",
-+ "xattr",
-+ "create_on_write",
-+ "truncate_lock",
-+ "initial_transno",
-+ "inode_bit_locks",
-+ "join_file",
-+ "getattr_by_fid",
-+ "no_oh_for_devices",
-+ "local_1.8_client",
-+ "remote_1.8_client",
-+ "max_byte_per_rpc",
-+ "64bit_qdata",
-+ "fid_capability",
-+ "oss_capability",
-+ "early_lock_cancel",
-+ "size_on_mds",
-+ "adaptive_timeout",
-+ "lru_resize",
-+ "mds_mds_connection",
-+ "real_conn",
-+ "change_qunit_size",
-+ "alt_checksum_algorithm",
-+ "fid_is_enabled",
-+ "version_recovery",
-+ "pools",
-+ NULL
-+};
-+
-+int lprocfs_rd_connect_flags(char *page, char **start, off_t off,
-+ int count, int *eof, void *data)
-+{
-+ struct obd_device *obd = data;
-+ __u64 mask = 1, flags;
-+ int i, ret = 0;
-+
-+ LPROCFS_CLIMP_CHECK(obd);
-+ flags = obd->u.cli.cl_import->imp_connect_data.ocd_connect_flags;
-+ ret = snprintf(page, count, "flags="LPX64"\n", flags);
-+ for (i = 0; obd_connect_names[i] != NULL; i++, mask <<= 1) {
-+ if (flags & mask)
-+ ret += snprintf(page + ret, count - ret, "%s\n",
-+ obd_connect_names[i]);
-+ }
-+ if (flags & ~(mask - 1))
-+ ret += snprintf(page + ret, count - ret,
-+ "unknown flags "LPX64"\n", flags & ~(mask - 1));
-+
-+ LPROCFS_CLIMP_EXIT(obd);
-+ return ret;
-+}
-+EXPORT_SYMBOL(lprocfs_rd_connect_flags);
-+
-+int lprocfs_rd_num_exports(char *page, char **start, off_t off, int count,
-+ int *eof, void *data)
-+{
-+ struct obd_device *obd = (struct obd_device*)data;
-+
-+ LASSERT(obd != NULL);
-+ *eof = 1;
-+ return snprintf(page, count, "%u\n", obd->obd_num_exports);
-+}
-+
-+int lprocfs_rd_numrefs(char *page, char **start, off_t off, int count,
-+ int *eof, void *data)
-+{
-+ struct obd_type *class = (struct obd_type*) data;
-+
-+ LASSERT(class != NULL);
-+ *eof = 1;
-+ return snprintf(page, count, "%d\n", class->typ_refcnt);
-+}
-+
-+int lprocfs_obd_setup(struct obd_device *obd, struct lprocfs_vars *list)
-+{
-+ int rc = 0;
-+
-+ LASSERT(obd != NULL);
-+ LASSERT(obd->obd_magic == OBD_DEVICE_MAGIC);
-+ LASSERT(obd->obd_type->typ_procroot != NULL);
-+
-+ obd->obd_proc_entry = lprocfs_register(obd->obd_name,
-+ obd->obd_type->typ_procroot,
-+ list, obd);
-+ if (IS_ERR(obd->obd_proc_entry)) {
-+ rc = PTR_ERR(obd->obd_proc_entry);
-+ CERROR("error %d setting up lprocfs for %s\n",rc,obd->obd_name);
-+ obd->obd_proc_entry = NULL;
-+ }
-+ return rc;
-+}
-+
-+int lprocfs_obd_cleanup(struct obd_device *obd)
-+{
-+ if (!obd)
-+ return -EINVAL;
-+ if (obd->obd_proc_exports_entry) {
-+ /* Should be no exports left */
-+ LASSERT(obd->obd_proc_exports_entry->subdir == NULL);
-+ lprocfs_remove(&obd->obd_proc_exports_entry);
-+ }
-+ lprocfs_remove(&obd->obd_proc_entry);
-+ return 0;
-+}
-+
-+static void lprocfs_free_client_stats(struct nid_stat *client_stat)
-+{
-+ CDEBUG(D_CONFIG, "stat %p - data %p/%p/%p\n", client_stat,
-+ client_stat->nid_proc, client_stat->nid_stats,
-+ client_stat->nid_brw_stats);
-+
-+ LASSERTF(client_stat->nid_exp_ref_count == 0, "count %d\n",
-+ client_stat->nid_exp_ref_count);
-+
-+ hlist_del_init(&client_stat->nid_hash);
-+
-+ if (client_stat->nid_proc)
-+ lprocfs_remove(&client_stat->nid_proc);
-+
-+ if (client_stat->nid_stats)
-+ lprocfs_free_stats(&client_stat->nid_stats);
-+
-+ if (client_stat->nid_brw_stats)
-+ OBD_FREE_PTR(client_stat->nid_brw_stats);
-+
-+ if (client_stat->nid_ldlm_stats)
-+ lprocfs_free_stats(&client_stat->nid_ldlm_stats);
-+
-+ OBD_FREE_PTR(client_stat);
-+ return;
-+
-+}
-+
-+void lprocfs_free_per_client_stats(struct obd_device *obd)
-+{
-+ struct nid_stat *stat;
-+ ENTRY;
-+
-+ /* we need extra list - because hash_exit called to early */
-+ /* not need locking because all clients is died */
-+ while(!list_empty(&obd->obd_nid_stats)) {
-+ stat = list_entry(obd->obd_nid_stats.next,
-+ struct nid_stat, nid_list);
-+ list_del_init(&stat->nid_list);
-+ lprocfs_free_client_stats(stat);
-+ }
-+
-+ EXIT;
-+}
-+
-+struct lprocfs_stats *lprocfs_alloc_stats(unsigned int num,
-+ enum lprocfs_stats_flags flags)
-+{
-+ struct lprocfs_stats *stats;
-+ unsigned int percpusize;
-+ unsigned int i, j;
-+ unsigned int num_cpu;
-+
-+ if (num == 0)
-+ return NULL;
-+
-+ if (flags & LPROCFS_STATS_FLAG_NOPERCPU)
-+ num_cpu = 1;
-+ else
-+ num_cpu = num_possible_cpus();
-+
-+ OBD_ALLOC(stats, offsetof(typeof(*stats), ls_percpu[num_cpu]));
-+ if (stats == NULL)
-+ return NULL;
-+
-+ if (flags & LPROCFS_STATS_FLAG_NOPERCPU) {
-+ stats->ls_flags = flags;
-+ spin_lock_init(&stats->ls_lock);
-+ /* Use this lock only if there are no percpu areas */
-+ } else {
-+ stats->ls_flags = 0;
-+ }
-+
-+ percpusize = offsetof(struct lprocfs_percpu, lp_cntr[num]);
-+ if (num_cpu > 1)
-+ percpusize = L1_CACHE_ALIGN(percpusize);
-+
-+ for (i = 0; i < num_cpu; i++) {
-+ OBD_ALLOC(stats->ls_percpu[i], percpusize);
-+ if (stats->ls_percpu[i] == NULL) {
-+ for (j = 0; j < i; j++) {
-+ OBD_FREE(stats->ls_percpu[j], percpusize);
-+ stats->ls_percpu[j] = NULL;
-+ }
-+ break;
-+ }
-+ }
-+ if (stats->ls_percpu[0] == NULL) {
-+ OBD_FREE(stats, offsetof(typeof(*stats),
-+ ls_percpu[num_cpu]));
-+ return NULL;
-+ }
-+
-+ stats->ls_num = num;
-+ return stats;
-+}
-+
-+void lprocfs_free_stats(struct lprocfs_stats **statsh)
-+{
-+ struct lprocfs_stats *stats = *statsh;
-+ unsigned int num_cpu;
-+ unsigned int percpusize;
-+ unsigned int i;
-+
-+ if (!stats || (stats->ls_num == 0))
-+ return;
-+ *statsh = NULL;
-+ if (stats->ls_flags & LPROCFS_STATS_FLAG_NOPERCPU)
-+ num_cpu = 1;
-+ else
-+ num_cpu = num_possible_cpus();
-+
-+ percpusize = offsetof(struct lprocfs_percpu, lp_cntr[stats->ls_num]);
-+ if (num_cpu > 1)
-+ percpusize = L1_CACHE_ALIGN(percpusize);
-+ for (i = 0; i < num_cpu; i++)
-+ OBD_FREE(stats->ls_percpu[i], percpusize);
-+ OBD_FREE(stats, offsetof(typeof(*stats), ls_percpu[num_cpu]));
-+}
-+
-+void lprocfs_clear_stats(struct lprocfs_stats *stats)
-+{
-+ struct lprocfs_counter *percpu_cntr;
-+ int i, j;
-+ unsigned int num_cpu;
-+
-+ num_cpu = lprocfs_stats_lock(stats, LPROCFS_GET_NUM_CPU);
-+
-+ for (i = 0; i < num_cpu; i++) {
-+ for (j = 0; j < stats->ls_num; j++) {
-+ percpu_cntr = &(stats->ls_percpu[i])->lp_cntr[j];
-+ atomic_inc(&percpu_cntr->lc_cntl.la_entry);
-+ percpu_cntr->lc_count = 0;
-+ percpu_cntr->lc_sum = 0;
-+ percpu_cntr->lc_min = LC_MIN_INIT;
-+ percpu_cntr->lc_max = 0;
-+ percpu_cntr->lc_sumsquare = 0;
-+ atomic_inc(&percpu_cntr->lc_cntl.la_exit);
-+ }
-+ }
-+
-+ lprocfs_stats_unlock(stats);
-+}
-+
-+static ssize_t lprocfs_stats_seq_write(struct file *file, const char *buf,
-+ size_t len, loff_t *off)
-+{
-+ struct seq_file *seq = file->private_data;
-+ struct lprocfs_stats *stats = seq->private;
-+
-+ lprocfs_clear_stats(stats);
-+
-+ return len;
-+}
-+
-+static void *lprocfs_stats_seq_start(struct seq_file *p, loff_t *pos)
-+{
-+ struct lprocfs_stats *stats = p->private;
-+ /* return 1st cpu location */
-+ return (*pos >= stats->ls_num) ? NULL :
-+ &(stats->ls_percpu[0]->lp_cntr[*pos]);
-+}
-+
-+static void lprocfs_stats_seq_stop(struct seq_file *p, void *v)
-+{
-+}
-+
-+static void *lprocfs_stats_seq_next(struct seq_file *p, void *v, loff_t *pos)
-+{
-+ struct lprocfs_stats *stats = p->private;
-+ ++*pos;
-+ return (*pos >= stats->ls_num) ? NULL :
-+ &(stats->ls_percpu[0]->lp_cntr[*pos]);
-+}
-+
-+/* seq file export of one lprocfs counter */
-+static int lprocfs_stats_seq_show(struct seq_file *p, void *v)
-+{
-+ struct lprocfs_stats *stats = p->private;
-+ struct lprocfs_counter *cntr = v;
-+ struct lprocfs_counter t, ret = { .lc_min = LC_MIN_INIT };
-+ int i, idx, rc = 0;
-+ unsigned int num_cpu;
-+
-+ if (cntr == &(stats->ls_percpu[0])->lp_cntr[0]) {
-+ struct timeval now;
-+ do_gettimeofday(&now);
-+ rc = seq_printf(p, "%-25s %lu.%lu secs.usecs\n",
-+ "snapshot_time", now.tv_sec, now.tv_usec);
-+ if (rc < 0)
-+ return rc;
-+ }
-+ idx = cntr - &(stats->ls_percpu[0])->lp_cntr[0];
-+
-+ if (stats->ls_flags & LPROCFS_STATS_FLAG_NOPERCPU)
-+ num_cpu = 1;
-+ else
-+ num_cpu = num_possible_cpus();
-+
-+ for (i = 0; i < num_cpu; i++) {
-+ struct lprocfs_counter *percpu_cntr =
-+ &(stats->ls_percpu[i])->lp_cntr[idx];
-+ int centry;
-+
-+ do {
-+ centry = atomic_read(&percpu_cntr->lc_cntl.la_entry);
-+ t.lc_count = percpu_cntr->lc_count;
-+ t.lc_sum = percpu_cntr->lc_sum;
-+ t.lc_min = percpu_cntr->lc_min;
-+ t.lc_max = percpu_cntr->lc_max;
-+ t.lc_sumsquare = percpu_cntr->lc_sumsquare;
-+ } while (centry != atomic_read(&percpu_cntr->lc_cntl.la_entry) &&
-+ centry != atomic_read(&percpu_cntr->lc_cntl.la_exit));
-+ ret.lc_count += t.lc_count;
-+ ret.lc_sum += t.lc_sum;
-+ if (t.lc_min < ret.lc_min)
-+ ret.lc_min = t.lc_min;
-+ if (t.lc_max > ret.lc_max)
-+ ret.lc_max = t.lc_max;
-+ ret.lc_sumsquare += t.lc_sumsquare;
-+ }
-+
-+ if (ret.lc_count == 0)
-+ goto out;
-+
-+ rc = seq_printf(p, "%-25s "LPD64" samples [%s]", cntr->lc_name,
-+ ret.lc_count, cntr->lc_units);
-+ if (rc < 0)
-+ goto out;
-+
-+ if ((cntr->lc_config & LPROCFS_CNTR_AVGMINMAX) && (ret.lc_count > 0)) {
-+ rc = seq_printf(p, " "LPD64" "LPD64" "LPD64,
-+ ret.lc_min, ret.lc_max, ret.lc_sum);
-+ if (rc < 0)
-+ goto out;
-+ if (cntr->lc_config & LPROCFS_CNTR_STDDEV)
-+ rc = seq_printf(p, " "LPD64, ret.lc_sumsquare);
-+ if (rc < 0)
-+ goto out;
-+ }
-+ rc = seq_printf(p, "\n");
-+ out:
-+ return (rc < 0) ? rc : 0;
-+}
-+
-+struct seq_operations lprocfs_stats_seq_sops = {
-+ start: lprocfs_stats_seq_start,
-+ stop: lprocfs_stats_seq_stop,
-+ next: lprocfs_stats_seq_next,
-+ show: lprocfs_stats_seq_show,
-+};
-+
-+static int lprocfs_stats_seq_open(struct inode *inode, struct file *file)
-+{
-+ struct proc_dir_entry *dp = PDE(inode);
-+ struct seq_file *seq;
-+ int rc;
-+
-+ LPROCFS_ENTRY_AND_CHECK(dp);
-+ rc = seq_open(file, &lprocfs_stats_seq_sops);
-+ if (rc) {
-+ LPROCFS_EXIT();
-+ return rc;
-+ }
-+
-+ seq = file->private_data;
-+ seq->private = dp->data;
-+ return 0;
-+}
-+
-+struct file_operations lprocfs_stats_seq_fops = {
-+ .owner = THIS_MODULE,
-+ .open = lprocfs_stats_seq_open,
-+ .read = seq_read,
-+ .write = lprocfs_stats_seq_write,
-+ .llseek = seq_lseek,
-+ .release = lprocfs_seq_release,
-+};
-+
-+int lprocfs_register_stats(struct proc_dir_entry *root, const char *name,
-+ struct lprocfs_stats *stats)
-+{
-+ struct proc_dir_entry *entry;
-+ LASSERT(root != NULL);
-+
-+ entry = create_proc_entry(name, 0644, root);
-+ if (entry == NULL)
-+ return -ENOMEM;
-+ entry->proc_fops = &lprocfs_stats_seq_fops;
-+ entry->data = (void *)stats;
-+ return 0;
-+}
-+
-+void lprocfs_counter_init(struct lprocfs_stats *stats, int index,
-+ unsigned conf, const char *name, const char *units)
-+{
-+ struct lprocfs_counter *c;
-+ int i;
-+ unsigned int num_cpu;
-+
-+ LASSERT(stats != NULL);
-+
-+ num_cpu = lprocfs_stats_lock(stats, LPROCFS_GET_NUM_CPU);
-+
-+ for (i = 0; i < num_cpu; i++) {
-+ c = &(stats->ls_percpu[i]->lp_cntr[index]);
-+ c->lc_config = conf;
-+ c->lc_count = 0;
-+ c->lc_sum = 0;
-+ c->lc_min = LC_MIN_INIT;
-+ c->lc_max = 0;
-+ c->lc_name = name;
-+ c->lc_units = units;
-+ }
-+
-+ lprocfs_stats_unlock(stats);
-+}
-+EXPORT_SYMBOL(lprocfs_counter_init);
-+
-+#define LPROCFS_OBD_OP_INIT(base, stats, op) \
-+do { \
-+ unsigned int coffset = base + OBD_COUNTER_OFFSET(op); \
-+ LASSERT(coffset < stats->ls_num); \
-+ lprocfs_counter_init(stats, coffset, 0, #op, "reqs"); \
-+} while (0)
-+
-+void lprocfs_init_ops_stats(int num_private_stats, struct lprocfs_stats *stats)
-+{
-+ LPROCFS_OBD_OP_INIT(num_private_stats, stats, iocontrol);
-+ LPROCFS_OBD_OP_INIT(num_private_stats, stats, get_info);
-+ LPROCFS_OBD_OP_INIT(num_private_stats, stats, set_info_async);
-+ LPROCFS_OBD_OP_INIT(num_private_stats, stats, attach);
-+ LPROCFS_OBD_OP_INIT(num_private_stats, stats, detach);
-+ LPROCFS_OBD_OP_INIT(num_private_stats, stats, setup);
-+ LPROCFS_OBD_OP_INIT(num_private_stats, stats, precleanup);
-+ LPROCFS_OBD_OP_INIT(num_private_stats, stats, cleanup);
-+ LPROCFS_OBD_OP_INIT(num_private_stats, stats, process_config);
-+ LPROCFS_OBD_OP_INIT(num_private_stats, stats, postrecov);
-+ LPROCFS_OBD_OP_INIT(num_private_stats, stats, add_conn);
-+ LPROCFS_OBD_OP_INIT(num_private_stats, stats, del_conn);
-+ LPROCFS_OBD_OP_INIT(num_private_stats, stats, connect);
-+ LPROCFS_OBD_OP_INIT(num_private_stats, stats, reconnect);
-+ LPROCFS_OBD_OP_INIT(num_private_stats, stats, disconnect);
-+ LPROCFS_OBD_OP_INIT(num_private_stats, stats, statfs);
-+ LPROCFS_OBD_OP_INIT(num_private_stats, stats, statfs_async);
-+ LPROCFS_OBD_OP_INIT(num_private_stats, stats, packmd);
-+ LPROCFS_OBD_OP_INIT(num_private_stats, stats, unpackmd);
-+ LPROCFS_OBD_OP_INIT(num_private_stats, stats, checkmd);
-+ LPROCFS_OBD_OP_INIT(num_private_stats, stats, preallocate);
-+ LPROCFS_OBD_OP_INIT(num_private_stats, stats, precreate);
-+ LPROCFS_OBD_OP_INIT(num_private_stats, stats, create);
-+ LPROCFS_OBD_OP_INIT(num_private_stats, stats, destroy);
-+ LPROCFS_OBD_OP_INIT(num_private_stats, stats, setattr);
-+ LPROCFS_OBD_OP_INIT(num_private_stats, stats, setattr_async);
-+ LPROCFS_OBD_OP_INIT(num_private_stats, stats, getattr);
-+ LPROCFS_OBD_OP_INIT(num_private_stats, stats, getattr_async);
-+ LPROCFS_OBD_OP_INIT(num_private_stats, stats, brw);
-+ LPROCFS_OBD_OP_INIT(num_private_stats, stats, brw_async);
-+ LPROCFS_OBD_OP_INIT(num_private_stats, stats, prep_async_page);
-+ LPROCFS_OBD_OP_INIT(num_private_stats, stats, reget_short_lock);
-+ LPROCFS_OBD_OP_INIT(num_private_stats, stats, release_short_lock);
-+ LPROCFS_OBD_OP_INIT(num_private_stats, stats, queue_async_io);
-+ LPROCFS_OBD_OP_INIT(num_private_stats, stats, queue_group_io);
-+ LPROCFS_OBD_OP_INIT(num_private_stats, stats, trigger_group_io);
-+ LPROCFS_OBD_OP_INIT(num_private_stats, stats, set_async_flags);
-+ LPROCFS_OBD_OP_INIT(num_private_stats, stats, teardown_async_page);
-+ LPROCFS_OBD_OP_INIT(num_private_stats, stats, merge_lvb);
-+ LPROCFS_OBD_OP_INIT(num_private_stats, stats, adjust_kms);
-+ LPROCFS_OBD_OP_INIT(num_private_stats, stats, punch);
-+ LPROCFS_OBD_OP_INIT(num_private_stats, stats, sync);
-+ LPROCFS_OBD_OP_INIT(num_private_stats, stats, migrate);
-+ LPROCFS_OBD_OP_INIT(num_private_stats, stats, copy);
-+ LPROCFS_OBD_OP_INIT(num_private_stats, stats, iterate);
-+ LPROCFS_OBD_OP_INIT(num_private_stats, stats, preprw);
-+ LPROCFS_OBD_OP_INIT(num_private_stats, stats, commitrw);
-+ LPROCFS_OBD_OP_INIT(num_private_stats, stats, enqueue);
-+ LPROCFS_OBD_OP_INIT(num_private_stats, stats, match);
-+ LPROCFS_OBD_OP_INIT(num_private_stats, stats, change_cbdata);
-+ LPROCFS_OBD_OP_INIT(num_private_stats, stats, cancel);
-+ LPROCFS_OBD_OP_INIT(num_private_stats, stats, cancel_unused);
-+ LPROCFS_OBD_OP_INIT(num_private_stats, stats, join_lru);
-+ LPROCFS_OBD_OP_INIT(num_private_stats, stats, init_export);
-+ LPROCFS_OBD_OP_INIT(num_private_stats, stats, destroy_export);
-+ LPROCFS_OBD_OP_INIT(num_private_stats, stats, extent_calc);
-+ LPROCFS_OBD_OP_INIT(num_private_stats, stats, llog_init);
-+ LPROCFS_OBD_OP_INIT(num_private_stats, stats, llog_finish);
-+ LPROCFS_OBD_OP_INIT(num_private_stats, stats, pin);
-+ LPROCFS_OBD_OP_INIT(num_private_stats, stats, unpin);
-+ LPROCFS_OBD_OP_INIT(num_private_stats, stats, import_event);
-+ LPROCFS_OBD_OP_INIT(num_private_stats, stats, notify);
-+ LPROCFS_OBD_OP_INIT(num_private_stats, stats, health_check);
-+ LPROCFS_OBD_OP_INIT(num_private_stats, stats, quotacheck);
-+ LPROCFS_OBD_OP_INIT(num_private_stats, stats, quotactl);
-+ LPROCFS_OBD_OP_INIT(num_private_stats, stats, quota_adjust_qunit);
-+ LPROCFS_OBD_OP_INIT(num_private_stats, stats, ping);
-+ LPROCFS_OBD_OP_INIT(num_private_stats, stats, register_page_removal_cb);
-+ LPROCFS_OBD_OP_INIT(num_private_stats,stats,unregister_page_removal_cb);
-+ LPROCFS_OBD_OP_INIT(num_private_stats, stats, register_lock_cancel_cb);
-+ LPROCFS_OBD_OP_INIT(num_private_stats, stats,unregister_lock_cancel_cb);
-+}
-+
-+void lprocfs_init_ldlm_stats(struct lprocfs_stats *ldlm_stats)
-+{
-+ lprocfs_counter_init(ldlm_stats,
-+ LDLM_ENQUEUE - LDLM_FIRST_OPC,
-+ 0, "ldlm_enqueue", "reqs");
-+ lprocfs_counter_init(ldlm_stats,
-+ LDLM_CONVERT - LDLM_FIRST_OPC,
-+ 0, "ldlm_convert", "reqs");
-+ lprocfs_counter_init(ldlm_stats,
-+ LDLM_CANCEL - LDLM_FIRST_OPC,
-+ 0, "ldlm_cancel", "reqs");
-+ lprocfs_counter_init(ldlm_stats,
-+ LDLM_BL_CALLBACK - LDLM_FIRST_OPC,
-+ 0, "ldlm_bl_callback", "reqs");
-+ lprocfs_counter_init(ldlm_stats,
-+ LDLM_CP_CALLBACK - LDLM_FIRST_OPC,
-+ 0, "ldlm_cp_callback", "reqs");
-+ lprocfs_counter_init(ldlm_stats,
-+ LDLM_GL_CALLBACK - LDLM_FIRST_OPC,
-+ 0, "ldlm_gl_callback", "reqs");
-+}
-+
-+int lprocfs_alloc_obd_stats(struct obd_device *obd, unsigned num_private_stats)
-+{
-+ struct lprocfs_stats *stats;
-+ unsigned int num_stats;
-+ int rc, i;
-+
-+ LASSERT(obd->obd_stats == NULL);
-+ LASSERT(obd->obd_proc_entry != NULL);
-+ LASSERT(obd->obd_cntr_base == 0);
-+
-+ num_stats = ((int)sizeof(*obd->obd_type->typ_ops) / sizeof(void *)) +
-+ num_private_stats - 1 /* o_owner */;
-+ stats = lprocfs_alloc_stats(num_stats, 0);
-+ if (stats == NULL)
-+ return -ENOMEM;
-+
-+ lprocfs_init_ops_stats(num_private_stats, stats);
-+
-+ for (i = num_private_stats; i < num_stats; i++) {
-+ /* If this LBUGs, it is likely that an obd
-+ * operation was added to struct obd_ops in
-+ * <obd.h>, and that the corresponding line item
-+ * LPROCFS_OBD_OP_INIT(.., .., opname)
-+ * is missing from the list above. */
-+ LASSERTF(stats->ls_percpu[0]->lp_cntr[i].lc_name != NULL,
-+ "Missing obd_stat initializer obd_op "
-+ "operation at offset %d.\n", i - num_private_stats);
-+ }
-+ rc = lprocfs_register_stats(obd->obd_proc_entry, "stats", stats);
-+ if (rc < 0) {
-+ lprocfs_free_stats(&stats);
-+ } else {
-+ obd->obd_stats = stats;
-+ obd->obd_cntr_base = num_private_stats;
-+ }
-+ return rc;
-+}
-+
-+void lprocfs_free_obd_stats(struct obd_device *obd)
-+{
-+ if (obd->obd_stats)
-+ lprocfs_free_stats(&obd->obd_stats);
-+}
-+
-+int lprocfs_exp_rd_nid(char *page, char **start, off_t off, int count,
-+ int *eof, void *data)
-+{
-+ struct obd_export *exp = (struct obd_export*)data;
-+ LASSERT(exp != NULL);
-+ *eof = 1;
-+ return snprintf(page, count, "%s\n", obd_export_nid2str(exp));
-+}
-+
-+struct exp_uuid_cb_data {
-+ char *page;
-+ int count;
-+ int *eof;
-+ int *len;
-+};
-+
-+static void
-+lprocfs_exp_rd_cb_data_init(struct exp_uuid_cb_data *cb_data, char *page,
-+ int count, int *eof, int *len)
-+{
-+ cb_data->page = page;
-+ cb_data->count = count;
-+ cb_data->eof = eof;
-+ cb_data->len = len;
-+}
-+
-+void lprocfs_exp_print_uuid(void *obj, void *cb_data)
-+{
-+ struct obd_export *exp = (struct obd_export *)obj;
-+ struct exp_uuid_cb_data *data = (struct exp_uuid_cb_data *)cb_data;
-+
-+ if (exp->exp_nid_stats)
-+ *data->len += snprintf((data->page + *data->len),
-+ data->count, "%s\n",
-+ obd_uuid2str(&exp->exp_client_uuid));
-+}
-+
-+int lprocfs_exp_rd_uuid(char *page, char **start, off_t off, int count,
-+ int *eof, void *data)
-+{
-+ struct nid_stat *stats = (struct nid_stat *)data;
-+ struct exp_uuid_cb_data cb_data;
-+ struct obd_device *obd = stats->nid_obd;
-+ int len = 0;
-+
-+ *eof = 1;
-+ page[0] = '\0';
-+ lprocfs_exp_rd_cb_data_init(&cb_data, page, count, eof, &len);
-+ lustre_hash_for_each_key(obd->obd_nid_hash, &stats->nid,
-+ lprocfs_exp_print_uuid, &cb_data);
-+ return (*cb_data.len);
-+}
-+
-+void lprocfs_exp_print_hash(void *obj, void *cb_data)
-+{
-+ struct obd_export *exp = (struct obd_export *)obj;
-+ struct exp_uuid_cb_data *data = (struct exp_uuid_cb_data *)cb_data;
-+ lustre_hash_t *lh;
-+
-+ lh = exp->exp_lock_hash;
-+ if (lh) {
-+ if (!*data->len)
-+ *data->len += lustre_hash_debug_header(data->page,
-+ data->count);
-+
-+ *data->len += lustre_hash_debug_str(lh, data->page +
-+ *data->len,
-+ data->count);
-+ }
-+}
-+
-+int lprocfs_exp_rd_hash(char *page, char **start, off_t off, int count,
-+ int *eof, void *data)
-+{
-+ struct nid_stat *stats = (struct nid_stat *)data;
-+ struct exp_uuid_cb_data cb_data;
-+ struct obd_device *obd = stats->nid_obd;
-+ int len = 0;
-+
-+ *eof = 1;
-+ page[0] = '\0';
-+ lprocfs_exp_rd_cb_data_init(&cb_data, page, count, eof, &len);
-+ lustre_hash_for_each_key(obd->obd_nid_hash, &stats->nid,
-+ lprocfs_exp_print_hash, &cb_data);
-+ return (*cb_data.len);
-+}
-+
-+int lprocfs_nid_stats_clear_read(char *page, char **start, off_t off,
-+ int count, int *eof, void *data)
-+{
-+ *eof = 1;
-+ return snprintf(page, count, "%s\n",
-+ "Write into this file to clear all nid stats and "
-+ "stale nid entries");
-+}
-+EXPORT_SYMBOL(lprocfs_nid_stats_clear_read);
-+
-+void lprocfs_nid_stats_clear_write_cb(void *obj, void *data)
-+{
-+ struct nid_stat *stat = obj;
-+ int i;
-+
-+ /* object has only hash + iterate_all references.
-+ * add/delete blocked by hash bucket lock */
-+ CDEBUG(D_INFO,"refcnt %d\n", stat->nid_exp_ref_count);
-+ if (stat->nid_exp_ref_count == 2) {
-+ hlist_del_init(&stat->nid_hash);
-+ stat->nid_exp_ref_count--;
-+ spin_lock(&stat->nid_obd->obd_nid_lock);
-+ list_del_init(&stat->nid_list);
-+ spin_unlock(&stat->nid_obd->obd_nid_lock);
-+ list_add(&stat->nid_list, data);
-+ EXIT;
-+ return;
-+ }
-+ /* we has reference to object - only clear data*/
-+ if (stat->nid_stats)
-+ lprocfs_clear_stats(stat->nid_stats);
-+
-+ if (stat->nid_brw_stats) {
-+ for (i = 0; i < BRW_LAST; i++)
-+ lprocfs_oh_clear(&stat->nid_brw_stats->hist[i]);
-+ }
-+ EXIT;
-+ return;
-+}
-+
-+int lprocfs_nid_stats_clear_write(struct file *file, const char *buffer,
-+ unsigned long count, void *data)
-+{
-+ struct obd_device *obd = (struct obd_device *)data;
-+ struct nid_stat *client_stat;
-+ CFS_LIST_HEAD(free_list);
-+
-+ lustre_hash_for_each(obd->obd_nid_stats_hash,
-+ lprocfs_nid_stats_clear_write_cb, &free_list);
-+
-+ while (!list_empty(&free_list)) {
-+ client_stat = list_entry(free_list.next, struct nid_stat,
-+ nid_list);
-+ list_del_init(&client_stat->nid_list);
-+ lprocfs_free_client_stats(client_stat);
-+ }
-+
-+ return count;
-+}
-+EXPORT_SYMBOL(lprocfs_nid_stats_clear_write);
-+
-+int lprocfs_exp_setup(struct obd_export *exp, lnet_nid_t *nid, int *newnid)
-+{
-+ struct nid_stat *new_stat, *old_stat;
-+ struct nid_stat_uuid *new_ns_uuid;
-+ struct obd_device *obd;
-+ int rc = 0;
-+ ENTRY;
-+
-+ *newnid = 0;
-+
-+ if (!exp || !exp->exp_obd || !exp->exp_obd->obd_proc_exports_entry ||
-+ !exp->exp_obd->obd_nid_stats_hash)
-+ RETURN(-EINVAL);
-+
-+ /* not test against zero because eric say:
-+ * You may only test nid against another nid, or LNET_NID_ANY.
-+ * Anything else is nonsense.*/
-+ if (!nid || *nid == LNET_NID_ANY)
-+ RETURN(0);
-+
-+ obd = exp->exp_obd;
-+
-+ CDEBUG(D_CONFIG, "using hash %p\n", obd->obd_nid_stats_hash);
-+
-+ OBD_ALLOC_PTR(new_stat);
-+ if (new_stat == NULL)
-+ RETURN(-ENOMEM);
-+
-+ OBD_ALLOC_PTR(new_ns_uuid);
-+ if (new_ns_uuid == NULL) {
-+ OBD_FREE_PTR(new_stat);
-+ RETURN(-ENOMEM);
-+ }
-+ CFS_INIT_LIST_HEAD(&new_ns_uuid->ns_uuid_list);
-+ strncpy(new_ns_uuid->ns_uuid.uuid, exp->exp_client_uuid.uuid,
-+ sizeof(struct obd_uuid));
-+
-+ CFS_INIT_LIST_HEAD(&new_stat->nid_uuid_list);
-+ new_stat->nid = *nid;
-+ new_stat->nid_obd = exp->exp_obd;
-+ /* need live in hash after destroy export */
-+ new_stat->nid_exp_ref_count = 1;
-+
-+ old_stat = lustre_hash_findadd_unique(obd->obd_nid_stats_hash,
-+ nid, &new_stat->nid_hash);
-+ CDEBUG(D_INFO, "Found stats %p for nid %s - ref %d\n",
-+ old_stat, libcfs_nid2str(*nid), new_stat->nid_exp_ref_count);
-+
-+ /* Return -EALREADY here so that we know that the /proc
-+ * entry already has been created */
-+ if (old_stat != new_stat) {
-+ struct nid_stat_uuid *tmp_uuid;
-+ int found = 0;
-+
-+ exp->exp_nid_stats = old_stat;
-+
-+ /* We need to decrement the refcount if the uuid was
-+ * already in our list */
-+ spin_lock(&obd->obd_nid_lock);
-+ list_for_each_entry(tmp_uuid, &old_stat->nid_uuid_list,
-+ ns_uuid_list) {
-+ if (tmp_uuid && obd_uuid_equals(&tmp_uuid->ns_uuid,
-+ &exp->exp_client_uuid)){
-+ found = 1;
-+ --old_stat->nid_exp_ref_count;
-+ break;
-+ }
-+ }
-+
-+ if (!found)
-+ list_add(&new_ns_uuid->ns_uuid_list,
-+ &old_stat->nid_uuid_list);
-+ else
-+ OBD_FREE_PTR(new_ns_uuid);
-+ spin_unlock(&obd->obd_nid_lock);
-+
-+ GOTO(destroy_new, rc = -EALREADY);
-+ }
-+ /* not found - create */
-+ new_stat->nid_proc = proc_mkdir(libcfs_nid2str(*nid),
-+ obd->obd_proc_exports_entry);
-+ if (!new_stat->nid_proc) {
-+ CERROR("Error making export directory for"
-+ " nid %s\n", libcfs_nid2str(*nid));
-+ GOTO(destroy_new_ns, rc = -ENOMEM);
-+ }
-+
-+ /* Add in uuid to our nid_stats list */
-+ spin_lock(&obd->obd_nid_lock);
-+ list_add(&new_ns_uuid->ns_uuid_list, &new_stat->nid_uuid_list);
-+ spin_unlock(&obd->obd_nid_lock);
-+
-+ rc = lprocfs_add_simple(new_stat->nid_proc, "uuid",
-+ lprocfs_exp_rd_uuid, NULL, new_stat);
-+ if (rc) {
-+ CWARN("Error adding the uuid file\n");
-+ GOTO(destroy_new_ns, rc);
-+ }
-+
-+ rc = lprocfs_add_simple(new_stat->nid_proc, "hash",
-+ lprocfs_exp_rd_hash, NULL, new_stat);
-+ if (rc) {
-+ CWARN("Error adding the hash file\n");
-+ lprocfs_remove(&new_stat->nid_proc);
-+ GOTO(destroy_new_ns, rc);
-+ }
-+
-+ exp->exp_nid_stats = new_stat;
-+ *newnid = 1;
-+ /* protect competitive add to list, not need locking on destroy */
-+ spin_lock(&obd->obd_nid_lock);
-+ list_add(&new_stat->nid_list, &obd->obd_nid_stats);
-+ spin_unlock(&obd->obd_nid_lock);
-+
-+ RETURN(rc);
-+
-+destroy_new_ns:
-+ lustre_hash_del(obd->obd_nid_stats_hash, nid, &new_stat->nid_hash);
-+ OBD_FREE_PTR(new_ns_uuid);
-+
-+destroy_new:
-+ OBD_FREE_PTR(new_stat);
-+ RETURN(rc);
-+}
-+
-+int lprocfs_exp_cleanup(struct obd_export *exp)
-+{
-+ struct nid_stat *stat = exp->exp_nid_stats;
-+ struct nid_stat_uuid *cursor, *tmp;
-+ int found = 0;
-+
-+ if(!stat || !exp->exp_obd)
-+ RETURN(0);
-+
-+ spin_lock(&exp->exp_obd->obd_nid_lock);
-+ list_for_each_entry_safe(cursor, tmp,
-+ &stat->nid_uuid_list,
-+ ns_uuid_list) {
-+ if (cursor && obd_uuid_equals(&cursor->ns_uuid,
-+ &exp->exp_client_uuid)) {
-+ found = 1;
-+ list_del(&cursor->ns_uuid_list);
-+ OBD_FREE_PTR(cursor);
-+ --stat->nid_exp_ref_count;
-+ CDEBUG(D_INFO, "Put stat %p - %d\n", stat,
-+ stat->nid_exp_ref_count);
-+ break;
-+ }
-+ }
-+ spin_unlock(&exp->exp_obd->obd_nid_lock);
-+ if (!found)
-+ CERROR("obd_export's client uuid %s are not found in its "
-+ "nid_stats list\n", exp->exp_client_uuid.uuid);
-+
-+ exp->exp_nid_stats = NULL;
-+ lprocfs_free_stats(&exp->exp_ops_stats);
-+
-+ return 0;
-+}
-+
-+int lprocfs_write_helper(const char *buffer, unsigned long count,
-+ int *val)
-+{
-+ return lprocfs_write_frac_helper(buffer, count, val, 1);
-+}
-+
-+int lprocfs_write_frac_helper(const char *buffer, unsigned long count,
-+ int *val, int mult)
-+{
-+ char kernbuf[20], *end, *pbuf;
-+
-+ if (count > (sizeof(kernbuf) - 1))
-+ return -EINVAL;
-+
-+ if (copy_from_user(kernbuf, buffer, count))
-+ return -EFAULT;
-+
-+ kernbuf[count] = '\0';
-+ pbuf = kernbuf;
-+ if (*pbuf == '-') {
-+ mult = -mult;
-+ pbuf++;
-+ }
-+
-+ *val = (int)simple_strtoul(pbuf, &end, 10) * mult;
-+ if (pbuf == end)
-+ return -EINVAL;
-+
-+ if (end != NULL && *end == '.') {
-+ int temp_val, pow = 1;
-+ int i;
-+
-+ pbuf = end + 1;
-+ if (strlen(pbuf) > 5)
-+ pbuf[5] = '\0'; /*only allow 5bits fractional*/
-+
-+ temp_val = (int)simple_strtoul(pbuf, &end, 10) * mult;
-+
-+ if (pbuf < end) {
-+ for (i = 0; i < (end - pbuf); i++)
-+ pow *= 10;
-+
-+ *val += temp_val / pow;
-+ }
-+ }
-+ return 0;
-+}
-+
-+int lprocfs_read_frac_helper(char *buffer, unsigned long count, long val,
-+ int mult)
-+{
-+ long decimal_val, frac_val;
-+ int prtn;
-+
-+ if (count < 10)
-+ return -EINVAL;
-+
-+ decimal_val = val / mult;
-+ prtn = snprintf(buffer, count, "%ld", decimal_val);
-+ frac_val = val % mult;
-+
-+ if (prtn < (count - 4) && frac_val > 0) {
-+ long temp_frac;
-+ int i, temp_mult = 1, frac_bits = 0;
-+
-+ temp_frac = frac_val * 10;
-+ buffer[prtn++] = '.';
-+ while (frac_bits < 2 && (temp_frac / mult) < 1 ) {
-+ /*only reserved 2bits fraction*/
-+ buffer[prtn++] ='0';
-+ temp_frac *= 10;
-+ frac_bits++;
-+ }
-+ /*
-+ Need to think these cases :
-+ 1. #echo x.00 > /proc/xxx output result : x
-+ 2. #echo x.0x > /proc/xxx output result : x.0x
-+ 3. #echo x.x0 > /proc/xxx output result : x.x
-+ 4. #echo x.xx > /proc/xxx output result : x.xx
-+ Only reserved 2bits fraction.
-+ */
-+ for (i = 0; i < (5 - prtn); i++)
-+ temp_mult *= 10;
-+
-+ frac_bits = min((int)count - prtn, 3 - frac_bits);
-+ prtn += snprintf(buffer + prtn, frac_bits, "%ld",
-+ frac_val * temp_mult / mult);
-+
-+ prtn--;
-+ while(buffer[prtn] < '1' || buffer[prtn] > '9') {
-+ prtn--;
-+ if (buffer[prtn] == '.') {
-+ prtn--;
-+ break;
-+ }
-+ }
-+ prtn++;
-+ }
-+ buffer[prtn++] ='\n';
-+ return prtn;
-+}
-+
-+int lprocfs_write_u64_helper(const char *buffer, unsigned long count,__u64 *val)
-+{
-+ return lprocfs_write_frac_u64_helper(buffer, count, val, 1);
-+}
-+
-+int lprocfs_write_frac_u64_helper(const char *buffer, unsigned long count,
-+ __u64 *val, int mult)
-+{
-+ char kernbuf[22], *end, *pbuf;
-+ __u64 whole, frac = 0, units;
-+ unsigned frac_d = 1;
-+
-+ if (count > (sizeof(kernbuf) - 1))
-+ return -EINVAL;
-+
-+ if (copy_from_user(kernbuf, buffer, count))
-+ return -EFAULT;
-+
-+ kernbuf[count] = '\0';
-+ pbuf = kernbuf;
-+ if (*pbuf == '-') {
-+ mult = -mult;
-+ pbuf++;
-+ }
-+
-+ whole = simple_strtoull(pbuf, &end, 10);
-+ if (pbuf == end)
-+ return -EINVAL;
-+
-+ if (end != NULL && *end == '.') {
-+ int i;
-+ pbuf = end + 1;
-+
-+ /* need to limit frac_d to a __u32 */
-+ if (strlen(pbuf) > 10)
-+ pbuf[10] = '\0';
-+
-+ frac = simple_strtoull(pbuf, &end, 10);
-+ /* count decimal places */
-+ for (i = 0; i < (end - pbuf); i++)
-+ frac_d *= 10;
-+ }
-+
-+ units = 1;
-+ switch(*end) {
-+ case 'p': case 'P':
-+ units <<= 10;
-+ case 't': case 'T':
-+ units <<= 10;
-+ case 'g': case 'G':
-+ units <<= 10;
-+ case 'm': case 'M':
-+ units <<= 10;
-+ case 'k': case 'K':
-+ units <<= 10;
-+ }
-+ /* Specified units override the multiplier */
-+ if (units)
-+ mult = mult < 0 ? -units : units;
-+
-+ frac *= mult;
-+ do_div(frac, frac_d);
-+ *val = whole * mult + frac;
-+ return 0;
-+}
-+
-+int lprocfs_seq_create(cfs_proc_dir_entry_t *parent,
-+ char *name, mode_t mode,
-+ struct file_operations *seq_fops, void *data)
-+{
-+ struct proc_dir_entry *entry;
-+ ENTRY;
-+
-+ entry = create_proc_entry(name, mode, parent);
-+ if (entry == NULL)
-+ RETURN(-ENOMEM);
-+ entry->proc_fops = seq_fops;
-+ entry->data = data;
-+
-+ RETURN(0);
-+}
-+EXPORT_SYMBOL(lprocfs_seq_create);
-+
-+__inline__ int lprocfs_obd_seq_create(struct obd_device *dev, char *name,
-+ mode_t mode,
-+ struct file_operations *seq_fops,
-+ void *data)
-+{
-+ return (lprocfs_seq_create(dev->obd_proc_entry, name,
-+ mode, seq_fops, data));
-+}
-+EXPORT_SYMBOL(lprocfs_obd_seq_create);
-+
-+void lprocfs_oh_tally(struct obd_histogram *oh, unsigned int value)
-+{
-+ if (value >= OBD_HIST_MAX)
-+ value = OBD_HIST_MAX - 1;
-+
-+ spin_lock(&oh->oh_lock);
-+ oh->oh_buckets[value]++;
-+ spin_unlock(&oh->oh_lock);
-+}
-+EXPORT_SYMBOL(lprocfs_oh_tally);
-+
-+void lprocfs_oh_tally_log2(struct obd_histogram *oh, unsigned int value)
-+{
-+ unsigned int val;
-+
-+ for (val = 0; ((1 << val) < value) && (val <= OBD_HIST_MAX); val++)
-+ ;
-+
-+ lprocfs_oh_tally(oh, val);
-+}
-+EXPORT_SYMBOL(lprocfs_oh_tally_log2);
-+
-+unsigned long lprocfs_oh_sum(struct obd_histogram *oh)
-+{
-+ unsigned long ret = 0;
-+ int i;
-+
-+ for (i = 0; i < OBD_HIST_MAX; i++)
-+ ret += oh->oh_buckets[i];
-+ return ret;
-+}
-+EXPORT_SYMBOL(lprocfs_oh_sum);
-+
-+void lprocfs_oh_clear(struct obd_histogram *oh)
-+{
-+ spin_lock(&oh->oh_lock);
-+ memset(oh->oh_buckets, 0, sizeof(oh->oh_buckets));
-+ spin_unlock(&oh->oh_lock);
-+}
-+EXPORT_SYMBOL(lprocfs_oh_clear);
-+
-+int lprocfs_obd_rd_recovery_status(char *page, char **start, off_t off,
-+ int count, int *eof, void *data)
-+{
-+ struct obd_device *obd = data;
-+ int len = 0, size;
-+
-+ LASSERT(obd != NULL);
-+ LASSERT(count >= 0);
-+
-+ /* Set start of user data returned to
-+ page + off since the user may have
-+ requested to read much smaller than
-+ what we need to read */
-+ *start = page + off;
-+
-+ /* We know we are allocated a page here.
-+ Also we know that this function will
-+ not need to write more than a page
-+ so we can truncate at CFS_PAGE_SIZE. */
-+ size = min(count + (int)off + 1, (int)CFS_PAGE_SIZE);
-+
-+ /* Initialize the page */
-+ memset(page, 0, size);
-+
-+ if (lprocfs_obd_snprintf(&page, size, &len, "status: ") <= 0)
-+ goto out;
-+ if (obd->obd_max_recoverable_clients == 0) {
-+ if (lprocfs_obd_snprintf(&page, size, &len, "INACTIVE\n") <= 0)
-+ goto out;
-+
-+ goto fclose;
-+ }
-+
-+ /* sampled unlocked, but really... */
-+ if (obd->obd_recovering == 0) {
-+ if (lprocfs_obd_snprintf(&page, size, &len, "COMPLETE\n") <= 0)
-+ goto out;
-+ if (lprocfs_obd_snprintf(&page, size, &len,
-+ "recovery_start: %lu\n",
-+ obd->obd_recovery_start) <= 0)
-+ goto out;
-+ if (lprocfs_obd_snprintf(&page, size, &len,
-+ "recovery_duration: %lu\n",
-+ obd->obd_recovery_end -
-+ obd->obd_recovery_start) <= 0)
-+ goto out;
-+ /* Number of clients that have completed recovery */
-+ if (lprocfs_obd_snprintf(&page, size, &len,
-+ "completed_clients: %d/%d\n",
-+ obd->obd_max_recoverable_clients -
-+ obd->obd_recoverable_clients,
-+ obd->obd_max_recoverable_clients) <= 0)
-+ goto out;
-+ if (lprocfs_obd_snprintf(&page, size, &len,
-+ "replayed_requests: %d\n",
-+ obd->obd_replayed_requests) <= 0)
-+ goto out;
-+ if (lprocfs_obd_snprintf(&page, size, &len,
-+ "last_transno: "LPD64"\n",
-+ obd->obd_next_recovery_transno - 1)<=0)
-+ goto out;
-+ goto fclose;
-+ }
-+
-+ if (lprocfs_obd_snprintf(&page, size, &len, "RECOVERING\n") <= 0)
-+ goto out;
-+ if (lprocfs_obd_snprintf(&page, size, &len, "recovery_start: %lu\n",
-+ obd->obd_recovery_start) <= 0)
-+ goto out;
-+ if (lprocfs_obd_snprintf(&page, size, &len, "time_remaining: %lu\n",
-+ cfs_time_current_sec() >= obd->obd_recovery_end ? 0 :
-+ obd->obd_recovery_end - cfs_time_current_sec()) <= 0)
-+ goto out;
-+ if (lprocfs_obd_snprintf(&page, size, &len,"connected_clients: %d/%d\n",
-+ obd->obd_connected_clients,
-+ obd->obd_max_recoverable_clients) <= 0)
-+ goto out;
-+ /* Number of clients that have completed recovery */
-+ if (lprocfs_obd_snprintf(&page, size, &len,"completed_clients: %d/%d\n",
-+ obd->obd_max_recoverable_clients -
-+ obd->obd_recoverable_clients,
-+ obd->obd_max_recoverable_clients) <= 0)
-+ goto out;
-+ if (lprocfs_obd_snprintf(&page, size, &len,"replayed_requests: %d/??\n",
-+ obd->obd_replayed_requests) <= 0)
-+ goto out;
-+ if (lprocfs_obd_snprintf(&page, size, &len, "queued_requests: %d\n",
-+ obd->obd_requests_queued_for_recovery) <= 0)
-+ goto out;
-+ if (lprocfs_obd_snprintf(&page, size, &len, "next_transno: "LPD64"\n",
-+ obd->obd_next_recovery_transno) <= 0)
-+ goto out;
-+
-+fclose:
-+ *eof = 1;
-+out:
-+ return min(count, len - (int)off);
-+}
-+EXPORT_SYMBOL(lprocfs_obd_rd_recovery_status);
-+
-+int lprocfs_obd_rd_hash(char *page, char **start, off_t off,
-+ int count, int *eof, void *data)
-+{
-+ struct obd_device *obd = data;
-+ int c = 0;
-+
-+ if (obd == NULL)
-+ return 0;
-+
-+ c += lustre_hash_debug_header(page, count);
-+ c += lustre_hash_debug_str(obd->obd_uuid_hash, page + c, count - c);
-+ c += lustre_hash_debug_str(obd->obd_nid_hash, page + c, count - c);
-+ c += lustre_hash_debug_str(obd->obd_nid_stats_hash, page+c, count-c);
-+
-+ return c;
-+}
-+EXPORT_SYMBOL(lprocfs_obd_rd_hash);
-+
-+#ifdef CRAY_XT3
-+int lprocfs_obd_rd_recovery_maxtime(char *page, char **start, off_t off,
-+ int count, int *eof, void *data)
-+{
-+ struct obd_device *obd = (struct obd_device *)data;
-+ LASSERT(obd != NULL);
-+
-+ return snprintf(page, count, "%lu\n",
-+ obd->obd_recovery_max_time);
-+}
-+EXPORT_SYMBOL(lprocfs_obd_rd_recovery_maxtime);
-+
-+int lprocfs_obd_wr_recovery_maxtime(struct file *file, const char *buffer,
-+ unsigned long count, void *data)
-+{
-+ struct obd_device *obd = (struct obd_device *)data;
-+ int val, rc;
-+ LASSERT(obd != NULL);
-+
-+ rc = lprocfs_write_helper(buffer, count, &val);
-+ if (rc)
-+ return rc;
-+
-+ obd->obd_recovery_max_time = val;
-+ return count;
-+}
-+EXPORT_SYMBOL(lprocfs_obd_wr_recovery_maxtime);
-+#endif /* CRAY_XT3 */
-+
-+EXPORT_SYMBOL(lprocfs_register);
-+EXPORT_SYMBOL(lprocfs_srch);
-+EXPORT_SYMBOL(lprocfs_remove);
-+EXPORT_SYMBOL(lprocfs_add_vars);
-+EXPORT_SYMBOL(lprocfs_obd_setup);
-+EXPORT_SYMBOL(lprocfs_obd_cleanup);
-+EXPORT_SYMBOL(lprocfs_add_simple);
-+EXPORT_SYMBOL(lprocfs_free_per_client_stats);
-+EXPORT_SYMBOL(lprocfs_alloc_stats);
-+EXPORT_SYMBOL(lprocfs_free_stats);
-+EXPORT_SYMBOL(lprocfs_clear_stats);
-+EXPORT_SYMBOL(lprocfs_register_stats);
-+EXPORT_SYMBOL(lprocfs_init_ops_stats);
-+EXPORT_SYMBOL(lprocfs_init_ldlm_stats);
-+EXPORT_SYMBOL(lprocfs_alloc_obd_stats);
-+EXPORT_SYMBOL(lprocfs_free_obd_stats);
-+EXPORT_SYMBOL(lprocfs_exp_setup);
-+EXPORT_SYMBOL(lprocfs_exp_cleanup);
-+
-+EXPORT_SYMBOL(lprocfs_rd_u64);
-+EXPORT_SYMBOL(lprocfs_rd_atomic);
-+EXPORT_SYMBOL(lprocfs_wr_atomic);
-+EXPORT_SYMBOL(lprocfs_rd_uint);
-+EXPORT_SYMBOL(lprocfs_wr_uint);
-+EXPORT_SYMBOL(lprocfs_rd_uuid);
-+EXPORT_SYMBOL(lprocfs_rd_name);
-+EXPORT_SYMBOL(lprocfs_rd_fstype);
-+EXPORT_SYMBOL(lprocfs_rd_server_uuid);
-+EXPORT_SYMBOL(lprocfs_rd_conn_uuid);
-+EXPORT_SYMBOL(lprocfs_rd_num_exports);
-+EXPORT_SYMBOL(lprocfs_rd_numrefs);
-+EXPORT_SYMBOL(lprocfs_at_hist_helper);
-+EXPORT_SYMBOL(lprocfs_rd_import);
-+EXPORT_SYMBOL(lprocfs_rd_timeouts);
-+EXPORT_SYMBOL(lprocfs_rd_blksize);
-+EXPORT_SYMBOL(lprocfs_rd_kbytestotal);
-+EXPORT_SYMBOL(lprocfs_rd_kbytesfree);
-+EXPORT_SYMBOL(lprocfs_rd_kbytesavail);
-+EXPORT_SYMBOL(lprocfs_rd_filestotal);
-+EXPORT_SYMBOL(lprocfs_rd_filesfree);
-+
-+EXPORT_SYMBOL(lprocfs_write_helper);
-+EXPORT_SYMBOL(lprocfs_write_frac_helper);
-+EXPORT_SYMBOL(lprocfs_read_frac_helper);
-+EXPORT_SYMBOL(lprocfs_write_u64_helper);
-+EXPORT_SYMBOL(lprocfs_write_frac_u64_helper);
-+#endif /* LPROCFS*/
-diff -urNad lustre~/lustre/ptlrpc/service.c lustre/lustre/ptlrpc/service.c
---- lustre~/lustre/ptlrpc/service.c 2009-03-12 10:32:27.000000000 +0100
-+++ lustre/lustre/ptlrpc/service.c 2009-03-13 09:45:03.000000000 +0100
-@@ -1501,7 +1501,7 @@
- cfs_daemonize(name);
- exit_fs(cfs_current());
- current->fs = fs;
-- ll_set_fs_pwd(current->fs, init_task.fs->pwdmnt, init_task.fs->pwd);
-+ ll_set_fs_pwd(current->fs, cfs_fs_mnt(init_task.fs), cfs_fs_pwd(init_task.fs));
- }
-
- static void
--
Lustre Debian Packaging
More information about the Pkg-lustre-svn-commit
mailing list