[Debian-ha-svn-commits] [SCM] cluster suite Debian packaging branch, experimental, updated. debian/3.0.9-1

Guido Günther agx at sigxcpu.org
Wed Mar 10 21:35:56 UTC 2010


The following commit has been merged in the experimental branch:
commit 1d4ad8fbc209e3e33884cb64bd91a125391c986c
Author: Guido Günther <agx at sigxcpu.org>
Date:   Wed Mar 10 21:23:04 2010 +0100

    Imported Upstream version 3.0.9

diff --git a/cman/cman_tool/join.c b/cman/cman_tool/join.c
index fa1920e..308d19b 100644
--- a/cman/cman_tool/join.c
+++ b/cman/cman_tool/join.c
@@ -348,21 +348,28 @@ int join(commandline_t *comline, char *main_envp[])
 		int envnum = 0;
 		const char *envvar = main_envp[envnum];
 		const char *equal;
+		char envname[PATH_MAX];
+
 
 		while (envvar) {
 			if (strncmp("COROSYNC_", envvar, 9) == 0) {
 				equal = strchr(envvar, '=');
 				if (equal) {
-					res = confdb_key_create(confdb_handle, object_handle, envvar, equal-envvar,
-								equal+1, strlen(equal+1));
+				        strncpy(envname, envvar, PATH_MAX);
+					if (equal-envvar < PATH_MAX) {
+					    envname[equal-envvar] = '\0';
+					
+					    res = confdb_key_create_typed(confdb_handle, object_handle, envname,
+									  equal+1, strlen(equal+1),CONFDB_VALUETYPE_STRING);
+					}
 				}
 			}
 			envvar = main_envp[++envnum];
 		}
 	}
-	res = confdb_key_create(confdb_handle, object_handle,
-				"COROSYNC_DEFAULT_CONFIG_IFACE", strlen("COROSYNC_DEFAULT_CONFIG_IFACE"),
-				config_modules, strlen(config_modules));
+	res = confdb_key_create_typed(confdb_handle, object_handle,
+				      "COROSYNC_DEFAULT_CONFIG_IFACE",
+				      config_modules, strlen(config_modules), CONFDB_VALUETYPE_STRING);
 	confdb_finalize (confdb_handle);
 
 join_exit:
diff --git a/cman/daemon/cman-preconfig.c b/cman/daemon/cman-preconfig.c
index cf68836..f27effa 100644
--- a/cman/daemon/cman-preconfig.c
+++ b/cman/daemon/cman-preconfig.c
@@ -256,27 +256,27 @@ static int add_ifaddr(struct objdb_iface_ver0 *objdb, char *mcast, char *ifaddr,
 		void *addrptr;
 
 		sprintf(tmp, "%d", num_interfaces);
-		objdb->object_key_create(interface_object_handle, "ringnumber", strlen("ringnumber"),
-					 tmp, strlen(tmp)+1);
+		objdb->object_key_create_typed(interface_object_handle, "ringnumber",
+					       tmp, strlen(tmp)+1, OBJDB_VALUETYPE_STRING);
 
 		if (if_addr.ss_family == AF_INET)
 			addrptr = &in->sin_addr;
 		else
 			addrptr = &in6->sin6_addr;
 		inet_ntop(if_addr.ss_family, addrptr, tmp, sizeof(tmp));
-		objdb->object_key_create(interface_object_handle, "bindnetaddr", strlen("bindnetaddr"),
-					 tmp, strlen(tmp)+1);
+		objdb->object_key_create_typed(interface_object_handle, "bindnetaddr",
+					       tmp, strlen(tmp)+1, OBJDB_VALUETYPE_STRING);
 
 		if (broadcast)
-			objdb->object_key_create(interface_object_handle, "broadcast", strlen("broadcast"),
-						 "yes", strlen("yes")+1);
+			objdb->object_key_create_typed(interface_object_handle, "broadcast",
+						       "yes", strlen("yes")+1, OBJDB_VALUETYPE_STRING);
 		else
-			objdb->object_key_create(interface_object_handle, "mcastaddr", strlen("mcastaddr"),
-						 mcast, strlen(mcast)+1);
+		        objdb->object_key_create_typed(interface_object_handle, "mcastaddr",
+						       mcast, strlen(mcast)+1, OBJDB_VALUETYPE_STRING);
 
 		sprintf(tmp, "%d", port);
-		objdb->object_key_create(interface_object_handle, "mcastport", strlen("mcastport"),
-					 tmp, strlen(tmp)+1);
+		objdb->object_key_create_typed(interface_object_handle, "mcastport",
+					       tmp, strlen(tmp)+1, OBJDB_VALUETYPE_STRING);
 
 		num_interfaces++;
 	}
@@ -601,8 +601,8 @@ static int get_nodename(struct objdb_iface_ver0 *objdb)
 		/* See if the user wants our default set of openais services (default=yes) */
 		objdb_get_int(objdb, object_handle, "disable_openais", &disable_openais, 0);
 
-		objdb->object_key_create(object_handle, "nodename", strlen("nodename"),
-					    nodename, strlen(nodename)+1);
+		objdb->object_key_create_typed(object_handle, "nodename",
+					       nodename, strlen(nodename)+1, OBJDB_VALUETYPE_STRING);
 	}
 	objdb->object_find_destroy(find_handle);
 
@@ -677,33 +677,28 @@ static void add_cman_overrides(struct objdb_iface_ver0 *objdb)
 	{
 		char *value;
 
-		objdb->object_key_create(object_handle, "version", strlen("version"),
-					 "2", 2);
+		objdb->object_key_create_typed(object_handle, "version",
+					       "2", 2, OBJDB_VALUETYPE_STRING);
 
 		sprintf(tmp, "%d", nodeid);
-		objdb->object_key_create(object_handle, "nodeid", strlen("nodeid"),
-					 tmp, strlen(tmp)+1);
+		objdb->object_key_create_typed(object_handle, "nodeid",
+					       tmp, strlen(tmp)+1, OBJDB_VALUETYPE_STRING);
 
-		objdb->object_key_create(object_handle, "vsftype", strlen("vsftype"),
-					 "none", strlen("none")+1);
+		objdb->object_key_create_typed(object_handle, "vsftype",
+					       "none", strlen("none")+1, OBJDB_VALUETYPE_STRING);
 
 		/* Set the token timeout is 10 seconds, but don't overrride anything that
 		   might be in cluster.conf */
 		if (objdb_get_string(objdb, object_handle, "token", &value)) {
 			snprintf(tmp, sizeof(tmp), "%d", DEFAULT_TOKEN_TIMEOUT);
-			objdb->object_key_create(object_handle, "token", strlen("token"),
-						 tmp, strlen(tmp)+1);
-		}
-		if (objdb_get_string(objdb, object_handle, "token_retransmits_before_loss_const", &value)) {
-			objdb->object_key_create(object_handle, "token_retransmits_before_loss_const",
-						 strlen("token_retransmits_before_loss_const"),
-						 "20", strlen("20")+1);
+			objdb->object_key_create_typed(object_handle, "token",
+						       tmp, strlen(tmp)+1, OBJDB_VALUETYPE_STRING);
 		}
 
 		/* Extend consensus & join timeouts per bz#214290 */
 		if (objdb_get_string(objdb, object_handle, "join", &value)) {
-			objdb->object_key_create(object_handle, "join", strlen("join"),
-						 "60", strlen("60")+1);
+			objdb->object_key_create_typed(object_handle, "join",
+						       "60", strlen("60")+1, OBJDB_VALUETYPE_STRING);
 		}
 		/* consensus should be 2*token, see bz#544482*/
 		if (objdb_get_string(objdb, object_handle, "consensus", &value)) {
@@ -712,26 +707,26 @@ static void add_cman_overrides(struct objdb_iface_ver0 *objdb)
 
 			objdb_get_int(objdb, object_handle, "token", &token, DEFAULT_TOKEN_TIMEOUT);
 			sprintf(calc_consensus, "%d", token*2);
-			objdb->object_key_create(object_handle, "consensus", strlen("consensus"),
-						 calc_consensus, strlen(calc_consensus)+1);
+			objdb->object_key_create_typed(object_handle, "consensus",
+						       calc_consensus, strlen(calc_consensus)+1, OBJDB_VALUETYPE_STRING);
 		}
 
 		/* Set RRP mode appropriately */
 		if (objdb_get_string(objdb, object_handle, "rrp_mode", &value)) {
 			if (num_interfaces > 1) {
-				objdb->object_key_create(object_handle, "rrp_mode", strlen("rrp_mode"),
-							 "active", strlen("active")+1);
+				objdb->object_key_create_typed(object_handle, "rrp_mode",
+							       "active", strlen("active")+1, OBJDB_VALUETYPE_STRING);
 			}
 			else {
-				objdb->object_key_create(object_handle, "rrp_mode", strlen("rrp_mode"),
-							 "none", strlen("none")+1);
+				objdb->object_key_create_typed(object_handle, "rrp_mode",
+							       "none", strlen("none")+1, OBJDB_VALUETYPE_STRING);
 			}
 		}
 
 		if (objdb_get_string(objdb, object_handle, "secauth", &value)) {
 			sprintf(tmp, "%d", 1);
-			objdb->object_key_create(object_handle, "secauth", strlen("secauth"),
-						 tmp, strlen(tmp)+1);
+			objdb->object_key_create_typed(object_handle, "secauth",
+						       tmp, strlen(tmp)+1, OBJDB_VALUETYPE_STRING);
 		}
 
 		/* optional security key filename */
@@ -739,8 +734,8 @@ static void add_cman_overrides(struct objdb_iface_ver0 *objdb)
 			objdb_get_string(objdb, object_handle, "keyfile", &key_filename);
 		}
 		else {
-			objdb->object_key_create(object_handle, "keyfile", strlen("keyfile"),
-						 key_filename, strlen(key_filename)+1);
+			objdb->object_key_create_typed(object_handle, "keyfile",
+						       key_filename, strlen(key_filename)+1, OBJDB_VALUETYPE_STRING);
 		}
 		if (!key_filename) {
 			/* Use the cluster name as key,
@@ -756,8 +751,8 @@ static void add_cman_overrides(struct objdb_iface_ver0 *objdb)
 
 			/* Key length must be a multiple of 4 */
 			keylen = (strlen(cluster_name)+4) & 0xFC;
-			objdb->object_key_create(object_handle, "key", strlen("key"),
-						 tmp, keylen);
+			objdb->object_key_create_typed(object_handle, "key",
+						       tmp, keylen, OBJDB_VALUETYPE_STRING);
 		}
 	}
 	objdb->object_find_destroy(find_handle);
@@ -776,40 +771,40 @@ static void add_cman_overrides(struct objdb_iface_ver0 *objdb)
 
 	/* enable timestamps on logging */
 	if (objdb_get_string(objdb, object_handle, "timestamp", &logstr)) {
-		objdb->object_key_create(object_handle, "timestamp", strlen("timestamp"),
-					    "on", strlen("on")+1);
+		objdb->object_key_create_typed(object_handle, "timestamp",
+					       "on", strlen("on")+1, OBJDB_VALUETYPE_STRING);
 	}
 
 	/* configure logfile */
 	if (objdb_get_string(objdb, object_handle, "to_logfile", &logstr)) {
-		objdb->object_key_create(object_handle, "to_logfile", strlen("to_logfile"),
-					    "yes", strlen("yes")+1);
+		objdb->object_key_create_typed(object_handle, "to_logfile",
+					       "yes", strlen("yes")+1, OBJDB_VALUETYPE_STRING);
 	}
 
 	if (objdb_get_string(objdb, object_handle, "logfile", &logstr)) {
-		objdb->object_key_create(object_handle, "logfile", strlen("logfile"),
-					    LOGDIR "/corosync.log", strlen(LOGDIR "/corosync.log")+1);
+		objdb->object_key_create_typed(object_handle, "logfile",
+					       LOGDIR "/corosync.log", strlen(LOGDIR "/corosync.log")+1, OBJDB_VALUETYPE_STRING);
 	}
 
 	if (objdb_get_string(objdb, object_handle, "logfile_priority", &logstr)) {
-		objdb->object_key_create(object_handle, "logfile_priority", strlen("logfile_priority"),
-					    loglevel, strlen(loglevel)+1);
+		objdb->object_key_create_typed(object_handle, "logfile_priority",
+					       loglevel, strlen(loglevel)+1, OBJDB_VALUETYPE_STRING);
 	}
 
 	/* syslog */
 	if (objdb_get_string(objdb, object_handle, "to_syslog", &logstr)) {
-		objdb->object_key_create(object_handle, "to_syslog", strlen("to_syslog"),
-					    "yes", strlen("yes")+1);
+		objdb->object_key_create_typed(object_handle, "to_syslog",
+					       "yes", strlen("yes")+1, OBJDB_VALUETYPE_STRING);
 	}
 
 	if (objdb_get_string(objdb, object_handle, "syslog_facility", &logstr)) {
-		objdb->object_key_create(object_handle, "syslog_facility", strlen("syslog_facility"),
-					    logfacility, strlen(logfacility)+1);
+		objdb->object_key_create_typed(object_handle, "syslog_facility",
+					 logfacility, strlen(logfacility)+1, OBJDB_VALUETYPE_STRING);
 	}
 
 	if (objdb_get_string(objdb, object_handle, "syslog_priority", &logstr)) {
-		objdb->object_key_create(object_handle, "syslog_priority", strlen("syslog_priority"),
-					    loglevel, strlen(loglevel)+1);
+		objdb->object_key_create_typed(object_handle, "syslog_priority",
+					       loglevel, strlen(loglevel)+1, OBJDB_VALUETYPE_STRING);
 	}
 
 	if (!debug) {
@@ -835,8 +830,8 @@ static void add_cman_overrides(struct objdb_iface_ver0 *objdb)
 	}
 
 	if (debug) {
-		objdb->object_key_create(object_handle, "to_stderr", strlen("to_stderr"),
-					    "yes", strlen("yes")+1);
+		objdb->object_key_create_typed(object_handle, "to_stderr",
+					       "yes", strlen("yes")+1, OBJDB_VALUETYPE_STRING);
 	}
 
 	/* Make sure we allow connections from user/group "ais" */
@@ -846,10 +841,10 @@ static void add_cman_overrides(struct objdb_iface_ver0 *objdb)
 					"aisexec", strlen("aisexec"));
 	}
 	objdb->object_find_destroy(find_handle);
-	objdb->object_key_create(object_handle, "user", strlen("user"),
-				    "ais", strlen("ais") + 1);
-	objdb->object_key_create(object_handle, "group", strlen("group"),
-				    "ais", strlen("ais") + 1);
+	objdb->object_key_create_typed(object_handle, "user",
+				 "ais", strlen("ais") + 1, OBJDB_VALUETYPE_STRING);
+	objdb->object_key_create_typed(object_handle, "group",
+				 "ais", strlen("ais") + 1, OBJDB_VALUETYPE_STRING);
 
 	objdb->object_find_create(cluster_parent_handle, "cman", strlen("cman"), &find_handle);
 	if (objdb->object_find_next(find_handle, &object_handle) == 0)
@@ -858,13 +853,13 @@ static void add_cman_overrides(struct objdb_iface_ver0 *objdb)
 
 		sprintf(str, "%d", cluster_id);
 
-		objdb->object_key_create(object_handle, "cluster_id", strlen("cluster_id"),
-					 str, strlen(str) + 1);
+		objdb->object_key_create_typed(object_handle, "cluster_id",
+					       str, strlen(str) + 1, OBJDB_VALUETYPE_STRING);
 
 		if (two_node) {
 			sprintf(str, "%d", 1);
-			objdb->object_key_create(object_handle, "two_node", strlen("two_node"),
-						 str, strlen(str) + 1);
+			objdb->object_key_create_typed(object_handle, "two_node",
+						       str, strlen(str) + 1, OBJDB_VALUETYPE_STRING);
 		}
 	}
 	objdb->object_find_destroy(find_handle);
@@ -872,18 +867,18 @@ static void add_cman_overrides(struct objdb_iface_ver0 *objdb)
 	/* Load the quorum service */
 	objdb->object_create(OBJECT_PARENT_HANDLE, &object_handle,
 			     "service", strlen("service"));
-	objdb->object_key_create(object_handle, "name", strlen("name"),
-				 "corosync_quorum", strlen("corosync_quorum") + 1);
-	objdb->object_key_create(object_handle, "ver", strlen("ver"),
-				 "0", 2);
+	objdb->object_key_create_typed(object_handle, "name",
+				       "corosync_quorum", strlen("corosync_quorum") + 1, OBJDB_VALUETYPE_STRING);
+	objdb->object_key_create_typed(object_handle, "ver",
+				       "0", 2, OBJDB_VALUETYPE_STRING);
 
 	/* Make sure we load our alter-ego - the main cman module */
 	objdb->object_create(OBJECT_PARENT_HANDLE, &object_handle,
 			     "service", strlen("service"));
-	objdb->object_key_create(object_handle, "name", strlen("name"),
-				 "corosync_cman", strlen("corosync_cman") + 1);
-	objdb->object_key_create(object_handle, "ver", strlen("ver"),
-				 "0", 2);
+	objdb->object_key_create_typed(object_handle, "name",
+				       "corosync_cman", strlen("corosync_cman") + 1, OBJDB_VALUETYPE_STRING);
+	objdb->object_key_create_typed(object_handle, "ver",
+				       "0", 2, OBJDB_VALUETYPE_STRING);
 
 	/* Define cman as the quorum provider for corosync */
 	objdb->object_find_create(OBJECT_PARENT_HANDLE, "quorum", strlen("quorum"), &find_handle);
@@ -893,8 +888,8 @@ static void add_cman_overrides(struct objdb_iface_ver0 *objdb)
 	}
 	objdb->object_find_destroy(find_handle);
 
-	objdb->object_key_create(object_handle, "provider", strlen("provider"),
-				 "quorum_cman", strlen("quorum_cman") + 1);
+	objdb->object_key_create_typed(object_handle, "provider",
+				       "quorum_cman", strlen("quorum_cman") + 1, OBJDB_VALUETYPE_STRING);
 }
 
 /* If ccs is not available then use some defaults */
@@ -983,20 +978,20 @@ static int set_noccs_defaults(struct objdb_iface_ver0 *objdb)
 			     "clusternodes", strlen("clusternodes"));
 	objdb->object_create(object_handle, &object_handle,
 			     "clusternode", strlen("clusternode"));
-	objdb->object_key_create(object_handle, "name", strlen("name"),
-				 nodename, strlen(nodename)+1);
+	objdb->object_key_create_typed(object_handle, "name",
+				       nodename, strlen(nodename)+1, OBJDB_VALUETYPE_STRING);
 
 	sprintf(tmp, "%d", votes);
-	objdb->object_key_create(object_handle, "votes", strlen("votes"),
-				 tmp, strlen(tmp)+1);
+	objdb->object_key_create_typed(object_handle, "votes",
+				       tmp, strlen(tmp)+1, OBJDB_VALUETYPE_STRING);
 
 	sprintf(tmp, "%d", nodeid);
-	objdb->object_key_create(object_handle, "nodeid", strlen("nodeid"),
-				 tmp, strlen(tmp)+1);
+	objdb->object_key_create_typed(object_handle, "nodeid",
+				       tmp, strlen(tmp)+1, OBJDB_VALUETYPE_STRING);
 
 	/* Write the default cluster name & ID in here too */
-	objdb->object_key_create(cluster_parent_handle, "name", strlen("name"),
-				 cluster_name, strlen(cluster_name)+1);
+	objdb->object_key_create_typed(cluster_parent_handle, "name",
+				       cluster_name, strlen(cluster_name)+1, OBJDB_VALUETYPE_STRING);
 
 
 	objdb->object_find_create(cluster_parent_handle, "cman", strlen("cman"), &find_handle);
@@ -1006,12 +1001,12 @@ static int set_noccs_defaults(struct objdb_iface_ver0 *objdb)
                                             "cman", strlen("cman"));
         }
 	sprintf(tmp, "%d", cluster_id);
-	objdb->object_key_create(object_handle, "cluster_id", strlen("cluster_id"),
-				    tmp, strlen(tmp)+1);
+	objdb->object_key_create_typed(object_handle, "cluster_id",
+				       tmp, strlen(tmp)+1, OBJDB_VALUETYPE_STRING);
 
 	sprintf(tmp, "%d", expected_votes);
-	objdb->object_key_create(object_handle, "expected_votes", strlen("expected_votes"),
-				    tmp, strlen(tmp)+1);
+	objdb->object_key_create_typed(object_handle, "expected_votes",
+				       tmp, strlen(tmp)+1, OBJDB_VALUETYPE_STRING);
 
 	objdb->object_find_destroy(find_handle);
 	return 0;
@@ -1045,8 +1040,8 @@ static int copy_config_tree(struct objdb_iface_ver0 *objdb, hdb_handle_t source_
 	while (!objdb->object_key_iter(source_object, &key_name, &key_name_len,
 				       &key_value, &key_value_len)) {
 
-		objdb->object_key_create(new_object, key_name, key_name_len,
-					 key_value, key_value_len);
+		objdb->object_key_create_typed(new_object, key_name,
+					       key_value, key_value_len, OBJDB_VALUETYPE_STRING);
 	}
 
 	/* Create sub-objects */
@@ -1190,6 +1185,7 @@ static const char *groupd_compat="groupd_compat";
 static const char *clvmd_interface="interface";
 static const char *cman_disallowed="disallowed";
 static const char *totem_crypto="crypto_accept";
+static const char *plock_ownership="plock_ownership";
 
 /*
  * Flags to set:
@@ -1204,6 +1200,7 @@ static void setup_old_compat(struct objdb_iface_ver0 *objdb, hdb_handle_t cluste
 	hdb_handle_t clvmd_handle;
 	hdb_handle_t cman_handle;
 	hdb_handle_t totem_handle;
+	hdb_handle_t gfs_handle;
 	char *value;
 
 	/* Set groupd to backwards compatibility mode */
@@ -1211,8 +1208,8 @@ static void setup_old_compat(struct objdb_iface_ver0 *objdb, hdb_handle_t cluste
 	if (objdb->object_key_get(groupd_handle, groupd_compat, strlen(groupd_compat),
 				  (void *)&value, NULL) ||
 	    !value) {
-		objdb->object_key_create(groupd_handle, groupd_compat, strlen(groupd_compat),
-					 "1", 2);
+		objdb->object_key_create_typed(groupd_handle, groupd_compat,
+					       "1", 2, OBJDB_VALUETYPE_STRING);
 	}
 
 	/* Make clvmd use cman */
@@ -1220,8 +1217,8 @@ static void setup_old_compat(struct objdb_iface_ver0 *objdb, hdb_handle_t cluste
 	if (objdb->object_key_get(clvmd_handle, clvmd_interface, strlen(clvmd_interface),
 				  (void *)&value, NULL) ||
 	    !value) {
-		objdb->object_key_create(clvmd_handle, clvmd_interface, strlen(clvmd_interface),
-					 "cman", 5);
+		objdb->object_key_create_typed(clvmd_handle, clvmd_interface,
+					       "cman", 5, OBJDB_VALUETYPE_STRING);
 	}
 
 	/* Make cman use disallowed mode */
@@ -1229,8 +1226,8 @@ static void setup_old_compat(struct objdb_iface_ver0 *objdb, hdb_handle_t cluste
 	if (objdb->object_key_get(cman_handle, cman_disallowed, strlen(cman_disallowed),
 				  (void *)&value, NULL) ||
 	    !value) {
-		objdb->object_key_create(cman_handle, cman_disallowed, strlen(cman_disallowed),
-					 "1", 2);
+		objdb->object_key_create_typed(cman_handle, cman_disallowed,
+					       "1", 2, OBJDB_VALUETYPE_STRING);
 	}
 
 	/* Make totem use the old communications method */
@@ -1238,8 +1235,17 @@ static void setup_old_compat(struct objdb_iface_ver0 *objdb, hdb_handle_t cluste
 	if (objdb->object_key_get(totem_handle, totem_crypto, strlen(totem_crypto),
 				  (void *)&value, NULL) ||
 	    !value) {
-		objdb->object_key_create(totem_handle, totem_crypto, strlen(totem_crypto),
-					 "old", 4);
+		objdb->object_key_create_typed(totem_handle, totem_crypto,
+					       "old", 4, OBJDB_VALUETYPE_STRING);
+	}
+
+	/* Disable plock ownership */
+	gfs_handle = find_or_create_object(objdb, "gfs_controld", OBJECT_PARENT_HANDLE);
+	if (objdb->object_key_get(gfs_handle, plock_ownership, strlen(plock_ownership),
+				  (void *)&value, NULL) ||
+	    !value) {
+		objdb->object_key_create_typed(gfs_handle, plock_ownership,
+					       "0", 2, OBJDB_VALUETYPE_STRING);
 	}
 }
 
@@ -1295,8 +1301,8 @@ static int cmanpre_readconfig(struct objdb_iface_ver0 *objdb, const char **error
 		objdb->object_create(OBJECT_PARENT_HANDLE, &object_handle,
 					"libccs", strlen("libccs"));
 
-		objdb->object_key_create(object_handle, "next_handle", strlen("next_handle"),
-					 &next_handle, sizeof(int));
+		objdb->object_key_create_typed(object_handle, "next_handle",
+					       &next_handle, sizeof(uint32_t), OBJDB_VALUETYPE_UINT32);
 	}
 	objdb->object_find_destroy(find_handle);
 
diff --git a/cman/init.d/cman.in b/cman/init.d/cman.in
index a35590a..e0281be 100644
--- a/cman/init.d/cman.in
+++ b/cman/init.d/cman.in
@@ -683,18 +683,18 @@ join_fence_domain()
 	if ! cman_tool status | grep Flags | grep 2node \
 		> /dev/null 2>&1; then
 		errmsg=$( fence_tool join -w $FENCE_JOIN_TIMEOUT \
-			> /dev/null 2>&1 ) || return 1
+			2>&1 ) || return 1
 	else
 		errmsg=$( fence_tool join -w $FENCE_JOIN_TIMEOUT \
 			-m $FENCED_MEMBER_DELAY join \
-			> /dev/null 2>&1 ) || return 1
+			2>&1 ) || return 1
 	fi
 }
 
 leave_fence_domain()
 {
 	if status fenced > /dev/null 2>&1; then
-		fence_tool leave -w 10 > /dev/null 2>&1
+		errmsg=$( fence_tool leave -w 10 2>&1 )
 		return $?
 	fi
 }
diff --git a/cman/man/qdisk.5 b/cman/man/qdisk.5
index f578e92..efa3638 100644
--- a/cman/man/qdisk.5
+++ b/cman/man/qdisk.5
@@ -73,14 +73,15 @@ the amount of synchronous I/O contention on the shared quorum disk.
 * Cluster node IDs must be statically configured in cluster.conf and
 must be numbered from 1..16 (there can be gaps, of course).
 
-* Cluster node votes should be more or less equal.
+* Cluster node votes must all be 1.
 
 * CMAN must be running before the qdisk program can operate in full
 capacity.  If CMAN is not running, qdisk will wait for it.
 
 * CMAN's eviction timeout should be at least 2x the quorum daemon's
 to give the quorum daemon adequate time to converge on a master during a
-failure + load spike situation.
+failure + load spike situation.  See section 3.3.1 for specific
+details.
 
 * For 'all-but-one' failure operation, the total number of votes assigned
 to the quorum device should be equal to or greater than the total number
@@ -90,7 +91,7 @@ been explored.
 
 * For 'tiebreaker' operation in a two-node cluster, unset CMAN's two_node
 flag (or set it to 0), set CMAN's expected votes to '3', set each node's
-vote to '1', and set qdisk's vote count to '1' as well.  This will allow
+vote to '1', and leave qdisk's vote count unset.  This will allow
 the cluster to operate if either both nodes are online, or a single node &
 the heuristics.
 
@@ -211,6 +212,7 @@ This is the frequency of read/write cycles, in seconds.
 \fItko\fP\fB="\fP10\fB"\fP
 .in 12
 This is the number of cycles a node must miss in order to be declared dead.
+The default for this number is dependent on the configured token timeout.
 
 .in 9
 \fItko_up\fP\fB="\fPX\fB"\fP
@@ -237,7 +239,10 @@ exceed \fBtko\fP.
 \fIvotes\fP\fB="\fP3\fB"\fP
 .in 12
 This is the number of votes the quorum daemon advertises to CMAN when it
-has a high enough score.
+has a high enough score.  The default is the number of nodes in the cluster
+minus 1.  For example, in a 4 node cluster, the default is 3.  This value
+may change during normal operation, for example when adding or removing
+a node from the cluster.
 
 .in 9
 \fIlog_level\fP\fB="\fP4\fB"\fP
@@ -289,12 +294,18 @@ This option requires careful tuning of the CMAN timeout, the qdiskd
 timeout, and CMAN's quorum_dev_poll value.  As a rule of thumb,
 CMAN's quorum_dev_poll value should be equal to Totem's token timeout
 and qdiskd's timeout (interval*tko) should be less than half of
-Totem's token timeout.
+Totem's token timeout.  See section 3.3.1 for more information.
 
 This option only takes effect if there are no heuristics
 configured.  Usage of this option in configurations with more than
 two cluster nodes is undefined and should not be done.
 
+In a two-node cluster with no heuristics and no defined vote
+count (see above), this mode is turned by default.  If enabled in
+this way at startup and a node is later added to the cluster 
+configuration or the vote count is set to a value other than 1, this
+mode will be disabled.
+
 .in 9
 \fIallow_kill\fP\fB="\fP1\fB"\fP
 .in 12
@@ -372,7 +383,7 @@ label.  This is useful in configurations where the block device name
 differs on a per-node basis.
 
 .in 9
-\fIcman_label\fP\fB="\fPmylabel\fB"/>\fP
+\fIcman_label\fP\fB="\fPmylabel\fB"\fP
 .in 12
 This overrides the label advertised to CMAN if present.  If specified,
 the quorum daemon will register with this name instead of the actual
@@ -391,6 +402,28 @@ qdiskd is running.  This option is ignored if io_timeout is set to 1.
 \fB/>\fP
 .in 0
 
+.SH "3.3.1.  Quorum Disk Timings"
+Qdiskd should not be used in environments requiring failure detection
+times of less than approximately 10 seconds.
+
+Qdiskd will attempt to automatically configure timings based on the
+totem timeout and the TKO.  If configuring manually, Totem's token
+timeout \fBmust\fP be set to a value at least 1 interval greater than
+the the following function:
+
+  interval * (tko + master_wait + upgrade_wait)
+
+So, if you have an interval of 2, a tko of 7, master_wait of 2 and
+upgrade_wait of 2, the token timeout should be at least 24 seconds
+(24000 msec).
+
+It is recommended to have at least 3 intervals to reduce the risk of 
+quorum loss during heavy I/O load.  As a rule of thumb, using a totem
+timeout more than 2x of qdiskd's timeout will result in good behavior.
+
+An improper timing configuration will cause CMAN to give up on qdiskd,
+causing a temporary loss of quorum during master transition.
+
 .SH "3.2.  The <heuristic> tag"
 This tag is a child of the <quorumd> tag.  Heuristics may not be changed
 while qdiskd is running.
diff --git a/cman/qdisk/disk.c b/cman/qdisk/disk.c
index a26707d..1f646ce 100644
--- a/cman/qdisk/disk.c
+++ b/cman/qdisk/disk.c
@@ -737,13 +737,13 @@ qdisk_init(char *partname, char *label)
 
 	ret = qdisk_validate(partname);
 	if (ret < 0) {
-		logt_print(LOG_DEBUG, "qdisk_verify");
+		logt_print(LOG_DEBUG, "qdisk_validate: %s\n", strerror(errno));
 		return -1;
 	}
 
 	ret = qdisk_open(partname, &disk);
 	if (ret < 0) {
-		logt_print(LOG_ERR, "qdisk_open");
+		logt_print(LOG_ERR, "qdisk_open: %s\n", strerror(errno));
 		return -1;
 	}
 
diff --git a/cman/qdisk/disk.h b/cman/qdisk/disk.h
index c5b3d18..93d15fe 100644
--- a/cman/qdisk/disk.h
+++ b/cman/qdisk/disk.h
@@ -55,7 +55,9 @@ typedef enum {
 	RF_UPTIME = 0x20,
 	RF_CMAN_LABEL = 0x40,
 	RF_IOTIMEOUT = 0x80,
-	RF_MASTER_WINS = 0x100
+	RF_MASTER_WINS = 0x100,
+	RF_AUTO_VOTES = 0x200,
+	RF_AUTO_MASTER_WINS = 0x400
 } run_flag_t;
 
 
@@ -246,7 +248,7 @@ typedef struct {
 	int qc_max_error_cycles;
 	int qc_master;		/* Master?! */
 	int qc_config;
-	int qc_pad;
+	int qc_token_timeout;
 	disk_node_state_t qc_disk_status;
 	disk_node_state_t qc_status;
 	run_flag_t qc_flags;
diff --git a/cman/qdisk/main.c b/cman/qdisk/main.c
index eb3ab3c..4d1c411 100644
--- a/cman/qdisk/main.c
+++ b/cman/qdisk/main.c
@@ -24,6 +24,7 @@
 #include <ccs.h>
 #include <liblogthread.h>
 #include "score.h"
+#include "../daemon/cman.h"
 #include <sys/syslog.h>
 
 #define LOG_DAEMON_NAME  "qdiskd"
@@ -1337,11 +1338,70 @@ get_log_config_data(int ccsfd)
 }
 
 
+/*
+ * return number of nodes - 1 on success
+ *        -1 on generic error
+ *        -2 if one of the node votes is != 1
+ */
+static int
+auto_qdisk_votes(int desc)
+{
+	int ret = 1;
+	char buf[PATH_MAX];
+	char *name;
+
+	while (1) {
+		int votes=0;
+
+		snprintf(buf, sizeof(buf)-1,
+			"/cluster/clusternodes/clusternode[%d]/@votes", ret);
+
+		name = NULL;
+		if (ccs_get(desc, buf, &name) != 0)
+			break;
+
+		votes=atoi(name);
+		if (votes != 1) {
+			free(name);
+
+			snprintf(buf, sizeof(buf)-1,
+			    "/cluster/clusternodes/clusternode[%d]/@name",
+			    ret);
+
+			if (ccs_get(desc, buf, &name) == 0) {
+				logt_print(LOG_ERR, "%s's vote count is %d\n",
+					   name, votes);
+				free(name);
+			}
+
+			logt_print(LOG_ERR, "Set all node vote counts to 1 "
+				   "or specify qdiskd's votes\n");
+			return -2;
+		}
+
+		free(name);
+		ret++;
+ 	}
+ 
+	// adjust count (one from init and one from the node count)
+	ret = ret - 2;
+ 
+	if (ret <= 0)
+		logt_print(LOG_ERR, "Unable to determine qdiskd votes "
+			   "automatically\n");
+	else
+		logt_print(LOG_DEBUG, "Setting votes to %d\n", ret);
+ 
+ 	return (ret);
+}
+
+
 static int
 get_dynamic_config_data(qd_ctx *ctx, int ccsfd)
 {
 	char *val = NULL;
 	char query[256];
+	int old_votes = 0;
 
 	if (ccsfd < 0)
 		return -1;
@@ -1463,6 +1523,60 @@ get_dynamic_config_data(qd_ctx *ctx, int ccsfd)
 		free(val);
 	}
 
+	/* Get votes */
+	if (ctx->qc_config) {
+		old_votes = ctx->qc_votes;
+		ctx->qc_flags &= ~RF_AUTO_VOTES;
+	}
+
+	snprintf(query, sizeof(query), "/cluster/quorumd/@votes");
+	if (ccs_get(ccsfd, query, &val) == 0) {
+		ctx->qc_votes = atoi(val);
+		free(val);
+		if (ctx->qc_votes < 0)
+			ctx->qc_votes = 0;
+	} else {
+		ctx->qc_votes = auto_qdisk_votes(ccsfd);
+		if (ctx->qc_votes < 0) {
+			if (ctx->qc_config) {
+				logt_print(LOG_WARNING, "Unable to determine "
+					   "new vote value; retaining old "
+					   "value of %d\n", old_votes);
+				ctx->qc_votes = old_votes;
+			} else {
+				/* During startup, this is fatal */
+				return -1;
+			}
+		} else {
+			ctx->qc_flags |= RF_AUTO_VOTES;
+		}
+	}
+
+	if (ctx->qc_config && old_votes != ctx->qc_votes) {
+		logt_print(LOG_DEBUG, "Changing vote count from %d to %d\n",
+			   old_votes, ctx->qc_votes);
+
+		if (ctx->qc_flags & RF_AUTO_MASTER_WINS) {
+			logt_print(LOG_DEBUG, "Vote count changed! "
+				   "Disabling master-wins\n");
+			ctx->qc_flags &= ~(RF_MASTER_WINS|RF_AUTO_MASTER_WINS);
+		}
+
+		/*
+		 * This is done in main() normally.  Here, we are
+		 * reconfiguring _only_ the votes at this point.  The 
+		 * label / cman runflags do not change during reconfiguration
+		 *
+		 * This only works after we have already gotten static
+		 * configuration data during initial startup.
+		 */
+		cman_register_quorum_device(ctx->qc_cman_admin,
+					    (ctx->qc_flags&RF_CMAN_LABEL)? 
+					    ctx->qc_cman_label:
+					    ctx->qc_device,
+					    ctx->qc_votes);
+	}
+
 	return 0;
 }
 
@@ -1472,6 +1586,7 @@ get_static_config_data(qd_ctx *ctx, int ccsfd)
 {
 	char *val = NULL;
 	char query[256];
+	int qdisk_fo;
 
 	if (ccsfd < 0)
 		return -1;
@@ -1486,14 +1601,37 @@ get_static_config_data(qd_ctx *ctx, int ccsfd)
 		if (ctx->qc_interval < 1)
 			ctx->qc_interval = 1;
 	}
+
+	snprintf(query, sizeof(query), "/cluster/totem/@token");
+	if (ccs_get(ccsfd, query, &val) == 0) {
+		ctx->qc_token_timeout = atoi(val);
+		free(val);
+		if (ctx->qc_token_timeout < 10000) {
+			logt_print(LOG_DEBUG, "Token timeout %d is too fast "
+				   "to use with qdiskd!\n",
+				   ctx->qc_token_timeout);
+			return -1;
+		} 
+	} else {
+		ctx->qc_token_timeout = DEFAULT_TOKEN_TIMEOUT;
+	}
 		
 	/* Get tko */
 	snprintf(query, sizeof(query), "/cluster/quorumd/@tko");
 	if (ccs_get(ccsfd, query, &val) == 0) {
 		ctx->qc_tko = atoi(val);
 		free(val);
-		if (ctx->qc_tko < 3)
-			ctx->qc_tko = 3;
+	} else {
+		ctx->qc_tko = ((ctx->qc_token_timeout / 1000) -
+			       ctx->qc_interval) / 2;
+		logt_print(LOG_DEBUG, "Auto-configured TKO as %d based on "
+			   "token=%d interval=%d\n", ctx->qc_tko,
+			   ctx->qc_token_timeout, ctx->qc_interval);
+	}
+
+	if (ctx->qc_tko < 4) {
+		logt_print(LOG_WARNING, "Quorum disk TKO (%d) is too low!\n",
+			   ctx->qc_tko);
 	}
 
 	/* Get up-tko (transition off->online) */
@@ -1527,14 +1665,27 @@ get_static_config_data(qd_ctx *ctx, int ccsfd)
 	}
 	if (ctx->qc_master_wait <= ctx->qc_tko_up)
 		ctx->qc_master_wait = ctx->qc_tko_up + 1;
-		
-	/* Get votes */
-	snprintf(query, sizeof(query), "/cluster/quorumd/@votes");
-	if (ccs_get(ccsfd, query, &val) == 0) {
-		ctx->qc_votes = atoi(val);
-		free(val);
-		if (ctx->qc_votes < 0)
-			ctx->qc_votes = 0;
+
+	logt_print(LOG_DEBUG, "Timings: %d tko, %d interval\n",
+		   ctx->qc_tko, ctx->qc_interval);
+	logt_print(LOG_DEBUG, "Timings: %d tko_up, %d master_wait, "
+		   "%d upgrade_wait\n",
+		   ctx->qc_tko_up, ctx->qc_master_wait, ctx->qc_upgrade_wait);
+
+	qdisk_fo = ctx->qc_interval * (ctx->qc_master_wait +
+				ctx->qc_upgrade_wait +
+				ctx->qc_tko) * 1000;
+	if (qdisk_fo >= ctx->qc_token_timeout) {
+		logt_print(LOG_ERR, "Quorum disk timings are too slow for "
+			   "configured token timeout\n");
+		logt_print(LOG_ERR, " * Totem Token timeout: %dms\n",
+			   ctx->qc_token_timeout);
+		logt_print(LOG_ERR, " * Min. Master recovery time: %dms\n",
+			   qdisk_fo);
+		logt_print(LOG_ERR,
+			   "Please set token timeout to at least %dms\n",
+			   qdisk_fo + (ctx->qc_interval * 1000));
+		return -1;
 	}
 
 	/* Get device */
@@ -1601,7 +1752,7 @@ get_static_config_data(qd_ctx *ctx, int ccsfd)
 static int
 get_config_data(qd_ctx *ctx, struct h_data *h, int maxh, int *cfh)
 {
-	int ccsfd = -1;
+	int ccsfd = -1, ret = -1;
 
 	ccsfd = ccs_connect();
 	if (ccsfd < 0) {
@@ -1625,14 +1776,20 @@ get_config_data(qd_ctx *ctx, struct h_data *h, int maxh, int *cfh)
 		ctx->qc_max_error_cycles = 0;
 	}
 	
-	if (ctx->qc_config ||
-	    get_dynamic_config_data(ctx, ccsfd) < 0)
+	if (get_dynamic_config_data(ctx, ccsfd) < 0) {
 		goto out;
+	}
+
+	if (ctx->qc_config) {
+		ret = 0;
+		goto out;
+	}
 
 	ctx->qc_config = 1;
 
-	if (get_static_config_data(ctx, ccsfd) < 0)
+	if (get_static_config_data(ctx, ccsfd) < 0) {
 		goto out;
+	}
 
 	*cfh = configure_heuristics(ccsfd, h, maxh);
 
@@ -1641,20 +1798,31 @@ get_config_data(qd_ctx *ctx, struct h_data *h, int maxh, int *cfh)
 			logt_print(LOG_WARNING, "Master-wins mode disabled\n");
 			ctx->qc_flags &= ~RF_MASTER_WINS;
 		}
+	} else {
+		if (ctx->qc_flags & RF_AUTO_VOTES &&
+		    !(ctx->qc_flags & RF_MASTER_WINS) &&
+		    ctx->qc_votes == 1) { 
+			/* Two node cluster, no heuristics, 1 vote for
+			 * quorum disk daemon.  Safe to enable master-wins.
+			 * In fact, qdiskd without master-wins in this config
+			 * is a waste of resources.
+			 */
+			ctx->qc_flags |= RF_MASTER_WINS | RF_AUTO_MASTER_WINS;
+			logt_print(LOG_INFO, "Enabling master-wins mode for "
+				   "simple two-node cluster\n");
+		}
 	}
 
+	ret = 0;
+
 	logt_print(LOG_DEBUG, "Quorum Daemon: %d heuristics, "
 		   "%d interval, %d tko, %d votes\n",
 		   *cfh, ctx->qc_interval, ctx->qc_tko, ctx->qc_votes);
-	logt_print(LOG_DEBUG, "%d tko_up, %d master_wait, "
-		   "%d upgrade_wait\n",
-		   ctx->qc_tko_up, ctx->qc_master_wait, ctx->qc_upgrade_wait);
-out:
 	logt_print(LOG_DEBUG, "Run Flags: %08x\n", ctx->qc_flags);
-
+out:
 	ccs_disconnect(ccsfd);
 
-	return 0;
+	return ret;
 }
 
 
diff --git a/cman/qdisk/mkqdisk.c b/cman/qdisk/mkqdisk.c
index 338bd5c..6d64ee5 100644
--- a/cman/qdisk/mkqdisk.c
+++ b/cman/qdisk/mkqdisk.c
@@ -19,7 +19,7 @@ main(int argc, char **argv)
 {
 	char device[128];
 	char *newdev = NULL, *newlabel = NULL;
-	int rv, verbose_level = 1;
+	int rv, flg = 0, verbose_level = 1;
 
 	printf(PROGRAM_NAME " v" RELEASE_VERSION "\n\n");
 
@@ -49,10 +49,12 @@ main(int argc, char **argv)
 			break;
 		case 'L':
 			/* List */
-			return find_partitions(NULL, NULL, 0, verbose_level);
+			flg = rv;
+ 			break;
 		case 'f':
-			return find_partitions( optarg, device,
-					       sizeof(device), verbose_level);
+			flg = rv;
+			newlabel = optarg;
+			break;
 		case 'c':
 			newdev = optarg;
 			break;
@@ -68,6 +70,14 @@ main(int argc, char **argv)
 		}
 	}
 
+	/* list */
+	if (flg == 'L') {
+		return find_partitions(NULL, NULL, 0, verbose_level);
+	} else if (flg == 'f') {
+		return find_partitions( newlabel, device,
+				       sizeof(device), verbose_level);
+	}
+
 	if (!newdev && !newlabel) {
 		printf("usage: mkqdisk -L | -f <label> | -c "
 		       "<device> -l <label>\n");
diff --git a/cman/qdisk/proc.c b/cman/qdisk/proc.c
index 1b7471e..688a4d1 100644
--- a/cman/qdisk/proc.c
+++ b/cman/qdisk/proc.c
@@ -36,13 +36,13 @@ check_device(char *device, char *label, quorum_header_t *qh,
 
 	ret = qdisk_validate(device);
 	if (ret < 0) {
-		logt_print(LOG_DEBUG, "qdisk_verify");
+		logt_print(LOG_DEBUG, "qdisk_validate: %s\n", strerror(errno));
 		return -1;
 	}
 
 	ret = qdisk_open(device, &disk);
 	if (ret < 0) {
-		logt_print(LOG_ERR, "qdisk_open");
+		logt_print(LOG_ERR, "qdisk_open: %s\n", strerror(errno));
 		return -1;
 	}
 
diff --git a/config/libs/libccsconfdb/libccs.c b/config/libs/libccsconfdb/libccs.c
index 6dc09a5..6a2b143 100644
--- a/config/libs/libccsconfdb/libccs.c
+++ b/config/libs/libccsconfdb/libccs.c
@@ -241,9 +241,9 @@ static hdb_handle_t create_ccs_handle(confdb_handle_t handle, int ccs_handle,
 
 	memset(buf, 0, sizeof(buf));
 	snprintf(buf, sizeof(buf), "%d", ccs_handle);
-	if (confdb_key_create
-	    (handle, connection_handle, "ccs_handle", strlen("ccs_handle"), buf,
-	     strlen(buf) + 1) != CS_OK) {
+	if (confdb_key_create_typed
+	    (handle, connection_handle, "ccs_handle", buf,
+	     strlen(buf) + 1, CONFDB_VALUETYPE_STRING) != CS_OK) {
 		destroy_ccs_handle(handle, connection_handle);
 		errno = ENOMEM;
 		return -1;
@@ -251,9 +251,9 @@ static hdb_handle_t create_ccs_handle(confdb_handle_t handle, int ccs_handle,
 
 	memset(buf, 0, sizeof(buf));
 	snprintf(buf, sizeof(buf), "%d", config_version);
-	if (confdb_key_create
+	if (confdb_key_create_typed
 	    (handle, connection_handle, "config_version",
-	     strlen("config_version"), buf, strlen(buf) + 1) != CS_OK) {
+	     buf, strlen(buf) + 1, CONFDB_VALUETYPE_STRING) != CS_OK) {
 		destroy_ccs_handle(handle, connection_handle);
 		errno = ENOMEM;
 		return -1;
@@ -261,9 +261,9 @@ static hdb_handle_t create_ccs_handle(confdb_handle_t handle, int ccs_handle,
 
 	memset(buf, 0, sizeof(buf));
 	snprintf(buf, sizeof(buf), "%d", xpath);
-	if (confdb_key_create
-	    (handle, connection_handle, "fullxpath", strlen("fullxpath"), buf,
-	     strlen(buf) + 1) != CS_OK) {
+	if (confdb_key_create_typed
+	    (handle, connection_handle, "fullxpath", buf,
+	     strlen(buf) + 1, CONFDB_VALUETYPE_STRING) != CS_OK) {
 		destroy_ccs_handle(handle, connection_handle);
 		errno = ENOMEM;
 		return -1;
@@ -342,10 +342,10 @@ int set_previous_query(confdb_handle_t handle, hdb_handle_t connection_handle,
 			}
 		}
 	} else {
-		if (confdb_key_create
+		if (confdb_key_create_typed
 		    (handle, connection_handle, "previous_query",
-		     strlen("previous_query"), previous_query,
-		     strlen(previous_query) + 1) != CS_OK) {
+		     previous_query,
+		     strlen(previous_query) + 1, CONFDB_VALUETYPE_STRING) != CS_OK) {
 			errno = ENOMEM;
 			return -1;
 		}
@@ -365,10 +365,10 @@ int set_previous_query(confdb_handle_t handle, hdb_handle_t connection_handle,
 			}
 		}
 	} else {
-		if (confdb_key_create
+		if (confdb_key_create_typed
 		    (handle, connection_handle, "query_handle",
-		     strlen("query_handle"), &query_handle,
-		     sizeof(hdb_handle_t)) != CS_OK) {
+		     &query_handle,
+		     sizeof(hdb_handle_t), CONFDB_VALUETYPE_UINT64) != CS_OK) {
 			errno = ENOMEM;
 			return -1;
 		}
@@ -378,10 +378,9 @@ int set_previous_query(confdb_handle_t handle, hdb_handle_t connection_handle,
 	    (handle, connection_handle, "iterator_tracker",
 	     strlen("iterator_tracker"), &temptracker, &templen) != CS_OK) {
 		temptracker = 1;
-		if (confdb_key_create
+		if (confdb_key_create_typed
 		    (handle, connection_handle, "iterator_tracker",
-		     strlen("iterator_tracker"), &temptracker,
-		     sizeof(unsigned int)) != CS_OK) {
+		     &temptracker, sizeof(unsigned int), CONFDB_VALUETYPE_UINT32) != CS_OK) {
 			errno = ENOMEM;
 			return -1;
 		}
diff --git a/config/man/cluster.conf.5 b/config/man/cluster.conf.5
index 336a841..aa653be 100644
--- a/config/man/cluster.conf.5
+++ b/config/man/cluster.conf.5
@@ -1,208 +1,237 @@
-.TH cluster.conf 5
+.TH CLUSTER.CONF 5 2010-01-12 cluster cluster
 
 .SH NAME
-cluster.conf - configuration file for cman, fence, dlm, gfs, rgmanager
+cluster.conf \- configuration file for cman and related daemons
 
-.SH DESCRIPTION
-
-The /etc/cluster/cluster.conf file contains configuration for:
-
-.B cman(5)
-for corosync and quorum configuration
-.br
-.B qdisk(5)
-for quorum disk configuration
-.br
-.B groupd(8)
-for daemon configuration
-.br
-.B fenced(8)
-for daemon and fence device configuration
-.br
-.B dlm_controld(8)
-for daemon configuration
-.br
-.B gfs_controld(8)
-for daemon configuration
-.br
-.B rgmanager(8)
-for daemon and resource configuration
-
-The same cluster.conf file must exist on each cluster node.
-
-When cman_tool starts corosync, the contents of cluster.conf are loaded into
-the corosync in-memory configuration database (confdb).  Daemons and programs
-listed above use the libccs library to read cluster.conf data from the
-corosync confdb.  (The libconfdb library can also be used for more general,
-non-xml confdb queries.)
-
-When cman configures corosync using cluster.conf, the corosync.conf file is
-not used.
+.SH SYNOPSIS
+.B /etc/cluster/cluster.conf
 
-.SS Cluster Nodes
-
-cluster.conf is an XML file.  It has one top-level \fIcluster\fP section
-containing everything else.  The cluster section has two mandatory
-attributes: \fIname\fP and \fIconfig_version\fP.  \fIname\fP can be up to
-15 characters long (16 including terminating null) and specifies the name
-of the cluster.  It is important that this name be unique among clusters
-on the same network.  \fIconfig_version\fP is a number used to identify
-the revision level of the cluster.conf file.
-
-  <cluster name="alpha" config_version="1">
-  </cluster>
-
-The set of nodes that make up the cluster are defined under the
-\fIclusternodes\fP section.  A \fIclusternode\fP section defines each
-node.  A clusternode has two mandatory attributes:
-.I name
+.SH DESCRIPTION
+When
+.BR cman_tool (8)
+starts the
+.BR corosync (8)
+daemon, the cluster.conf data is read into the corosync in-memory
+database (confdb).  The configuration is used by corosync,
+cman and other related cluster daemons and programs.  When cman
+configures corosync with cluster.conf, the
+.BR corosync.conf (5)
+file is not used.
+
+A basic cluster configuration is described below.
+Configuration options for other daemons/programs are described in
+their own man pages.
+.BR ccs_tool (8)
+can be used to do some basic cluster.conf editing.
+
+The cluster.rng schema is used to validate cluster.conf.  Unrecognized
+items will produce a warning during cluster startup, and invalid xml
+structure will cause the cluster startup to fail.  See
+.BR ccs_config_validate (8)
 and
-.I nodeid
+.BR ccs_config_dump (8).
+
+.SS Cluster
+The top level
+.B cluster
+section contains all other sections and has two required attributes:
+.TP 8
+.B name
+The name of the cluster can be up to 15 characters long (16 including
+terminating null).  It is important that this name be unique among
+clusters on the same network.
+.TP 8
+.B config_version
+The config_version specifies the revision level of the file and should be
+increased each time the file is updated.
+.P
+.nf
+<cluster name="alpha" config_version="1">
+</cluster>
+.fi
 
-The name should correspond to the hostname (the fully qualified name is
-generally not necessary) on the network interface to be used for cluster
-communication.  Nodeid's must be greater than zero and unique.
-
-  <cluster name="alpha" config_version="1">
-          <clusternodes>
-                  <clusternode name="node-01" nodeid="1">
-                  </clusternode>
-
-                  <clusternode name="node-02" nodeid="2">
-                  </clusternode>
-
-                  <clusternode name="node-03" nodeid="3">
-                  </clusternode>
-          </clusternodes>
-  </cluster>
+.SS Cluster Nodes
+The set of nodes that make up the cluster are defined in the
+.B clusternodes
+section which contains multiple
+.B clusternode
+sections.  A clusternode has two required attributes:
+.TP 8
+.B name
+The node name should correspond to the hostname on the network interface
+to be used for cluster communication.
+.TP 8
+.B nodeid
+The node id must be greater than zero and unique.
+.P
+.nf
+<cluster name="alpha" config_version="1">
+        <clusternodes>
+        <clusternode name="node-01" nodeid="1">
+        </clusternode>
+
+        <clusternode name="node-02" nodeid="2">
+        </clusternode>
+
+        <clusternode name="node-03" nodeid="3">
+        </clusternode>
+        </clusternodes>
+</cluster>
+.fi
 
 .SS Logging
-.br
-All daemons listed above use the <logging> section to configure loggging.
-Global settings apply to all:
-
-  <logging debug="on"/>
+Cluster daemons use a common
+.B logging
+section to configure their loggging behavior.
+.P
+.nf
+<cluster name="alpha" config_version="1">
+        <logging/>
+</cluster>
+.fi
+.P
 
-Per-daemon settings override the corresponding global setting.  logging_daemon
-names that can be configured include: corosync, qdiskd, groupd, fenced,
+Global settings apply to all:
+.P
+.nf
+<logging debug="on"/>
+.fi
+.P
+
+Per-daemon
+.B logging_daemon
+subsections override the global settings.
+Daemon names that can be configured include: corosync, qdiskd, groupd, fenced,
 dlm_controld, gfs_controld, rgmanager.
+.P
+.nf
+<logging>
+        <logging_daemon name="qdiskd" debug="on"/>
+        <logging_daemon name="fenced" debug="on"/>
+</logging>
+.fi
+.P
 
-  <logging>
-      <logging_daemon name="qdiskd" debug="on"/>
-      <logging_daemon name="fenced" debug="on"/>
-  </logging>
-
-corosync daemon settings apply to all corosync subsystems by default, but
+Corosync daemon settings apply to all corosync subsystems by default, but
 subsystems can also be configured individually.  These include CLM, CPG, MAIN,
 SERV, CMAN, TOTEM, QUORUM, CONFDB, CKPT, EVT.
+.P
+.nf
+<logging>
+        <logging_daemon name="corosync" subsys="QUORUM" debug="on"/>
+        <logging_daemon name="corosync" subsys="CONFDB" debug="on"/>
+</logging>
+.fi
+.P
 
-  <logging>
-      <logging_daemon name="corosync" subsys="QUORUM" debug="on"/>
-      <logging_daemon name="corosync" subsys="CONFDB" debug="on"/>
-  </logging>
-
-.B Settings
-.br
-The settings available at global, daemon and subsystem levels are:
+The attributes available at global, daemon and subsystem levels are:
 
+.TP 8
 .B to_syslog
-.br
-enable/disable messages to syslog (yes/no)
-.br
-default "yes"
+enable/disable messages to syslog (yes/no), default "yes"
 
+.TP 8
 .B to_logfile
-.br
-enable/disable messages to log file (yes/no)
-.br
-default "yes"
+enable/disable messages to log file (yes/no), default "yes"
 
+.TP 8
 .B syslog_facility
-.br
-facility used for syslog messages
-.br
-default "daemon"
+facility used for syslog messages, default "daemon"
 
+.TP 8
 .B syslog_priority
-.br
-messages at this level and up will be sent to syslog
-.br
-default "info"
+messages at this level and up will be sent to syslog, default "info"
 
+.TP 8
 .B logfile_priority
-.br
-messages at this level and up will be written to log file
-.br
-default "info"
+messages at this level and up will be written to log file, default "info"
 
-.B logfile
-.br
+.TP 8
+.B logfile "\     "
 the log file name, default /var/log/cluster/<daemon>.log
 
+.TP 8
 .B debug="on"
-.br
-is a shortcut for logfile_priority="debug"
+a shortcut for logfile_priority="debug"
 
-.B Defaults
-.br
+.SH EXAMPLE
 An explicit configuration for the default settings would be:
-
+.P
+.nf
 <logging to_syslog="yes" to_logfile="yes" syslog_facility="daemon"
          syslog_priority="info" logfile_priority="info">
-.br
     <logging_daemon name="qdiskd"
              logfile="/var/log/cluster/qdiskd.log"/>
-.br
-    <logging_daemon name="groupd"
-             logfile="/var/log/cluster/groupd.log"/>
-.br
     <logging_daemon name="fenced"
              logfile="/var/log/cluster/fenced.log"/>
-.br
     <logging_daemon name="dlm_controld"
              logfile="/var/log/cluster/dlm_controld.log"/>
-.br
     <logging_daemon name="gfs_controld"
              logfile="/var/log/cluster/gfs_controld.log"/>
-.br
     <logging_daemon name="rgmanager"
              logfile="/var/log/cluster/rgmanager.log"/>
-.br
     <logging_daemon name="corosync"
              logfile="/var/log/cluster/corosync.log"/>
-.br
 </logging>
-
-.B Examples
-.br
-To include debug messages (and above) from all daemons in their default log files, either
-.in +7
-<logging debug="on"/> or
+.fi
+.P
+
+To include debug messages (and above) from all daemons in their default
+log files, either of the following which are equivalent:
+.P
+.nf
+<logging debug="on"/>
 <logging logfile_priority="debug"/>
-.in -7
+.fi
+.P
 
-To exclude all log messages from syslog
-.in +7
+To exclude all log messages from syslog:
+.P
+.nf
 <logging to_syslog="no"/>
-.in -7
+.fi
+.P
 
-To disable logging to all log files
-.in +7
+To disable logging to all log files:
+.P
+.nf
 <logging to_file="no"/>
-.in -7
+.fi
+.P
 
-To include debug messages (and above) from all daemons in syslog
-.in +7
+To include debug messages (and above) from all daemons in syslog:
+.P
+.nf
 <logging syslog_priority="debug"/>
-.in -7
+.fi
+.P
 
 To limit syslog messages to error (and above), keeping info (and above) in
-log files (this logfile_priority setting is the default so could be omitted)
-.in +7
+log files (this logfile_priority setting is the default so could be omitted):
+.P
+.nf
 <logging syslog_priority="error" logfile_priority="info"/>
-.in -7
+.fi
+.P
 
+.SH FILES
+.TP
+.I /etc/cluster/cluster.conf
+standard location of cluster configuration file
+.TP
+.I /usr/share/cluster/cluster.rng
+standard location of cluster.conf schema
 
 .SH SEE ALSO
-cman(5), qdisk(5), groupd(8), fenced(8), dlm_controld(8), gfs_controld(8), rgmanager(8)
+.BR ccs_tool (8),
+.BR ccs_config_dump (8),
+.BR ccs_config_validate (8),
+.BR cman_tool (8),
+.BR cman (5),
+.BR qdisk (5),
+.BR fenced (8),
+.BR fence_node (8),
+.BR dlm_controld (8),
+.BR gfs_controld (8),
+.BR rgmanager (8)
 
diff --git a/config/plugins/ldap/99cluster.ldif b/config/plugins/ldap/99cluster.ldif
index c2ec09b..4039bb6 100644
--- a/config/plugins/ldap/99cluster.ldif
+++ b/config/plugins/ldap/99cluster.ldif
@@ -1,12 +1,6 @@
-# Auto-generated @ 2009-12-07 14:40:21
+# Auto-generated @ 2010-02-10 13:29:29
 dn: cn=schema
 attributeTypes: (
-  1.3.6.1.4.1.2312.8.1.1.15 NAME 'rhcsAlias'
-  EQUALITY caseExactIA5Match
-  SYNTAX 1.3.6.1.4.1.1466.115.121.1.26
-  SINGLE-VALUE
-  )
-attributeTypes: (
   1.3.6.1.4.1.2312.8.1.1.1 NAME 'rhcsConfig-version'
   EQUALITY caseExactIA5Match
   SYNTAX 1.3.6.1.4.1.1466.115.121.1.26
@@ -80,8 +74,8 @@ attributeTypes: (
   )
 attributeTypes: (
   1.3.6.1.4.1.2312.8.1.1.228 NAME 'rhcsBroadcast'
-  EQUALITY integerMatch
-  SYNTAX 1.3.6.1.4.1.1466.115.121.1.27
+  EQUALITY caseExactIA5Match
+  SYNTAX 1.3.6.1.4.1.1466.115.121.1.26
   SINGLE-VALUE
   )
 attributeTypes: (
@@ -314,8 +308,8 @@ attributeTypes: (
   )
 attributeTypes: (
   1.3.6.1.4.1.2312.8.1.1.70 NAME 'rhcsUse-uuid'
-  EQUALITY booleanMatch
-  SYNTAX 1.3.6.1.4.1.1466.115.121.1.7
+  EQUALITY caseExactIA5Match
+  SYNTAX 1.3.6.1.4.1.1466.115.121.1.26
   SINGLE-VALUE
   )
 attributeTypes: (
@@ -589,175 +583,187 @@ attributeTypes: (
   SINGLE-VALUE
   )
 attributeTypes: (
-  1.3.6.1.4.1.2312.8.1.1.59 NAME 'rhcsExec'
+  1.3.6.1.4.1.2312.8.1.1.63 NAME 'rhcsIp-family'
   EQUALITY caseExactIA5Match
   SYNTAX 1.3.6.1.4.1.1466.115.121.1.26
   SINGLE-VALUE
   )
 attributeTypes: (
-  1.3.6.1.4.1.2312.8.1.1.60 NAME 'rhcsVmware-type'
+  1.3.6.1.4.1.2312.8.1.1.252 NAME 'rhcsIpport'
   EQUALITY caseExactIA5Match
   SYNTAX 1.3.6.1.4.1.1466.115.121.1.26
   SINGLE-VALUE
   )
 attributeTypes: (
-  1.3.6.1.4.1.2312.8.1.1.52 NAME 'rhcsSecure'
+  1.3.6.1.4.1.2312.8.1.1.66 NAME 'rhcsRetrans'
   EQUALITY caseExactIA5Match
   SYNTAX 1.3.6.1.4.1.1466.115.121.1.26
   SINGLE-VALUE
   )
 attributeTypes: (
-  1.3.6.1.4.1.2312.8.1.1.61 NAME 'rhcsVmware-datacenter'
+  1.3.6.1.4.1.2312.8.1.1.69 NAME 'rhcsDomain'
   EQUALITY caseExactIA5Match
   SYNTAX 1.3.6.1.4.1.1466.115.121.1.26
   SINGLE-VALUE
   )
 attributeTypes: (
-  1.3.6.1.4.1.2312.8.1.1.53 NAME 'rhcsVerbose'
+  1.3.6.1.4.1.2312.8.1.1.71 NAME 'rhcsTimeout'
   EQUALITY caseExactIA5Match
   SYNTAX 1.3.6.1.4.1.1466.115.121.1.26
   SINGLE-VALUE
   )
 attributeTypes: (
-  1.3.6.1.4.1.2312.8.1.1.245 NAME 'rhcsVersion'
+  1.3.6.1.4.1.2312.8.1.1.270 NAME 'rhcsSerial-device'
   EQUALITY caseExactIA5Match
   SYNTAX 1.3.6.1.4.1.1466.115.121.1.26
   SINGLE-VALUE
   )
 attributeTypes: (
-  1.3.6.1.4.1.2312.8.1.1.246 NAME 'rhcsHelp'
+  1.3.6.1.4.1.2312.8.1.1.271 NAME 'rhcsSerial-params'
   EQUALITY caseExactIA5Match
   SYNTAX 1.3.6.1.4.1.1466.115.121.1.26
   SINGLE-VALUE
   )
 attributeTypes: (
-  1.3.6.1.4.1.2312.8.1.1.247 NAME 'rhcsSeparator'
+  1.3.6.1.4.1.2312.8.1.1.272 NAME 'rhcsChannel-address'
   EQUALITY caseExactIA5Match
   SYNTAX 1.3.6.1.4.1.1466.115.121.1.26
   SINGLE-VALUE
   )
 attributeTypes: (
-  1.3.6.1.4.1.2312.8.1.1.57 NAME 'rhcsPartition'
+  1.3.6.1.4.1.2312.8.1.1.59 NAME 'rhcsExec'
   EQUALITY caseExactIA5Match
   SYNTAX 1.3.6.1.4.1.1466.115.121.1.26
   SINGLE-VALUE
   )
 attributeTypes: (
-  1.3.6.1.4.1.2312.8.1.1.58 NAME 'rhcsManaged'
+  1.3.6.1.4.1.2312.8.1.1.60 NAME 'rhcsVmware-type'
   EQUALITY caseExactIA5Match
   SYNTAX 1.3.6.1.4.1.1466.115.121.1.26
   SINGLE-VALUE
   )
 attributeTypes: (
-  1.3.6.1.4.1.2312.8.1.1.248 NAME 'rhcsHmc-version'
+  1.3.6.1.4.1.2312.8.1.1.52 NAME 'rhcsSecure'
   EQUALITY caseExactIA5Match
   SYNTAX 1.3.6.1.4.1.1466.115.121.1.26
   SINGLE-VALUE
   )
 attributeTypes: (
-  1.3.6.1.4.1.2312.8.1.1.249 NAME 'rhcsCmd-prompt'
+  1.3.6.1.4.1.2312.8.1.1.61 NAME 'rhcsVmware-datacenter'
   EQUALITY caseExactIA5Match
   SYNTAX 1.3.6.1.4.1.1466.115.121.1.26
   SINGLE-VALUE
   )
 attributeTypes: (
-  1.3.6.1.4.1.2312.8.1.1.250 NAME 'rhcsInet4-only'
+  1.3.6.1.4.1.2312.8.1.1.53 NAME 'rhcsVerbose'
   EQUALITY caseExactIA5Match
   SYNTAX 1.3.6.1.4.1.1466.115.121.1.26
   SINGLE-VALUE
   )
 attributeTypes: (
-  1.3.6.1.4.1.2312.8.1.1.251 NAME 'rhcsInet6-only'
+  1.3.6.1.4.1.2312.8.1.1.245 NAME 'rhcsVersion'
   EQUALITY caseExactIA5Match
   SYNTAX 1.3.6.1.4.1.1466.115.121.1.26
   SINGLE-VALUE
   )
 attributeTypes: (
-  1.3.6.1.4.1.2312.8.1.1.252 NAME 'rhcsIpport'
+  1.3.6.1.4.1.2312.8.1.1.246 NAME 'rhcsHelp'
   EQUALITY caseExactIA5Match
   SYNTAX 1.3.6.1.4.1.1466.115.121.1.26
   SINGLE-VALUE
   )
 attributeTypes: (
-  1.3.6.1.4.1.2312.8.1.1.55 NAME 'rhcsIdentity-file'
+  1.3.6.1.4.1.2312.8.1.1.247 NAME 'rhcsSeparator'
   EQUALITY caseExactIA5Match
   SYNTAX 1.3.6.1.4.1.1466.115.121.1.26
   SINGLE-VALUE
   )
 attributeTypes: (
-  1.3.6.1.4.1.2312.8.1.1.253 NAME 'rhcsSnmp-version'
+  1.3.6.1.4.1.2312.8.1.1.57 NAME 'rhcsPartition'
   EQUALITY caseExactIA5Match
   SYNTAX 1.3.6.1.4.1.1466.115.121.1.26
   SINGLE-VALUE
   )
 attributeTypes: (
-  1.3.6.1.4.1.2312.8.1.1.254 NAME 'rhcsCommunity'
+  1.3.6.1.4.1.2312.8.1.1.58 NAME 'rhcsManaged'
   EQUALITY caseExactIA5Match
   SYNTAX 1.3.6.1.4.1.1466.115.121.1.26
   SINGLE-VALUE
   )
 attributeTypes: (
-  1.3.6.1.4.1.2312.8.1.1.255 NAME 'rhcsSnmp-auth-prot'
+  1.3.6.1.4.1.2312.8.1.1.248 NAME 'rhcsHmc-version'
   EQUALITY caseExactIA5Match
   SYNTAX 1.3.6.1.4.1.1466.115.121.1.26
   SINGLE-VALUE
   )
 attributeTypes: (
-  1.3.6.1.4.1.2312.8.1.1.256 NAME 'rhcsSnmp-sec-level'
+  1.3.6.1.4.1.2312.8.1.1.249 NAME 'rhcsCmd-prompt'
   EQUALITY caseExactIA5Match
   SYNTAX 1.3.6.1.4.1.1466.115.121.1.26
   SINGLE-VALUE
   )
 attributeTypes: (
-  1.3.6.1.4.1.2312.8.1.1.257 NAME 'rhcsSnmp-priv-prot'
+  1.3.6.1.4.1.2312.8.1.1.250 NAME 'rhcsInet4-only'
   EQUALITY caseExactIA5Match
   SYNTAX 1.3.6.1.4.1.1466.115.121.1.26
   SINGLE-VALUE
   )
 attributeTypes: (
-  1.3.6.1.4.1.2312.8.1.1.258 NAME 'rhcsSnmp-priv-passwd'
+  1.3.6.1.4.1.2312.8.1.1.251 NAME 'rhcsInet6-only'
   EQUALITY caseExactIA5Match
   SYNTAX 1.3.6.1.4.1.1466.115.121.1.26
   SINGLE-VALUE
   )
 attributeTypes: (
-  1.3.6.1.4.1.2312.8.1.1.259 NAME 'rhcsSnmp-priv-passwd-script'
+  1.3.6.1.4.1.2312.8.1.1.55 NAME 'rhcsIdentity-file'
   EQUALITY caseExactIA5Match
   SYNTAX 1.3.6.1.4.1.1466.115.121.1.26
   SINGLE-VALUE
   )
 attributeTypes: (
-  1.3.6.1.4.1.2312.8.1.1.260 NAME 'rhcsUdpport'
+  1.3.6.1.4.1.2312.8.1.1.253 NAME 'rhcsSnmp-version'
   EQUALITY caseExactIA5Match
   SYNTAX 1.3.6.1.4.1.1466.115.121.1.26
   SINGLE-VALUE
   )
 attributeTypes: (
-  1.3.6.1.4.1.2312.8.1.1.63 NAME 'rhcsIp-family'
+  1.3.6.1.4.1.2312.8.1.1.254 NAME 'rhcsCommunity'
   EQUALITY caseExactIA5Match
   SYNTAX 1.3.6.1.4.1.1466.115.121.1.26
   SINGLE-VALUE
   )
 attributeTypes: (
-  1.3.6.1.4.1.2312.8.1.1.65 NAME 'rhcsMulticast-ttl'
+  1.3.6.1.4.1.2312.8.1.1.255 NAME 'rhcsSnmp-auth-prot'
   EQUALITY caseExactIA5Match
   SYNTAX 1.3.6.1.4.1.1466.115.121.1.26
   SINGLE-VALUE
   )
 attributeTypes: (
-  1.3.6.1.4.1.2312.8.1.1.66 NAME 'rhcsRetrans'
+  1.3.6.1.4.1.2312.8.1.1.256 NAME 'rhcsSnmp-sec-level'
   EQUALITY caseExactIA5Match
   SYNTAX 1.3.6.1.4.1.1466.115.121.1.26
   SINGLE-VALUE
   )
 attributeTypes: (
-  1.3.6.1.4.1.2312.8.1.1.69 NAME 'rhcsDomain'
+  1.3.6.1.4.1.2312.8.1.1.257 NAME 'rhcsSnmp-priv-prot'
   EQUALITY caseExactIA5Match
   SYNTAX 1.3.6.1.4.1.1466.115.121.1.26
   SINGLE-VALUE
   )
 attributeTypes: (
-  1.3.6.1.4.1.2312.8.1.1.71 NAME 'rhcsTimeout'
+  1.3.6.1.4.1.2312.8.1.1.258 NAME 'rhcsSnmp-priv-passwd'
+  EQUALITY caseExactIA5Match
+  SYNTAX 1.3.6.1.4.1.1466.115.121.1.26
+  SINGLE-VALUE
+  )
+attributeTypes: (
+  1.3.6.1.4.1.2312.8.1.1.259 NAME 'rhcsSnmp-priv-passwd-script'
+  EQUALITY caseExactIA5Match
+  SYNTAX 1.3.6.1.4.1.1466.115.121.1.26
+  SINGLE-VALUE
+  )
+attributeTypes: (
+  1.3.6.1.4.1.2312.8.1.1.260 NAME 'rhcsUdpport'
   EQUALITY caseExactIA5Match
   SYNTAX 1.3.6.1.4.1.1466.115.121.1.26
   SINGLE-VALUE
@@ -931,6 +937,12 @@ attributeTypes: (
   SINGLE-VALUE
   )
 attributeTypes: (
+  1.3.6.1.4.1.2312.8.1.1.273 NAME 'rhcsInterface'
+  EQUALITY caseExactIA5Match
+  SYNTAX 1.3.6.1.4.1.1466.115.121.1.26
+  SINGLE-VALUE
+  )
+attributeTypes: (
   1.3.6.1.4.1.2312.8.1.1.125 NAME 'rhcsRef'
   EQUALITY caseExactIA5Match
   SYNTAX 1.3.6.1.4.1.1466.115.121.1.26
@@ -1527,7 +1539,6 @@ attributeTypes: (
 objectClasses: (
      1.3.6.1.4.1.2312.8.1.2.1 NAME 'rhcsCluster' SUP top STRUCTURAL
      MUST ( rhcsConfig-version $ name )
-     MAY ( rhcsAlias )
    )
 objectClasses: (
      1.3.6.1.4.1.2312.8.1.2.3 NAME 'rhcsCman' SUP top STRUCTURAL
@@ -1577,7 +1588,8 @@ objectClasses: (
    )
 objectClasses: (
      1.3.6.1.4.1.2312.8.1.2.15 NAME 'rhcsMaster' SUP top STRUCTURAL
-     MUST ( rhcsWeight $ name )
+     MUST ( name )
+     MAY ( rhcsWeight )
    )
 objectClasses: (
      1.3.6.1.4.1.2312.8.1.2.13 NAME 'rhcsGfs-controld' SUP top STRUCTURAL
@@ -1605,12 +1617,13 @@ objectClasses: (
 #   )
 objectClasses: (
      1.3.6.1.4.1.2312.8.1.2.10 NAME 'rhcsClusternode' SUP top STRUCTURAL
-     MUST ( name )
-     MAY ( rhcsWeight $ rhcsNodeid $ rhcsVotes )
+     MUST ( rhcsNodeid $ name )
+     MAY ( rhcsWeight $ rhcsVotes )
    )
 objectClasses: (
      1.3.6.1.4.1.2312.8.1.2.11 NAME 'rhcsAltname' SUP top STRUCTURAL
-     MAY ( rhcsMcast $ rhcsPort $ name )
+     MUST ( name )
+     MAY ( rhcsMcast $ rhcsPort )
    )
 ### Placeholder for rhcsFencedevices
 ### This object class currently has no attributes
@@ -1621,7 +1634,7 @@ objectClasses: (
 objectClasses: (
      1.3.6.1.4.1.2312.8.1.2.5 NAME 'rhcsFencedevice' SUP top STRUCTURAL
      MUST ( rhcsAgent $ name )
-     MAY ( rhcsRibcl $ rhcsSsl $ rhcsIo-fencing $ rhcsSwitch $ rhcsModule-name $ rhcsDrac-version $ rhcsMethod $ rhcsCipher $ rhcsTimeout $ rhcsUse-uuid $ rhcsDomain $ rhcsKey-file $ rhcsHash $ rhcsRetrans $ rhcsMulticast-ttl $ rhcsMulticast-address $ rhcsIp-family $ rhcsUdpport $ rhcsSnmp-priv-passwd-script $ rhcsSnmp-priv-passwd $ rhcsSnmp-priv-prot $ rhcsSnmp-sec-level $ rhcsSnmp-auth-prot $ rhcsCommunity $ rhcsSnmp-version $ rhcsIdentity-file $ rhcsIpport $ rhcsInet6-only $ rhcsInet4-only $ rhcsCmd-prompt $ rhcsHmc-version $ rhcsManaged $ rhcsPartition $ rhcsSeparator $ rhcsHelp $ rhcsVersion $ rhcsDebug $ rhcsVerbose $ rhcsVmware-datacenter $ rhcsSecure $ rhcsVmware-type $ rhcsExec $ rhcsOption $ rhcsRpowerpath $ rhcsCserver $ rhcsServers $ rhcsAptpl $ rhcsLogfile $ rhcsAction $ rhcsDevices $ rhcsKey $ rhcsNodename $ rhcsLanplus $ rhcsAuth $ rhcsPasswd-script $ rhcsPasswd $ rhcsLogin $ rhcsIpaddr $ rhcsPort $ rhcsDevice )
+     MAY ( rhcsRibcl $ rhcsSsl $ rhcsIo-fencing $ rhcsSwitch $ rhcsModule-name $ rhcsDrac-version $ rhcsMethod $ rhcsCipher $ rhcsUdpport $ rhcsSnmp-priv-passwd-script $ rhcsSnmp-priv-passwd $ rhcsSnmp-priv-prot $ rhcsSnmp-sec-level $ rhcsSnmp-auth-prot $ rhcsCommunity $ rhcsSnmp-version $ rhcsIdentity-file $ rhcsInet6-only $ rhcsInet4-only $ rhcsCmd-prompt $ rhcsHmc-version $ rhcsManaged $ rhcsPartition $ rhcsSeparator $ rhcsHelp $ rhcsVersion $ rhcsVerbose $ rhcsVmware-datacenter $ rhcsSecure $ rhcsVmware-type $ rhcsExec $ rhcsChannel-address $ rhcsSerial-params $ rhcsSerial-device $ rhcsTimeout $ rhcsUse-uuid $ rhcsDomain $ rhcsKey-file $ rhcsHash $ rhcsRetrans $ rhcsIpport $ rhcsMulticast-address $ rhcsIp-family $ rhcsDebug $ rhcsOption $ rhcsRpowerpath $ rhcsCserver $ rhcsServers $ rhcsAptpl $ rhcsLogfile $ rhcsAction $ rhcsDevices $ rhcsKey $ rhcsNodename $ rhcsLanplus $ rhcsAuth $ rhcsPasswd-script $ rhcsPasswd $ rhcsLogin $ rhcsIpaddr $ rhcsPort $ rhcsDevice )
    )
 objectClasses: (
      1.3.6.1.4.1.2312.8.1.2.21 NAME 'rhcsRm' SUP top STRUCTURAL
@@ -1661,6 +1674,10 @@ objectClasses: (
 #     1.3.6.1.4.1.2312.8.1.2.27 NAME 'rhcsResources' SUP top STRUCTURAL
 #   )
 objectClasses: (
+     1.3.6.1.4.1.2312.8.1.2.59 NAME 'rhcsClvmd' SUP top STRUCTURAL
+     MAY ( rhcsInterface )
+   )
+objectClasses: (
      1.3.6.1.4.1.2312.8.1.2.31 NAME 'rhcsService' SUP top STRUCTURAL
      MAY ( rhcs--enforce-timeouts $ rhcs--independent-subtree $ rhcsPriority $ rhcsRestart-expire-time $ rhcsMax-restarts $ rhcsDepend-mode $ rhcsDepend $ rhcsRecovery $ rhcsNfs-client-cache $ rhcsNfslock $ rhcsExclusive $ rhcsHardrecovery $ rhcsAutostart $ rhcsDomain $ name $ rhcsRef )
    )
@@ -1775,5 +1792,5 @@ objectClasses: (
 objectClasses: (
      1.3.6.1.4.1.2312.8.1.2.6 NAME 'rhcsDevice' SUP top STRUCTURAL
      MUST ( name )
-     MAY ( rhcsRibcl $ rhcsSsl $ rhcsIo-fencing $ rhcsSwitch $ rhcsModule-name $ rhcsDrac-version $ rhcsMethod $ rhcsCipher $ rhcsTimeout $ rhcsUse-uuid $ rhcsDomain $ rhcsKey-file $ rhcsHash $ rhcsRetrans $ rhcsMulticast-ttl $ rhcsMulticast-address $ rhcsIp-family $ rhcsUdpport $ rhcsSnmp-priv-passwd-script $ rhcsSnmp-priv-passwd $ rhcsSnmp-priv-prot $ rhcsSnmp-sec-level $ rhcsSnmp-auth-prot $ rhcsCommunity $ rhcsSnmp-version $ rhcsIdentity-file $ rhcsIpport $ rhcsInet6-only $ rhcsInet4-only $ rhcsCmd-prompt $ rhcsHmc-version $ rhcsManaged $ rhcsPartition $ rhcsSeparator $ rhcsHelp $ rhcsVersion $ rhcsDebug $ rhcsVerbose $ rhcsVmware-datacenter $ rhcsSecure $ rhcsVmware-type $ rhcsExec $ rhcsOption $ rhcsRpowerpath $ rhcsCserver $ rhcsServers $ rhcsAptpl $ rhcsLogfile $ rhcsAction $ rhcsDevices $ rhcsKey $ rhcsNodename $ rhcsLanplus $ rhcsAuth $ rhcsPasswd-script $ rhcsPasswd $ rhcsLogin $ rhcsIpaddr $ rhcsPort $ rhcsDevice )
+     MAY ( rhcsRibcl $ rhcsSsl $ rhcsIo-fencing $ rhcsSwitch $ rhcsModule-name $ rhcsDrac-version $ rhcsMethod $ rhcsCipher $ rhcsUdpport $ rhcsSnmp-priv-passwd-script $ rhcsSnmp-priv-passwd $ rhcsSnmp-priv-prot $ rhcsSnmp-sec-level $ rhcsSnmp-auth-prot $ rhcsCommunity $ rhcsSnmp-version $ rhcsIdentity-file $ rhcsInet6-only $ rhcsInet4-only $ rhcsCmd-prompt $ rhcsHmc-version $ rhcsManaged $ rhcsPartition $ rhcsSeparator $ rhcsHelp $ rhcsVersion $ rhcsVerbose $ rhcsVmware-datacenter $ rhcsSecure $ rhcsVmware-type $ rhcsExec $ rhcsChannel-address $ rhcsSerial-params $ rhcsSerial-device $ rhcsTimeout $ rhcsUse-uuid $ rhcsDomain $ rhcsKey-file $ rhcsHash $ rhcsRetrans $ rhcsIpport $ rhcsMulticast-address $ rhcsIp-family $ rhcsDebug $ rhcsOption $ rhcsRpowerpath $ rhcsCserver $ rhcsServers $ rhcsAptpl $ rhcsLogfile $ rhcsAction $ rhcsDevices $ rhcsKey $ rhcsNodename $ rhcsLanplus $ rhcsAuth $ rhcsPasswd-script $ rhcsPasswd $ rhcsLogin $ rhcsIpaddr $ rhcsPort $ rhcsDevice )
    )
diff --git a/config/plugins/ldap/configldap.c b/config/plugins/ldap/configldap.c
index 4877f61..fb25af4 100644
--- a/config/plugins/ldap/configldap.c
+++ b/config/plugins/ldap/configldap.c
@@ -237,9 +237,10 @@ static int read_config_for(LDAP *ld, struct objdb_iface_ver0 *objdb, hdb_handle_
 					 */
 					if (strcmp("objectClass", attr) &&
 					    strcmp("cn", attr)) {
-						objdb->object_key_create(object_handle, attr, strlen(attr),
+					  objdb->object_key_create_typed(object_handle, attr,
 									 val_ber[i]->bv_val,
-									 val_ber[i]->bv_len+1);
+									 val_ber[i]->bv_len+1,
+									 OBJDB_VALUETYPE_STRING);
 					}
 					i++;
 				}
diff --git a/config/plugins/ldap/ldap-base.csv b/config/plugins/ldap/ldap-base.csv
index 690b92f..14c8f06 100644
--- a/config/plugins/ldap/ldap-base.csv
+++ b/config/plugins/ldap/ldap-base.csv
@@ -1,5 +1,5 @@
-# Max attribute value: 269
-# Max object class value: 58
+# Max attribute value: 273
+# Max object class value: 59
 obj,rhcsCluster,cluster,1
 obj,rhcsCman,cman,3
 obj,rhcsTotem,totem,4
@@ -326,3 +326,8 @@ attr,rhcsRibcl,ribcl,266
 attr,rhcsKey,key,267
 attr,rhcsDevices,devices,268
 attr,rhcsAptpl,aptpl,269
+attr,rhcsSerial-device,serial_device,270
+attr,rhcsSerial-params,serial_params,271
+attr,rhcsChannel-address,channel_address,272
+obj,rhcsClvmd,clvmd,59
+attr,rhcsInterface,interface,273
diff --git a/config/plugins/xml/config.c b/config/plugins/xml/config.c
index dba9891..1db23da 100644
--- a/config/plugins/xml/config.c
+++ b/config/plugins/xml/config.c
@@ -56,13 +56,13 @@ static void addkeys(xmlAttrPtr tmpattr, struct objdb_iface_ver0 *objdb,
 {
 	for (tmpattr = tmpattr; tmpattr; tmpattr = tmpattr->next) {
 		if (tmpattr->type == XML_ATTRIBUTE_NODE)
-			objdb->object_key_create(object_handle,
+			objdb->object_key_create_typed(object_handle,
 						 (char *)tmpattr->name,
-						 strlen((char *)tmpattr->name),
 						 (char *)tmpattr->children->
 						 content,
 						 strlen((char *)tmpattr->
-							children->content) + 1);
+							children->content) + 1,
+						 OBJDB_VALUETYPE_STRING);
 	}
 }
 
diff --git a/config/tools/ldap/rng2ldif/value-list.c b/config/tools/ldap/rng2ldif/value-list.c
index d8b0e84..1de9181 100644
--- a/config/tools/ldap/rng2ldif/value-list.c
+++ b/config/tools/ldap/rng2ldif/value-list.c
@@ -1,4 +1,3 @@
-/** License: GPLv2 */
 #include <stdio.h>
 #include <stdlib.h>
 #include <assert.h>
diff --git a/config/tools/ldap/rng2ldif/value-list.h b/config/tools/ldap/rng2ldif/value-list.h
index 4e83e08..2e43326 100644
--- a/config/tools/ldap/rng2ldif/value-list.h
+++ b/config/tools/ldap/rng2ldif/value-list.h
@@ -1,4 +1,3 @@
-/** License: GPLv2 */
 #ifndef _VALUE_LIST_H
 #define _VALUE_LIST_H
 
diff --git a/config/tools/xml/cluster.rng.in b/config/tools/xml/cluster.rng.in
index 4344fda..fd1eaf7 100644
--- a/config/tools/xml/cluster.rng.in
+++ b/config/tools/xml/cluster.rng.in
@@ -17,6 +17,7 @@ cluster
 - clusternodes
 - fencedevices
 - rm
+- clvmd
 
 Element defnitions:
 - Resource
@@ -29,21 +30,12 @@ To validate your cluster.conf against this schema, run:
 -->
 
 <start>
-<element name="cluster" rha:description="The cluster element is the
-    top-level element and contains attributes that define the cluster
-    name and the configuration version number.">
- <attribute name="name" rha:description="Specifies a unique cluster name;
-      it can be up to 16 characters long." rha:sample="my_cluster"/>
- <attribute name="config_version" rha:description="Identifies the
-     revision level of the cluster.conf file. To propagate a revised
-     configuration file, this value must be advanced to a value higher
-     than the current value." rha:sample="1" rha:default="1"/>
- <optional>
-  <attribute name="alias" rha:description="Pretty name for cluster that
-      is not parsed by cluster software; only the Conga and
-      system-config-cluster GUIs use this value."
-      rha:sample="My Cluster"/>
- </optional>
+<element name="cluster" rha:description="Defines cluster properties, and
+    contains all other configuration. cluster.conf(5)">
+ <attribute name="name" rha:description="Name of the cluster.
+     cluster.conf(5)"/>
+ <attribute name="config_version" rha:description="Revision level
+     of cluster.conf file. cluster.conf(5)"/>
  <interleave>
 
 <!-- cman block -->
@@ -128,10 +120,8 @@ To validate your cluster.conf against this schema, run:
      <attribute name="nodename" rha:description="Local node name; this is set internally by cman-preconfig and should never be set by a user."/>
     </optional>
     <optional>
-     <attribute name="broadcast">
-      <data type="nonNegativeInteger"/>
-     </attribute>
-    </optional>
+     <attribute name="broadcast" rha:description="enable cman broadcast" rha:default="no"/>
+   </optional>
     <optional>
      <attribute name="keyfile" rha:description=""/>
     </optional>
@@ -424,96 +414,103 @@ To validate your cluster.conf against this schema, run:
 
 <!-- dlm block -->
  <optional>
- <element name="dlm" rha:description="">
+ <element name="dlm" rha:description="Configuration for dlm and
+       dlm_controld daemon. dlm_controld(8)">
+
   <optional>
-    <attribute name="log_debug" rha:description="Enable dlm
-      kernel debug messages." rha:default="0" rha:sample="1"/>
+    <attribute name="log_debug" rha:description="Set to 1 to enable
+        dlm kernel debugging messages. dlm_controld(8)"/>
   </optional>
+
   <optional>
-   <attribute name="timewarn" rha:description="The number of centiseconds
-     a lock is blocked before a warning is sent to userland, for lockspaces
-     created with TIMEWARN flag." rha:default="500" rha:sample="100"/>
+   <attribute name="timewarn" rha:description="Number of centiseconds
+       a lock is blocked before notifying dlm_controld deadlock code.
+       dlm_controld(8)"/>
   </optional>
+
   <optional>
-   <attribute name="protocol" rha:description="The network protocol
-     used by the dlm." rha:default="tcp" rha:sample="sctp"/>
+   <attribute name="protocol" rha:description="The dlm lowcomms protocol.
+       dlm_controld(8)"/>
   </optional>
+
   <optional>
-   <attribute name="enable_fencing" rha:description="Enable fencing recovery
-     dependency." rha:default="1" rha:sample="0"/>
+   <attribute name="enable_fencing" rha:description="Fencing recovery
+       dependency. dlm_controld(8)" />
   </optional>
+
   <optional>
-   <attribute name="enable_quorum" rha:description="Enable quorum recovery
-     dependency." rha:default="1" rha:sample="0"/>
+   <attribute name="enable_quorum" rha:description="Quorum recovery
+       dependency. dlm_controld(8)"/>
   </optional>
+
   <optional>
-   <attribute name="enable_deadlk" rha:description="Enable deadlock detection
-     code." rha:default="0" rha:sample="1"/>
+   <attribute name="enable_deadlk" rha:description="Deadlock detection
+       capability. dlm_controld(8)"/>
   </optional>
+
   <optional>
-   <attribute name="enable_plock" rha:description="Enable posix lock code for
-     cluster fs." rha:default="1" rha:sample="0"/>
+   <attribute name="enable_plock" rha:description="Cluster fs posix
+       lock capability. dlm_controld(8)"/>
   </optional>
+
   <optional>
-   <attribute name="plock_debug" rha:description="Enable posix lock debugging."
-     rha:default="0" rha:sample="1"/>
+   <attribute name="plock_debug" rha:description="Set to 1 to enable
+       posix lock debugging. dlm_controld(8)"/>
   </optional>
+
   <optional>
-   <attribute name="plock_rate_limit" rha:description="The maximum
-     number of plock operations that will be sent every second.  This is
-     used to prevent potentially excessive network load.  For best
-     performance it is recommended to disable this rate limiting by
-     setting the value to 0." rha:sample="10000" rha:default="100"/>
+   <attribute name="plock_rate_limit" rha:description="Limit the rate of
+       plock operations. dlm_controld(8)"/>
   </optional>
+
   <optional>
-   <attribute name="plock_ownership" rha:description="Enabling this
-     option by setting to 1 optimizes plock performance for repeated
-     locking of the same locks by processes on a single node.
-     All dlm_controld daemons in the cluster must be stopped before
-     changing this value." rha:sample="0" rha:default="1"/>
+   <attribute name="plock_ownership" rha:description="Set to 1/0 to
+       enable/disable plock ownership. dlm_controld(8)"/>
   </optional>
+
   <optional>
-   <attribute name="drop_resources_time" rha:description="For tuning the
-     plock_ownership resource caching.  This is the frequence of
-     attempts in milliseconds to drop unused resources from the
-     cache." rha:sample="5000" rha:default="10000"/>
+   <attribute name="drop_resources_time" rha:description="Plock ownership
+       drop resources time. dlm_controld(8)"/>
   </optional>
+
   <optional>
-   <attribute name="drop_resources_count" rha:description="For tuning
-     the plock_ownership resource caching.  This is the maximum number
-     of resources to drop from the cache each time." rha:sample="100"
-     rha:default="10"/>
+   <attribute name="drop_resources_count" rha:description="Plock ownership
+       drop resources count. dlm_controld(8)"/>
   </optional>
+
   <optional>
-   <attribute name="drop_resources_age" rha:description="For tuning the
-     plock_ownership resource caching.  This is the time in milliseconds
-     that a cached resource should be unused before consideration for
-     dropping." rha:sample="5000" rha:default="10000"/>
+   <attribute name="drop_resources_age" rha:description="Plock ownership
+       drop resources age. dlm_controld(8)"/>
   </optional>
+
   <optional>
    <zeroOrMore>
     <element name="lockspace" rha:description="Individual lockspace
-      configuration.">
-     <attribute name="name" rha:description="The name of the lockspace."
-        rha:sample="foo"/>
+      configuration. dlm_controld(8)">
+     <attribute name="name" rha:description="Name of the lockspace.
+         dlm_controld(8)"/>
+
      <optional>
-      <attribute name="nodir" rha:description="The lockspace will not use a
-        resource directory when this is set to 1." rha:default="0"
-        rha:sample="1"/>
+      <attribute name="nodir" rha:description="Set to 1 to disable the
+          internal resource directory. dlm_controld(8)"/>
      </optional>
+
      <optional>
       <zeroOrMore>
-       <element name="master" rha:description="Define a particular node to be
-         responsible for a certain amount of lock mastering.">
-        <attribute name="name" rha:description="The name of the node that
-          should be mastering resources/locks.  This needs to match one of
-          the nodes defined in clusternodes." rha:sample="node01"/>
+       <element name="master" rha:description="Defines a master node.
+           dlm_controld(8)">
+
+        <attribute name="name" rha:description="The name of a node that
+            should be master resources/locks. dlm_controld(8)"/>
+
+        <optional>
         <attribute name="weight" rha:description="The proportion of
-          resources/locks this node should master." rha:default="1"
-          rha:sample="2"/>
+            resources this node should master. dlm_controld(8)"/>
+        </optional>
        </element>
       </zeroOrMore>
      </optional>
+
     </element>
    </zeroOrMore>
   </optional>
@@ -523,64 +520,60 @@ To validate your cluster.conf against this schema, run:
 
 <!-- gfs_controld block -->
  <optional>
- <element name="gfs_controld" rha:description="This element and its
-   attributes configure the gfs_controld daemon.">
+ <element name="gfs_controld" rha:description="Configuration for
+       gfs_controld daemon. gfs_controld(8)">
+
   <optional>
-   <attribute name="enable_withdraw" rha:description="Enable the code that
-     handles gfs withdraw." rha:default="1" rha:sample="0"/>
+   <attribute name="enable_withdraw" rha:description="Set to 1/0 to
+       enable/disable a response to a withdraw. gfs_controld(8)"/>
   </optional>
+
   <optional>
-   <attribute name="enable_plock" rha:description="Enable posix lock code for
-     cluster fs.  gfs_controld only handles plocks when daemons run in compat
-     mode, otherwise dlm_controld processes plocks." rha:default="1"
-     rha:sample="0"/>
+   <attribute name="enable_plock" rha:description="Cluster fs posix
+       lock capability. gfs_controld(8)"/>
   </optional>
+
   <optional>
-   <attribute name="plock_debug" rha:description="Enable posix lock
-     debugging." rha:default="0" rha:sample="1"/>
+   <attribute name="plock_debug" rha:description="Set to 1 to enable
+       posix lock debugging. gfs_controld(8)"/>
   </optional>
+
   <optional>
-   <attribute name="plock_rate_limit" rha:description="The maximum
-     number of plock operations that will be sent every second.  This is
-     used to prevent potentially excessive network load.  For best
-     performance it is recommended to disable this rate limiting by
-     setting the value to 0." rha:sample="10000" rha:default="100"/>
+   <attribute name="plock_rate_limit" rha:description="Limit the rate of
+       plock operations. gfs_controld(8)"/>
   </optional>
+
   <optional>
-   <attribute name="plock_ownership" rha:description="Enabling this
-     option by setting to 1 optimizes plock performance for repeated
-     locking of the same locks by processes on a single node.
-     All gfs_controld daemons in the cluster must be stopped before
-     changing this value." rha:sample="1" rha:default="0"/>
+   <attribute name="plock_ownership" rha:description="Set to 1/0 to
+       enable/disable plock ownership. gfs_controld(8)"/>
   </optional>
+
   <optional>
-   <attribute name="drop_resources_time" rha:description="For tuning the
-     plock_ownership resource caching.  This is the frequence of
-     attempts in milliseconds to drop unused resources from the
-     cache." rha:sample="5000" rha:default="10000"/>
+   <attribute name="drop_resources_time" rha:description="Plock ownership
+       drop resources time. gfs_controld(8)"/>
   </optional>
+
   <optional>
-   <attribute name="drop_resources_count" rha:description="For tuning
-     the plock_ownership resource caching.  This is the maximum number
-     of resources to drop from the cache each time." rha:sample="100"
-     rha:default="10"/>
+   <attribute name="drop_resources_count" rha:description="Plock ownership
+       drop resources count. gfs_controld(8)"/>
   </optional>
+
   <optional>
-   <attribute name="drop_resources_age" rha:description="For tuning the
-     plock_ownership resource caching.  This is the time in milliseconds
-     that a cached resource should be unused before consideration for
-     dropping." rha:sample="5000" rha:default="10000"/>
+   <attribute name="drop_resources_age" rha:description="Plock ownership
+       drop resources age. gfs_controld(8)"/>
   </optional>
+
  </element>
  </optional>
 <!-- end gfs_controld block -->
 
 <!-- group block -->
  <optional>
- <element name="group" rha:description="">
+ <element name="group" rha:description="Defines groupd configuration.
+     groupd(8)">
   <optional>
    <attribute name="groupd_compat" rha:description="Enable compatibility with
-     cluster2 (RHEL5) nodes" rha:default="0" rha:sample="1"/>
+     cluster2 nodes. groupd(8)"/>
   </optional>
  </element>
  </optional>
@@ -588,148 +581,164 @@ To validate your cluster.conf against this schema, run:
 
 <!-- logging block -->
  <optional>
-  <element name="logging" rha:description="Global logging config applies
-    to all daemons.">
+  <element name="logging" rha:description="Defines global logging
+      configuration, and contains daemon-specific configuration.
+      cluster.conf(5)">
+
    <optional>
-    <attribute name="to_syslog" rha:description="enable/disable messages to
-      syslog" rha:default="yes"/>
+    <attribute name="to_syslog" rha:description="Set to yes/no to
+        enable/disable messages to syslog. cluster.conf(5)"/>
    </optional>
+
    <optional>
-    <attribute name="to_logfile" rha:description="enable/disable messages to
-      log file" rha:default="yes"/>
+    <attribute name="to_logfile" rha:description="Set to yes/no to
+        enable/disable messages to log file. cluster.conf(5)"/>
    </optional>
+
    <optional>
-    <attribute name="syslog_facility" rha:description="facility used for
-      syslog messages" rha:default="daemon"/>
+    <attribute name="syslog_facility" rha:description="The facility
+        used for syslog messages. cluster.conf(5)"/>
    </optional>
+
    <optional>
-    <attribute name="syslog_priority" rha:description="messages at this level
-      and up will be sent to syslog" rha:default="info"/>
+    <attribute name="syslog_priority" rha:description="Messages at this
+        level and higher are sent to syslog. cluster.conf(5)"/>
    </optional>
+
    <optional>
-    <attribute name="logfile_priority" rha:description="messages at this level
-      and up will be written to log file" rha:default="info"/>
+    <attribute name="logfile_priority" rha:description="Messages at this
+        level and higher are written to log file. cluster.conf(5)"/>
    </optional>
+
    <optional>
-    <attribute name="logfile" rha:description="the log file name"
-      rha:default="/var/log/cluster/daemon_name.log"/>
+    <attribute name="logfile" rha:description="The log file path name.
+        cluster.conf(5)"/>
    </optional>
+
    <optional>
-    <attribute name="debug" rha:description="turn on debugging, a shortcut for
-      setting logfile_priority to debug" rha:sample="on"/>
+    <attribute name="debug" rha:description="Set to on to enable debugging
+        messages in log file. cluster.conf(5)"/>
    </optional>
+
    <optional>
-    <element name="logging_daemon" rha:description="Per-daemon logging
-      config overrides global settings for named daemon.">
-     <attribute name="name" rha:description="daemon name" rha:sample="fenced"/>
+    <element name="logging_daemon" rha:description="Defines
+        daemon-specific logging configuration. cluster.conf(5)">
+
+     <attribute name="name" rha:description="The daemon name.
+         cluster.conf(5)"/>
+
      <optional>
-      <attribute name="subsys" rha:description="corosync subsystem name"
-        rha:sample="CMAN"/>
+      <attribute name="subsys" rha:description="A corosync subsystem name.
+          cluster.conf(5)"/>
      </optional>
+
      <optional>
-      <attribute name="to_syslog" rha:description="same as global"/>
+      <attribute name="to_syslog" rha:description="Same as global."/>
      </optional>
      <optional>
-      <attribute name="to_logfile" rha:description="same as global"/>
+      <attribute name="to_logfile" rha:description="Same as global."/>
      </optional>
      <optional>
-      <attribute name="syslog_facility" rha:description="same as global"/>
+      <attribute name="syslog_facility" rha:description="Same as global."/>
      </optional>
      <optional>
-      <attribute name="syslog_priority" rha:description="same as global"/>
+      <attribute name="syslog_priority" rha:description="Same as global."/>
      </optional>
      <optional>
-      <attribute name="logfile_priority" rha:description="same as global"/>
+      <attribute name="logfile_priority" rha:description="Same as global."/>
      </optional>
      <optional>
-      <attribute name="logfile" rha:description="same as global"/>
+      <attribute name="logfile" rha:description="Same as global."/>
      </optional>
      <optional>
-      <attribute name="debug" rha:description="same as global"/>
+      <attribute name="debug" rha:description="Same as global."/>
      </optional>
+
     </element>
    </optional>
+
   </element>
  </optional>
 <!-- end logging block -->
 
-<!-- clusternode block -->
- <element name="clusternodes" rha:description="This element defines the
-     cluster nodes configuration; it contains one clusternode element
-     per node.">
-   <zeroOrMore>
-     <element name="clusternode" rha:description="This element and its
-         attributes define a cluster node, specifying node name, node ID,
-         number of quorum votes, and fencing method for that node. There
-         is one clusernode element per node in a cluster.">
-      <attribute name="name" rha:description="The hostname or the IP
-          address of the node." rha:sample="node-01">
-	 <data type="ID"/>
-       </attribute>
-       <optional>
-	 <attribute name="votes" rha:description="The number of votes a
-            node can cast" rha:sample="2" rha:default="1">
-	   <data type="positiveInteger"/>
-	 </attribute>
-       </optional>
-       <optional>
-	 <attribute name="nodeid" rha:description="Each node requires
-             a unique integer value as its node ID." rha:sample="1">
-	   <data type="positiveInteger"/>
-	 </attribute>
-       </optional>
-       <optional>
-	 <attribute name="weight" rha:description="" rha:sample=""/>
-       </optional>
-       <optional>
-         <element name="altname" rha:description="">
-           <optional>
-             <attribute name="name" rha:description="" rha:sample=""/>
-           </optional>
-           <optional>
-             <attribute name="port" rha:description="" rha:sample=""/>
-           </optional>
-           <optional>
-             <attribute name="mcast" rha:description="" rha:sample=""/>
-           </optional>
-         </element>
-       </optional>
-       <interleave>
-	 <optional>
-	   <ref name="FENCE"/>
-	 </optional>
-	 <optional>
-	   <ref name="UNFENCE"/>
-	 </optional>
-       </interleave>
-     </element>
-   </zeroOrMore>
+<!-- clusternodes block -->
+ <element name="clusternodes" rha:description="Contains all cluster
+     node definitions. cluster.conf(5)">
+
+  <zeroOrMore>
+   <element name="clusternode" rha:description="Defines cluster node
+       properties, and contains other node specific configuration.
+       cluster.conf(5)">
+
+   <attribute name="name" rha:description="The hostname or IP address
+       of the node. cluster.conf(5)"/>
+
+   <attribute name="nodeid" rha:description="A unique integer to use
+       as a node identifier. cluster.conf(5)">
+    <data type="positiveInteger"/>
+   </attribute>
+
+   <optional>
+    <attribute name="votes" rha:description="The number of votes the
+        node contributes to quorum. cman(5)">
+     <data type="positiveInteger"/>
+    </attribute>
+   </optional>
+
+   <optional>
+    <attribute name="weight" rha:description="The dlm locking weight.
+        dlm_controld(8)"/>
+   </optional>
+
+   <optional>
+    <element name="altname" rha:description="Defines a second network
+        interface to use for corosync redundant ring mode. cman(5)">
+
+     <attribute name="name" rha:description="A second hostname or IP
+        address of the node. cman(5)"/>
+
+     <optional>
+      <attribute name="port" rha:description="The network port to use
+          on the second interface. cman(5)"/>
+     </optional>
+
+     <optional>
+      <attribute name="mcast" rha:description="The multicast address
+          to use on the second interface. cman(5)"/>
+     </optional>
+    </element>
+   </optional>
+
+   <interleave>
+    <optional>
+     <ref name="FENCE"/>
+    </optional>
+    <optional>
+     <ref name="UNFENCE"/>
+    </optional>
+   </interleave>
+
+   </element>
+  </zeroOrMore>
  </element>
 <!-- end clusternode block -->
 
 <!-- fencedevices block -->
  <optional>
- <element name="fencedevices" rha:description="This element and its
-     attributes define the fence devices in the cluster. Parameters
-     vary according to the type of fence device. For example, for a
-     power controller used as a fence device, the cluster configuration
-     defines the name of the power controller, its IP address, login,
-     and password.">
+ <element name="fencedevices" rha:description="Contains all fence
+     device definitions. fenced(8)">
   <zeroOrMore>
-   <element name="fencedevice" rha:description="The fencedevice element
-       and its attributes define each fence device in a
-       cluster. Parameters for each fence device vary according to the
-       type of fence device.">
-     <attribute name="name" rha:description="This is a reference
-         name that you assign to a fence device. It is specific to a
-         cluster configuration file. It is required
-         when configuring a fence method for a node (refer
-         to the method and device elements in the clusternode
-         element)." rha:sample="apc_123">
+   <element name="fencedevice" rha:description="Defines fence device
+       properties. fenced(8)">
+
+     <attribute name="name" rha:description="A name that is used to
+         reference this fence device from clusternode fence section.
+         fenced(8)">
       <data type="ID"/>
      </attribute>
-     <attribute name="agent" rha:description="Specifies a fence agent to
-         be used." rha:sample="fence_apc"/>
+
+     <attribute name="agent" rha:description="The fence agent to be
+         used. fenced(8)"/>
 
      <ref name="FENCEDEVICEOPTIONS"/>
 
@@ -893,7 +902,23 @@ To validate your cluster.conf against this schema, run:
   </element>
  </optional>
 
+<!-- clvmd block -->
+ <optional>
+   <element name="clvmd" rha:description="The clvmd element contains
+       attributes that define parameters for the cluster LVM daemon.">
+    <optional>
+     <attribute name="interface" rha:description="The interface attribute
+         tells clvmd which cluster interface it should use for internode
+         communications and locking. Valid values for this depend on
+         how the daemon is configured at compile-time, but are typically
+         cman, corosync or openais." rha:sample="cman"/>
+    </optional>
+   </element>
+ </optional>
+
  </interleave>
+
+
 </element> <!-- cluster end -->
 </start>
 
@@ -1956,28 +1981,30 @@ To validate your cluster.conf against this schema, run:
 <!-- begin node fence definitions -->
 
  <define name="FENCE">
-  <element name="fence" rha:description="The fence element specifies how
-      a node is fenced. Its elements and attributes identify fence device
-      (or devices) to use and the parameters specific to each fence device
-      (for example, IP address and port number in an APC fence device)">
+  <element name="fence" rha:description="Contains methods for fencing
+      the node in different ways. fenced(8)">
+
    <zeroOrMore>
-    <element name="method" rha:description="Typically, there is a single
-        method used to fence each node (the name  given to the method is
-        not significant). A method refers to a specific device listed in
-        the fencedevices section (a separate section from the clusternode
-        section), and then lists any node-specific parameters related
-        to using the device.">
-     <attribute name="name" rha:description="" rha:sample="apc123"/>
+
+    <element name="method" rha:description="Contains one or more devices
+        for fencing the node a single way. fenced(8)">
+
+     <attribute name="name" rha:description="A name used to distinguish
+         multiple methods from each other. fenced(8)"/>
+
      <zeroOrMore>
        <ref name="DEVICE"/>
      </zeroOrMore>
+
     </element>
+
    </zeroOrMore>
   </element>
  </define>
 
  <define name="UNFENCE">
-  <element name="unfence" rha:description="">
+  <element name="unfence" rha:description="Contains devices for unfencing
+      the node. fence_node(8)">
    <zeroOrMore>
     <ref name="DEVICE"/>
    </zeroOrMore>
@@ -1985,8 +2012,11 @@ To validate your cluster.conf against this schema, run:
  </define>
 
  <define name="DEVICE">
-  <element name="device" rha:description="">
-   <attribute name="name" rha:description="" rha:sample="">
+  <element name="device" rha:description="Defines the properties of a
+      device used for fencing or unfencing a node. fenced(8)">
+
+   <attribute name="name" rha:description="The name of a fencedevice
+       defined in the fencedevices section. fenced(8)">
     <data type="IDREF"/>
    </attribute>
 
@@ -2114,6 +2144,86 @@ To validate your cluster.conf against this schema, run:
        <group>
         <attribute name="rpowerpath" rha:description="" rha:sample=""/>
        </group>
+       <!-- fence_xvm ( provided by fence-virt package ) -->
+       <group>
+        <optional>
+          <attribute name="option"/> <!-- deprecated; for compatibility.  use "action" -->
+        </optional>
+        <optional>
+          <attribute name="debug" rha:description="Specify (stdin) or increment (command line) debug level" />
+        </optional>
+        <optional>
+          <attribute name="ip_family" rha:description="IP Family ([auto], ipv4, ipv6)" />
+        </optional>
+        <optional>
+          <attribute name="multicast_address" rha:description="Multicast address (default=225.0.0.12 / ff05::3:1)" />
+        </optional>
+        <optional>
+          <attribute name="ipport" rha:description="Multicast or VMChannel IP port (default=1229)" />
+        </optional>
+        <optional>
+          <attribute name="retrans" rha:description="Multicast retransmit time (in 1/10sec; default=20)" />
+        </optional>
+        <optional>
+          <attribute name="auth" rha:description="Authentication (none, sha1, [sha256], sha512)" />
+        </optional>
+        <optional>
+          <attribute name="hash" rha:description="Packet hash strength (none, sha1, [sha256], sha512)" />
+        </optional>
+        <optional>
+          <attribute name="key_file" rha:description="Shared key file (default=/etc/cluster/fence_xvm.key)" />
+        </optional>
+        <optional>
+          <attribute name="port" rha:description="Virtual Machine (domain name) to fence" />
+        </optional>
+        <optional>
+          <attribute name="domain" rha:description="Virtual Machine (domain name) to fence (deprecated)" />
+        </optional>
+        <optional>
+          <attribute name="use_uuid" rha:description="Treat [domain] as UUID instead of domain name. This is provided for compatibility with older fence_xvmd installations." />
+        </optional>
+        <optional>
+          <attribute name="action" rha:description="Fencing action (null, off, on, [reboot], status, list, monitor, metadata)" />
+        </optional>
+        <optional>
+          <attribute name="timeout" rha:description="Fencing timeout (in seconds; default=30)" />
+        </optional>
+       </group>
+
+       <!-- fence_virt -->
+       <group>
+        <optional>
+          <attribute name="option"/> <!-- deprecated; for compatibility.  use "action" -->
+        </optional>
+        <optional>
+          <attribute name="debug" rha:description="Specify (stdin) or increment (command line) debug level" />
+        </optional>
+        <optional>
+          <attribute name="serial_device" rha:description="Serial device (default=/dev/ttyS1)" />
+        </optional>
+        <optional>
+          <attribute name="serial_params" rha:description="Serial Parameters (default=115200,8N1)" />
+        </optional>
+        <optional>
+          <attribute name="channel_address" rha:description="VM Channel IP address (default=10.0.2.179)" />
+        </optional>
+        <optional>
+          <attribute name="ipport" rha:description="Multicast or VMChannel IP port (default=1229)" />
+        </optional>
+        <optional>
+          <attribute name="port" rha:description="Virtual Machine (domain name) to fence" />
+        </optional>
+        <optional>
+          <attribute name="domain" rha:description="Virtual Machine (domain name) to fence (deprecated)" />
+        </optional>
+        <optional>
+          <attribute name="action" rha:description="Fencing action (null, off, on, [reboot], status, list, monitor, metadata)" />
+        </optional>
+        <optional>
+          <attribute name="timeout" rha:description="Fencing timeout (in seconds; default=30)" />
+        </optional>
+       </group>
+
        <!-- end non-generated device definitions -->
 
        <!-- begin auto-generated device definitions -->
@@ -2533,53 +2643,6 @@ To validate your cluster.conf against this schema, run:
       </group>
 
 
-      <!-- fence_xvm -->
-      <group>
-        <optional>
-          <attribute name="option"/> <!-- deprecated; for compatibility.  use "action" -->
-        </optional>
-        <optional>
-          <attribute name="debug" rha:description="Specify (CCS) / increment (command line) debug level" />
-        </optional>
-        <optional>
-          <attribute name="ip_family" rha:description="IP Family ([auto], ipv4, ipv6)" />
-        </optional>
-        <optional>
-          <attribute name="multicast_address" rha:description="Multicast address (default=225.0.0.12 / ff02::3:1)" />
-        </optional>
-        <optional>
-          <attribute name="port" rha:description="IP port (default=1229)" />
-        </optional>
-        <optional>
-          <attribute name="multicast_ttl" rha:description="Multicast time-to-live (in hops; default=2)" />
-        </optional>
-        <optional>
-          <attribute name="retrans" rha:description="Multicast retransmit time (in 1/10sec; default=20)" />
-        </optional>
-        <optional>
-          <attribute name="auth" rha:description="Authentication (none, sha1, [sha256], sha512)" />
-        </optional>
-        <optional>
-          <attribute name="hash" rha:description="Packet hash strength (none, sha1, [sha256], sha512)" />
-        </optional>
-        <optional>
-          <attribute name="key_file" rha:description="Shared key file (default=/etc/cluster/fence_xvm.key)" />
-        </optional>
-        <optional>
-          <attribute name="domain" rha:description="Virtual machine (domain name) to fence" />
-        </optional>
-        <optional>
-          <attribute name="use_uuid" rha:description="Treat 'domain' as UUID instead of domain name" />
-        </optional>
-        <optional>
-          <attribute name="action" rha:description="Fencing action (null, off, [reboot])" />
-        </optional>
-        <optional>
-          <attribute name="timeout" rha:description="Fencing timeout (in seconds; default=30)" />
-        </optional>
-      </group>
-
-
       <!-- fence_ipmilan -->
       <group>
         <optional>
diff --git a/dlm/man/dlm_tool.8 b/dlm/man/dlm_tool.8
index 57e3da3..5e61567 100644
--- a/dlm/man/dlm_tool.8
+++ b/dlm/man/dlm_tool.8
@@ -1,39 +1,94 @@
-.TH dlm_tool 8
+.TH DLM_TOOL 8 2009-01-20 cluster cluster
 
 .SH NAME
-dlm_tool - A program to join and leave lockspaces and display dlm information
+dlm_tool \- a utility for the dlm and dlm_controld daemon
 
 .SH SYNOPSIS
-.B
-dlm_tool
-[\fIOPTIONS\fR]
-<\fBjoin | leave | lockdump | deadlock_check\fP>
-<\fBname\fP>
+.B dlm_tool
+[COMMAND] [OPTIONS]
+[
+.I name
+]
 
 .SH DESCRIPTION
+.TP
+.B ls
+Display internal dlm_controld state about lockspaces.
+
+.TP
+.B dump
+Dump dlm_controld debug buffer.
+
+.TP
+.BI plocks " name"
+Dump posix locks from dlm_controld for the lockspace.
+
+.TP
+.BI lockdump " name"
+Minimal display of locks from the lockspace.
 
-\fBdlm_tool\fP is a program used to join or leave dlm lockspaces, dump
-dlm lock state, and initiate deadlock detection cycles.  The name of a
-lockspace follows the subcommand.
+.TP
+.BI lockdebug " name"
+Extended display of locks from the lockspace.
+
+.TP
+.BI join " name"
+Join a lockspace.
+
+.TP
+.BI leave " name"
+Leave a lockspace.
+
+.TP
+.BI deadlock_check " name"
+Start a deadlock detection cycle for the lockspace.
 
 .SH OPTIONS
 .TP
-\fB-m\fP
-The permission mode (in octal) of the lockspace device created by join;
-default 0600.
+.B \-n
+Show all node information in ls.
+
+.TP
+.BI \-d " num"
+Resource directory enabled (1) or disabled (0) during join. Default 0.
+
+.TP
+.BI \-e " num"
+Exclusive create off/on (0/1) in join. Default 0.
+
+.TP
+.BI \-f " num"
+FS memory allocation off/on (0/1) in join. Default 0.
+
+.TP
+.BI \-m " mode"
+The permission mode (in octal) of the lockspace device created by join.
+Default 0600.
+
 .TP
-\fB-M\fP
+.B \-M
 Dump MSTCPY locks in addition to locks held by local processes.
+
+.TP
+.B \-s
+Summary following lockdebug output (experimental, format may change).
+
 .TP
-\fB-d\fP <num>
-Resource directory enabled (1) or disabled (0) during join; default 0.
+.B \-v
+Verbose lockdebug output.
+
 .TP
-\fB-h\fP
-Help.  Print out the usage syntax.
+.B \-w
+Wide lockdebug output.
+
+.TP
+.B \-h
+Print a help message describing available options, then exit.
+
 .TP
-\fB-V\fP
-Print version information.
+.B \-V
+Print program version information, then exit.
 
 .SH SEE ALSO
-libdlm(3)
+.BR dlm_controld (8)
 
diff --git a/doc/cluster.logrotate.in b/doc/cluster.logrotate.in
index df7d94c..bd55cce 100644
--- a/doc/cluster.logrotate.in
+++ b/doc/cluster.logrotate.in
@@ -1,8 +1,9 @@
 @LOGDIR@/*log {
 	missingok
 	compress
-	notifempty
+	copytruncate
 	daily
-	rotate 7
-	create 0600 root root
+	rotate 31
+	minsize 2048
+	notifempty
 }
diff --git a/fence/agents/alom/Makefile b/fence/agents/alom/Makefile
index 08d5843..d92eeb7 100644
--- a/fence/agents/alom/Makefile
+++ b/fence/agents/alom/Makefile
@@ -2,6 +2,6 @@ include ../../../make/defines.mk
 
 TARGET= fence_alom
 
-MANTARGET= $(TARGET).8
+MAKEMAN = 1
 
 include $(OBJDIR)/make/fencebuild.mk
diff --git a/fence/agents/alom/fence_alom.py b/fence/agents/alom/fence_alom.py
index 5dfe9e0..6223608 100644
--- a/fence/agents/alom/fence_alom.py
+++ b/fence/agents/alom/fence_alom.py
@@ -64,6 +64,7 @@ def main():
 	docs["shortdesc"] = "Fence agent for Sun ALOM"
 	docs["longdesc"] = "fence_alom is an I/O Fencing \
 agent which can be used with ALOM connected machines."
+	docs["vendorurl"] = "http://www.sun.com"
 	show_docs(options, docs)
 		
 	# Operate the fencing device
diff --git a/fence/agents/apc/Makefile b/fence/agents/apc/Makefile
index 53ef7f2..fabf52c 100644
--- a/fence/agents/apc/Makefile
+++ b/fence/agents/apc/Makefile
@@ -2,6 +2,6 @@ include ../../../make/defines.mk
 
 TARGET= fence_apc
 
-MANTARGET= $(TARGET).8
+MAKEMAN = 1
 
 include $(OBJDIR)/make/fencebuild.mk
diff --git a/fence/agents/apc/fence_apc.py b/fence/agents/apc/fence_apc.py
index b70b220..12ddf2a 100755
--- a/fence/agents/apc/fence_apc.py
+++ b/fence/agents/apc/fence_apc.py
@@ -207,6 +207,7 @@ which can be used with the APC network power switch. It logs into device \
 via telnet/ssh  and reboots a specified outlet. Lengthy telnet/ssh connections \
 should be avoided while a GFS cluster  is  running  because  the  connection \
 will block any necessary fencing actions."
+	docs["vendorurl"] = "http://www.apc.com"
 	show_docs(options, docs)
 
 	## Support for -n [switch]:[plug] notation that was used before
diff --git a/fence/agents/apc_snmp/Makefile b/fence/agents/apc_snmp/Makefile
index 733842e..3834702 100644
--- a/fence/agents/apc_snmp/Makefile
+++ b/fence/agents/apc_snmp/Makefile
@@ -2,6 +2,6 @@ include ../../../make/defines.mk
 
 TARGET= fence_apc_snmp
 
-MANTARGET= $(TARGET).8
+MAKEMAN = 1
 
 include $(OBJDIR)/make/fencebuild.mk
diff --git a/fence/agents/apc_snmp/fence_apc_snmp.py b/fence/agents/apc_snmp/fence_apc_snmp.py
index ea3a89f..71e978e 100644
--- a/fence/agents/apc_snmp/fence_apc_snmp.py
+++ b/fence/agents/apc_snmp/fence_apc_snmp.py
@@ -195,6 +195,7 @@ def main():
 which can be used with the APC network power switch. It logs \
 into a device via SNMP and reboots a specified outlet. It supports \
 SNMP v1 and v3 with all combinations of  authenticity/privacy settings."
+	docs["vendorurl"] = "http://www.apc.com"
 	show_docs(options, docs)
 
 	# Operate the fencing device
diff --git a/fence/man/fence_baytech.8 b/fence/agents/baytech/fence_baytech.8
similarity index 98%
rename from fence/man/fence_baytech.8
rename to fence/agents/baytech/fence_baytech.8
index e60175d..aa17955 100644
--- a/fence/man/fence_baytech.8
+++ b/fence/agents/baytech/fence_baytech.8
@@ -25,6 +25,8 @@ fence_baytech accepts options on the command line as well as from stdin.
 fenced sends parameters through stdin when it execs the agent.  fence_baytech
 can be run by itself with command line options which is useful for testing.
 
+Vendor URL: http://www.baytech.net
+
 .SH OPTIONS
 .TP
 \fB-a\fP \fIhost\fP
diff --git a/fence/agents/bladecenter/Makefile b/fence/agents/bladecenter/Makefile
index 1425812..76b2a2c 100644
--- a/fence/agents/bladecenter/Makefile
+++ b/fence/agents/bladecenter/Makefile
@@ -2,6 +2,6 @@ include ../../../make/defines.mk
 
 TARGET= fence_bladecenter
 
-MANTARGET= $(TARGET).8
+MAKEMAN = 1
 
 include $(OBJDIR)/make/fencebuild.mk
diff --git a/fence/agents/bladecenter/fence_bladecenter.py b/fence/agents/bladecenter/fence_bladecenter.py
index 708fc84..8b890e4 100644
--- a/fence/agents/bladecenter/fence_bladecenter.py
+++ b/fence/agents/bladecenter/fence_bladecenter.py
@@ -110,6 +110,7 @@ def main():
 which can be used with IBM Bladecenters with recent enough firmware that \
 includes telnet support. It logs into a Brocade chasis via telnet or ssh \
 and uses the command line interface to power on and off blades."
+	docs["vendorurl"] = "http://www.ibm.com"
 	show_docs(options, docs)
 	
 	##
diff --git a/fence/man/fence_brocade.8 b/fence/agents/brocade/fence_brocade.8
similarity index 98%
rename from fence/man/fence_brocade.8
rename to fence/agents/brocade/fence_brocade.8
index 36fde82..812a3f9 100644
--- a/fence/man/fence_brocade.8
+++ b/fence/agents/brocade/fence_brocade.8
@@ -26,6 +26,8 @@ into the GFS cluster (after reboot) the port on the Brocade FC switch needs to
 be enabled. This can be done by running fence_brocade and specifying the 
 enable action.
 
+Vendor URL: http://www.brocade.com
+
 .SH OPTIONS
 .TP
 \fB-a\fP \fIIPaddress\fP
diff --git a/fence/man/fence_bullpap.8 b/fence/agents/bullpap/fence_bullpap.8
similarity index 98%
rename from fence/man/fence_bullpap.8
rename to fence/agents/bullpap/fence_bullpap.8
index 876b406..1404c75 100644
--- a/fence/man/fence_bullpap.8
+++ b/fence/agents/bullpap/fence_bullpap.8
@@ -18,6 +18,8 @@ fence_bullpap accepts options on the command line as well as from stdin.
 fenced sends the options through stdin when it execs the agent.  fence_bullpap
 can be run by itself with command line options which is useful for testing.
 
+Vendor URL: http://www.bull.com
+
 .SH OPTIONS
 .TP
 \fB-a\fP \fIIPaddress\fP
diff --git a/fence/agents/cisco_mds/Makefile b/fence/agents/cisco_mds/Makefile
index 2154ec8..6b2cc85 100644
--- a/fence/agents/cisco_mds/Makefile
+++ b/fence/agents/cisco_mds/Makefile
@@ -2,6 +2,6 @@ include ../../../make/defines.mk
 
 TARGET= fence_cisco_mds
 
-MANTARGET= $(TARGET).8
+MAKEMAN = 1
 
 include $(OBJDIR)/make/fencebuild.mk
diff --git a/fence/agents/cisco_mds/fence_cisco_mds.py b/fence/agents/cisco_mds/fence_cisco_mds.py
index c345c2e..d20efe4 100644
--- a/fence/agents/cisco_mds/fence_cisco_mds.py
+++ b/fence/agents/cisco_mds/fence_cisco_mds.py
@@ -111,6 +111,7 @@ def main():
 	docs["shortdesc"] = "Fence agent for Cisco MDS"
 	docs["longdesc"] = "fence_cisco_mds is an I/O Fencing agent \
 which can be used with any Cisco MDS 9000 series with SNMP enabled device."
+	docs["vendorurl"] = "http://www.cisco.com"
 	show_docs(options, docs)
 
 	if (not (options["-o"] in ["list","monitor"])):
diff --git a/fence/man/fence_cpint.8 b/fence/agents/cpint/fence_cpint.8
similarity index 97%
rename from fence/man/fence_cpint.8
rename to fence/agents/cpint/fence_cpint.8
index efbaa7e..e9f1c5d 100644
--- a/fence/man/fence_cpint.8
+++ b/fence/agents/cpint/fence_cpint.8
@@ -26,6 +26,8 @@ fence_node sends the options through stdin when it execs the agent.
 fence_cpint can be run by itself with command line options which is useful for
 testing.
 
+Vendor URL: http://www.ibm.com
+
 .SH OPTIONS
 .TP
 \fB-h\fP
diff --git a/fence/man/fence_drac.8 b/fence/agents/drac/fence_drac.8
similarity index 98%
rename from fence/man/fence_drac.8
rename to fence/agents/drac/fence_drac.8
index 8dda184..3e302d9 100644
--- a/fence/man/fence_drac.8
+++ b/fence/agents/drac/fence_drac.8
@@ -25,6 +25,8 @@ Fenced sends parameters through stdin when it execs the agent.  fence_drac
 can be run by itself with command line options.  This is useful for testing 
 and for turning outlets on or off from scripts.
 
+Vendor URL: http://www.dell.com
+
 .SH OPTIONS
 .TP
 \fB-a\fP \fIIPaddress\fR
diff --git a/fence/agents/drac5/Makefile b/fence/agents/drac5/Makefile
index 5db5ad9..4f74c52 100644
--- a/fence/agents/drac5/Makefile
+++ b/fence/agents/drac5/Makefile
@@ -2,6 +2,6 @@ include ../../../make/defines.mk
 
 TARGET= fence_drac5
 
-MANTARGET= $(TARGET).8
+MAKEMAN = 1
 
 include $(OBJDIR)/make/fencebuild.mk
diff --git a/fence/agents/drac5/fence_drac5.py b/fence/agents/drac5/fence_drac5.py
index 900b547..256158a 100755
--- a/fence/agents/drac5/fence_drac5.py
+++ b/fence/agents/drac5/fence_drac5.py
@@ -104,6 +104,7 @@ which can be used with the Dell Remote Access Card v5 or CMC (DRAC). \
 This device provides remote access to controlling  power to a server. \
 It logs into the DRAC through the telnet/ssh interface of the card. \
 By default, the telnet interface is not  enabled."
+	docs["vendorurl"] = "http://www.dell.com"
 	show_docs(options, docs)
 
 	##
diff --git a/fence/man/fence_egenera.8 b/fence/agents/egenera/fence_egenera.8
similarity index 98%
rename from fence/man/fence_egenera.8
rename to fence/agents/egenera/fence_egenera.8
index cfa839e..7e907cd 100644
--- a/fence/man/fence_egenera.8
+++ b/fence/agents/egenera/fence_egenera.8
@@ -21,6 +21,8 @@ fence_egenera accepts options on the command line as well as from stdin.
 Fenced sends parameters through stdin when it execs the agent.  fence_egenera 
 can also be run by itself with command line options.  
 
+Vendor URL: http://www.egenera.com
+
 .SH OPTIONS
 .TP
 \fB-c\fP \fIcserver\fR
diff --git a/fence/agents/eps/Makefile b/fence/agents/eps/Makefile
index 0cdba17..3d591da 100644
--- a/fence/agents/eps/Makefile
+++ b/fence/agents/eps/Makefile
@@ -2,6 +2,6 @@ include ../../../make/defines.mk
 
 TARGET= fence_eps
 
-MANTARGET= $(TARGET).8
+MAKEMAN = 1
 
 include $(OBJDIR)/make/fencebuild.mk
diff --git a/fence/agents/eps/fence_eps.py b/fence/agents/eps/fence_eps.py
index ffcb69b..049c01c 100644
--- a/fence/agents/eps/fence_eps.py
+++ b/fence/agents/eps/fence_eps.py
@@ -117,6 +117,7 @@ this is only one, which has support for hidden page feature. \
 Agent basically works by connecting to hidden page and pass \
 appropriate arguments to GET request. This means, that hidden \
 page feature must be enabled and properly configured."
+	docs["vendorurl"] = "http://www.epowerswitch.com"
 	show_docs(options, docs)
 
 	#Run fence action. Conn is None, beacause we always need open new http connection
diff --git a/fence/agents/ibmblade/Makefile b/fence/agents/ibmblade/Makefile
index 23d49c1..91f3e2a 100644
--- a/fence/agents/ibmblade/Makefile
+++ b/fence/agents/ibmblade/Makefile
@@ -2,6 +2,6 @@ include ../../../make/defines.mk
 
 TARGET= fence_ibmblade
 
-MANTARGET= $(TARGET).8
+MAKEMAN = 1
 
 include $(OBJDIR)/make/fencebuild.mk
diff --git a/fence/agents/ibmblade/fence_ibmblade.py b/fence/agents/ibmblade/fence_ibmblade.py
index 3ad6acc..323a4ad 100644
--- a/fence/agents/ibmblade/fence_ibmblade.py
+++ b/fence/agents/ibmblade/fence_ibmblade.py
@@ -77,6 +77,7 @@ def main():
 which can be used with IBM BladeCenter chassis. It issues SNMP Set \
 request to BladeCenter chassis, rebooting, powering up or down \
 the specified Blade Server."
+	docs["vendorurl"] = "http://www.ibm.com"
 	show_docs(options, docs)
 
 	# Operate the fencing device
diff --git a/fence/agents/ifmib/Makefile b/fence/agents/ifmib/Makefile
index d3745ad..64f4448 100644
--- a/fence/agents/ifmib/Makefile
+++ b/fence/agents/ifmib/Makefile
@@ -2,6 +2,6 @@ include ../../../make/defines.mk
 
 TARGET= fence_ifmib
 
-MANTARGET= $(TARGET).8
+MAKEMAN = 1
 
 include $(OBJDIR)/make/fencebuild.mk
diff --git a/fence/agents/ifmib/fence_ifmib.py b/fence/agents/ifmib/fence_ifmib.py
index 9f388d0..2914301 100644
--- a/fence/agents/ifmib/fence_ifmib.py
+++ b/fence/agents/ifmib/fence_ifmib.py
@@ -137,6 +137,7 @@ It was written with managed ethernet switches in mind, in order to \
 fence iSCSI SAN connections. However, there are many devices that \
 support the IF-MIB interface. The agent uses IF-MIB::ifAdminStatus \
 to control the state of an interface."
+	docs["vendorurl"] = "http://www.ietf.org/wg/concluded/ifmib.html"
 	show_docs(options, docs)
 
 	# Operate the fencing device
diff --git a/fence/agents/ilo/Makefile b/fence/agents/ilo/Makefile
index fe4c712..aafe6e4 100644
--- a/fence/agents/ilo/Makefile
+++ b/fence/agents/ilo/Makefile
@@ -2,6 +2,6 @@ include ../../../make/defines.mk
 
 TARGET= fence_ilo
 
-MANTARGET= $(TARGET).8
+MAKEMAN = 1
 
 include $(OBJDIR)/make/fencebuild.mk
diff --git a/fence/agents/ilo/fence_ilo.py b/fence/agents/ilo/fence_ilo.py
index 10d888c..b68f4e4 100755
--- a/fence/agents/ilo/fence_ilo.py
+++ b/fence/agents/ilo/fence_ilo.py
@@ -76,6 +76,7 @@ used for HP servers with the Integrated Light Out (iLO) PCI card.\
 The agent opens an SSL connection to the iLO card. Once the SSL \
 connection is established, the agent is able to communicate with \
 the iLO card through an XML stream."
+	docs["vendorurl"] = "http://www.hp.com"
 	show_docs(options, docs)
 
 	##
diff --git a/fence/agents/ilo_mp/Makefile b/fence/agents/ilo_mp/Makefile
index b4bc456..42ffb03 100644
--- a/fence/agents/ilo_mp/Makefile
+++ b/fence/agents/ilo_mp/Makefile
@@ -2,6 +2,6 @@ include ../../../make/defines.mk
 
 TARGET= fence_ilo_mp
 
-MANTARGET= $(TARGET).8
+MAKEMAN = 1
 
 include $(OBJDIR)/make/fencebuild.mk
diff --git a/fence/agents/ilo_mp/fence_ilo_mp.py b/fence/agents/ilo_mp/fence_ilo_mp.py
index b96c417..1f9363b 100644
--- a/fence/agents/ilo_mp/fence_ilo_mp.py
+++ b/fence/agents/ilo_mp/fence_ilo_mp.py
@@ -44,6 +44,7 @@ def main():
 	docs = { }
 	docs["shortdesc"] = "Fence agent for HP iLO MP"
 	docs["longdesc"] = ""
+	docs["vendorurl"] = "http://www.hp.com"
 	show_docs(options, docs)
 	
 	conn = fence_login(options)
diff --git a/fence/agents/intelmodular/Makefile b/fence/agents/intelmodular/Makefile
index 7ff7668..e7509e6 100644
--- a/fence/agents/intelmodular/Makefile
+++ b/fence/agents/intelmodular/Makefile
@@ -2,6 +2,6 @@ include ../../../make/defines.mk
 
 TARGET= fence_intelmodular
 
-MANTARGET= $(TARGET).8
+MAKEMAN = 1
 
 include $(OBJDIR)/make/fencebuild.mk
diff --git a/fence/agents/intelmodular/fence_intelmodular.py b/fence/agents/intelmodular/fence_intelmodular.py
index 1a313ae..11ff9bc 100644
--- a/fence/agents/intelmodular/fence_intelmodular.py
+++ b/fence/agents/intelmodular/fence_intelmodular.py
@@ -87,6 +87,7 @@ Note: Since firmware update version 2.7, SNMP v2 write support is \
 removed, and replaced by SNMP v3 support. So agent now has default \
 SNMP version 3. If you are using older firmware, please supply -d \
 for command line and snmp_version option for your cluster.conf."
+	docs["vendorurl"] = "http://www.intel.com"
 	show_docs(options, docs)
 
 	# Operate the fencing device
diff --git a/fence/agents/ldom/Makefile b/fence/agents/ldom/Makefile
index 821bc21..fb9de6c 100644
--- a/fence/agents/ldom/Makefile
+++ b/fence/agents/ldom/Makefile
@@ -2,6 +2,6 @@ include ../../../make/defines.mk
 
 TARGET= fence_ldom
 
-MANTARGET= $(TARGET).8
+MAKEMAN = 1
 
 include $(OBJDIR)/make/fencebuild.mk
diff --git a/fence/agents/ldom/fence_ldom.py b/fence/agents/ldom/fence_ldom.py
index d520fca..507a97a 100644
--- a/fence/agents/ldom/fence_ldom.py
+++ b/fence/agents/ldom/fence_ldom.py
@@ -113,6 +113,7 @@ for root in bash). But (for example) csh use ], so in that case you \
 must use parameter -c with argument ]. Very similar situation is, \
 if you use bash and login to host machine with other user than \
 root. Than prompt is $, so again, you must use parameter -c."
+	docs["vendorurl"] = "http://www.sun.com"
 	show_docs(options, docs)
 
 	##
diff --git a/fence/agents/lib/fence2man.xsl b/fence/agents/lib/fence2man.xsl
index abf9d93..563b3a4 100644
--- a/fence/agents/lib/fence2man.xsl
+++ b/fence/agents/lib/fence2man.xsl
@@ -40,6 +40,9 @@ as from stdin. Fenced sends parameters through stdin when it execs the
 agent. <xsl:value-of select="@name" /> can be run by itself with command
 line options.  This is useful for testing and for turning outlets on or off
 from scripts.
+<xsl:if test="vendor-url">
+Vendor URL: <xsl:value-of select="vendor-url" />
+</xsl:if>
 .SH PARAMETERS
 <xsl:apply-templates select="parameters"><xsl:with-param name="show">getopt</xsl:with-param></xsl:apply-templates>
 .SH ACTIONS
diff --git a/fence/agents/lib/fencing.py.py b/fence/agents/lib/fencing.py.py
index 2617492..831cdb1 100644
--- a/fence/agents/lib/fencing.py.py
+++ b/fence/agents/lib/fencing.py.py
@@ -421,6 +421,8 @@ def metadata(avail_opt, options, docs):
 	print "<?xml version=\"1.0\" ?>"
 	print "<resource-agent name=\"" + os.path.basename(sys.argv[0]) + "\" shortdesc=\"" + docs["shortdesc"] + "\" >"
 	print "<longdesc>" + docs["longdesc"] + "</longdesc>"
+	if docs.has_key("vendorurl"):
+		print "<vendor-url>" + docs["vendorurl"] + "</vendor-url>"
 	print "<parameters>"
 	for option, value in sorted_list:
 		if all_opt[option].has_key("shortdesc"):
diff --git a/fence/agents/lpar/Makefile b/fence/agents/lpar/Makefile
index fed1b2b..2ee6491 100644
--- a/fence/agents/lpar/Makefile
+++ b/fence/agents/lpar/Makefile
@@ -2,6 +2,6 @@ include ../../../make/defines.mk
 
 TARGET= fence_lpar
 
-MANTARGET= $(TARGET).8
+MAKEMAN = 1
 
 include $(OBJDIR)/make/fencebuild.mk
diff --git a/fence/agents/manual/Makefile b/fence/agents/manual/Makefile
index 490100f..ff4bde2 100644
--- a/fence/agents/manual/Makefile
+++ b/fence/agents/manual/Makefile
@@ -1,5 +1,7 @@
 TARGET= fence_ack_manual
 
+MANTARGET=$(TARGET).8
+
 SBINDIRT=$(TARGET)
 
 all: ${TARGET}
diff --git a/fence/man/fence_ack_manual.8 b/fence/agents/manual/fence_ack_manual.8
similarity index 100%
rename from fence/man/fence_ack_manual.8
rename to fence/agents/manual/fence_ack_manual.8
diff --git a/fence/man/fence_mcdata.8 b/fence/agents/mcdata/fence_mcdata.8
similarity index 98%
rename from fence/man/fence_mcdata.8
rename to fence/agents/mcdata/fence_mcdata.8
index 2230a66..c52ffd6 100644
--- a/fence/man/fence_mcdata.8
+++ b/fence/agents/mcdata/fence_mcdata.8
@@ -26,6 +26,8 @@ into the GFS cluster (after reboot) the port on the McData FC switch needs to
 be enabled. This can be done by running fence_mcdata and specifying the 
 enable action.
 
+Vendor URL: http://www.brocade.com
+
 .SH OPTIONS
 .TP
 \fB-a\fP \fIIPaddress\fP
diff --git a/fence/agents/nss_wrapper/Makefile b/fence/agents/nss_wrapper/Makefile
index b3a53ae..bf30a22 100644
--- a/fence/agents/nss_wrapper/Makefile
+++ b/fence/agents/nss_wrapper/Makefile
@@ -15,7 +15,7 @@ OBJS=	$(TARGET).o
 CFLAGS += -I${nsprincdir} -I${nssincdir}
 CFLAGS += -I${incdir}
 
-LDFLAGS += -L${nsslibdir} -lnss3 -lssl3
+LDFLAGS += -L${nsslibdir} -lnss3 -lssl3 -lnspr4
 LDFLAGS += -L${libdir}
 
 ${TARGET}: ${OBJS}
diff --git a/fence/agents/rackswitch/Makefile b/fence/agents/rackswitch/Makefile
index 71334d5..05ee5af 100644
--- a/fence/agents/rackswitch/Makefile
+++ b/fence/agents/rackswitch/Makefile
@@ -1,5 +1,7 @@
 TARGET= fence_rackswitch
 
+MANTARGET = fence_rackswitch.8
+
 SBINDIRT=$(TARGET)
 
 all: ${TARGET}
diff --git a/fence/man/fence_rackswitch.8 b/fence/agents/rackswitch/fence_rackswitch.8
similarity index 97%
rename from fence/man/fence_rackswitch.8
rename to fence/agents/rackswitch/fence_rackswitch.8
index 4f662c7..fc64a79 100644
--- a/fence/man/fence_rackswitch.8
+++ b/fence/agents/rackswitch/fence_rackswitch.8
@@ -18,6 +18,8 @@ fence_rackswitch accepts options on the command line as well as from stdin.
 fenced sends the options through stdin when it execs the agent.  fence_rackswitch 
 can be run by itself with command line options which is useful for testing.
 
+Vendor URL: http://www.bladenetwork.net
+
 .SH OPTIONS
 .TP
 \fB-a\fP \fIIPaddress\fP
diff --git a/fence/agents/rsa/Makefile b/fence/agents/rsa/Makefile
index b385197..cfbf74b 100644
--- a/fence/agents/rsa/Makefile
+++ b/fence/agents/rsa/Makefile
@@ -2,6 +2,6 @@ include ../../../make/defines.mk
 
 TARGET= fence_rsa
 
-MANTARGET= $(TARGET).8
+MAKEMAN = 1
 
 include $(OBJDIR)/make/fencebuild.mk
diff --git a/fence/agents/rsa/fence_rsa.py b/fence/agents/rsa/fence_rsa.py
index f633cd6..58ab148 100644
--- a/fence/agents/rsa/fence_rsa.py
+++ b/fence/agents/rsa/fence_rsa.py
@@ -66,6 +66,7 @@ logs into an RSA II device via telnet and reboots the associated \
 machine. Lengthy telnet connections to the RSA II device should \
 be avoided while a GFS cluster is running because the connection \
 will block any necessary fencing actions."
+	docs["vendorurl"] = "http://www.ibm.com"
 	show_docs(options, docs)
 	
 	##
diff --git a/fence/man/fence_rsb.8 b/fence/agents/rsb/fence_rsb.8
similarity index 98%
rename from fence/man/fence_rsb.8
rename to fence/agents/rsb/fence_rsb.8
index 2bb7bf4..d278271 100644
--- a/fence/man/fence_rsb.8
+++ b/fence/agents/rsb/fence_rsb.8
@@ -19,6 +19,7 @@ fence_rsb accepts options on the command line as well as from stdin.
 Fenced sends parameters through stdin when it execs the agent.  fence_rsb
 can be run by itself with command line options.  This is useful for testing.
 
+Vendor URL: http://www.fujitsu.com
 .SH OPTIONS
 .TP
 \fB-a\fP \fIIPaddress\fR
diff --git a/fence/agents/sanbox2/Makefile b/fence/agents/sanbox2/Makefile
index bfc886f..19ec5db 100644
--- a/fence/agents/sanbox2/Makefile
+++ b/fence/agents/sanbox2/Makefile
@@ -2,6 +2,6 @@ include ../../../make/defines.mk
 
 TARGET= fence_sanbox2
 
-MANTARGET= $(TARGET).8
+MAKEMAN = 1
 
 include $(OBJDIR)/make/fencebuild.mk
diff --git a/fence/agents/sanbox2/fence_sanbox2.py b/fence/agents/sanbox2/fence_sanbox2.py
index 086fe32..47f3bc8 100644
--- a/fence/agents/sanbox2/fence_sanbox2.py
+++ b/fence/agents/sanbox2/fence_sanbox2.py
@@ -130,6 +130,7 @@ QLogic SANBox2 FC switches.  It logs into a SANBox2 switch via telnet and disabl
 port. Disabling  the port which a machine is connected to effectively fences that machine. \
 Lengthy telnet connections to the switch should be avoided while a GFS cluster is running \
 because the connection will block any necessary fencing actions."
+	docs["vendorurl"] = "http://www.qlogic.com"
 	show_docs(options, docs)
 
 	##
diff --git a/fence/man/fence_scsi.8 b/fence/agents/scsi/fence_scsi.8
similarity index 100%
rename from fence/man/fence_scsi.8
rename to fence/agents/scsi/fence_scsi.8
diff --git a/fence/agents/scsi/fence_scsi.pl b/fence/agents/scsi/fence_scsi.pl
index 3e37d5b..59259cf 100644
--- a/fence/agents/scsi/fence_scsi.pl
+++ b/fence/agents/scsi/fence_scsi.pl
@@ -2,7 +2,6 @@
 
 use File::Basename;
 use Getopt::Std;
-use IPC::Open3;
 use POSIX;
 
 #BEGIN_VERSION_GENERATION
@@ -47,6 +46,7 @@ sub do_action_on ($@)
 	log_error ("device $dev is not a block device") if (! -b $dev);
 
 	do_register_ignore ($node_key, $dev);
+
 	if (!get_reservation_key ($dev)) {
 	    do_reserve ($node_key, $dev);
 	}
@@ -71,6 +71,7 @@ sub do_action_off ($@)
 	log_error ("device $dev is not a block device") if (! -b $dev);
 
 	my @keys = grep { /$node_key/ } get_registration_keys ($dev);
+
 	if (scalar (@keys) != 0) {
 	    do_preempt_abort ($host_key, $node_key, $dev);
 	}
@@ -92,6 +93,7 @@ sub do_action_status ($@)
 	log_error ("device $dev is not a block device") if (! -b $dev);
 
 	my @keys = grep { /$node_key/ } get_registration_keys ($dev);
+
 	if (scalar (@keys) != 0) {
 	    $dev_count++;
 	}
@@ -121,20 +123,14 @@ sub do_register ($$$)
     log_debug ("$self (host_key=$host_key, node_key=$node_key, dev=$dev)");
 
     my $cmd;
-    my $pid;
+    my $out;
 
     $cmd = "sg_persist -n -o -G -K $host_key -S $node_key -d $dev";
     $cmd .= " -Z" if (defined $opt_a);
-    $pid = open3 (\*IN, \*OUT, \*ERR, $cmd) or die "$!\n";
-
-    waitpid ($pid, 0);
+    $out = qx { $cmd };
 
     die "[error]: $self\n" if ($?>>8);
 
-    close (IN);
-    close (OUT);
-    close (ERR);
-
     return;
 }
 
@@ -154,20 +150,14 @@ sub do_register_ignore ($$)
     log_debug ("$self (node_key=$node_key, dev=$dev)");
 
     my $cmd;
-    my $pid;
+    my $out;
 
     $cmd = "sg_persist -n -o -I -S $node_key -d $dev";
     $cmd .= " -Z" if (defined $opt_a);
-    $pid = open3 (\*IN, \*OUT, \*ERR, $cmd) or die "$!\n";
-
-    waitpid ($pid, 0);
+    $out = qx { $cmd };
 
     die "[error]: $self\n" if ($?>>8);
 
-    close (IN);
-    close (OUT);
-    close (ERR);
-
     return;
 }
 
@@ -179,16 +169,10 @@ sub do_reserve ($$)
     log_debug ("$self (host_key=$host_key, dev=$dev)");
 
     my $cmd = "sg_persist -n -o -R -T 5 -K $host_key -d $dev";
-    my $pid = open3 (\*IN, \*OUT, \*ERR, $cmd) or die "$!\n";
-
-    waitpid ($pid, 0);
+    my $out = qx { $cmd };
 
     die "[error]: $self\n" if ($?>>8);
 
-    close (IN);
-    close (OUT);
-    close (ERR);
-
     return;
 }
 
@@ -200,16 +184,10 @@ sub do_release ($$)
     log_debug ("$self (host_key=$host_key, dev=$dev)");
 
     my $cmd = "sg_persist -n -o -L -T 5 -K $host_key -d $dev";
-    my $pid = open3 (\*IN, \*OUT, \*ERR, $cmd) or die "$!\n";
-
-    waitpid ($pid, 0);
+    my $out = qx { $cmd };
 
     die "[error]: $self\n" if ($?>>8);
 
-    close (IN);
-    close (OUT);
-    close (ERR);
-
     return;
 }
 
@@ -221,16 +199,10 @@ sub do_preempt ($$$)
     log_debug ("$self (host_key=$host_key, node_key=$node_key, dev=$dev)");
 
     my $cmd = "sg_persist -n -o -P -T 5 -K $host_key -S $node_key -d $dev";
-    my $pid = open3 (\*IN, \*OUT, \*ERR, $cmd) or die "$!\n";
-
-    waitpid ($pid, 0);
+    my $out = qx { $cmd };
 
     die "[error]: $self\n" if ($?>>8);
 
-    close (IN);
-    close (OUT);
-    close (ERR);
-
     return;
 }
 
@@ -242,16 +214,10 @@ sub do_preempt_abort ($$$)
     log_debug ("$self (host_key=$host_key, node_key=$node_key, dev=$dev)");
 
     my $cmd = "sg_persist -n -o -A -T 5 -K $host_key -S $node_key -d $dev";
-    my $pid = open3 (\*IN, \*OUT, \*ERR, $cmd) or die "$!\n";
-
-    waitpid ($pid, 0);
+    my $out = qx { $cmd };
 
     die "[error]: $self\n" if ($?>>8);
 
-    close (IN);
-    close (OUT);
-    close (ERR);
-
     return;
 }
 
@@ -281,6 +247,7 @@ sub key_write ($)
 sub get_key ($)
 {
     my $self = (caller(0))[3];
+
     my $key = sprintf ("%.4x%.4x", get_cluster_id (), get_node_id ($_[0]));
 
     return ($key);
@@ -292,17 +259,13 @@ sub get_node_id ($)
     my $node_id;
 
     my $cmd = "cman_tool nodes -n $_[0] -F id";
-    my $pid = open3 (\*IN, \*OUT, \*ERR, $cmd) or die "$!\n";
-
-    waitpid ($pid, 0);
+    my $out = qx { $cmd };
 
     die "[error]: $self\n" if ($?>>8);
 
-    chomp ($node_id = <OUT>);
+    chomp ($out);
 
-    close (IN);
-    close (OUT);
-    close (ERR);
+    $node_id = $out;
 
     return ($node_id);
 }
@@ -313,26 +276,18 @@ sub get_cluster_id ()
     my $cluster_id;
 
     my $cmd = "cman_tool status";
-    my $pid = open3 (\*IN, \*OUT, \*ERR, $cmd) or die "$!\n";
-
-    waitpid ($pid, 0);
+    my @out = qx { $cmd };
 
     die "[error]: $self\n" if ($?>>8);
 
-    while (<OUT>) {
+    foreach (@out) {
 	chomp;
-
 	my ($param, $value) = split (/\s*:\s*/, $_);
-
 	if ($param =~ /^cluster\s+id/i) {
 	    $cluster_id = $value;
 	}
     }
 
-    close (IN);
-    close (OUT);
-    close (ERR);
-
     return ($cluster_id);
 }
 
@@ -342,32 +297,24 @@ sub get_devices_clvm ()
     my @devices;
 
     my $cmd = "vgs --noheadings " .
-              "    --separator : " .
-              "    --sort pv_uuid " .
-              "    --options vg_attr,pv_name " .
-              "    --config 'global { locking_type = 0 } " .
-              "              devices { preferred_names = [ \"^/dev/dm\" ] }'";
+	"    --separator : " .
+	"    --sort pv_uuid " .
+	"    --options vg_attr,pv_name " .
+	"    --config 'global { locking_type = 0 } " .
+	"              devices { preferred_names = [ \"^/dev/dm\" ] }'";
 
-    my $pid = open3 (\*IN, \*OUT, \*ERR, $cmd) or die "$!\n";
-
-    waitpid ($pid, 0);
+    my @out = qx { $cmd 2> /dev/null };
 
     die "[error]: $self\n" if ($?>>8);
 
-    while (<OUT>) {
+    foreach (@out) {
 	chomp;
-
 	my ($vg_attr, $pv_name) = split (/:/, $_);
-
 	if ($vg_attr =~ /c$/) {
 	    push (@devices, $pv_name);
 	}
     }
 
-    close (IN);
-    close (OUT);
-    close (ERR);
-
     return (@devices);
 }
 
@@ -430,7 +377,6 @@ sub get_mpath_slaves ($)
     opendir (\*DIR, "/sys/block/$dev/slaves/") or die "$!\n";
 
     @slaves = grep { !/^\./ } readdir (DIR);
-
     if ($slaves[0] =~ /^dm/) {
 	@slaves = get_mpath_slaves ($slaves[0]);
     } else {
@@ -449,24 +395,17 @@ sub get_registration_keys ($)
     my @keys;
 
     my $cmd = "sg_persist -n -i -k -d $dev";
-    my $pid = open3 (\*IN, \*OUT, \*ERR, $cmd) or die "$!\n";
-
-    waitpid ($pid, 0);
+    my @out = qx { $cmd };
 
     die "[error]: $self\n" if ($?>>8);
 
-    while (<OUT>) {
+    foreach (@out) {
 	chomp;
-
 	if ($_ =~ s/^\s+0x//i) {
 	    push (@keys, $_);
 	}
     }
 
-    close (IN);
-    close (OUT);
-    close (ERR);
-
     return (@keys);
 }
 
@@ -477,26 +416,19 @@ sub get_reservation_key ($)
     my $key;
 
     my $cmd = "sg_persist -n -i -r -d $dev";
-    my $pid = open3 (\*IN, \*OUT, \*ERR, $cmd) or die "$!\n";
-
-    waitpid ($pid, 0);
+    my @out = qx { $cmd };
 
     die "[error]: $self\n" if ($?>>8);
 
-    while (<OUT>) {
+    foreach (@out) {
 	chomp;
-
 	if ($_ =~ s/^\s+key=0x//i) {
 	    $key = $_;
 	    last;
 	}
     }
 
-    close (IN);
-    close (OUT);
-    close (ERR);
-
-    return ($key);
+    return ($key)
 }
 
 sub get_options_stdin ()
@@ -505,7 +437,6 @@ sub get_options_stdin ()
 
     while (<STDIN>) {
 	chomp;
-
 	s/^\s*//;
 	s/\s*$//;
 
@@ -575,9 +506,7 @@ sub print_metadata ()
     print "<resource-agent name=\"fence_scsi\"" .
           " shortdesc=\"fence agent for SCSI-3 persistent reservations\">\n";
     print "<longdesc>fence_scsi</longdesc>\n";
-
     print "<parameters>\n";
-
     print "\t<parameter name=\"aptpl\" unique=\"1\" required=\"0\">\n";
     print "\t\t<getopt mixed=\"-a\"/>\n";
     print "\t\t<content type=\"boolean\"/>\n";
@@ -585,7 +514,6 @@ sub print_metadata ()
           "Use APTPL flag for registrations" .
           "</shortdesc>\n";
     print "\t</parameter>\n";
-
     print "\t<parameter name=\"devices\" unique=\"1\" required=\"0\">\n";
     print "\t\t<getopt mixed=\"-d\"/>\n";
     print "\t\t<content type=\"string\"/>\n";
@@ -593,7 +521,6 @@ sub print_metadata ()
           "List of devices to be used for fencing action" .
           "</shortdesc>\n";
     print "\t</parameter>\n";
-
     print "\t<parameter name=\"logfile\" unique=\"1\" required=\"0\">\n";
     print "\t\t<getopt mixed=\"-f\"/>\n";
     print "\t\t<content type=\"string\"/>\n";
@@ -601,7 +528,6 @@ sub print_metadata ()
           "File to write error/debug messages" .
           "</shortdesc>\n";
     print "\t</parameter>\n";
-
     print "\t<parameter name=\"key\" unique=\"1\" required=\"0\">\n";
     print "\t\t<getopt mixed=\"-k\"/>\n";
     print "\t\t<content type=\"string\"/>\n";
@@ -609,7 +535,6 @@ sub print_metadata ()
           "Key value to be used for fencing action" .
           "</shortdesc>\n";
     print "\t</parameter>\n";
-
     print "\t<parameter name=\"action\" unique=\"1\" required=\"0\">\n";
     print "\t\t<getopt mixed=\"-o\"/>\n";
     print "\t\t<content type=\"string\" default=\"off\"/>\n";
@@ -617,7 +542,6 @@ sub print_metadata ()
           "Fencing action" .
           "</shortdesc>\n";
     print "\t</parameter>\n";
-
     print "\t<parameter name=\"nodename\" unique=\"1\" required=\"0\">\n";
     print "\t\t<getopt mixed=\"-n\"/>\n";
     print "\t\t<content type=\"string\"/>\n";
@@ -625,16 +549,13 @@ sub print_metadata ()
           "Name of node" .
           "</shortdesc>\n";
     print "\t</parameter>\n";
-
     print "</parameters>\n";
-
     print "<actions>\n";
     print "\t<action name=\"on\"/>\n";
     print "\t<action name=\"off\"/>\n";
     print "\t<action name=\"status\"/>\n";
     print "\t<action name=\"metadata\"/>\n";
     print "</actions>\n";
-
     print "</resource-agent>\n";
 
     exit (0);
@@ -688,6 +609,12 @@ if ($key == 0) {
     log_error ("key cannot be zero");
 }
 
+## remove any leading zeros from key
+##
+if ($key =~ /^0/) {
+    $key =~ s/^0+//;
+}
+
 ## get devices
 ##
 if (defined $opt_d) {
@@ -722,7 +649,7 @@ elsif ($opt_o =~ /^status/i) {
 }
 else {
     log_error ("unknown action '$opt_o'");
-exit (1);
+    exit (1);
 }
 
 ## close the logfile
diff --git a/fence/agents/virsh/Makefile b/fence/agents/virsh/Makefile
index 190d080..51ef0ca 100644
--- a/fence/agents/virsh/Makefile
+++ b/fence/agents/virsh/Makefile
@@ -2,6 +2,6 @@ include ../../../make/defines.mk
 
 TARGET= fence_virsh
 
-MANTARGET= $(TARGET).8
+MAKEMAN = 1
 
 include $(OBJDIR)/make/fencebuild.mk
diff --git a/fence/man/fence_vixel.8 b/fence/agents/vixel/fence_vixel.8
similarity index 98%
rename from fence/man/fence_vixel.8
rename to fence/agents/vixel/fence_vixel.8
index dc285c5..83db14f 100644
--- a/fence/man/fence_vixel.8
+++ b/fence/agents/vixel/fence_vixel.8
@@ -29,6 +29,8 @@ Then apply
 
 Consult the Vixel manual for details
 
+Vendor URL: http://www.emulex.com
+
 .SH OPTIONS
 .TP
 \fB-a\fP \fIIPaddress\fP
diff --git a/fence/agents/vmware/Makefile b/fence/agents/vmware/Makefile
index 0ec0b3d..e58aa67 100644
--- a/fence/agents/vmware/Makefile
+++ b/fence/agents/vmware/Makefile
@@ -3,5 +3,6 @@ include ../../../make/defines.mk
 TARGET= fence_vmware_helper fence_vmware
 
 MANTARGET= fence_vmware.8
+MAKEMAN = 1
 
 include $(OBJDIR)/make/fencebuild.mk
diff --git a/fence/agents/vmware/fence_vmware.py b/fence/agents/vmware/fence_vmware.py
index ad51687..99e5ca9 100644
--- a/fence/agents/vmware/fence_vmware.py
+++ b/fence/agents/vmware/fence_vmware.py
@@ -322,6 +322,7 @@ command is used.\
 After you have successfully installed VI Perl Toolkit or VIX API, you should \
 be able to run fence_vmware_helper (part of this agent) or vmrun command. \
 This agent supports only vmrun from version 2.0.0 (VIX API 1.6.0)."
+	docs["vendorurl"] = "http://www.vmware.com"
 	show_docs(options, docs)
 
 	# Check vmware type and set path
diff --git a/fence/agents/wti/Makefile b/fence/agents/wti/Makefile
index 5240559..c281149 100644
--- a/fence/agents/wti/Makefile
+++ b/fence/agents/wti/Makefile
@@ -2,6 +2,6 @@ include ../../../make/defines.mk
 
 TARGET= fence_wti
 
-MANTARGET= $(TARGET).8
+MAKEMAN = 1
 
 include $(OBJDIR)/make/fencebuild.mk
diff --git a/fence/agents/wti/fence_wti.py b/fence/agents/wti/fence_wti.py
index a45e3ec..92b362a 100644
--- a/fence/agents/wti/fence_wti.py
+++ b/fence/agents/wti/fence_wti.py
@@ -96,6 +96,7 @@ which can be used with the WTI Network Power Switch (NPS). It logs \
 into an NPS via telnet or ssh and boots a specified plug. \
 Lengthy telnet connections to the NPS should be avoided while a GFS cluster \
 is running because the connection will block any necessary fencing actions."
+	docs["vendorurl"] = "http://www.wti.com"
 	show_docs(options, docs)
 	
 	##
diff --git a/fence/man/fence_xcat.8 b/fence/agents/xcat/fence_xcat.8
similarity index 98%
rename from fence/man/fence_xcat.8
rename to fence/agents/xcat/fence_xcat.8
index 7da35d5..719f896 100644
--- a/fence/man/fence_xcat.8
+++ b/fence/agents/xcat/fence_xcat.8
@@ -23,6 +23,8 @@ NOTE: It is recommended that fence_bladecenter(8) is used instead of fence_xcat
 the bladecenter firmware supports telnet.  This interface is much cleaner and
 easier to setup.
 
+Vendor URL: http://www.xcat.org
+
 .SH OPTIONS
 .TP
 \fB-h\fP
diff --git a/fence/agents/xvm/Makefile b/fence/agents/xvm/Makefile
index 5b10360..1ff9e3e 100644
--- a/fence/agents/xvm/Makefile
+++ b/fence/agents/xvm/Makefile
@@ -1,6 +1,8 @@
 TARGET1= fence_xvmd
 TARGET2= testprog
 
+MANTARGET=$(TARGET1).8
+
 SBINDIRT=$(TARGET1)
 
 all: ${TARGET1}
diff --git a/fence/man/fence_xvmd.8 b/fence/agents/xvm/fence_xvmd.8
similarity index 100%
rename from fence/man/fence_xvmd.8
rename to fence/agents/xvm/fence_xvmd.8
diff --git a/fence/man/fence_zvm.8 b/fence/agents/zvm/fence_zvm.8
similarity index 98%
rename from fence/man/fence_zvm.8
rename to fence/agents/zvm/fence_zvm.8
index b22f8e1..c58d8ae 100644
--- a/fence/man/fence_zvm.8
+++ b/fence/agents/zvm/fence_zvm.8
@@ -18,6 +18,8 @@ fence_node sends the options through stdin when it execs the agent.
 fence_zvm can be run by itself with command line options which is useful
 for testing.
 
+Vendor URL: http://www.ibm.com
+
 .SH OPTIONS
 .TP
 \fB-a\fP \fIIPaddress\fP
diff --git a/fence/man/Makefile b/fence/man/Makefile
index 45401f6..a4a9cf1 100644
--- a/fence/man/Makefile
+++ b/fence/man/Makefile
@@ -2,26 +2,6 @@ include ../../make/defines.mk
 
 MANTARGET = fenced.8 fence_node.8 fence_tool.8
 
-ifdef fence_agents
-MANTARGET += \
-	fence_ack_manual.8 \
-	fence_baytech.8 \
-	fence_brocade.8 \
-	fence_bullpap.8 \
-	fence_cpint.8 \
-	fence_drac.8 \
-	fence_egenera.8 \
-	fence_mcdata.8 \
-	fence_rackswitch.8 \
-	fence_rib.8 \
-	fence_rsb.8 \
-	fence_scsi.8 \
-	fence_vixel.8 \
-	fence_xcat.8 \
-	fence_xvmd.8 \
-	fence_zvm.8
-endif
-
 include $(OBJDIR)/make/install.mk
 include $(OBJDIR)/make/uninstall.mk
 
diff --git a/fence/man/fence_node.8 b/fence/man/fence_node.8
index 2cf2d00..fe26198 100644
--- a/fence/man/fence_node.8
+++ b/fence/man/fence_node.8
@@ -93,12 +93,12 @@ Fencing/fence_node() configuration shown in
 Unfencing is only performed for a node with an <unfence> section:
 
 .nf
-  <clusternode name="node1" nodeid="1">
-          <fence>
-          </fence>
-          <unfence>
-          </unfence>
-  </clusternode>
+<clusternode name="node1" nodeid="1">
+        <fence>
+        </fence>
+        <unfence>
+        </unfence>
+</clusternode>
 .fi
 
 The <unfence> section does not contain <method> sections like the <fence>
@@ -109,17 +109,17 @@ referenced by both fence and unfence <device> lines, and the same per-node
 args should be repeated.
 
 .nf
-  <clusternode name="node1" nodeid="1">
-          <fence>
-          <method name="1">
-          <device name="myswitch" foo="x"/>
-          </method>
-          </fence>
-
-          <unfence>
-          <device name="myswitch" foo="x" action="on"/>
-          </unfence>
-  </clusternode>
+<clusternode name="node1" nodeid="1">
+        <fence>
+        <method name="1">
+        <device name="myswitch" foo="x"/>
+        </method>
+        </fence>
+
+        <unfence>
+        <device name="myswitch" foo="x" action="on"/>
+        </unfence>
+</clusternode>
 .fi
 
 .SH SEE ALSO
diff --git a/fence/man/fence_rib.8 b/fence/man/fence_rib.8
deleted file mode 100644
index 04ace3f..0000000
--- a/fence/man/fence_rib.8
+++ /dev/null
@@ -1,10 +0,0 @@
-.TH fence_rib 8
-
-.SH NAME
-fence_rib - I/O Fencing agent for Compaq Remote Insight Lights Out card
-
-.SH DESCRIPTION
-fence_rib is deprecated.  fence_ilo should be used instead
-
-.SH SEE ALSO
-fence_ilo(8)
diff --git a/fence/man/fenced.8 b/fence/man/fenced.8
index 84c3a46..c3496fb 100644
--- a/fence/man/fenced.8
+++ b/fence/man/fenced.8
@@ -114,12 +114,22 @@ Command line options override a corresponding setting in cluster.conf.
 .TP
 .B \-D
 Enable debugging to stderr and don't fork.
+.br
+See also
+.B fence_tool dump
+in 
+.BR fence_tool (8).
 .TP
 .B \-L
 Enable debugging to log file.
+.br
+See also
+.B logging
+in 
+.BR cluster.conf (5).
 .TP
 .BI \-g " num"
-groupd compatibility mode, 0 off, 1 on.  Default 0.
+groupd compatibility mode, 0 off, 1 on. Default 0.
 .TP
 .BI \-r " path"
 Register a directory that needs to be empty for the daemon to start.  Use
@@ -133,14 +143,14 @@ All nodes are in a clean state to start. Do no startup fencing.
 Skip startup fencing of nodes with no defined fence methods.
 .TP
 .BI \-j " secs"
-Post-join fencing delay.
+Post-join fencing delay. Default 6.
 .TP
 .BI \-f " secs"
-Post-fail fencing delay.
+Post-fail fencing delay. Default 0.
 .TP
-.BI \-R " path"
+.BI \-R " secs"
 Number of seconds to wait for a manual override after a failed fencing
-attempt before the next attempt.
+attempt before the next attempt. Default 3.
 .TP
 .BI \-O " path"
 Location of a FIFO used for communication between fenced and fence_ack_manual.
@@ -157,36 +167,44 @@ is usually located at /etc/cluster/cluster.conf.  It is not read directly.
 Other cluster components load the contents into memory, and the values are
 accessed through the libccs library.
 
-Fencing daemon behavior can be controlled by setting options in the
-cluster.conf file under the section <fence_daemon />.  See above for
-complete descriptions of these values.  The delay values are in seconds;
--1 secs means an unlimited delay.  The values shown are the defaults.
+Configuration options for fenced are added to the <fence_daemon /> section
+of cluster.conf, within the top level <cluster> section.
 
-Post-join delay is the number of seconds the daemon will wait before
-fencing any victims after a node joins the domain.
+.TP
+.B post_join_delay
+is the number of seconds the daemon will wait before fencing any victims
+after a node joins the domain.  Default 6.
 
-  <fence_daemon post_join_delay="6"/>
+<fence_daemon post_join_delay="6"/>
 
-Post-fail delay is the number of seconds the daemon will wait before
-fencing any victims after a domain member fails.
+.TP
+.B post_fail_delay
+is the number of seconds the daemon will wait before fencing any victims
+after a domain member fails.  Default 0.
 
-  <fence_daemon post_fail_delay="0"/>
+<fence_daemon post_fail_delay="0"/>
 
-Clean-start is used to prevent any startup fencing the daemon might do.
+.TP
+.B clean_start
+is used to prevent any startup fencing the daemon might do.
 It indicates that the daemon should assume all nodes are in a clean state
-to start.
+to start. Default 0.
 
-  <fence_daemon clean_start="0"/>
+<fence_daemon clean_start="0"/>
 
-Override-path is the location of a FIFO used for communication between
-fenced and fence_ack_manual.
+.TP
+.B override_path
+is the location of a FIFO used for communication between fenced and
+fence_ack_manual. Default shown.
 
-  <fence_daemon override_path="/var/run/cluster/fenced_override"/>
+<fence_daemon override_path="/var/run/cluster/fenced_override"/>
 
-Override-time is the amount of time to wait for administrator intervention
-between fencing attempts following fence agent failures.
+.TP
+.B override_time
+is the number of seconds to wait for administrator intervention
+between fencing attempts following fence agent failures. Default 3.
 
-  <fence_daemon override_time="3"/>
+<fence_daemon override_time="3"/>
 
 .SS Per-node fencing settings
 
@@ -194,19 +212,19 @@ The per-node fencing configuration is partly dependant on the specific
 agent/hardware being used.  The general framework begins like this:
 
 .nf
-  <clusternodes>
+<clusternodes>
 
-  <clusternode name="node1" nodeid="1">
-          <fence>
-          </fence>
-  </clusternode>
+<clusternode name="node1" nodeid="1">
+        <fence>
+        </fence>
+</clusternode>
 
-  <clusternode name="node2" nodeid="2">
-          <fence>
-          </fence>
-  </clusternode>
+<clusternode name="node2" nodeid="2">
+        <fence>
+        </fence>
+</clusternode>
 
-  </clusternodes>
+</clusternodes>
 .fi
 
 The simple fragment above is a valid configuration: there is no way to
@@ -221,25 +239,25 @@ listed in the separate <fencedevices> section, and then lists any
 node-specific parameters related to using the device.
 
 .nf
-  <clusternodes>
-
-  <clusternode name="node1" nodeid="1">
-          <fence>
-          <method name="1">
-          <device name="myswitch" foo="x"/>
-          </method>
-          </fence>
-  </clusternode>
-
-  <clusternode name="node2" nodeid="2">
-          <fence>
-          <method name="1">
-          <device name="myswitch" foo="y"/>
-          </method>
-          </fence>
-  </clusternode>
-
-  </clusternodes>
+<clusternodes>
+
+<clusternode name="node1" nodeid="1">
+        <fence>
+        <method name="1">
+        <device name="myswitch" foo="x"/>
+        </method>
+        </fence>
+</clusternode>
+
+<clusternode name="node2" nodeid="2">
+        <fence>
+        <method name="1">
+        <device name="myswitch" foo="y"/>
+        </method>
+        </fence>
+</clusternode>
+
+</clusternodes>
 .fi
 
 .SS Fence device settings
@@ -249,9 +267,9 @@ may be one or more devices listed.  The per-node fencing sections above
 reference one of these fence devices by name.
 
 .nf
-  <fencedevices>
-          <fencedevice name="myswitch" agent="..." something="..."/>
-  </fencedevices>
+<fencedevices>
+        <fencedevice name="myswitch" agent="..." something="..."/>
+</fencedevices>
 .fi
 
 .SS Multiple methods for a node
@@ -261,21 +279,21 @@ for a node.  If fencing fails using the first method, fenced will try the
 next method, and continue to cycle through methods until one succeeds.
 
 .nf
-  <clusternode name="node1" nodeid="1">
-          <fence>
-          <method name="1">
-          <device name="myswitch" foo="x"/>
-          </method>
-          <method name="2">
-          <device name="another" bar="123"/>
-          </method>
-          </fence>
-  </clusternode>
-
-  <fencedevices>
-          <fencedevice name="myswitch" agent="..." something="..."/>
-          <fencedevice name="another" agent="..."/>
-  </fencedevices>
+<clusternode name="node1" nodeid="1">
+        <fence>
+        <method name="1">
+        <device name="myswitch" foo="x"/>
+        </method>
+        <method name="2">
+        <device name="another" bar="123"/>
+        </method>
+        </fence>
+</clusternode>
+
+<fencedevices>
+        <fencedevice name="myswitch" agent="..." something="..."/>
+        <fencedevice name="another" agent="..."/>
+</fencedevices>
 .fi
 
 .SS Dual path, redundant power
@@ -286,14 +304,14 @@ fenced will run the agent for the device twice, once for each device line,
 and both must succeed for fencing to be considered successful.
 
 .nf
-  <clusternode name="node1" nodeid="1">
-          <fence>
-          <method name="1">
-          <device name="sanswitch1" port="11"/>
-          <device name="sanswitch2" port="11"/>
-          </method>
-          </fence>
-  </clusternode>
+<clusternode name="node1" nodeid="1">
+        <fence>
+        <method name="1">
+        <device name="sanswitch1" port="11"/>
+        <device name="sanswitch2" port="11"/>
+        </method>
+        </fence>
+</clusternode>
 .fi
 
 When using power switches to fence nodes with dual power supplies, the
@@ -302,16 +320,16 @@ either port.  The default off-on behavior of the agent could result in the
 power never being fully disabled to the node.
 
 .nf
-  <clusternode name="node1" nodeid="1">
-          <fence>
-          <method name="1">
-          <device name="nps1" port="11" action="off"/>
-          <device name="nps2" port="11" action="off"/>
-          <device name="nps1" port="11" action="on"/>
-          <device name="nps2" port="11" action="on"/>
-          </method>
-          </fence>
-  </clusternode>
+<clusternode name="node1" nodeid="1">
+        <fence>
+        <method name="1">
+        <device name="nps1" port="11" action="off"/>
+        <device name="nps2" port="11" action="off"/>
+        <device name="nps1" port="11" action="on"/>
+        <device name="nps2" port="11" action="on"/>
+        </method>
+        </fence>
+</clusternode>
 .fi
 
 .SS Hardware-specific settings
diff --git a/gfs-kernel/src/gfs/gfs.h b/gfs-kernel/src/gfs/gfs.h
index fec3f26..df16f15 100644
--- a/gfs-kernel/src/gfs/gfs.h
+++ b/gfs-kernel/src/gfs/gfs.h
@@ -1,7 +1,7 @@
 #ifndef __GFS_DOT_H__
 #define __GFS_DOT_H__
 
-#define RELEASE_VERSION "3.0.7"
+#define RELEASE_VERSION "3.0.9"
 
 #include "lm_interface.h"
 
diff --git a/gfs-kernel/src/gfs/glock.c b/gfs-kernel/src/gfs/glock.c
index 3040c60..80f4466 100644
--- a/gfs-kernel/src/gfs/glock.c
+++ b/gfs-kernel/src/gfs/glock.c
@@ -986,6 +986,33 @@ state_change(struct gfs_glock *gl, unsigned int new_state)
 	gl->gl_state = new_state;
 }
 
+static int gfs_glock_demote_wait(void *word)
+{
+	schedule();
+	return 0;
+}
+
+static void gfs_wait_on_demote(struct gfs_glock *gl)
+{
+	might_sleep();
+	wait_on_bit(&gl->gl_flags, GLF_DEMOTE, gfs_glock_demote_wait, TASK_UNINTERRUPTIBLE);
+}
+
+static void gfs_demote_wake(struct gfs_glock *gl)
+{
+	clear_bit(GLF_DEMOTE, &gl->gl_flags);
+	smp_mb__after_clear_bit();
+	wake_up_bit(&gl->gl_flags, GLF_DEMOTE);
+}
+
+void gfs_glock_dq_wait(struct gfs_holder *gh)
+{
+	struct gfs_glock *gl = gh->gh_gl;
+	set_bit(GLF_DEMOTE, &gl->gl_flags);
+	gfs_glock_dq(gh);
+	gfs_wait_on_demote(gl);
+}
+
 /**
  * xmote_bh - Called after the lock module is done acquiring a lock
  * @gl: The glock in question
@@ -1091,6 +1118,8 @@ xmote_bh(struct gfs_glock *gl, unsigned int ret)
 		gl->gl_req_bh = NULL;
 		clear_bit(GLF_LOCK, &gl->gl_flags);
 		run_queue(gl);
+		if (test_bit(GLF_DEMOTE, &gl->gl_flags))
+			gfs_demote_wake(gl);
 		spin_unlock(&gl->gl_spin);
 	}
 
@@ -1200,8 +1229,9 @@ drop_bh(struct gfs_glock *gl, unsigned int ret)
 	gl->gl_req_bh = NULL;
 	clear_bit(GLF_LOCK, &gl->gl_flags);
 	run_queue(gl);
-	spin_unlock(&gl->gl_spin);
-
+	if (test_bit(GLF_DEMOTE, &gl->gl_flags))
+		gfs_demote_wake(gl);
+	spin_unlock(&gl->gl_spin);	
 	glock_put(gl);
 
 	if (gh) {
@@ -1312,6 +1342,11 @@ glock_wait_internal(struct gfs_holder *gh)
 		if (gl->gl_req_gh != gh &&
 		    !test_bit(HIF_HOLDER, &gh->gh_iflags) &&
 		    !list_empty(&gh->gh_list)) {
+			if (gh->gh_flags & GL_FLOCK &&
+			    list_empty(&gl->gl_holders)) {
+				spin_unlock(&gl->gl_spin);
+				goto skip_try_flag;
+			}
 			list_del_init(&gh->gh_list);
 			gh->gh_error = GLR_TRYFAILED;
 			if (test_bit(HIF_RECURSE, &gh->gh_iflags))
@@ -1323,6 +1358,7 @@ glock_wait_internal(struct gfs_holder *gh)
 		spin_unlock(&gl->gl_spin);
 	}
 
+skip_try_flag:
 	if ((gh->gh_flags & LM_FLAG_PRIORITY) &&
 	    !(gh->gh_flags & GL_NOCANCEL_OTHER))
 		do_cancels(gh);
@@ -1402,13 +1438,14 @@ add_to_queue(struct gfs_holder *gh)
 			if (tmp_gh->gh_owner == gh->gh_owner) {
 				/* Make sure pre-existing holder is compatible
 				   with this new one. */
-				if (gfs_assert_warn(sdp, (gh->gh_flags & LM_FLAG_ANY) ||
-						    !(tmp_gh->gh_flags & LM_FLAG_ANY)) ||
-				    gfs_assert_warn(sdp, (tmp_gh->gh_flags & GL_LOCAL_EXCL) ||
-						    !(gh->gh_flags & GL_LOCAL_EXCL)) ||
-				    gfs_assert_warn(sdp, relaxed_state_ok(gl->gl_state,
-									  gh->gh_state,
-									  gh->gh_flags)))
+				if (!(gh->gh_flags & GL_FLOCK) && 
+				    (gfs_assert_warn(sdp, (gh->gh_flags & LM_FLAG_ANY) ||
+						     !(tmp_gh->gh_flags & LM_FLAG_ANY)) ||
+				     gfs_assert_warn(sdp, (tmp_gh->gh_flags & GL_LOCAL_EXCL) ||
+						     !(gh->gh_flags & GL_LOCAL_EXCL)) ||
+				     gfs_assert_warn(sdp, relaxed_state_ok(gl->gl_state,
+									   gh->gh_state,
+									   gh->gh_flags))))
 					goto fail;
 
 				/* We're good!  Grant the hold. */
@@ -1430,15 +1467,16 @@ add_to_queue(struct gfs_holder *gh)
 			tmp_gh = list_entry(tmp, struct gfs_holder, gh_list);
 			if (tmp_gh->gh_owner == gh->gh_owner) {
 				/* Yes, make sure it is compatible with new */
-				if (gfs_assert_warn(sdp, test_bit(HIF_PROMOTE,
-								  &tmp_gh->gh_iflags)) ||
-				    gfs_assert_warn(sdp, (gh->gh_flags & LM_FLAG_ANY) ||
-						    !(tmp_gh->gh_flags & LM_FLAG_ANY)) ||
-				    gfs_assert_warn(sdp, (tmp_gh->gh_flags & GL_LOCAL_EXCL) ||
-						    !(gh->gh_flags & GL_LOCAL_EXCL)) ||
-				    gfs_assert_warn(sdp, relaxed_state_ok(tmp_gh->gh_state,
-									  gh->gh_state,
-									  gh->gh_flags)))
+				if (!(gh->gh_flags & GL_FLOCK) &&
+				    (gfs_assert_warn(sdp, test_bit(HIF_PROMOTE,
+								   &tmp_gh->gh_iflags)) ||
+				     gfs_assert_warn(sdp, (gh->gh_flags & LM_FLAG_ANY) ||
+						     !(tmp_gh->gh_flags & LM_FLAG_ANY)) ||
+				     gfs_assert_warn(sdp, (tmp_gh->gh_flags & GL_LOCAL_EXCL) ||
+						     !(gh->gh_flags & GL_LOCAL_EXCL)) ||
+				     gfs_assert_warn(sdp, relaxed_state_ok(tmp_gh->gh_state,
+									   gh->gh_state,
+									   gh->gh_flags))))
 					goto fail;
 
 				/* OK, make sure they're marked, so
diff --git a/gfs-kernel/src/gfs/glock.h b/gfs-kernel/src/gfs/glock.h
index a0342b1..9de5e94 100644
--- a/gfs-kernel/src/gfs/glock.h
+++ b/gfs-kernel/src/gfs/glock.h
@@ -21,6 +21,7 @@
 #define GL_SYNC           (0x00000800) /* Sync to disk when no more holders */
 #define GL_NOCANCEL       (0x00001000) /* Don't ever cancel this request */
 #define GL_NOCANCEL_OTHER (0x00004000) /* Don't cancel other locks for this */
+#define GL_FLOCK          (0x00008000) /* This is an flock */
 
 #define GLR_TRYFAILED     (13)
 #define GLR_CANCELED      (14)
@@ -96,6 +97,7 @@ int gfs_glock_nq(struct gfs_holder *gh);
 int gfs_glock_poll(struct gfs_holder *gh);
 int gfs_glock_wait(struct gfs_holder *gh);
 void gfs_glock_dq(struct gfs_holder *gh);
+void gfs_glock_dq_wait(struct gfs_holder *gh);
 
 void gfs_glock_prefetch(struct gfs_glock *gl, unsigned int state, int flags);
 void gfs_glock_force_drop(struct gfs_glock *gl);
diff --git a/gfs-kernel/src/gfs/incore.h b/gfs-kernel/src/gfs/incore.h
index 1e00fc2..921b79d 100644
--- a/gfs-kernel/src/gfs/incore.h
+++ b/gfs-kernel/src/gfs/incore.h
@@ -474,6 +474,8 @@ struct gfs_holder {
                                       *   (demote/greedy) holders */
 #define GLF_GREEDY              (7)  /* This lock is ignoring callbacks
                                       *   (requests from other nodes) for now */
+#define GLF_DEMOTE              (8)  /* This is used to check for unflock completion
+				      * different from HIF_DEMOTE */
 
 struct gfs_glock {
 	struct list_head gl_list;    /* Link to hb_list in one of superblock's
diff --git a/gfs-kernel/src/gfs/ops_file.c b/gfs-kernel/src/gfs/ops_file.c
index 6a64958..e675772 100644
--- a/gfs-kernel/src/gfs/ops_file.c
+++ b/gfs-kernel/src/gfs/ops_file.c
@@ -1732,7 +1732,8 @@ do_flock(struct file *file, int cmd, struct file_lock *fl)
 	int error = 0;
 
 	state = (fl->fl_type == F_WRLCK) ? LM_ST_EXCLUSIVE : LM_ST_SHARED;
-	flags = ((IS_SETLKW(cmd)) ? 0 : LM_FLAG_TRY) | GL_EXACT | GL_NOCACHE;
+	flags = ((IS_SETLKW(cmd)) ? 0 : LM_FLAG_TRY) | GL_EXACT | GL_NOCACHE
+		| GL_FLOCK;
 
 	down(&fp->f_fl_lock);
 
@@ -1740,21 +1741,19 @@ do_flock(struct file *file, int cmd, struct file_lock *fl)
 	if (gl) {
 		if (fl_gh->gh_state == state)
 			goto out;
-		gfs_glock_hold(gl);
 		flock_lock_file_wait(file,
-				     &(struct file_lock){.fl_type = F_UNLCK});		
-		gfs_glock_dq_uninit(fl_gh);
+				     &(struct file_lock){.fl_type = F_UNLCK});
+		gfs_glock_dq_wait(fl_gh);
+		gfs_holder_reinit(state, flags, fl_gh);
 	} else {
 		error = gfs_glock_get(ip->i_sbd,
 				      ip->i_num.no_formal_ino, &gfs_flock_glops,
 				      CREATE, &gl);
 		if (error)
 			goto out;
+		gfs_holder_init(gl, state, flags, fl_gh);
+		gfs_glock_put(gl);
 	}
-
-	gfs_holder_init(gl, state, flags, fl_gh);
-	gfs_glock_put(gl);
-
 	error = gfs_glock_nq(fl_gh);
 	if (error) {
 		gfs_holder_uninit(fl_gh);
diff --git a/gfs-kernel/src/gfs/ops_fstype.c b/gfs-kernel/src/gfs/ops_fstype.c
index 2e74d04..8bc76bf 100644
--- a/gfs-kernel/src/gfs/ops_fstype.c
+++ b/gfs-kernel/src/gfs/ops_fstype.c
@@ -437,7 +437,7 @@ static int init_journal(struct gfs_sbd *sdp, int undo)
 	else {
 		/*  Discover this node's journal number (lock module tells us
 		    which one to use), and lock it */
-		error = -EINVAL;
+		error = -EUSERS;
 		if (sdp->sd_lockstruct.ls_jid >= sdp->sd_journals) {
 			printk("GFS: fsid=%s: can't mount journal #%u\n",
 			       sdp->sd_fsname, sdp->sd_lockstruct.ls_jid);
diff --git a/gfs/gfs_jadd/main.c b/gfs/gfs_jadd/main.c
index f295c86..5f0affd 100644
--- a/gfs/gfs_jadd/main.c
+++ b/gfs/gfs_jadd/main.c
@@ -713,33 +713,42 @@ update_fs(void)
  */
 
 static int
-find_fs(char *name)
+find_fs(const char *name)
 {
 	FILE *fp = fopen("/proc/mounts", "r");
 	char buffer[4096];
 	char fstype[80];
 	int fsdump, fspass;
+	char *realname;
 
+	realname = realpath(name, NULL);
+	if (!realname) {
+		perror(name);
+		return -1;
+	}
 	if (fp == NULL) {
 		perror("open: /proc/mounts");
 		exit(EXIT_FAILURE);
 	}
 	while ((fgets(buffer, 4095, fp)) != NULL) {
 		buffer[4095] = 0;
-		if (strstr(buffer, name) == 0)
+		if (strstr(buffer, realname) == 0)
 			continue;
 		if (sscanf(buffer, "%s %s %s %s %d %d", device, fspath, fstype,
 			   fsoptions, &fsdump, &fspass) != 6)
 			continue;
 		if (strcmp(fstype, "gfs") != 0)
 			continue;
-		if ((strcmp(device, name) != 0) && (strcmp(fspath, name) != 0))
+		if ((strcmp(device, realname) != 0) &&
+		    (strcmp(fspath, realname) != 0))
 			continue;
 		fclose(fp);
+		free(realname);
 		return 0;
 	}
 	fprintf(stderr, "GFS Filesystem %s not found\n", name);
 	fclose(fp);
+	free(realname);
 	return 1;
 }
 
diff --git a/gfs2/convert/gfs2_convert.c b/gfs2/convert/gfs2_convert.c
index 4034e74..5e3d8f3 100644
--- a/gfs2/convert/gfs2_convert.c
+++ b/gfs2/convert/gfs2_convert.c
@@ -156,35 +156,25 @@ void print_it(const char *label, const char *fmt, const char *fmt2, ...)
 /*                   Fixes all unallocated metadata bitmap states (which are */
 /*                   valid in gfs1 but invalid in gfs2).                     */
 /* ------------------------------------------------------------------------- */
-static void convert_bitmaps(struct gfs2_sbd *sdp, struct rgrp_list *rgd2,
-					 int read_disk)
+static void convert_bitmaps(struct gfs2_sbd *sdp, struct rgrp_list *rg)
 {
 	uint32_t blk;
 	int x, y;
 	struct gfs2_rindex *ri;
 	unsigned char state;
-	struct gfs2_buffer_head *bh;
 
-	ri = &rgd2->ri;
-	if (gfs2_compute_bitstructs(sdp, rgd2)) { /* mallocs bh as array */
-		log_crit("gfs2_convert: Error converting bitmaps.\n");
-		exit(-1);
-	}
+	ri = &rg->ri;
 	for (blk = 0; blk < ri->ri_length; blk++) {
-		bh = bget_generic(&sdp->nvbuf_list, ri->ri_addr + blk,
-				  read_disk, read_disk);
-		if (!rgd2->bh[blk])
-			rgd2->bh[blk] = bh;
-		x = (blk) ? sizeof(struct gfs2_meta_header) : sizeof(struct gfs2_rgrp);
+		x = (blk) ? sizeof(struct gfs2_meta_header) :
+			sizeof(struct gfs2_rgrp);
 
 		for (; x < sdp->bsize; x++)
 			for (y = 0; y < GFS2_NBBY; y++) {
-				state = (rgd2->bh[blk]->b_data[x] >>
-						 (GFS2_BIT_SIZE * y)) & 0x03;
+				state = (rg->bh[blk]->b_data[x] >>
+					 (GFS2_BIT_SIZE * y)) & 0x03;
 				if (state == 0x02) /* unallocated metadata state invalid */
-					rgd2->bh[blk]->b_data[x] &= ~(0x02 << (GFS2_BIT_SIZE * y));
+					rg->bh[blk]->b_data[x] &= ~(0x02 << (GFS2_BIT_SIZE * y));
 			}
-		brelse(bh, updated);
 	}
 }/* convert_bitmaps */
 
@@ -196,7 +186,6 @@ static int convert_rgs(struct gfs2_sbd *sbp)
 {
 	struct rgrp_list *rgd;
 	osi_list_t *tmp;
-	struct gfs2_buffer_head *bh;
 	struct gfs1_rgrp *rgd1;
 	int rgs = 0;
 
@@ -216,12 +205,9 @@ static int convert_rgs(struct gfs2_sbd *sbp)
 		sbp->blks_total += rgd->ri.ri_data;
 		sbp->blks_alloced += (rgd->ri.ri_data - rgd->rg.rg_free);
 		sbp->dinodes_alloced += rgd1->rg_useddi;
-		convert_bitmaps(sbp, rgd, TRUE);
+		convert_bitmaps(sbp, rgd);
 		/* Write the updated rgrp to the gfs2 buffer */
-		bh = bget(&sbp->nvbuf_list,
-			  rgd->ri.ri_addr); /* get a gfs2 buffer for the rg */
-		gfs2_rgrp_out(&rgd->rg, rgd->bh[0]->b_data);
-		brelse(bh, updated); /* release the buffer */
+		gfs2_rgrp_out(&rgd->rg, rgd->bh[0]);
 		rgs++;
 		if (rgs % 100 == 0) {
 			printf(".");
@@ -324,19 +310,20 @@ static void fix_metatree(struct gfs2_sbd *sbp, struct gfs2_inode *ip,
 	amount = size;
 
 	while (copied < size) {
-		bh = bhold(ip->i_bh);
-
+		bh = ip->i_bh;
 		/* First, build up the metatree */
 		for (h = 0; h < blk->height; h++) {
-			lookup_block(ip, bh, h, &blk->mp, 1, &new, &block);
-			brelse(bh, updated);
+			lookup_block(ip, ip->i_bh, h, &blk->mp, 1, &new,
+				     &block);
+			if (bh != ip->i_bh)
+				brelse(bh);
 			if (!block)
 				break;
 
-			bh = bread(&sbp->buf_list, block);
+			bh = bread(sbp, block);
 			if (new)
 				memset(bh->b_data, 0, sbp->bsize);
-			gfs2_meta_header_out(&mh, bh->b_data);
+			gfs2_meta_header_out(&mh, bh);
 		}
 
 		hdrsize = sizeof(struct gfs2_meta_header);
@@ -344,10 +331,11 @@ static void fix_metatree(struct gfs2_sbd *sbp, struct gfs2_inode *ip,
 		if (amount > sbp->bsize - hdrsize - ptramt)
 			amount = sbp->bsize - hdrsize - ptramt;
 
-		memcpy(bh->b_data + hdrsize + ptramt,
-		       (char *)srcptr, amount);
+		memcpy(bh->b_data + hdrsize + ptramt, (char *)srcptr, amount);
 		srcptr += amount;
-		brelse(bh, updated);
+		bmodified(bh);
+		if (bh != ip->i_bh)
+			brelse(bh);
 
 		copied += amount;
 
@@ -417,8 +405,7 @@ static void fix_metatree(struct gfs2_sbd *sbp, struct gfs2_inode *ip,
 /*                                                                           */
 /* Adapted from gfs2_fsck metawalk.c's build_and_check_metalist              */
 /* ------------------------------------------------------------------------- */
-static int adjust_indirect_blocks(struct gfs2_sbd *sbp, struct gfs2_buffer_head *dibh,
-			   struct gfs2_inode *ip)
+static int adjust_indirect_blocks(struct gfs2_sbd *sbp, struct gfs2_inode *ip)
 {
 	uint32_t gfs2_hgt;
 	struct gfs2_buffer_head *bh;
@@ -429,6 +416,7 @@ static int adjust_indirect_blocks(struct gfs2_sbd *sbp, struct gfs2_buffer_head
 	int error = 0, di_height;
 	struct blocklist blocks, *blk, *newblk;
 	struct metapath gfs2mp;
+	struct gfs2_buffer_head *dibh = ip->i_bh;
 
 	/* if there are no indirect blocks to check */
 	if (ip->i_di.di_height <= 1)
@@ -504,13 +492,14 @@ static int adjust_indirect_blocks(struct gfs2_sbd *sbp, struct gfs2_buffer_head
 			osi_list_add_prev(&newblk->list, &blocks.list);
 
 			/* read the new metadata block's pointers */
-			bh = bread(&sbp->buf_list, block);
+			bh = bread(sbp, block);
 			memcpy(newblk->ptrbuf, bh->b_data +
 			       sizeof(struct gfs_indirect), bufsize);
 			/* Zero the buffer so we can fill it in later */
 			memset(bh->b_data + sizeof(struct gfs_indirect), 0,
 			       bufsize);
-			brelse(bh, updated);
+			bmodified(bh);
+			brelse(bh);
 			/* Free the metadata block so we can reuse it.
 			   This allows us to convert a "full" file system. */
 			ip->i_di.di_blocks--;
@@ -591,6 +580,282 @@ out:
 	return error;
 }
 
+static void jdata_mp_gfs1_to_gfs2(struct gfs2_sbd *sbp, int gfs1_h, int gfs2_h,
+			   struct metapath *gfs1mp, struct metapath *gfs2mp,
+			   unsigned int *len, uint64_t dinode_size)
+{
+	uint64_t offset;
+	int h;
+	uint64_t gfs1factor[GFS2_MAX_META_HEIGHT];
+	uint64_t gfs2factor[GFS2_MAX_META_HEIGHT];
+
+	/* figure out multiplication factors for each height - gfs1 */
+	memset(&gfs1factor, 0, sizeof(gfs1factor));
+	gfs1factor[gfs1_h - 1] = sbp->bsize - sizeof(struct gfs2_meta_header);
+	for (h = gfs1_h - 1; h > 0; h--)
+		gfs1factor[h - 1] = gfs1factor[h] * sbp->sd_inptrs;
+
+	/* figure out multiplication factors for each height - gfs2 */
+	memset(&gfs2factor, 0, sizeof(gfs2factor));
+	gfs2factor[gfs2_h] = 1ull;
+	gfs2factor[gfs2_h - 1] = sbp->bsize;
+	for (h = gfs2_h - 1; h > 0; h--)
+		gfs2factor[h - 1] = gfs2factor[h] * gfs2_inptrs;
+
+	/* Convert from gfs1 to an offset */
+	offset = 0;
+	for (h = 0; h < gfs1_h; h++)
+		offset += (gfs1mp->mp_list[h] * gfs1factor[h]);
+
+	if (dinode_size - offset < *len)
+		*len = dinode_size - offset;
+
+	/* Convert from an offset back to gfs2 */
+	memset(gfs2mp, 0, sizeof(*gfs2mp));
+	for (h = 0; h <= gfs2_h; h++) {
+		/* Can't use do_div here because the factors are too large. */
+		gfs2mp->mp_list[h] = offset / gfs2factor[h];
+		offset %= gfs2factor[h];
+	}
+}
+
+static void fix_jdatatree(struct gfs2_sbd *sbp, struct gfs2_inode *ip,
+		  struct blocklist *blk, char *srcptr,
+		  unsigned int size)
+{
+	uint64_t block;
+	struct gfs2_buffer_head *bh;
+	unsigned int amount, ptramt;
+	int h, copied = 0, new = 0;
+	struct gfs2_meta_header mh;
+
+	mh.mh_magic = GFS2_MAGIC;
+	mh.mh_type = GFS2_METATYPE_IN;
+	mh.mh_format = GFS2_FORMAT_IN;
+
+	/* This condition should never arise. 
+	   We're always dealing with unstuffed inodes */
+	if (!ip->i_di.di_height)
+		unstuff_dinode(ip);
+
+	ptramt = blk->mp.mp_list[blk->height];
+	amount = size;
+
+	while (copied < size) {
+		bh = ip->i_bh;
+		/* First, build up the metatree */
+		for (h = 0; h < blk->height; h++) {
+			lookup_block(ip, ip->i_bh, h, &blk->mp, 1, &new,
+				     &block);
+			if (bh != ip->i_bh)
+				brelse(bh);
+			if (!block)
+				break;
+
+			bh = bread(sbp, block);
+			if (new)
+				memset(bh->b_data, 0, sbp->bsize);
+			if (h < (blk->height - 1))
+				gfs2_meta_header_out(&mh, bh);
+		}
+
+		if (amount > sbp->bsize - ptramt)
+			amount = sbp->bsize - ptramt;
+
+		memcpy(bh->b_data + ptramt, (char *)srcptr, amount);
+		srcptr += amount;
+		bmodified(bh);
+		if (bh != ip->i_bh)
+			brelse(bh);
+
+		copied += amount;
+
+		if (ptramt + amount >= sbp->bsize) {
+			/* advance to the next metablock */
+			blk->mp.mp_list[blk->height] += amount;
+			for (h = blk->height; h > 0; h--) {
+				if (blk->mp.mp_list[h] >= gfs2_inptrs) {
+					blk->mp.mp_list[h] = 0;
+					blk->mp.mp_list[h - 1]++;
+					continue;
+				}
+				break;
+			}
+		}
+		amount = size - copied;
+		ptramt = 0;
+	}
+}
+
+static int adjust_jdata_inode(struct gfs2_sbd *sbp, struct gfs2_inode *ip)
+{
+	uint32_t gfs2_hgt;
+	struct gfs2_buffer_head *bh;
+	osi_list_t *tmp, *x;
+	int h, header_size, bufsize, ptrnum;
+	uint64_t *ptr1, block;
+	uint64_t dinode_size;
+	int error = 0, di_height;
+	struct blocklist blocks, *blk, *newblk;
+	struct metapath gfs2mp;
+	struct gfs2_buffer_head *dibh = ip->i_bh;
+
+	/* Don't have to worry about things with stuffed inodes */
+	if (ip->i_di.di_height == 0)
+		return 0;
+
+	osi_list_init(&blocks.list);
+
+	/* Add the dinode block to the blocks list */
+	blk = malloc(sizeof(struct blocklist));
+	if (!blk) {
+		log_crit("Error: Can't allocate memory"
+			 " for indirect block fix.\n");
+		return -1;
+	}
+	memset(blk, 0, sizeof(*blk));
+	/* allocate a buffer to hold the pointers or data */
+	bufsize = sbp->bsize - sizeof(struct gfs2_meta_header);
+	blk->block = dibh->b_blocknr;
+	/* 
+	 * blk->ptrbuf either contains 
+	 * a) diptrs (for height=0)
+	 * b) inptrs (for height=1 to di_height - 1)
+	 * c) data for height = di_height
+	 */
+	blk->ptrbuf = malloc(bufsize);
+	if (!blk->ptrbuf) {
+		log_crit("Error: Can't allocate memory"
+			 " for file conversion.\n");
+		free(blk);
+		return -1;
+	}
+	memset(blk->ptrbuf, 0, bufsize);
+	/* Fill in the pointers from the dinode buffer */
+	memcpy(blk->ptrbuf, dibh->b_data + sizeof(struct gfs_dinode),
+	       sbp->bsize - sizeof(struct gfs_dinode));
+	/* Zero out the pointers so we can fill them in later. */
+	memset(dibh->b_data + sizeof(struct gfs_dinode), 0,
+	       sbp->bsize - sizeof(struct gfs_dinode));
+	osi_list_add_prev(&blk->list, &blocks.list);
+
+	/* Now run the metadata chain and build lists of all data/metadata blocks */
+	osi_list_foreach(tmp, &blocks.list) {
+		blk = osi_list_entry(tmp, struct blocklist, list);
+
+		if (blk->height >= ip->i_di.di_height)
+			continue;
+
+		header_size = (blk->height > 0 ? sizeof(struct gfs_indirect) :
+			       sizeof(struct gfs_dinode));
+
+		for (ptr1 = (uint64_t *)blk->ptrbuf, ptrnum = 0;
+		     ptrnum < sbp->sd_inptrs; ptr1++, ptrnum++) {
+			if (!*ptr1)
+				continue;
+
+			block = be64_to_cpu(*ptr1);
+
+			newblk = malloc(sizeof(struct blocklist));
+			if (!newblk) {
+				log_crit("Error: Can't allocate memory"
+					 " for indirect block fix.\n");
+				error = -1;
+				goto out;
+			}
+			memset(newblk, 0, sizeof(*newblk));
+			newblk->ptrbuf = malloc(bufsize);
+			if (!newblk->ptrbuf) {
+				log_crit("Error: Can't allocate memory"
+					 " for file conversion.\n");
+				free(newblk);
+				goto out;
+			}
+			memset(newblk->ptrbuf, 0, bufsize);
+			newblk->block = block;
+			newblk->height = blk->height + 1;
+			/* Build the metapointer list from our predecessors */
+			for (h = 0; h < blk->height; h++)
+				newblk->mp.mp_list[h] = blk->mp.mp_list[h];
+			newblk->mp.mp_list[h] = ptrnum;
+			/* Queue it to be processed later on in the loop. */
+			osi_list_add_prev(&newblk->list, &blocks.list);
+
+			bh = bread(sbp, block);
+			if (newblk->height == ip->i_di.di_height) {
+				/* read in the jdata block */
+				memcpy(newblk->ptrbuf, bh->b_data +
+				       sizeof(struct gfs2_meta_header), bufsize);
+				/* Zero the buffer so we can fill it in later */
+				memset(bh->b_data + sizeof(struct gfs2_meta_header), 0,
+				       bufsize);
+			} else {
+				/* read the new metadata block's pointers */
+				memcpy(newblk->ptrbuf, bh->b_data +
+				       sizeof(struct gfs_indirect),
+				       sbp->bsize - sizeof(struct gfs_indirect));
+				/* Zero the buffer so we can fill it in later */
+				memset(bh->b_data + sizeof(struct gfs_indirect), 0,
+				       sbp->bsize - sizeof(struct gfs_indirect));
+			}
+			bmodified(bh);
+			brelse(bh);
+			/* Free the block so we can reuse it. This allows us to
+			   convert a "full" file system. */
+			ip->i_di.di_blocks--;
+			gfs2_free_block(sbp, block);
+		}
+	}
+	/* The gfs2 height may be different.  We need to rebuild the
+	   metadata tree to the gfs2 height. */
+	gfs2_hgt = calc_gfs2_tree_height(ip, ip->i_di.di_size);
+	/* Save off the size because we're going to empty the contents
+	   and add the data blocks back in later. */
+	dinode_size = ip->i_di.di_size;
+	ip->i_di.di_size = 0ULL;
+	di_height = ip->i_di.di_height;
+	ip->i_di.di_height = 0;
+
+	/* Now run through the block list a second time.  If the block
+	   is a data block, rewrite the data to the gfs2 offset. */
+	osi_list_foreach_safe(tmp, &blocks.list, x) {
+		unsigned int len;
+
+		blk = osi_list_entry(tmp, struct blocklist, list);
+		/* If it's not a data block at the highest level */
+		if (blk->height != di_height) {
+			osi_list_del(tmp);
+			free(blk->ptrbuf);
+			free(blk);
+			continue;
+		}
+		len = bufsize;
+		jdata_mp_gfs1_to_gfs2(sbp, di_height, gfs2_hgt, &blk->mp, &gfs2mp, &len, dinode_size);
+		memcpy(&blk->mp, &gfs2mp, sizeof(struct metapath));
+		blk->height -= di_height - gfs2_hgt;
+		if (len)
+			fix_jdatatree(sbp, ip, blk, blk->ptrbuf, len);
+		osi_list_del(tmp);
+		free(blk->ptrbuf);
+		free(blk);
+	}
+	ip->i_di.di_size = dinode_size;
+
+	/* Set the new dinode height, which may or may not have changed.  */
+	/* The caller will take it from the ip and write it to the buffer */
+	ip->i_di.di_height = gfs2_hgt;
+	return 0;
+
+out:
+	while (!osi_list_empty(&blocks.list)) {
+		blk = osi_list_entry(tmp, struct blocklist, list);
+		osi_list_del(&blocks.list);
+		free(blk->ptrbuf);
+		free(blk);
+	}
+	return error;
+}
+
 /* ------------------------------------------------------------------------- */
 /* adjust_inode - change an inode from gfs1 to gfs2                          */
 /*                                                                           */
@@ -662,17 +927,24 @@ static int adjust_inode(struct gfs2_sbd *sbp, struct gfs2_buffer_head *bh)
 	/* ----------------------------------------------------------- */
 	if (inode_was_gfs1) {
 		struct gfs_dinode *gfs1_dinode_struct;
+		int ret = 0;
 
 		gfs1_dinode_struct = (struct gfs_dinode *)&inode->i_di;
 		inode->i_di.di_goal_meta = inode->i_di.di_goal_data;
 		inode->i_di.di_goal_data = 0; /* make sure the upper 32b are 0 */
 		inode->i_di.di_goal_data = gfs1_dinode_struct->di_goal_dblk;
 		inode->i_di.di_generation = 0;
-		if (adjust_indirect_blocks(sbp, bh, inode))
+		if (!(inode->i_di.di_mode & S_IFDIR) &&
+		    inode->i_di.di_flags & GFS2_DIF_JDATA)
+			ret = adjust_jdata_inode(sbp, inode);
+		else
+			ret = adjust_indirect_blocks(sbp, inode);
+		if (ret)
 			return -1;
 	}
 	
-	gfs2_dinode_out(&inode->i_di, bh->b_data);
+	bmodified(inode->i_bh);
+	inode_put(&inode); /* does gfs2_dinode_out if modified */
 	sbp->md.next_inum++; /* update inode count */
 	return 0;
 } /* adjust_inode */
@@ -707,10 +979,6 @@ static int inode_renumber(struct gfs2_sbd *sbp, uint64_t root_inode_addr)
 		rgs_processed++;
 		rgd = osi_list_entry(tmp, struct rgrp_list, list);
 		first = 1;
-		if (gfs2_rgrp_read(sbp, rgd)) {
-			log_crit("Unable to read rgrp.\n");
-			return -1;
-		}
 		while (1) {    /* for all inodes in the resource group */
 			gettimeofday(&tv, NULL);
 			/* Put out a warm, fuzzy message every second so the customer */
@@ -723,7 +991,7 @@ static int inode_renumber(struct gfs2_sbd *sbp, uint64_t root_inode_addr)
 				fflush(stdout);
 			}
 			/* Get the next metadata block.  Break out if we reach the end. */
-            /* We have to check all metadata blocks because the bitmap may  */
+			/* We have to check all metadata blocks because the bitmap may  */
 			/* be "11" (used meta) for both inodes and indirect blocks.     */
 			/* We need to process the inodes and change the indirect blocks */
 			/* to have a bitmap type of "01" (data).                        */
@@ -734,10 +1002,10 @@ static int inode_renumber(struct gfs2_sbd *sbp, uint64_t root_inode_addr)
 				sbp->sd_sb.sb_root_dir.no_addr = block;
 				sbp->sd_sb.sb_root_dir.no_formal_ino = sbp->md.next_inum;
 			}
-			bh = bread(&sbp->buf_list, block);
-			if (!gfs2_check_meta(bh, GFS_METATYPE_DI)) /* if it is an dinode */
+			bh = bread(sbp, block);
+			if (!gfs2_check_meta(bh, GFS_METATYPE_DI)) {/* if it is an dinode */
 				error = adjust_inode(sbp, bh);
-			else { /* It's metadata, but not an inode, so fix the bitmap. */
+			} else { /* It's metadata, but not an inode, so fix the bitmap. */
 				int blk, buf_offset;
 				int bitmap_byte; /* byte within the bitmap to fix */
 				int byte_bit; /* bit within the byte */
@@ -748,7 +1016,7 @@ static int inode_renumber(struct gfs2_sbd *sbp, uint64_t root_inode_addr)
 				byte_bit = (block - rgd->ri.ri_data0) % GFS2_NBBY;
 				/* Now figure out which bitmap block the byte is on */
 				for (blk = 0; blk < rgd->ri.ri_length; blk++) {
-                    /* figure out offset of first bitmap byte for this map: */
+					/* figure out offset of first bitmap byte for this map: */
 					buf_offset = (blk) ? sizeof(struct gfs2_meta_header) :
 						sizeof(struct gfs2_rgrp);
 					/* if it's on this page */
@@ -760,12 +1028,12 @@ static int inode_renumber(struct gfs2_sbd *sbp, uint64_t root_inode_addr)
 						break;
 					}
 					bitmap_byte -= (sbp->bsize - buf_offset);
+					bmodified(rgd->bh[blk]);
 				}
 			}
-			brelse(bh, updated);
+			brelse(bh);
 			first = 0;
 		} /* while 1 */
-		gfs2_rgrp_relse(rgd, updated);
 	} /* for all rgs */
 	log_notice("\r%" PRIu64" inodes from %d rgs converted.",
 		   sbp->md.next_inum, rgs_processed);
@@ -779,14 +1047,12 @@ static int inode_renumber(struct gfs2_sbd *sbp, uint64_t root_inode_addr)
 static int fetch_inum(struct gfs2_sbd *sbp, uint64_t iblock,
 					   struct gfs2_inum *inum)
 {
-	struct gfs2_buffer_head *bh_fix;
 	struct gfs2_inode *fix_inode;
 
-	bh_fix = bread(&sbp->buf_list, iblock);
-	fix_inode = inode_get(sbp, bh_fix);
+	fix_inode = inode_read(sbp, iblock);
 	inum->no_formal_ino = fix_inode->i_di.di_num.no_formal_ino;
 	inum->no_addr = fix_inode->i_di.di_num.no_addr;
-	brelse(bh_fix, updated);
+	inode_put(&fix_inode);
 	return 0;
 }/* fetch_inum */
 
@@ -799,7 +1065,7 @@ static int fetch_inum(struct gfs2_sbd *sbp, uint64_t iblock,
 /* Returns: 0 on success, -1 on failure                                      */
 /* ------------------------------------------------------------------------- */
 static int process_dirent_info(struct gfs2_inode *dip, struct gfs2_sbd *sbp,
-						struct gfs2_buffer_head *bh, int dir_entries)
+			       struct gfs2_buffer_head *bh, int dir_entries)
 {
 	int error;
 	struct gfs2_dirent *dent;
@@ -923,9 +1189,10 @@ static int fix_one_directory_exhash(struct gfs2_sbd *sbp, struct gfs2_inode *dip
 			log_crit("Error reading leaf %" PRIx64 "\n", leaf_block);
 			break;
 		}
-		gfs2_leaf_in(&leaf, (char *)bh_leaf->b_data); /* buffer to structure */
+		gfs2_leaf_in(&leaf, bh_leaf); /* buffer to structure */
 		error = process_dirent_info(dip, sbp, bh_leaf, leaf.lf_entries);
-		brelse(bh_leaf, updated);
+		bmodified(bh_leaf);
+		brelse(bh_leaf);
 	} /* for leaf_num */
 	return 0;
 }/* fix_one_directory_exhash */
@@ -940,7 +1207,6 @@ static int fix_directory_info(struct gfs2_sbd *sbp, osi_list_t *dir_to_fix)
 	struct inode_block *dir_iblk;
 	uint64_t offset, dirblock;
 	struct gfs2_inode *dip;
-	struct gfs2_buffer_head *bh_dir;
 
 	dirs_fixed = 0;
 	dirents_fixed = 0;
@@ -962,24 +1228,24 @@ static int fix_directory_info(struct gfs2_sbd *sbp, osi_list_t *dir_to_fix)
 		dir_iblk = (struct inode_block *)fix;
 		dirblock = dir_iblk->di_addr; /* addr of dir inode */
 		/* read in the directory inode */
-		bh_dir = bread(&sbp->buf_list, dirblock);
-		dip = inode_get(sbp, bh_dir);
+		dip = inode_read(sbp, dirblock);
 		/* fix the directory: either exhash (leaves) or linear (stuffed) */
 		if (dip->i_di.di_flags & GFS2_DIF_EXHASH) {
 			if (fix_one_directory_exhash(sbp, dip)) {
 				log_crit("Error fixing exhash directory.\n");
-				brelse(bh_dir, updated);
+				inode_put(&dip);
 				return -1;
 			}
 		}
 		else {
-			if (process_dirent_info(dip, sbp, bh_dir, dip->i_di.di_entries)) {
+			if (process_dirent_info(dip, sbp, dip->i_bh,
+						dip->i_di.di_entries)) {
 				log_crit("Error fixing linear directory.\n");
-				brelse(bh_dir, updated);
+				inode_put(&dip);
 				return -1;
 			}
 		}
-		brelse(bh_dir, updated);
+		inode_put(&dip);
 	}
 	/* Free the last entry in memory: */
 	if (tmp) {
@@ -1091,24 +1357,22 @@ static int init(struct gfs2_sbd *sbp)
 	sbp->sd_sb.sb_bsize = GFS2_DEFAULT_BSIZE;
 	sbp->bsize = sbp->sd_sb.sb_bsize;
 	osi_list_init(&sbp->rglist);
-	init_buf_list(sbp, &sbp->buf_list, 128 << 20);
-	init_buf_list(sbp, &sbp->nvbuf_list, 0xffffffff);
 	if (compute_constants(sbp)) {
 		log_crit("Error: Bad constants (1)\n");
 		exit(-1);
 	}
 
-	bh = bread(&sbp->buf_list, GFS2_SB_ADDR >> sbp->sd_fsb2bb_shift);
+	bh = bread(sbp, GFS2_SB_ADDR >> sbp->sd_fsb2bb_shift);
 	memcpy(&raw_gfs1_ondisk_sb, (struct gfs1_sb *)bh->b_data,
 		   sizeof(struct gfs1_sb));
-	gfs2_sb_in(&sbp->sd_sb, bh->b_data);
+	gfs2_sb_in(&sbp->sd_sb, bh);
 	sbp->bsize = sbp->sd_sb.sb_bsize;
 	sbp->sd_inptrs = (sbp->bsize - sizeof(struct gfs_indirect)) /
 		sizeof(uint64_t);
 	sbp->sd_diptrs = (sbp->bsize - sizeof(struct gfs_dinode)) /
 		sizeof(uint64_t);
 	sbp->sd_jbsize = sbp->bsize - sizeof(struct gfs2_meta_header);
-	brelse(bh, not_updated);
+	brelse(bh);
 	if (compute_heightsize(sbp, sbp->sd_heightsize, &sbp->sd_max_height,
 				sbp->bsize, sbp->sd_diptrs, sbp->sd_inptrs)) {
 		log_crit("Error: Bad constants (1)\n");
@@ -1152,11 +1416,10 @@ static int init(struct gfs2_sbd *sbp)
 	}
 	/* get gfs1 rindex inode - gfs1's rindex inode ptr became __pad2 */
 	gfs2_inum_in(&inum, (char *)&raw_gfs1_ondisk_sb.sb_rindex_di);
-	bh = bread(&sbp->buf_list, inum.no_addr);
-	sbp->md.riinode = gfs_inode_get(sbp, bh);
+	sbp->md.riinode = gfs_inode_read(sbp, inum.no_addr);
 	/* get gfs1 jindex inode - gfs1's journal index inode ptr became master */
 	gfs2_inum_in(&inum, (char *)&raw_gfs1_ondisk_sb.sb_jindex_di);
-	sbp->md.jiinode = gfs2_load_inode(sbp, inum.no_addr);
+	sbp->md.jiinode = inode_read(sbp, inum.no_addr);
 	/* read in the journal index data */
 	read_gfs1_jiindex(sbp);
 	/* read in the resource group index data: */
@@ -1175,8 +1438,8 @@ static int init(struct gfs2_sbd *sbp)
 	}
 	printf("\n");
 	fflush(stdout);
-	inode_put(sbp->md.riinode, updated);
-	inode_put(sbp->md.jiinode, updated);
+	inode_put(&sbp->md.riinode);
+	inode_put(&sbp->md.jiinode);
 	log_debug("%d rgs found.\n", rgcount);
 	return 0;
 }/* fill_super_block */
@@ -1329,12 +1592,12 @@ static int journ_space_to_rg(struct gfs2_sbd *sdp)
 		uint64_t size;
 
 		jndx = &sd_jindex[j];
-		/* go through all rg index entries, keeping track of the highest */
-		/* that's still in the first subdevice.                          */
-		/* Note: we really should go through all of the rgindex because  */
-		/* we might have had rg's added by gfs_grow, and journals added  */
-		/* by jadd.  gfs_grow adds rgs out of order, so we can't count   */
-		/* on them being in ascending order.                             */
+		/* go through all rg index entries, keeping track of the
+		   highest that's still in the first subdevice.
+		   Note: we really should go through all of the rgindex because
+		   we might have had rg's added by gfs_grow, and journals added
+		   by jadd.  gfs_grow adds rgs out of order, so we can't count
+		   on them being in ascending order. */
 		rgdhigh = NULL;
 		osi_list_foreach(tmp, &sdp->rglist) {
 			rgd = osi_list_entry(tmp, struct rgrp_list, list);
@@ -1370,19 +1633,36 @@ static int journ_space_to_rg(struct gfs2_sbd *sdp)
 
 		rgd->ri.ri_data0 = jndx->ji_addr + rgd->ri.ri_length;
 		rgd->ri.ri_data = size - rgd->ri.ri_length;
-		sdp->blks_total += rgd->ri.ri_data; /* For statfs file update */
 		/* Round down to nearest multiple of GFS2_NBBY */
 		while (rgd->ri.ri_data & 0x03)
 			rgd->ri.ri_data--;
+		sdp->blks_total += rgd->ri.ri_data; /* For statfs file update */
 		rgd->rg.rg_free = rgd->ri.ri_data;
 		rgd->ri.ri_bitbytes = rgd->ri.ri_data / GFS2_NBBY;
-		convert_bitmaps(sdp, rgd, FALSE); /* allocates rgd->bh */
+
+		if(!(rgd->bh = (struct gfs2_buffer_head **)
+		     malloc(rgd->ri.ri_length *
+			    sizeof(struct gfs2_buffer_head *))))
+			return -1;
+		if(!memset(rgd->bh, 0, rgd->ri.ri_length *
+			   sizeof(struct gfs2_buffer_head *))) {
+			free(rgd->bh);
+			return -1;
+		}
+		for (x = 0; x < rgd->ri.ri_length; x++) {
+			rgd->bh[x] = bget(sdp, rgd->ri.ri_addr + x);
+			memset(rgd->bh[x]->b_data, 0, sdp->bsize);
+		}
+		if (gfs2_compute_bitstructs(sdp, rgd)) {
+			log_crit("gfs2_convert: Error converting bitmaps.\n");
+			exit(-1);
+		}
+		convert_bitmaps(sdp, rgd);
 		for (x = 0; x < rgd->ri.ri_length; x++) {
-			rgd->bh[x]->b_count++;
 			if (x)
-				gfs2_meta_header_out(&mh, rgd->bh[x]->b_data);
+				gfs2_meta_header_out(&mh, rgd->bh[x]);
 			else
-				gfs2_rgrp_out(&rgd->rg, rgd->bh[x]->b_data);
+				gfs2_rgrp_out(&rgd->rg, rgd->bh[x]);
 		}
 		/* Add the new gfs2 rg to our list: We'll output the rg index later. */
 		osi_list_add_prev((osi_list_t *)&rgd->list,
@@ -1417,7 +1697,7 @@ static void write_statfs_file(struct gfs2_sbd *sdp)
 	struct gfs2_statfs_change sc;
 	char buf[sizeof(struct gfs2_statfs_change)];
 	int count;
-	
+
 	sc.sc_total = sdp->blks_total;
 	sc.sc_free = sdp->blks_total - sdp->blks_alloced;
 	sc.sc_dinodes = sdp->dinodes_alloced;
@@ -1459,11 +1739,10 @@ static void remove_obsolete_gfs1(struct gfs2_sbd *sbp)
 /* ------------------------------------------------------------------------- */
 static void conv_build_jindex(struct gfs2_sbd *sdp)
 {
-	struct gfs2_inode *jindex;
 	unsigned int j;
 
-	jindex = createi(sdp->master_dir, "jindex", S_IFDIR | 0700,
-			 GFS2_DIF_SYSTEM);
+	sdp->md.jiinode = createi(sdp->master_dir, "jindex", S_IFDIR | 0700,
+				  GFS2_DIF_SYSTEM);
 
 	for (j = 0; j < sdp->md.journals; j++) {
 		char name[256];
@@ -1472,20 +1751,21 @@ static void conv_build_jindex(struct gfs2_sbd *sdp)
 		printf("Writing journal #%d...", j + 1);
 		fflush(stdout);
 		sprintf(name, "journal%u", j);
-		ip = createi(jindex, name, S_IFREG | 0600, GFS2_DIF_SYSTEM);
+		ip = createi(sdp->md.jiinode, name, S_IFREG | 0600,
+			     GFS2_DIF_SYSTEM);
 		write_journal(sdp, ip, j,
 			      sdp->jsize << 20 >> sdp->sd_sb.sb_bsize_shift);
-		inode_put(ip, updated);
+		inode_put(&ip);
 		printf("done.\n");
 		fflush(stdout);
 	}
 
 	if (sdp->debug) {
 		printf("\nJindex:\n");
-		gfs2_dinode_print(&jindex->i_di);
+		gfs2_dinode_print(&sdp->md.jiinode->i_di);
 	}
 
-	inode_put(jindex, updated);
+	inode_put(&sdp->md.jiinode);
 }
 
 /* ------------------------------------------------------------------------- */
@@ -1527,7 +1807,7 @@ int main(int argc, char **argv)
 		if (error)
 			log_crit("%s: Unable to convert resource groups.\n",
 					device);
-		bcommit(&sb2.nvbuf_list); /* write the buffers to disk */
+		fsync(sb2.device_fd); /* write the buffers to disk */
 	}
 	/* ---------------------------------------------- */
 	/* Renumber the inodes consecutively.             */
@@ -1536,7 +1816,7 @@ int main(int argc, char **argv)
 		error = inode_renumber(&sb2, sb2.sd_sb.sb_root_dir.no_addr);
 		if (error)
 			log_crit("\n%s: Error renumbering inodes.\n", device);
-		bcommit(&sb2.buf_list); /* write the buffers to disk */
+		fsync(sb2.device_fd); /* write the buffers to disk */
 	}
 	/* ---------------------------------------------- */
 	/* Fix the directories to match the new numbers.  */
@@ -1557,7 +1837,7 @@ int main(int argc, char **argv)
 		error = journ_space_to_rg(&sb2);
 		if (error)
 			log_crit("%s: Error converting journal space.\n", device);
-		bcommit(&sb2.buf_list); /* write the buffers to disk */
+		fsync(sb2.device_fd); /* write the buffers to disk */
 	}
 	/* ---------------------------------------------- */
 	/* Create our system files and directories.       */
@@ -1588,33 +1868,31 @@ int main(int argc, char **argv)
 		build_quota(&sb2);
 
 		update_inode_file(&sb2);
+		/* Now delete the now-obsolete gfs1 files: */
+		remove_obsolete_gfs1(&sb2);
+
 		write_statfs_file(&sb2);
 
-		inode_put(sb2.master_dir, updated);
-		inode_put(sb2.md.inum, updated);
-		inode_put(sb2.md.statfs, updated);
+		inode_put(&sb2.master_dir);
+		inode_put(&sb2.md.inum);
+		inode_put(&sb2.md.statfs);
 
-		bcommit(&sb2.buf_list); /* write the buffers to disk */
-		bcommit(&sb2.nvbuf_list); /* write the buffers to disk */
+		fsync(sb2.device_fd); /* write the buffers to disk */
 
-		/* Now delete the now-obsolete gfs1 files: */
-		remove_obsolete_gfs1(&sb2);
 		/* Now free all the in memory */
-		gfs2_rgrp_free(&sb2.rglist, updated);
+		gfs2_rgrp_free(&sb2.rglist);
 		log_notice("Committing changes to disk.\n");
 		fflush(stdout);
 		/* Set filesystem type in superblock to gfs2.  We do this at the */
 		/* end because if the tool is interrupted in the middle, we want */
 		/* it to not reject the partially converted fs as already done   */
 		/* when it's run a second time.                                  */
-		bh = bread(&sb2.buf_list, sb2.sb_addr);
+		bh = bread(&sb2, sb2.sb_addr);
 		sb2.sd_sb.sb_fs_format = GFS2_FORMAT_FS;
 		sb2.sd_sb.sb_multihost_format = GFS2_FORMAT_MULTI;
-		gfs2_sb_out(&sb2.sd_sb, bh->b_data);
-		brelse(bh, updated);
+		gfs2_sb_out(&sb2.sd_sb, bh);
+		brelse(bh);
 
-		bsync(&sb2.buf_list); /* write the buffers to disk */
-		bsync(&sb2.nvbuf_list); /* write the buffers to disk */
 		error = fsync(sb2.device_fd);
 		if (error)
 			perror(device);
diff --git a/gfs2/edit/gfs2hex.c b/gfs2/edit/gfs2hex.c
index a4ec02c..00c4481 100644
--- a/gfs2/edit/gfs2hex.c
+++ b/gfs2/edit/gfs2hex.c
@@ -28,7 +28,7 @@
 
 
 struct gfs2_sb sb;
-char *buf;
+struct gfs2_buffer_head *bh;
 struct gfs2_dinode di;
 int line, termlines;
 char edit_fmt[80];
@@ -276,7 +276,7 @@ static int indirect_dirent(struct indirect_info *indir, char *ptr, int d)
 **
 *******************************************************************************
 ******************************************************************************/
-void do_dinode_extended(struct gfs2_dinode *dine, char *dinebuf)
+void do_dinode_extended(struct gfs2_dinode *dine, struct gfs2_buffer_head *lbh)
 {
 	unsigned int x, y, ptroff = 0;
 	uint64_t p, last;
@@ -289,7 +289,7 @@ void do_dinode_extended(struct gfs2_dinode *dine, char *dinebuf)
 		/* Indirect pointers */
 		for (x = sizeof(struct gfs2_dinode); x < sbd.bsize;
 			 x += sizeof(uint64_t)) {
-			p = be64_to_cpu(*(uint64_t *)(dinebuf + x));
+			p = be64_to_cpu(*(uint64_t *)(lbh->b_data + x));
 			if (p) {
 				indirect->ii[indirect_blocks].block = p;
 				indirect->ii[indirect_blocks].mp.mp_list[0] =
@@ -308,8 +308,7 @@ void do_dinode_extended(struct gfs2_dinode *dine, char *dinebuf)
 		indirect->ii[0].block = block;
 		indirect->ii[0].is_dir = TRUE;
 		for (x = sizeof(struct gfs2_dinode); x < sbd.bsize; x += skip) {
-			skip = indirect_dirent(indirect->ii,
-					       dinebuf + x,
+			skip = indirect_dirent(indirect->ii, lbh->b_data + x,
 					       indirect->ii[0].dirents);
 			if (skip <= 0)
 				break;
@@ -320,12 +319,13 @@ void do_dinode_extended(struct gfs2_dinode *dine, char *dinebuf)
 			 dine->di_height == 0) {
 		/* Leaf Pointers: */
 		
-		last = be64_to_cpu(*(uint64_t *)(dinebuf + sizeof(struct gfs2_dinode)));
+		last = be64_to_cpu(*(uint64_t *)(lbh->b_data +
+						 sizeof(struct gfs2_dinode)));
     
 		for (x = sizeof(struct gfs2_dinode), y = 0;
 			 y < (1 << dine->di_depth);
 			 x += sizeof(uint64_t), y++) {
-			p = be64_to_cpu(*(uint64_t *)(dinebuf + x));
+			p = be64_to_cpu(*(uint64_t *)(lbh->b_data + x));
 
 			if (p != last || ((y + 1) * sizeof(uint64_t) == dine->di_size)) {
 				struct gfs2_buffer_head *tmp_bh;
@@ -335,8 +335,8 @@ void do_dinode_extended(struct gfs2_dinode *dine, char *dinebuf)
 
 				if (last >= max_block)
 					break;
-				tmp_bh = bread(&sbd.buf_list, last);
-				gfs2_leaf_in(&leaf, tmp_bh->b_data);
+				tmp_bh = bread(&sbd, last);
+				gfs2_leaf_in(&leaf, tmp_bh);
 				indirect->ii[indirect_blocks].dirents = 0;
 				for (direntcount = 0, bufoffset = sizeof(struct gfs2_leaf);
 					 bufoffset < sbd.bsize;
@@ -347,7 +347,7 @@ void do_dinode_extended(struct gfs2_dinode *dine, char *dinebuf)
 					if (skip <= 0)
 						break;
 				}
-				brelse(tmp_bh, not_updated);
+				brelse(tmp_bh);
 				indirect->ii[indirect_blocks].block = last;
 				indirect_blocks++;
 				last = p;
@@ -461,7 +461,7 @@ void do_leaf_extended(char *dlebuf, struct iinfo *indir)
 *******************************************************************************
 ******************************************************************************/
 
-static void do_eattr_extended(char *deebuf)
+static void do_eattr_extended(struct gfs2_buffer_head *ebh)
 {
 	struct gfs2_ea_header ea;
 	unsigned int x;
@@ -470,11 +470,13 @@ static void do_eattr_extended(char *deebuf)
 	print_gfs2("Eattr Entries:");
 	eol(0);
 
-	for (x = sizeof(struct gfs2_meta_header); x < sbd.bsize; x += ea.ea_rec_len)
+	for (x = sizeof(struct gfs2_meta_header); x < sbd.bsize;
+	     x += ea.ea_rec_len)
 	{
 		eol(0);
-		gfs2_ea_header_in(&ea, deebuf + x);
-		gfs2_ea_header_print(&ea, deebuf + x + sizeof(struct gfs2_ea_header));
+		gfs2_ea_header_in(&ea, ebh->b_data + x);
+		gfs2_ea_header_print(&ea, ebh->b_data + x +
+				     sizeof(struct gfs2_ea_header));
 	}
 }
 
@@ -532,11 +534,11 @@ static void gfs2_sb_print2(struct gfs2_sb *sbp2)
 /**
  * gfs1_rgrp_in - read in a gfs1 rgrp
  */
-static void gfs1_rgrp_in(struct gfs1_rgrp *rgrp, char *rbuf)
+static void gfs1_rgrp_in(struct gfs1_rgrp *rgrp, struct gfs2_buffer_head *rbh)
 {
-        struct gfs1_rgrp *str = (struct gfs1_rgrp *)rbuf;
+        struct gfs1_rgrp *str = (struct gfs1_rgrp *)rbh->b_data;
 
-        gfs2_meta_header_in(&rgrp->rg_header, rbuf);
+        gfs2_meta_header_in(&rgrp->rg_header, rbh);
         rgrp->rg_flags = be32_to_cpu(str->rg_flags);
         rgrp->rg_free = be32_to_cpu(str->rg_free);
         rgrp->rg_useddi = be32_to_cpu(str->rg_useddi);
@@ -592,12 +594,12 @@ int display_gfs2(void)
 
 	uint32_t magic;
 
-	magic = be32_to_cpu(*(uint32_t *)buf);
+	magic = be32_to_cpu(*(uint32_t *)bh->b_data);
 
 	switch (magic)
 	{
 	case GFS2_MAGIC:
-		gfs2_meta_header_in(&mh, buf);
+		gfs2_meta_header_in(&mh, bh);
 		if (mh.mh_type > GFS2_METATYPE_QC)
 			print_gfs2("Unknown metadata type");
 		else
@@ -607,7 +609,7 @@ int display_gfs2(void)
 		switch (mh.mh_type)
 		{
 		case GFS2_METATYPE_SB:
-			gfs2_sb_in(&sbd.sd_sb, buf);
+			gfs2_sb_in(&sbd.sd_sb, bh);
 			gfs2_sb_print2(&sbd.sd_sb);
 			break;
 
@@ -615,10 +617,10 @@ int display_gfs2(void)
 			if (gfs1) {
 				struct gfs1_rgrp rg1;
 
-				gfs1_rgrp_in(&rg1, buf);
+				gfs1_rgrp_in(&rg1, bh);
 				gfs1_rgrp_print(&rg1);
 			} else {
-				gfs2_rgrp_in(&rg, buf);
+				gfs2_rgrp_in(&rg, bh);
 				gfs2_rgrp_print(&rg);
 			}
 			break;
@@ -636,7 +638,7 @@ int display_gfs2(void)
 			break;
 
 		case GFS2_METATYPE_LF:
-			gfs2_leaf_in(&lf, buf);
+			gfs2_leaf_in(&lf, bh);
 			gfs2_leaf_print(&lf);
 			break;
 
@@ -646,21 +648,21 @@ int display_gfs2(void)
 
 		case GFS2_METATYPE_LH:
 			if (gfs1) {
-				gfs_log_header_in(&lh1, buf);
+				gfs_log_header_in(&lh1, bh);
 				gfs_log_header_print(&lh1);
 			} else {
-				gfs2_log_header_in(&lh, buf);
+				gfs2_log_header_in(&lh, bh);
 				gfs2_log_header_print(&lh);
 			}
 			break;
 
 		case GFS2_METATYPE_LD:
-			gfs2_log_descriptor_in(&ld, buf);
+			gfs2_log_descriptor_in(&ld, bh);
 			gfs2_log_descriptor_print(&ld);
 			break;
 
 		case GFS2_METATYPE_EA:
-			do_eattr_extended(buf);
+			do_eattr_extended(bh);
 			break;
 			
 		case GFS2_METATYPE_ED:
@@ -672,7 +674,7 @@ int display_gfs2(void)
 			break;
 
 		case GFS2_METATYPE_QC:
-			gfs2_quota_change_in(&qc, buf);
+			gfs2_quota_change_in(&qc, bh);
 			gfs2_quota_change_print(&qc);
 			break;
 
diff --git a/gfs2/edit/gfs2hex.h b/gfs2/edit/gfs2hex.h
index 5a351d6..d18dd59 100644
--- a/gfs2/edit/gfs2hex.h
+++ b/gfs2/edit/gfs2hex.h
@@ -5,7 +5,7 @@
 
 int display_gfs2(void);
 int edit_gfs2(void);
-void do_dinode_extended(struct gfs2_dinode *di, char *buf);
+void do_dinode_extended(struct gfs2_dinode *di, struct gfs2_buffer_head *lbh);
 void print_gfs2(const char *fmt, ...);
 int do_indirect_extended(char *diebuf, struct iinfo *iinf, int hgt);
 void do_leaf_extended(char *dlebuf, struct iinfo *indir);
diff --git a/gfs2/edit/hexedit.c b/gfs2/edit/hexedit.c
index b6b8fe9..109f9c0 100644
--- a/gfs2/edit/hexedit.c
+++ b/gfs2/edit/hexedit.c
@@ -427,9 +427,10 @@ static void print_usage(void)
 /* returns: metatype if block is a GFS2 structure block type                */
 /*          0 if block is not a GFS2 structure                              */
 /* ------------------------------------------------------------------------ */
-static int get_block_type(const char *lpBuffer)
+static int get_block_type(struct gfs2_buffer_head *lbh)
 {
 	int ret_type = 0;
+	char *lpBuffer = lbh->b_data;
 
 	if (*(lpBuffer+0)==0x01 && *(lpBuffer+1)==0x16 &&
 	    *(lpBuffer+2)==0x19 && *(lpBuffer+3)==0x70 &&
@@ -444,7 +445,7 @@ static int get_block_type(const char *lpBuffer)
 /* returns: metatype if block is a GFS2 structure block type                */
 /*          0 if block is not a GFS2 structure                              */
 /* ------------------------------------------------------------------------ */
-int display_block_type(const char *lpBuffer, int from_restore)
+int display_block_type(int from_restore)
 {
 	int ret_type = 0; /* return type */
 
@@ -481,8 +482,8 @@ int display_block_type(const char *lpBuffer, int from_restore)
 		ret_type = GFS2_METATYPE_RG;
 		struct_len = gfs1 ? sizeof(struct gfs_rgrp) : sizeof(struct gfs2_rgrp);
 	}
-	else if ((ret_type = get_block_type(lpBuffer))) {
-		switch (*(lpBuffer+7)) {
+	else if ((ret_type = get_block_type(bh))) {
+		switch (*(bh->b_data + 7)) {
 		case GFS2_METATYPE_SB:   /* 1 */
 			print_gfs2("(superblock)");
 			if (gfs1)
@@ -564,8 +565,9 @@ int display_block_type(const char *lpBuffer, int from_restore)
 
 		rgd = gfs2_blk2rgrpd(&sbd, block);
 		if (rgd) {
+			gfs2_rgrp_read(&sbd, rgd);
 			type = gfs2_get_bitmap(&sbd, block, rgd);
-			gfs2_rgrp_relse(rgd, not_updated);
+			gfs2_rgrp_relse(rgd);
 		} else
 			type = 4;
 		screen_chunk_size = ((termlines - 4) * 16) >> 8 << 8;
@@ -577,9 +579,9 @@ int display_block_type(const char *lpBuffer, int from_restore)
 			   sbd.bsize / screen_chunk_size + 1 : sbd.bsize /
 			   screen_chunk_size, allocdesc[gfs1][type]);
 		/*eol(9);*/
-		if ((*(lpBuffer+7) == GFS2_METATYPE_IN) ||
-		    (*(lpBuffer+7) == GFS2_METATYPE_DI &&
-		     (*(lpBuffer + 0x8b) || *(lpBuffer + 0x8a)))) {
+		if ((*(bh->b_data + 7) == GFS2_METATYPE_IN) ||
+		    (*(bh->b_data + 7) == GFS2_METATYPE_DI &&
+		     (*(bh->b_data + 0x8b) || *(bh->b_data + 0x8a)))) {
 			int ptroffset = edit_row[dmode] * 16 + edit_col[dmode];
 
 			if (ptroffset >= struct_len || pgnum) {
@@ -638,11 +640,12 @@ int display_block_type(const char *lpBuffer, int from_restore)
 /* ------------------------------------------------------------------------ */
 /* hexdump - hex dump the filesystem block to the screen                    */
 /* ------------------------------------------------------------------------ */
-static int hexdump(uint64_t startaddr, const char *lpBuffer, int len)
+static int hexdump(uint64_t startaddr, int len)
 {
 	const unsigned char *pointer,*ptr2;
 	int i;
 	uint64_t l;
+	const char *lpBuffer = bh->b_data;
 
 	strcpy(edit_fmt,"%02X");
 	pointer = (unsigned char *)lpBuffer + offset;
@@ -783,8 +786,8 @@ static void rgcount(void)
 {
 	printf("%lld RGs in this file system.\n",
 	       (unsigned long long)sbd.md.riinode->i_di.di_size / risize());
-	inode_put(sbd.md.riinode, not_updated);
-	gfs2_rgrp_free(&sbd.rglist, not_updated);
+	inode_put(&sbd.md.riinode);
+	gfs2_rgrp_free(&sbd.rglist);
 	exit(EXIT_SUCCESS);
 }
 
@@ -816,11 +819,11 @@ static uint64_t find_rgrp_block(struct gfs2_inode *dif, int rg)
 /* ------------------------------------------------------------------------ */
 /* gfs_rgrp_in - Read in a resource group header                            */
 /* ------------------------------------------------------------------------ */
-static void gfs_rgrp_in(struct gfs_rgrp *rgrp, char *gbuf)
+static void gfs_rgrp_in(struct gfs_rgrp *rgrp, struct gfs2_buffer_head *rbh)
 {
-	struct gfs_rgrp *str = (struct gfs_rgrp *)gbuf;
+	struct gfs_rgrp *str = (struct gfs_rgrp *)rbh->b_data;
 
-	gfs2_meta_header_in(&rgrp->rg_header, gbuf);
+	gfs2_meta_header_in(&rgrp->rg_header, rbh);
 	rgrp->rg_flags = be32_to_cpu(str->rg_flags);
 	rgrp->rg_free = be32_to_cpu(str->rg_free);
 	rgrp->rg_useddi = be32_to_cpu(str->rg_useddi);
@@ -833,11 +836,11 @@ static void gfs_rgrp_in(struct gfs_rgrp *rgrp, char *gbuf)
 /* ------------------------------------------------------------------------ */
 /* gfs_rgrp_out */
 /* ------------------------------------------------------------------------ */
-static void gfs_rgrp_out(struct gfs_rgrp *rgrp, char *gbuf)
+static void gfs_rgrp_out(struct gfs_rgrp *rgrp, struct gfs2_buffer_head *rbh)
 {
-	struct gfs_rgrp *str = (struct gfs_rgrp *)gbuf;
+	struct gfs_rgrp *str = (struct gfs_rgrp *)rbh->b_data;
 
-	gfs2_meta_header_out(&rgrp->rg_header, gbuf);
+	gfs2_meta_header_out(&rgrp->rg_header, rbh);
 	str->rg_flags = cpu_to_be32(rgrp->rg_flags);
 	str->rg_free = cpu_to_be32(rgrp->rg_free);
 	str->rg_useddi = cpu_to_be32(rgrp->rg_useddi);
@@ -867,7 +870,6 @@ static void gfs_rgrp_print(struct gfs_rgrp *rg)
 /* ------------------------------------------------------------------------ */
 static uint64_t get_rg_addr(int rgnum)
 {
-	struct gfs2_buffer_head *bh;
 	uint64_t rgblk = 0, gblock;
 	struct gfs2_inode *riinode;
 
@@ -875,14 +877,13 @@ static uint64_t get_rg_addr(int rgnum)
 		gblock = sbd1->sb_rindex_di.no_addr;
 	else
 		gblock = masterblock("rindex");
-	bh = bread(&sbd.buf_list, gblock);
-	riinode = inode_get(&sbd, bh);
+	riinode = inode_read(&sbd, gblock);
 	if (rgnum < riinode->i_di.di_size / risize())
 		rgblk = find_rgrp_block(riinode, rgnum);
 	else
 		fprintf(stderr, "Error: File system only has %lld RGs.\n",
 			(unsigned long long)riinode->i_di.di_size / risize());
-	inode_put(riinode, not_updated);
+	inode_put(&riinode);
 	return rgblk;
 }
 
@@ -899,25 +900,25 @@ static void set_rgrp_flags(int rgnum, uint32_t new_flags, int modify, int full)
 		struct gfs2_rgrp rg2;
 		struct gfs_rgrp rg1;
 	} rg;
-	struct gfs2_buffer_head *bh;
+	struct gfs2_buffer_head *rbh;
 	uint64_t rgblk;
 
 	rgblk = get_rg_addr(rgnum);
-	bh = bread(&sbd.buf_list, rgblk);
+	rbh = bread(&sbd, rgblk);
 	if (gfs1)
-		gfs_rgrp_in(&rg.rg1, bh->b_data);
+		gfs_rgrp_in(&rg.rg1, rbh);
 	else
-		gfs2_rgrp_in(&rg.rg2, bh->b_data);
+		gfs2_rgrp_in(&rg.rg2, rbh);
 	if (modify) {
 		printf("RG #%d (block %llu / 0x%llx) rg_flags changed from 0x%08x to 0x%08x\n",
 		       rgnum, (unsigned long long)rgblk,
 		       (unsigned long long)rgblk, rg.rg2.rg_flags, new_flags);
 		rg.rg2.rg_flags = new_flags;
 		if (gfs1)
-			gfs_rgrp_out(&rg.rg1, bh->b_data);
+			gfs_rgrp_out(&rg.rg1, rbh);
 		else
-			gfs2_rgrp_out(&rg.rg2, bh->b_data);
-		brelse(bh, updated);
+			gfs2_rgrp_out(&rg.rg2, rbh);
+		brelse(rbh);
 	} else {
 		if (full) {
 			print_gfs2("RG #%d", rgnum);
@@ -932,10 +933,10 @@ static void set_rgrp_flags(int rgnum, uint32_t new_flags, int modify, int full)
 			printf("RG #%d (block %llu / 0x%llx) rg_flags = 0x%08x\n",
 			       rgnum, (unsigned long long)rgblk,
 			       (unsigned long long)rgblk, rg.rg2.rg_flags);
-		brelse(bh, not_updated);
+		brelse(rbh);
 	}
 	if (modify)
-		bsync(&sbd.buf_list);
+		fsync(sbd.device_fd);
 }
 
 /* ------------------------------------------------------------------------ */
@@ -987,17 +988,17 @@ static int parse_rindex(struct gfs2_inode *dip, int print_rindex)
 			else {
 				struct gfs2_buffer_head *tmp_bh;
 
-				tmp_bh = bread(&sbd.nvbuf_list, ri.ri_addr);
+				tmp_bh = bread(&sbd, ri.ri_addr);
 				if (gfs1) {
 					struct gfs_rgrp rg1;
-					gfs_rgrp_in(&rg1, tmp_bh->b_data);
+					gfs_rgrp_in(&rg1, tmp_bh);
 					gfs_rgrp_print(&rg1);
 				} else {
 					struct gfs2_rgrp rg;
-					gfs2_rgrp_in(&rg, tmp_bh->b_data);
+					gfs2_rgrp_in(&rg, tmp_bh);
 					gfs2_rgrp_print(&rg);
 				}
-				brelse(tmp_bh, not_updated);
+				brelse(tmp_bh);
 			}
 			last_entry_onscreen[dmode] = print_entry_ndx;
 		}
@@ -1593,17 +1594,15 @@ int block_is_per_node(void)
 int block_is_in_per_node(void)
 {
 	int d;
-	struct gfs2_dinode per_node_di;
-	struct gfs2_buffer_head *per_node_bh;
+	struct gfs2_inode *per_node_di;
 
 	if (gfs1)
 		return FALSE;
 
-	per_node_bh = bread(&sbd.buf_list, masterblock("per_node"));
-	gfs2_dinode_in(&per_node_di, per_node_bh->b_data);
+	per_node_di = inode_read(&sbd, masterblock("per_node"));
 
-	do_dinode_extended(&per_node_di, per_node_bh->b_data);
-	brelse(per_node_bh, not_updated);
+	do_dinode_extended(&per_node_di->i_di, per_node_di->i_bh);
+	inode_put(&per_node_di);
 
 	for (d = 0; d < indirect->ii[0].dirents; d++) {
 		if (block == indirect->ii[0].dirent[d].block)
@@ -1638,10 +1637,10 @@ static int display_extended(void)
 
 	/* Display any indirect pointers that we have. */
 	if (block_is_rindex()) {
-		tmp_bh = bread(&sbd.buf_list, block);
+		tmp_bh = bread(&sbd, block);
 		tmp_inode = inode_get(&sbd, tmp_bh);
 		parse_rindex(tmp_inode, TRUE);
-		brelse(tmp_bh, not_updated);
+		brelse(tmp_bh);
 	}
 	else if (has_indirect_blocks() && !indirect_blocks &&
 		 !display_leaf(indirect))
@@ -1650,37 +1649,37 @@ static int display_extended(void)
 		return -1;
 	else if (block_is_rglist()) {
 		if (gfs1)
-			tmp_bh = bread(&sbd.buf_list,
+			tmp_bh = bread(&sbd,
 				       sbd1->sb_rindex_di.no_addr);
 		else
-			tmp_bh = bread(&sbd.buf_list, masterblock("rindex"));
+			tmp_bh = bread(&sbd, masterblock("rindex"));
 		tmp_inode = inode_get(&sbd, tmp_bh);
 		parse_rindex(tmp_inode, FALSE);
-		brelse(tmp_bh, not_updated);
+		brelse(tmp_bh);
 	}
 	else if (block_is_jindex()) {
-		tmp_bh = bread(&sbd.buf_list, block);
+		tmp_bh = bread(&sbd, block);
 		tmp_inode = inode_get(&sbd, tmp_bh);
 		print_jindex(tmp_inode);
-		brelse(tmp_bh, not_updated);
+		brelse(tmp_bh);
 	}
 	else if (block_is_inum_file()) {
-		tmp_bh = bread(&sbd.buf_list, block);
+		tmp_bh = bread(&sbd, block);
 		tmp_inode = inode_get(&sbd, tmp_bh);
 		print_inum(tmp_inode);
-		brelse(tmp_bh, not_updated);
+		brelse(tmp_bh);
 	}
 	else if (block_is_statfs_file()) {
-		tmp_bh = bread(&sbd.buf_list, block);
+		tmp_bh = bread(&sbd, block);
 		tmp_inode = inode_get(&sbd, tmp_bh);
 		print_statfs(tmp_inode);
-		brelse(tmp_bh, not_updated);
+		brelse(tmp_bh);
 	}
 	else if (block_is_quota_file()) {
-		tmp_bh = bread(&sbd.buf_list, block);
+		tmp_bh = bread(&sbd, block);
 		tmp_inode = inode_get(&sbd, tmp_bh);
 		print_quota(tmp_inode);
-		brelse(tmp_bh, not_updated);
+		brelse(tmp_bh);
 	}
 	return 0;
 }
@@ -1694,30 +1693,23 @@ static void read_superblock(int fd)
 
 	sbd1 = (struct gfs_sb *)&sbd.sd_sb;
 	ioctl(fd, BLKFLSBUF, 0);
-	lseek(fd, 0x10 * 4096, SEEK_SET);
-	if (read(fd, buf, sbd.bsize) != sbd.bsize) {
-		fprintf(stderr, "bad read: %s from %s:%d: superblock\n",
-			strerror(errno), __FUNCTION__, __LINE__);
-		exit(-1);
-	}
 	memset(&sbd, 0, sizeof(struct gfs2_sbd));
-	sbd.device_fd = fd;
 	sbd.bsize = GFS2_DEFAULT_BSIZE;
+	sbd.device_fd = fd;
+	bh = bread(&sbd, 0x10);
 	sbd.jsize = GFS2_DEFAULT_JSIZE;
 	sbd.rgsize = GFS2_DEFAULT_RGSIZE;
 	sbd.utsize = GFS2_DEFAULT_UTSIZE;
 	sbd.qcsize = GFS2_DEFAULT_QCSIZE;
 	sbd.time = time(NULL);
 	osi_list_init(&sbd.rglist);
-	init_buf_list(&sbd, &sbd.buf_list, 128 << 20);
-	init_buf_list(&sbd, &sbd.nvbuf_list, 0xffffffff);
-	gfs2_sb_in(&sbd.sd_sb, buf); /* parse it out into the sb structure */
+	gfs2_sb_in(&sbd.sd_sb, bh); /* parse it out into the sb structure */
 	/* Check to see if this is really gfs1 */
 	if (sbd1->sb_fs_format == GFS_FORMAT_FS &&
 		sbd1->sb_header.mh_type == GFS_METATYPE_SB &&
 		sbd1->sb_header.mh_format == GFS_FORMAT_SB &&
 		sbd1->sb_multihost_format == GFS_FORMAT_MULTI) {
-		struct gfs_sb *sbbuf = (struct gfs_sb *)buf;
+		struct gfs_sb *sbbuf = (struct gfs_sb *)bh->b_data;
 
 		gfs1 = TRUE;
 		sbd1->sb_flags = be32_to_cpu(sbbuf->sb_flags);
@@ -1747,20 +1739,19 @@ static void read_superblock(int fd)
 			sizeof(uint64_t);
 		sbd.sd_diptrs = (sbd.bsize - sizeof(struct gfs_dinode)) /
 			sizeof(uint64_t);
-		sbd.md.riinode = gfs2_load_inode(&sbd,
-						 sbd1->sb_rindex_di.no_addr);
+		sbd.md.riinode = inode_read(&sbd, sbd1->sb_rindex_di.no_addr);
 		sbd.fssize = sbd.device.length;
-		gfs1_ri_update(&sbd, 0, &count, 1);
+		gfs1_rindex_read(&sbd, 0, &count);
 	} else {
 		sbd.sd_inptrs = (sbd.bsize - sizeof(struct gfs2_meta_header)) /
 			sizeof(uint64_t);
 		sbd.sd_diptrs = (sbd.bsize - sizeof(struct gfs2_dinode)) /
 			sizeof(uint64_t);
-		sbd.master_dir = gfs2_load_inode(&sbd,
+		sbd.master_dir = inode_read(&sbd,
 					    sbd.sd_sb.sb_master_dir.no_addr);
 		gfs2_lookupi(sbd.master_dir, "rindex", 6, &sbd.md.riinode);
 		sbd.fssize = sbd.device.length;
-		ri_update(&sbd, 0, &count);
+		rindex_read(&sbd, 0, &count);
 	}
 
 }
@@ -1773,7 +1764,7 @@ static void read_master_dir(void)
 	ioctl(sbd.device_fd, BLKFLSBUF, 0);
 	lseek(sbd.device_fd, sbd.sd_sb.sb_master_dir.no_addr * sbd.bsize,
 	      SEEK_SET);
-	if (read(sbd.device_fd, buf, sbd.bsize) != sbd.bsize) {
+	if (read(sbd.device_fd, bh->b_data, sbd.bsize) != sbd.bsize) {
 		fprintf(stderr, "read error: %s from %s:%d: "
 			"master dir block %lld (0x%llx)\n",
 			strerror(errno), __FUNCTION__,
@@ -1782,8 +1773,8 @@ static void read_master_dir(void)
 			(unsigned long long)sbd.sd_sb.sb_master_dir.no_addr);
 		exit(-1);
 	}
-	gfs2_dinode_in(&di, buf); /* parse disk inode into structure */
-	do_dinode_extended(&di, buf); /* get extended data, if any */
+	gfs2_dinode_in(&di, bh); /* parse disk inode into structure */
+	do_dinode_extended(&di, bh); /* get extended data, if any */
 	memcpy(&masterdir, &indirect[0], sizeof(struct indirect_info));
 }
 
@@ -1808,8 +1799,7 @@ int display(int identify_only)
 	if (block_in_mem != blk) { /* If we changed blocks from the last read */
 		dev_offset = blk * sbd.bsize;
 		ioctl(sbd.device_fd, BLKFLSBUF, 0);
-		lseek(sbd.device_fd, dev_offset, SEEK_SET);
-		if (read(sbd.device_fd, buf, sbd.bsize) != sbd.bsize) {
+		if (!(bh = bread(&sbd, blk))) {
 			fprintf(stderr, "read error: %s from %s:%d: "
 				"offset %lld (0x%llx)\n",
 				strerror(errno), __FUNCTION__, __LINE__,
@@ -1820,13 +1810,13 @@ int display(int identify_only)
 		block_in_mem = blk; /* remember which block is in memory */
 	}
 	line = 1;
-	gfs2_struct_type = display_block_type(buf, FALSE);
+	gfs2_struct_type = display_block_type(FALSE);
 	if (identify_only)
 		return 0;
 	indirect_blocks = 0;
 	lines_per_row[dmode] = 1;
 	if (gfs2_struct_type == GFS2_METATYPE_SB || blk == 0x10 * (4096 / sbd.bsize)) {
-		gfs2_sb_in(&sbd.sd_sb, buf); /* parse it out into the sb structure */
+		gfs2_sb_in(&sbd.sd_sb, bh); /* parse it out into the sb structure */
 		memset(indirect, 0, sizeof(indirect));
 		indirect->ii[0].block = sbd.sd_sb.sb_master_dir.no_addr;
 		indirect->ii[0].is_dir = TRUE;
@@ -1849,8 +1839,8 @@ int display(int identify_only)
 		indirect->ii[0].dirent[1].dirent.de_type = DT_DIR;
 	}
 	else if (gfs2_struct_type == GFS2_METATYPE_DI) {
-		gfs2_dinode_in(&di, buf); /* parse disk inode into structure */
-		do_dinode_extended(&di, buf); /* get extended data, if any */
+		gfs2_dinode_in(&di, bh); /* parse disk inode into structure */
+		do_dinode_extended(&di, bh); /* get extended data, if any */
 	}
 	else if (gfs2_struct_type == GFS2_METATYPE_IN) { /* indirect block list */
 		int i, hgt = get_height();
@@ -1861,10 +1851,11 @@ int display(int identify_only)
 				       &blockstack[blockhist - 1].mp,
 				       sizeof(struct metapath));
 		}
-		indirect_blocks = do_indirect_extended(buf, indirect, hgt);
+		indirect_blocks = do_indirect_extended(bh->b_data, indirect,
+						       hgt);
 	}
 	else if (gfs2_struct_type == GFS2_METATYPE_LF) { /* directory leaf */
-		do_leaf_extended(buf, indirect);
+		do_leaf_extended(bh->b_data, indirect);
 	}
 	last_entry_onscreen[dmode] = 0;
 	if (dmode == EXTENDED_MODE && !block_has_extended_info())
@@ -1879,8 +1870,7 @@ int display(int identify_only)
 		move(line, 0);
 	}
 	if (dmode == HEX_MODE)          /* if hex display mode           */
-		hexdump(dev_offset, buf,
-			(gfs2_struct_type == GFS2_METATYPE_DI)?
+		hexdump(dev_offset, (gfs2_struct_type == GFS2_METATYPE_DI)?
 			struct_len + di.di_size:sbd.bsize);
 	else if (dmode == GFS2_MODE)    /* if structure display          */
 		display_gfs2();            /* display the gfs2 structure    */
@@ -1962,13 +1952,13 @@ static uint64_t find_journal_block(const char *journal, uint64_t *j_size)
 	else
 		jindex_block = masterblock("jindex");
 	/* read in the block */
-	jindex_bh = bread(&sbd.buf_list, jindex_block);
+	jindex_bh = bread(&sbd, jindex_block);
 	/* get the dinode data from it. */
-	gfs2_dinode_in(&di, jindex_bh->b_data); /* parse disk inode to struct*/
+	gfs2_dinode_in(&di, jindex_bh); /* parse disk inode to struct*/
 
 	if (!gfs1)
-		do_dinode_extended(&di, jindex_bh->b_data); /* parse dir. */
-	brelse(jindex_bh, not_updated);
+		do_dinode_extended(&di, jindex_bh); /* parse dir. */
+	brelse(jindex_bh);
 
 	if (gfs1) {
 		struct gfs2_inode *jiinode;
@@ -1987,11 +1977,11 @@ static uint64_t find_journal_block(const char *journal, uint64_t *j_size)
 		struct gfs2_dinode jdi;
 
 		jblock = indirect->ii[0].dirent[journal_num + 2].block;
-		j_bh = bread(&sbd.buf_list, jblock);
+		j_bh = bread(&sbd, jblock);
 		j_inode = inode_get(&sbd, j_bh);
-		gfs2_dinode_in(&jdi, j_bh->b_data);/* parse dinode to struct */
+		gfs2_dinode_in(&jdi, j_bh);/* parse dinode to struct */
 		*j_size = jdi.di_size;
-		brelse(j_bh, not_updated);
+		brelse(j_bh);
 	}
 	return jblock;
 }
@@ -2006,21 +1996,21 @@ static uint64_t find_metablockoftype_slow(uint64_t startblk, int metatype, int p
 {
 	uint64_t blk, last_fs_block;
 	int found = 0;
-	struct gfs2_buffer_head *bh;
+	struct gfs2_buffer_head *lbh;
 
 	last_fs_block = lseek(sbd.device_fd, 0, SEEK_END) / sbd.bsize;
 	for (blk = startblk + 1; blk < last_fs_block; blk++) {
-		bh = bread(&sbd.buf_list, blk);
+		lbh = bread(&sbd, blk);
 		/* Can't use get_block_type here (returns false "none") */
-		if (bh->b_data[0] == 0x01 && bh->b_data[1] == 0x16 &&
-		    bh->b_data[2] == 0x19 && bh->b_data[3] == 0x70 &&
-		    bh->b_data[4] == 0x00 && bh->b_data[5] == 0x00 &&
-		    bh->b_data[6] == 0x00 && bh->b_data[7] == metatype) {
+		if (lbh->b_data[0] == 0x01 && lbh->b_data[1] == 0x16 &&
+		    lbh->b_data[2] == 0x19 && lbh->b_data[3] == 0x70 &&
+		    lbh->b_data[4] == 0x00 && lbh->b_data[5] == 0x00 &&
+		    lbh->b_data[6] == 0x00 && lbh->b_data[7] == metatype) {
 			found = 1;
-			brelse(bh, not_updated);
+			brelse(lbh);
 			break;
 		}
-		brelse(bh, not_updated);
+		brelse(lbh);
 	}
 	if (!found)
 		blk = 0;
@@ -2030,7 +2020,7 @@ static uint64_t find_metablockoftype_slow(uint64_t startblk, int metatype, int p
 		else
 			printf("%llu\n", (unsigned long long)blk);
 	}
-	gfs2_rgrp_free(&sbd.rglist, not_updated);
+	gfs2_rgrp_free(&sbd.rglist);
 	if (print)
 		exit(0);
 	return blk;
@@ -2069,7 +2059,7 @@ static uint64_t find_metablockoftype_rg(uint64_t startblk, int metatype, int pri
 	if (!rgd) {
 		if (print)
 			printf("0\n");
-		gfs2_rgrp_free(&sbd.rglist, not_updated);
+		gfs2_rgrp_free(&sbd.rglist);
 		if (print)
 			exit(-1);
 	}
@@ -2095,7 +2085,7 @@ static uint64_t find_metablockoftype_rg(uint64_t startblk, int metatype, int pri
 		else
 			printf("%llu\n", (unsigned long long)blk);
 	}
-	gfs2_rgrp_free(&sbd.rglist, not_updated);
+	gfs2_rgrp_free(&sbd.rglist);
 	if (print)
 		exit(0);
 	return blk;
@@ -2129,7 +2119,7 @@ static uint64_t find_metablockoftype(const char *strtype, int print)
 			"specified: must be one of:\n");
 		fprintf(stderr, "sb rg rb di in lf jd lh ld"
 			" ea ed lb 13 qc\n");
-		gfs2_rgrp_free(&sbd.rglist, not_updated);
+		gfs2_rgrp_free(&sbd.rglist);
 		exit(-1);
 	}
 	return blk;
@@ -2301,10 +2291,10 @@ static void hex_edit(int *exitch)
 				else if (estring[i+1] >= 'A' &&
 					 estring[i+1] <= 'F')
 					ch += (estring[i+1] - 'A' + 0x0a);
-				buf[offset + hexoffset] = ch;
+				bh->b_data[offset + hexoffset] = ch;
 			}
 			lseek(sbd.device_fd, dev_offset, SEEK_SET);
-			if (write(sbd.device_fd, buf, sbd.bsize) !=
+			if (write(sbd.device_fd, bh->b_data, sbd.bsize) !=
 			    sbd.bsize) {
 				fprintf(stderr, "write error: %s from %s:%d: "
 					"offset %lld (0x%llx)\n",
@@ -2391,7 +2381,8 @@ static void jump(void)
 		
 		if (edit_row[dmode] >= 0) {
 			col2 = edit_col[dmode] & 0x08;/* thus 0-7->0, 8-15->8 */
-			b = (uint64_t *)&buf[edit_row[dmode]*16 + offset + col2];
+			b = (uint64_t *)&bh->b_data[edit_row[dmode]*16 +
+						    offset + col2];
 			temp_blk=be64_to_cpu(*b);
 		}
 	}
@@ -2430,15 +2421,15 @@ static void print_block_type(uint64_t tblock, int type, const char *additional)
 static void find_print_block_type(void)
 {
 	uint64_t tblock;
-	struct gfs2_buffer_head *bh;
+	struct gfs2_buffer_head *lbh;
 	int type;
 
 	tblock = blockstack[blockhist % BLOCK_STACK_SIZE].block;
-	bh = bread(&sbd.buf_list, tblock);
-	type = get_block_type(bh->b_data);
+	lbh = bread(&sbd, tblock);
+	type = get_block_type(lbh);
 	print_block_type(tblock, type, "");
-	brelse(bh, NOT_UPDATED);
-	gfs2_rgrp_free(&sbd.rglist, not_updated);
+	brelse(lbh);
+	gfs2_rgrp_free(&sbd.rglist);
 	exit(0);
 }
 
@@ -2481,7 +2472,7 @@ static void find_print_block_rg(int bitmap)
 			printf("-1 (block invalid or part of an rgrp).\n");
 		}
 	}
-	gfs2_rgrp_free(&sbd.rglist, not_updated);
+	gfs2_rgrp_free(&sbd.rglist);
 	exit(0);
 }
 
@@ -2502,7 +2493,7 @@ static void find_change_block_alloc(int *newval)
 		       *newval);
 		for (i = GFS2_BLKST_FREE; i <= GFS2_BLKST_DINODE; i++)
 			printf("%d - %s\n", i, allocdesc[gfs1][i]);
-		gfs2_rgrp_free(&sbd.rglist, not_updated);
+		gfs2_rgrp_free(&sbd.rglist);
 		exit(-1);
 	}
 	ablock = blockstack[blockhist % BLOCK_STACK_SIZE].block;
@@ -2517,19 +2508,20 @@ static void find_change_block_alloc(int *newval)
 		} else {
 			rgd = gfs2_blk2rgrpd(&sbd, ablock);
 			if (rgd) {
+				gfs2_rgrp_read(&sbd, rgd);
 				type = gfs2_get_bitmap(&sbd, ablock, rgd);
+				gfs2_rgrp_relse(rgd);
 				printf("%d (%s)\n", type, allocdesc[gfs1][type]);
-				gfs2_rgrp_relse(rgd, not_updated);
 			} else {
-				gfs2_rgrp_free(&sbd.rglist, not_updated);
+				gfs2_rgrp_free(&sbd.rglist);
 				printf("-1 (block invalid or part of an rgrp).\n");
 				exit(-1);
 			}
 		}
 	}
-	gfs2_rgrp_free(&sbd.rglist, (newval) ? updated : not_updated);
+	gfs2_rgrp_free(&sbd.rglist);
 	if (newval)
-		bcommit(&sbd.nvbuf_list);
+		fsync(sbd.device_fd);
 	exit(0);
 }
 
@@ -2539,13 +2531,13 @@ static void find_change_block_alloc(int *newval)
 static void process_field(const char *field, uint64_t *newval, int print_field)
 {
 	uint64_t fblock;
-	struct gfs2_buffer_head *bh;
+	struct gfs2_buffer_head *rbh;
 	int type;
 	struct gfs2_rgrp rg;
 
 	fblock = blockstack[blockhist % BLOCK_STACK_SIZE].block;
-	bh = bread(&sbd.buf_list, block);
-	type = get_block_type(bh->b_data);
+	rbh = bread(&sbd, fblock);
+	type = get_block_type(rbh);
 	switch (type) {
 	case GFS2_METATYPE_SB:
 		if (print_field)
@@ -2553,10 +2545,10 @@ static void process_field(const char *field, uint64_t *newval, int print_field)
 					 " which is not implemented");
 		break;
 	case GFS2_METATYPE_RG:
-		gfs2_rgrp_in(&rg, bh->b_data);
+		gfs2_rgrp_in(&rg, rbh);
 		if (newval) {
 			gfs2_rgrp_assignval(&rg, field, *newval);
-			gfs2_rgrp_out(&rg, bh->b_data);
+			gfs2_rgrp_out(&rg, rbh);
 			if (print_field)
 				gfs2_rgrp_printval(&rg, field);
 		} else {
@@ -2570,10 +2562,10 @@ static void process_field(const char *field, uint64_t *newval, int print_field)
 					 " which is not implemented");
 		break;
 	case GFS2_METATYPE_DI:
-		gfs2_dinode_in(&di, bh->b_data);
+		gfs2_dinode_in(&di, rbh);
 		if (newval) {
 			gfs2_dinode_assignval(&di, field, *newval);
-			gfs2_dinode_out(&di, bh->b_data);
+			gfs2_dinode_out(&di, rbh);
 			if (print_field)
 				gfs2_dinode_printval(&di, field);
 		} else {
@@ -2596,8 +2588,8 @@ static void process_field(const char *field, uint64_t *newval, int print_field)
 					 " which is not implemented");
 		break;
 	}
-	brelse(bh, newval ? UPDATED : NOT_UPDATED);
-	bcommit(&sbd.buf_list);
+	brelse(rbh);
+	fsync(sbd.device_fd);
 }
 
 /* ------------------------------------------------------------------------ */
@@ -2868,11 +2860,12 @@ static void interactive_mode(void)
 /* ------------------------------------------------------------------------ */
 /* gfs_log_header_in - read in a gfs1-style log header                      */
 /* ------------------------------------------------------------------------ */
-void gfs_log_header_in(struct gfs_log_header *head, char *inbuf)
+void gfs_log_header_in(struct gfs_log_header *head,
+		       struct gfs2_buffer_head *lbh)
 {
-	struct gfs_log_header *str = (struct gfs_log_header *) inbuf;
+	struct gfs_log_header *str = (struct gfs_log_header *)lbh->b_data;
 
-	gfs2_meta_header_in(&head->lh_header, inbuf);
+	gfs2_meta_header_in(&head->lh_header, lbh);
 
 	head->lh_flags = be32_to_cpu(str->lh_flags);
 	head->lh_pad = be32_to_cpu(str->lh_pad);
@@ -2941,7 +2934,7 @@ static int fsck_readi(struct gfs2_inode *ip, void *rbuf, uint64_t roffset,
 	       unsigned int size, uint64_t *abs_block)
 {
 	struct gfs2_sbd *sdp = ip->i_sbd;
-	struct gfs2_buffer_head *bh;
+	struct gfs2_buffer_head *lbh;
 	uint64_t lblock, dblock;
 	unsigned int o;
 	uint32_t extlen = 0;
@@ -2976,18 +2969,18 @@ static int fsck_readi(struct gfs2_inode *ip, void *rbuf, uint64_t roffset,
 			amount = sdp->bsize - o;
 		if (!extlen)
 			block_map(ip, lblock, &not_new, &dblock, &extlen,
-				  FALSE, not_updated);
+				  FALSE);
 		if (dblock) {
-			bh = bread(&sdp->buf_list, dblock);
+			lbh = bread(sdp, dblock);
 			if (*abs_block == 0)
-				*abs_block = bh->b_blocknr;
+				*abs_block = lbh->b_blocknr;
 			dblock++;
 			extlen--;
 		} else
-			bh = NULL;
-		if (bh) {
-			memcpy(rbuf, bh->b_data + o, amount);
-			brelse(bh, not_updated);
+			lbh = NULL;
+		if (lbh) {
+			memcpy(rbuf, lbh->b_data + o, amount);
+			brelse(lbh);
 		} else {
 			memset(rbuf, 0, amount);
 		}
@@ -3018,13 +3011,13 @@ static void check_journal_wrap(uint64_t seq, uint64_t *highest_seq)
 /* ------------------------------------------------------------------------ */
 static void dump_journal(const char *journal)
 {
-	struct gfs2_buffer_head *j_bh = NULL;
+	struct gfs2_buffer_head *j_bh = NULL, dummy_bh;
 	uint64_t jblock, j_size, jb, abs_block;
 	int error, start_line, journal_num;
-	char jbuf[sbd.bsize];
 	struct gfs2_inode *j_inode = NULL;
 	int ld_blocks = 0;
 	uint64_t highest_seq = 0;
+	char *jbuf = NULL;
 
 	start_line = line;
 	lines_per_row[dmode] = 1;
@@ -3036,24 +3029,26 @@ static void dump_journal(const char *journal)
 	if (!jblock)
 		return;
 	if (!gfs1) {
-		j_bh = bread(&sbd.buf_list, jblock);
+		j_bh = bread(&sbd, jblock);
 		j_inode = inode_get(&sbd, j_bh);
+		jbuf = malloc(sbd.bsize);
 	}
 
 	for (jb = 0; jb < j_size; jb += (gfs1 ? 1:sbd.bsize)) {
 		if (gfs1) {
 			if (j_bh)
-				brelse(j_bh, not_updated);
-			j_bh = bread(&sbd.buf_list, jblock + jb);
+				brelse(j_bh);
+			j_bh = bread(&sbd, jblock + jb);
 			abs_block = jblock + jb;
-			memcpy(jbuf, j_bh->b_data, sbd.bsize);
+			dummy_bh.b_data = j_bh->b_data;
 		} else {
-			error = fsck_readi(j_inode, (void *)&jbuf, jb,
+			error = fsck_readi(j_inode, (void *)jbuf, jb,
 					   sbd.bsize, &abs_block);
 			if (!error) /* end of file */
 				break;
+			dummy_bh.b_data = jbuf;
 		}
-		if (get_block_type(jbuf) == GFS2_METATYPE_LD) {
+		if (get_block_type(&dummy_bh) == GFS2_METATYPE_LD) {
 			uint64_t *b;
 			struct gfs2_log_descriptor ld;
 			int ltndx;
@@ -3076,7 +3071,7 @@ static void dump_journal(const char *journal)
 
 			print_gfs2("0x%llx (j+%4llx): Log descriptor, ",
 				   abs_block, jb / (gfs1 ? 1 : sbd.bsize));
-			gfs2_log_descriptor_in(&ld, jbuf);
+			gfs2_log_descriptor_in(&ld, &dummy_bh);
 			print_gfs2("type %d ", ld.ld_type);
 
 			for (ltndx = 0;; ltndx++) {
@@ -3090,20 +3085,21 @@ static void dump_journal(const char *journal)
 			eol(0);
 			print_gfs2("                    ");
 			if (gfs1)
-				b = (uint64_t *)(jbuf +
+				b = (uint64_t *)(dummy_bh.b_data +
 					sizeof(struct gfs_log_descriptor));
 			else
-				b = (uint64_t *)(jbuf +
+				b = (uint64_t *)(dummy_bh.b_data +
 					sizeof(struct gfs2_log_descriptor));
 			ld_blocks = ld.ld_data1;
-			ld_blocks -= print_ld_blocks(b, (jbuf + sbd.bsize),
+			ld_blocks -= print_ld_blocks(b, (dummy_bh.b_data +
+							 sbd.bsize),
 						     start_line);
-		} else if (get_block_type(jbuf) == GFS2_METATYPE_LH) {
+		} else if (get_block_type(&dummy_bh) == GFS2_METATYPE_LH) {
 			struct gfs2_log_header lh;
 			struct gfs_log_header lh1;
 
 			if (gfs1) {
-				gfs_log_header_in(&lh1, jbuf);
+				gfs_log_header_in(&lh1, &dummy_bh);
 				check_journal_wrap(lh1.lh_sequence,
 						   &highest_seq);
 				print_gfs2("0x%llx (j+%4llx): Log header: "
@@ -3114,7 +3110,7 @@ static void dump_journal(const char *journal)
 					   lh1.lh_first, lh1.lh_tail,
 					   lh1.lh_last_dump);
 			} else {
-				gfs2_log_header_in(&lh, jbuf);
+				gfs2_log_header_in(&lh, &dummy_bh);
 				check_journal_wrap(lh.lh_sequence,
 						   &highest_seq);
 				print_gfs2("0x%llx (j+%4llx): Log header: Seq"
@@ -3129,13 +3125,14 @@ static void dump_journal(const char *journal)
 				   " continuation block", abs_block, jb);
 			eol(0);
 			print_gfs2("                    ");
-			ld_blocks -= print_ld_blocks((uint64_t *)jbuf,
-						     (jbuf + sbd.bsize),
-						     start_line);
+			ld_blocks -= print_ld_blocks((uint64_t *)dummy_bh.b_data,
+						     (dummy_bh.b_data +
+						      sbd.bsize), start_line);
 		}
 	}
-	brelse(j_bh, not_updated);
+	brelse(j_bh);
 	blockhist = -1; /* So we don't print anything else */
+	free(jbuf);
 }
 
 /* ------------------------------------------------------------------------ */
@@ -3332,7 +3329,7 @@ static void process_parameters(int argc, char *argv[], int pass)
 				printf("Error: field not specified.\n");
 				printf("Format is: %s -p <block> field "
 				       "<field> [newvalue]\n", argv[0]);
-				gfs2_rgrp_free(&sbd.rglist, not_updated);
+				gfs2_rgrp_free(&sbd.rglist);
 				exit(EXIT_FAILURE);
 			}
 			if (isdigit(argv[i + 1][0])) {
@@ -3342,11 +3339,11 @@ static void process_parameters(int argc, char *argv[], int pass)
 				else
 					newval = (uint64_t)atoll(argv[i + 1]);
 				process_field(argv[i], &newval, 1);
-				gfs2_rgrp_free(&sbd.rglist, not_updated);
+				gfs2_rgrp_free(&sbd.rglist);
 				exit(0);
 			} else {
 				process_field(argv[i], NULL, 1);
-				gfs2_rgrp_free(&sbd.rglist, not_updated);
+				gfs2_rgrp_free(&sbd.rglist);
 				exit(0);
 			}
 		} else if (!strcmp(argv[i], "blocktype")) {
@@ -3378,7 +3375,7 @@ static void process_parameters(int argc, char *argv[], int pass)
 				printf("Error: rg # not specified.\n");
 				printf("Format is: %s rgflags rgnum"
 				       "[newvalue]\n", argv[0]);
-				gfs2_rgrp_free(&sbd.rglist, not_updated);
+				gfs2_rgrp_free(&sbd.rglist);
 				exit(EXIT_FAILURE);
 			}
 			if (argv[i][0]=='0' && argv[i][1]=='x')
@@ -3395,7 +3392,7 @@ static void process_parameters(int argc, char *argv[], int pass)
 					new_flags = atoi(argv[i]);
 			}
 			set_rgrp_flags(rg, new_flags, set, FALSE);
-			gfs2_rgrp_free(&sbd.rglist, not_updated);
+			gfs2_rgrp_free(&sbd.rglist);
 			exit(EXIT_SUCCESS);
 		} else if (!strcmp(argv[i], "rg")) {
 			int rg;
@@ -3404,7 +3401,7 @@ static void process_parameters(int argc, char *argv[], int pass)
 			if (i >= argc - 1) {
 				printf("Error: rg # not specified.\n");
 				printf("Format is: %s rg rgnum\n", argv[0]);
-				gfs2_rgrp_free(&sbd.rglist, not_updated);
+				gfs2_rgrp_free(&sbd.rglist);
 				exit(EXIT_FAILURE);
 			}
 			rg = atoi(argv[i]);
@@ -3413,7 +3410,7 @@ static void process_parameters(int argc, char *argv[], int pass)
 				push_block(temp_blk);
 			} else {
 				set_rgrp_flags(rg, 0, FALSE, TRUE);
-				gfs2_rgrp_free(&sbd.rglist, not_updated);
+				gfs2_rgrp_free(&sbd.rglist);
 				exit(EXIT_SUCCESS);
 			}
 		}
@@ -3462,7 +3459,6 @@ int main(int argc, char *argv[])
 	memset(last_entry_onscreen, 0, sizeof(last_entry_onscreen));
 	dmode = INIT_MODE;
 	sbd.bsize = 4096;
-	type_alloc(buf, char, sbd.bsize); /* allocate/malloc a new 4K buffer */
 	block = starting_blk = 0x10;
 	for (i = 0; i < BLOCK_STACK_SIZE; i++) {
 		blockstack[i].dmode = HEX_MODE;
@@ -3519,10 +3515,8 @@ int main(int argc, char *argv[])
 		}
 	}
 	close(fd);
-	if (buf)
-		free(buf);
 	if (indirect)
 		free(indirect);
-	gfs2_rgrp_free(&sbd.rglist, not_updated);
+	gfs2_rgrp_free(&sbd.rglist);
  	exit(EXIT_SUCCESS);
 }
diff --git a/gfs2/edit/hexedit.h b/gfs2/edit/hexedit.h
index 88964a2..549c3af 100644
--- a/gfs2/edit/hexedit.h
+++ b/gfs2/edit/hexedit.h
@@ -51,7 +51,7 @@ extern char estring[1024]; /* edit string */
 extern char efield[64];
 extern uint64_t dev_offset;
 extern uint64_t max_block;
-extern char *buf;
+extern struct gfs2_buffer_head *bh;
 extern int termlines;
 extern int termcols;
 extern int insert;
@@ -154,16 +154,17 @@ extern int block_is_statfs_file(void);
 extern int block_is_quota_file(void);
 extern int block_is_per_node(void);
 extern int block_is_in_per_node(void);
-extern int display_block_type(const char *lpBuffer, int from_restore);
+extern int display_block_type(int from_restore);
 extern void gfs_jindex_in(struct gfs_jindex *jindex, char *buf);
-extern void gfs_log_header_in(struct gfs_log_header *head, char *buf);
+extern void gfs_log_header_in(struct gfs_log_header *head,
+			      struct gfs2_buffer_head *bh);
 extern void gfs_log_header_print(struct gfs_log_header *lh);
-extern void gfs_dinode_in(struct gfs_dinode *di, char *buf);
-extern int display(int identify_only);
-extern uint64_t check_keywords(const char *kword);
+extern void gfs_dinode_in(struct gfs_dinode *di, struct gfs2_buffer_head *bh);
 extern void savemeta(char *out_fn, int saveoption);
 extern void restoremeta(const char *in_fn, const char *out_device,
 			uint64_t printblocksonly);
+extern int display(int identify_only);
+extern uint64_t check_keywords(const char *kword);
 extern uint64_t masterblock(const char *fn);
 
 struct gfs2_dirents {
diff --git a/gfs2/edit/savemeta.c b/gfs2/edit/savemeta.c
index 85fcde7..f6d136f 100644
--- a/gfs2/edit/savemeta.c
+++ b/gfs2/edit/savemeta.c
@@ -33,8 +33,8 @@ struct saved_metablock {
 };
 
 struct saved_metablock *savedata;
+struct gfs2_buffer_head *savebh;
 uint64_t last_fs_block, last_reported_block, blks_saved, total_out, pct;
-struct gfs2_block_list *blocklist = NULL;
 uint64_t journal_blocks[MAX_JOURNALS_SAVED];
 uint64_t gfs1_journal_size = 0; /* in blocks */
 int journals_found = 0;
@@ -50,15 +50,15 @@ extern void read_superblock(void);
  * returns: 0 if successful
  *          -1 if this isn't gfs metadata.
  */
-static int get_gfs_struct_info(char *gbuf, int *block_type, int *gstruct_len)
+static int get_gfs_struct_info(struct gfs2_buffer_head *lbh, int *block_type,
+			       int *gstruct_len)
 {
-	struct gfs2_meta_header mh, mhbuf;
+	struct gfs2_meta_header mh;
 
 	*block_type = 0;
 	*gstruct_len = sbd.bsize;
 
-	memcpy(&mhbuf, gbuf, sizeof(mhbuf));
-	gfs2_meta_header_in(&mh, (void *)&mhbuf);
+	gfs2_meta_header_in(&mh, lbh);
 	if (mh.mh_magic != GFS2_MAGIC)
 		return -1;
 
@@ -133,7 +133,7 @@ static void warm_fuzzy_stuff(uint64_t wfsblock, int force, int save)
 		if (last_fs_block) {
 			printf("\r");
 			if (save) {
-				percent = (block * 100) / last_fs_block;
+				percent = (wfsblock * 100) / last_fs_block;
 				printf("%" PRIu64 " metadata blocks (%"
 				       PRIu64 "%%) processed, ", wfsblock,
 				       percent);
@@ -187,28 +187,15 @@ static int save_block(int fd, int out_fd, uint64_t blk)
 		return 0;
 	}
 	memset(savedata, 0, sizeof(struct saved_metablock));
-	if (lseek(fd, blk * sbd.bsize, SEEK_SET) != blk * sbd.bsize) {
-		fprintf(stderr, "bad seek: %s from %s:%d: "
-			"block %lld (0x%llx)\n", strerror(errno),
-			__FUNCTION__, __LINE__,
-			(unsigned long long)blk, (unsigned long long)blk);
-		exit(-1);
-	}
-	/* read in the block */
-	if (read(fd, savedata->buf, sbd.bsize) != sbd.bsize) {
-		fprintf(stderr, "bad read: %s from %s:%d: "
-			"block %lld (0x%llx)\n", strerror(errno),
-			__FUNCTION__, __LINE__,
-			(unsigned long long)blk, (unsigned long long)blk);
-		exit(-1);
-	}
+	savebh = bread(&sbd, blk);
+	memcpy(&savedata->buf, savebh->b_data, sbd.bsize);
 
 	/* If this isn't metadata and isn't a system file, we don't want it.
 	   Note that we're checking "block" here rather than blk.  That's
 	   because we want to know if the source inode's "block" is a system
 	   inode, not the block within the inode "blk". They may or may not
 	   be the same thing. */
-	if (get_gfs_struct_info(savedata->buf, &blktype, &blklen) &&
+	if (get_gfs_struct_info(savebh, &blktype, &blklen) &&
 	    !block_is_systemfile())
 		return 0; /* Not metadata, and not system file, so skip it */
 	trailing0 = 0;
@@ -305,15 +292,14 @@ static void save_indirect_blocks(int out_fd, osi_list_t *cur_list,
 		old_block = indir_block;
 		blktype = save_block(sbd.device_fd, out_fd, indir_block);
 		if (blktype == GFS2_METATYPE_EA) {
-			nbh = bread(&sbd.buf_list, indir_block);
+			nbh = bread(&sbd, indir_block);
 			save_ea_block(out_fd, nbh);
-			brelse(nbh, not_updated);
+			brelse(nbh);
 		}
 		if (height != hgt) { /* If not at max height */
-			nbh = bread(&sbd.buf_list, indir_block);
-			osi_list_add_prev(&nbh->b_altlist,
-					  cur_list);
-			brelse(nbh, not_updated);
+			nbh = bread(&sbd, indir_block);
+			osi_list_add_prev(&nbh->b_altlist, cur_list);
+			brelse(nbh);
 		}
 	} /* for all data on the indirect block */
 }
@@ -344,7 +330,7 @@ static void save_inode_data(int out_fd)
 
 	for (i = 0; i < GFS2_MAX_META_HEIGHT; i++)
 		osi_list_init(&metalist[i]);
-	metabh = bread(&sbd.buf_list, block);
+	metabh = bread(&sbd, block);
 	if (gfs1)
 		inode = inode_get(&sbd, metabh);
 	else
@@ -393,16 +379,17 @@ static void save_inode_data(int out_fd)
 	}
 	if (inode->i_di.di_eattr) { /* if this inode has extended attributes */
 		struct gfs2_meta_header mh;
+		struct gfs2_buffer_head *lbh;
 
-		metabh = bread(&sbd.buf_list, inode->i_di.di_eattr);
+		lbh = bread(&sbd, inode->i_di.di_eattr);
 		save_block(sbd.device_fd, out_fd, inode->i_di.di_eattr);
-		gfs2_meta_header_in(&mh, metabh->b_data);
+		gfs2_meta_header_in(&mh, lbh);
 		if (mh.mh_magic == GFS2_MAGIC &&
 		    mh.mh_type == GFS2_METATYPE_EA)
-			save_ea_block(out_fd, metabh);
+			save_ea_block(out_fd, lbh);
 		else if (mh.mh_magic == GFS2_MAGIC &&
 			 mh.mh_type == GFS2_METATYPE_IN)
-			save_indirect_blocks(out_fd, cur_list, metabh, 2, 2);
+			save_indirect_blocks(out_fd, cur_list, lbh, 2, 2);
 		else {
 			if (mh.mh_magic == GFS2_MAGIC) /* if it's metadata */
 				save_block(sbd.device_fd, out_fd,
@@ -416,15 +403,15 @@ static void save_inode_data(int out_fd)
 				(unsigned long long)block,
 				(unsigned long long)block);
 		}
-		brelse(metabh, not_updated);
+		brelse(lbh);
 	}
-	inode_put(inode, not_updated);
+	inode_put(&inode);
+	brelse(metabh);
 }
 
 static void get_journal_inode_blocks(void)
 {
 	int journal;
-	struct gfs2_buffer_head *bh;
 
 	journals_found = 0;
 	memset(journal_blocks, 0, sizeof(journal_blocks));
@@ -440,19 +427,18 @@ static void get_journal_inode_blocks(void)
 	for (journal = 0; ; journal++) { /* while journals exist */
 		uint64_t jblock;
 		int amt;
-		struct gfs2_dinode jdi;
 		struct gfs2_inode *j_inode = NULL;
 
 		if (gfs1) {
 			struct gfs_jindex ji;
 			char jbuf[sizeof(struct gfs_jindex)];
 
-			bh = bread(&sbd.buf_list, sbd1->sb_jindex_di.no_addr);
-			j_inode = gfs_inode_get(&sbd, bh);
+			j_inode = gfs_inode_read(&sbd,
+						 sbd1->sb_jindex_di.no_addr);
 			amt = gfs2_readi(j_inode, (void *)&jbuf,
 					 journal * sizeof(struct gfs_jindex),
 					 sizeof(struct gfs_jindex));
-			brelse(bh, not_updated);
+			inode_put(&j_inode);
 			if (!amt)
 				break;
 			gfs_jindex_in(&ji, jbuf);
@@ -462,21 +448,19 @@ static void get_journal_inode_blocks(void)
 			if (journal > indirect->ii[0].dirents - 3)
 				break;
 			jblock = indirect->ii[0].dirent[journal + 2].block;
-			bh = bread(&sbd.buf_list, jblock);
-			j_inode = inode_get(&sbd, bh);
-			gfs2_dinode_in(&jdi, bh->b_data);
-			inode_put(j_inode, not_updated);
 		}
 		journal_blocks[journals_found++] = jblock;
 	}
 }
 
-static int next_rg_freemeta(struct rgrp_list *rgd, uint64_t *nrfblock, int first)
+static int next_rg_freemeta(struct gfs2_sbd *sdp, struct rgrp_list *rgd,
+			    uint64_t *nrfblock, int first)
 {
 	struct gfs2_bitmap *bits = NULL;
 	uint32_t length = rgd->ri.ri_length;
 	uint32_t blk = (first)? 0: (uint32_t)((*nrfblock+1)-rgd->ri.ri_data0);
 	int i;
+	struct gfs2_buffer_head *lbh;
 
 	if(!first && (*nrfblock < rgd->ri.ri_data0)) {
 		log_err("next_rg_freemeta:  Start block is outside rgrp "
@@ -491,9 +475,11 @@ static int next_rg_freemeta(struct rgrp_list *rgd, uint64_t *nrfblock, int first
 	}
 	for(; i < length; i++){
 		bits = &rgd->bits[i];
-		blk = gfs2_bitfit((unsigned char *)rgd->bh[i]->b_data +
+		lbh = bread(sdp, rgd->ri.ri_addr + i);
+		blk = gfs2_bitfit((unsigned char *)lbh->b_data +
 				  bits->bi_offset, bits->bi_len, blk,
 				  GFS2_BLKST_UNLINKED);
+		brelse(lbh);
 		if(blk != BFITNOENT){
 			*nrfblock = blk + (bits->bi_start * GFS2_NBBY) +
 				rgd->ri.ri_data0;
@@ -511,10 +497,9 @@ void savemeta(char *out_fn, int saveoption)
 	int out_fd;
 	int slow;
 	osi_list_t *tmp;
-	uint64_t memreq;
 	int rgcount;
 	uint64_t jindex_block;
-	struct gfs2_buffer_head *bh;
+	struct gfs2_buffer_head *lbh;
 
 	slow = (saveoption == 1);
 	sbd.md.journals = 1;
@@ -551,8 +536,6 @@ void savemeta(char *out_fn, int saveoption)
 			exit(-1);
 		}
 		osi_list_init(&sbd.rglist);
-		init_buf_list(&sbd, &sbd.buf_list, 128 << 20);
-		init_buf_list(&sbd, &sbd.nvbuf_list, 0xffffffff);
 		if (!gfs1)
 			sbd.sd_sb.sb_bsize = GFS2_DEFAULT_BSIZE;
 		if (compute_constants(&sbd)) {
@@ -585,24 +568,23 @@ void savemeta(char *out_fn, int saveoption)
 	       last_fs_block, sbd.bsize);
 	if (!slow) {
 		if (gfs1) {
-			sbd.md.riinode =
-				gfs2_load_inode(&sbd,
+			sbd.md.riinode = inode_read(&sbd,
 						sbd1->sb_rindex_di.no_addr);
 			jindex_block = sbd1->sb_jindex_di.no_addr;
 		} else {
 			sbd.master_dir =
-				gfs2_load_inode(&sbd,
-						sbd.sd_sb.sb_master_dir.no_addr);
+				inode_read(&sbd,
+					sbd.sd_sb.sb_master_dir.no_addr);
 
 			slow = gfs2_lookupi(sbd.master_dir, "rindex", 6, 
 					    &sbd.md.riinode);
 			jindex_block = masterblock("jindex");
 		}
-		bh = bread(&sbd.buf_list, jindex_block);
-		gfs2_dinode_in(&di, bh->b_data);
+		lbh = bread(&sbd, jindex_block);
+		gfs2_dinode_in(&di, lbh);
 		if (!gfs1)
-			do_dinode_extended(&di, bh->b_data);
-		brelse(bh, not_updated);
+			do_dinode_extended(&di, lbh);
+		brelse(lbh);
 	}
 	if (!slow) {
 		printf("Reading resource groups...");
@@ -614,12 +596,6 @@ void savemeta(char *out_fn, int saveoption)
 		printf("Done.\n\n");
 		fflush(stdout);
 	}
-	if (!slow) {
-		blocklist = gfs2_block_list_create(&sbd, last_fs_block + 1,
-						   &memreq);
-		if (!blocklist)
-			slow = TRUE;
-	}
 	get_journal_inode_blocks();
 	if (!slow) {
 		/* Save off the superblock */
@@ -646,7 +622,7 @@ void savemeta(char *out_fn, int saveoption)
 		for (tmp = sbd.rglist.next; tmp != &sbd.rglist;
 		     tmp = tmp->next){
 			struct rgrp_list *rgd;
-			int i, first;
+			int first;
 
 			rgd = osi_list_entry(tmp, struct rgrp_list, list);
 			slow = gfs2_rgrp_read(&sbd, rgd);
@@ -656,12 +632,6 @@ void savemeta(char *out_fn, int saveoption)
 				  (unsigned long long)rgd->ri.ri_addr,
 				  (unsigned long long)rgd->ri.ri_addr,
 				  rgd->ri.ri_length);
-			for (i = 0; i < rgd->ri.ri_length; i++) {
-				if(gfs2_block_set(&sbd, blocklist,
-						  rgd->ri.ri_addr + i,
-						  gfs2_meta_other))
-					break;
-			}
 			first = 1;
 			/* Save off the rg and bitmaps */
 			for (block = rgd->ri.ri_addr;
@@ -673,7 +643,7 @@ void savemeta(char *out_fn, int saveoption)
 			if (saveoption != 2) {
 				int blktype;
 
-				while (!gfs2_next_rg_meta(rgd, &block, first)) {
+				while (!gfs2_next_rg_meta(rgd, &block, first)){
 					warm_fuzzy_stuff(block, FALSE, TRUE);
 					blktype = save_block(sbd.device_fd,
 							     out_fd, block);
@@ -684,13 +654,14 @@ void savemeta(char *out_fn, int saveoption)
 				/* Save off the free/unlinked meta blocks too.
 				   If we don't, we may run into metadata
 				   allocation issues. */
-				while (!next_rg_freemeta(rgd, &block, first)) {
+				while (!next_rg_freemeta(&sbd, rgd, &block,
+							 first)) {
 					blktype = save_block(sbd.device_fd,
 							     out_fd, block);
 					first = 0;
 				}
 			}
-			gfs2_rgrp_relse(rgd, not_updated);
+			gfs2_rgrp_relse(rgd);
 		}
 	}
 	if (slow) {
@@ -699,8 +670,6 @@ void savemeta(char *out_fn, int saveoption)
 		}
 	}
 	/* Clean up */
-	if (blocklist)
-		gfs2_block_list_destroy(&sbd, blocklist);
 	/* There may be a gap between end of file system and end of device */
 	/* so we tell the user that we've processed everything. */
 	block = last_fs_block;
@@ -766,6 +735,8 @@ static int restore_data(int fd, int in_fd, int printblocksonly)
 	blks_saved = total_out = 0;
 	last_fs_block = 0;
 	while (TRUE) {
+		struct gfs2_buffer_head dummy_bh;
+
 		memset(savedata, 0, sizeof(struct saved_metablock));
 		rs = read(in_fd, &buf64, sizeof(uint64_t));
 		if (!rs)
@@ -807,8 +778,9 @@ static int restore_data(int fd, int in_fd, int printblocksonly)
 		if (first) {
 			struct gfs2_sb bufsb;
 
+			dummy_bh.b_data = (char *)&bufsb;
 			memcpy(&bufsb, savedata->buf, sizeof(bufsb));
-			gfs2_sb_in(&sbd.sd_sb, (void *)&bufsb);
+			gfs2_sb_in(&sbd.sd_sb, &dummy_bh);
 			sbd1 = (struct gfs_sb *)&sbd.sd_sb;
 			if (sbd1->sb_fs_format == GFS_FORMAT_FS &&
 			    sbd1->sb_header.mh_type ==
@@ -836,19 +808,20 @@ static int restore_data(int fd, int in_fd, int printblocksonly)
 			}
 			first = 0;
 		}
+		bh = &dummy_bh;
+		bh->b_data = savedata->buf;
 		if (printblocksonly) {
 			block = savedata->blk;
 			if (block > highest_valid_block)
 				highest_valid_block = block;
 			if (printblocksonly > 1 && printblocksonly == block) {
-				memcpy(buf, savedata->buf, sbd.bsize);
 				block_in_mem = block;
 				display(0);
 				return 0;
 			} else if (printblocksonly == 1) {
 				print_gfs2("%d (l=0x%x): ", blks_saved,
 					   savedata->siglen);
-				display_block_type(savedata->buf, TRUE);
+				display_block_type(TRUE);
 			}
 		} else {
 			warm_fuzzy_stuff(savedata->blk, FALSE, FALSE);
@@ -862,17 +835,16 @@ static int restore_data(int fd, int in_fd, int printblocksonly)
 				fprintf(stderr, "bad seek: %s from %s:"
 					"%d: block %lld (0x%llx)\n",
 					strerror(errno), __FUNCTION__,
-					__LINE__, (unsigned long long)
-					savedata->blk,
-					(unsigned long long)
-					savedata->blk);
+					__LINE__,
+					(unsigned long long)savedata->blk,
+					(unsigned long long)savedata->blk);
 				exit(-1);
 			}
 			if (write(fd, savedata->buf, sbd.bsize) != sbd.bsize) {
 				fprintf(stderr, "write error: %s from "
 					"%s:%d: block %lld (0x%llx)\n",
-					strerror(errno),
-					__FUNCTION__, __LINE__,
+					strerror(errno), __FUNCTION__,
+					__LINE__,
 					(unsigned long long)savedata->blk,
 					(unsigned long long)savedata->blk);
 				exit(-1);
diff --git a/gfs2/fsck/eattr.c b/gfs2/fsck/eattr.c
index 6489e6d..5d3e7cd 100644
--- a/gfs2/fsck/eattr.c
+++ b/gfs2/fsck/eattr.c
@@ -5,44 +5,9 @@
 
 #include "libgfs2.h"
 #include "fsck.h"
+#include "metawalk.h"
 #include "eattr.h"
 
-static int clear_blk_nodup(struct gfs2_sbd *sbp, uint64_t block)
-{
-	struct gfs2_block_query q;
-
-	if(gfs2_block_check(sbp, bl, block, &q)) {
-		stack;
-		return -1;
-	}
-
-	if(q.dup_block) {
-		log_debug( _("Not clearing block with marked as a duplicate\n"));
-		return 1;
-	}
-
-	gfs2_block_set(sbp, bl, block, gfs2_block_free);
-
-	return 0;
-
-}
-
-int clear_eattr_indir(struct gfs2_inode *ip, uint64_t block,
-		      uint64_t parent, struct gfs2_buffer_head **bh,
-		      enum update_flags *want_updated, void *private)
-{
-	*want_updated = not_updated;
-	return clear_blk_nodup(ip->i_sbd, block);
-}
-
-int clear_eattr_leaf(struct gfs2_inode *ip, uint64_t block,
-		     uint64_t parent, struct gfs2_buffer_head **bh,
-		     enum update_flags *want_updated, void *private)
-{
-	*want_updated = not_updated;
-	return clear_blk_nodup(ip->i_sbd, block);
-}
-
 int clear_eattr_entry (struct gfs2_inode *ip,
 		       struct gfs2_buffer_head *leaf_bh,
 		       struct gfs2_ea_header *ea_hdr,
@@ -50,17 +15,12 @@ int clear_eattr_entry (struct gfs2_inode *ip,
 		       void *private)
 {
 	struct gfs2_sbd *sdp = ip->i_sbd;
-	char ea_name[256];
 
 	if(!ea_hdr->ea_name_len){
 		/* Skip this entry for now */
 		return 1;
 	}
 
-	memset(ea_name, 0, sizeof(ea_name));
-	strncpy(ea_name, (char *)ea_hdr + sizeof(struct gfs2_ea_header),
-		ea_hdr->ea_name_len);
-
 	if(!GFS2_EATYPE_VALID(ea_hdr->ea_type) &&
 	   ((ea_hdr_prev) || (!ea_hdr_prev && ea_hdr->ea_type))){
 		/* Skip invalid entry */
@@ -79,8 +39,7 @@ int clear_eattr_entry (struct gfs2_inode *ip,
 		} else {
 			log_debug( _("  Pointers Required: %d\n"
 				  "  Pointers Reported: %d\n"),
-				  max_ptrs,
-				  ea_hdr->ea_num_ptrs);
+				  max_ptrs, ea_hdr->ea_num_ptrs);
 		}
 
 
@@ -91,14 +50,11 @@ int clear_eattr_entry (struct gfs2_inode *ip,
 int clear_eattr_extentry(struct gfs2_inode *ip, uint64_t *ea_data_ptr,
 			 struct gfs2_buffer_head *leaf_bh,
 			 struct gfs2_ea_header *ea_hdr,
-			 struct gfs2_ea_header *ea_hdr_prev,
-			 enum update_flags *want_updated, void *private)
+			 struct gfs2_ea_header *ea_hdr_prev, void *private)
 {
 	uint64_t block = be64_to_cpu(*ea_data_ptr);
 
-	*want_updated = not_updated;
-	return clear_blk_nodup(ip->i_sbd, block);
-
+	return delete_eattr_leaf(ip, block, 0, &leaf_bh, private);
 }
 
 
diff --git a/gfs2/fsck/eattr.h b/gfs2/fsck/eattr.h
index b93b50b..f4264b0 100644
--- a/gfs2/fsck/eattr.h
+++ b/gfs2/fsck/eattr.h
@@ -2,11 +2,9 @@
 #define _EATTR_H
 
 int clear_eattr_indir(struct gfs2_inode *ip, uint64_t block, uint64_t parent,
-		      struct gfs2_buffer_head **bh,
-		      enum update_flags *want_updated, void *private);
+		      struct gfs2_buffer_head **bh, void *private);
 int clear_eattr_leaf(struct gfs2_inode *ip, uint64_t block, uint64_t parent,
-		     struct gfs2_buffer_head **bh,
-		     enum update_flags *want_updated, void *private);
+		     struct gfs2_buffer_head **bh, void *private);
 int clear_eattr_entry (struct gfs2_inode *ip,
 					   struct gfs2_buffer_head *leaf_bh,
 					   struct gfs2_ea_header *ea_hdr,
@@ -15,7 +13,6 @@ int clear_eattr_entry (struct gfs2_inode *ip,
 int clear_eattr_extentry(struct gfs2_inode *ip, uint64_t *ea_data_ptr,
 			 struct gfs2_buffer_head *leaf_bh,
 			 struct gfs2_ea_header *ea_hdr,
-			 struct gfs2_ea_header *ea_hdr_prev,
-			 enum update_flags *want_updated, void *private);
+			 struct gfs2_ea_header *ea_hdr_prev, void *private);
 
 #endif /* _EATTR_H */
diff --git a/gfs2/fsck/fs_recovery.c b/gfs2/fsck/fs_recovery.c
index 16480be..05f41e4 100644
--- a/gfs2/fsck/fs_recovery.c
+++ b/gfs2/fsck/fs_recovery.c
@@ -98,7 +98,6 @@ static int buf_lo_scan_elements(struct gfs2_inode *ip, unsigned int start,
 	struct gfs2_buffer_head *bh_log, *bh_ip;
 	uint64_t blkno;
 	int error = 0;
-	enum update_flags if_modified;
 
 	if (pass != 1 || be32_to_cpu(ld->ld_type) != GFS2_LOG_DESC_METADATA)
 		return 0;
@@ -119,20 +118,23 @@ static int buf_lo_scan_elements(struct gfs2_inode *ip, unsigned int start,
 		if (error)
 			return error;
 
-		bh_ip = bget(&sdp->buf_list, blkno);
+		log_info( _("Journal replay writing metadata block #"
+			    "%lld (0x%llx) for journal+0x%x\n"),
+			  (unsigned long long)blkno, (unsigned long long)blkno,
+			  start);
+		bh_ip = bget(sdp, blkno);
 		memcpy(bh_ip->b_data, bh_log->b_data, sdp->bsize);
 
 		check_magic = ((struct gfs2_meta_header *)
 			       (bh_ip->b_data))->mh_magic;
 		check_magic = be32_to_cpu(check_magic);
-		if (check_magic != GFS2_MAGIC) {
-			if_modified = not_updated;
+		if (check_magic != GFS2_MAGIC)
 			error = -EIO;
-		} else
-			if_modified = updated;
+		else
+			bmodified(bh_ip);
 
-		brelse(bh_log, not_updated);
-		brelse(bh_ip, if_modified);
+		brelse(bh_log);
+		brelse(bh_ip);
 		if (error)
 			break;
 
@@ -170,6 +172,11 @@ static int revoke_lo_scan_elements(struct gfs2_inode *ip, unsigned int start,
 		}
 		while (offset + sizeof(uint64_t) <= sdp->sd_sb.sb_bsize) {
 			blkno = be64_to_cpu(*(__be64 *)(bh->b_data + offset));
+			log_info( _("Journal replay processing revoke for "
+				    "block #%lld (0x%llx) for journal+0x%x\n"),
+				  (unsigned long long)blkno,
+				  (unsigned long long)blkno,
+				  start);
 			error = gfs2_revoke_add(sdp, blkno, start);
 			if (error < 0)
 				return error;
@@ -181,7 +188,8 @@ static int revoke_lo_scan_elements(struct gfs2_inode *ip, unsigned int start,
 			offset += sizeof(uint64_t);
 		}
 
-		brelse(bh, updated);
+		bmodified(bh);
+		brelse(bh);
 		offset = sizeof(struct gfs2_meta_header);
 		first = 0;
 	}
@@ -218,7 +226,11 @@ static int databuf_lo_scan_elements(struct gfs2_inode *ip, unsigned int start,
 		if (error)
 			return error;
 
-		bh_ip = bget(&sdp->buf_list, blkno);
+		log_info( _("Journal replay writing data block #%lld (0x%llx)"
+			    " for journal+0x%x\n"),
+			  (unsigned long long)blkno, (unsigned long long)blkno,
+			  start);
+		bh_ip = bget(sdp, blkno);
 		memcpy(bh_ip->b_data, bh_log->b_data, sdp->bsize);
 
 		/* Unescape */
@@ -227,8 +239,9 @@ static int databuf_lo_scan_elements(struct gfs2_inode *ip, unsigned int start,
 			*eptr = cpu_to_be32(GFS2_MAGIC);
 		}
 
-		brelse(bh_log, not_updated);
-		brelse(bh_ip, updated);
+		brelse(bh_log);
+		bmodified(bh_ip);
+		brelse(bh_ip);
 
 		sd_replayed_jblocks++;
 	}
@@ -269,7 +282,8 @@ static int foreach_descriptor(struct gfs2_inode *ip, unsigned int start,
 			       (bh->b_data))->mh_magic;
 		check_magic = be32_to_cpu(check_magic);
 		if (check_magic != GFS2_MAGIC) {
-			brelse(bh, updated);
+			bmodified(bh);
+			brelse(bh);
 			return -EIO;
 		}
 		ld = (struct gfs2_log_descriptor *)bh->b_data;
@@ -281,38 +295,45 @@ static int foreach_descriptor(struct gfs2_inode *ip, unsigned int start,
 			error = get_log_header(ip, start, &lh);
 			if (!error) {
 				gfs2_replay_incr_blk(ip, &start);
-				brelse(bh, updated);
+				bmodified(bh);
+				brelse(bh);
 				continue;
 			}
 			if (error == 1)
 				error = -EIO;
-			brelse(bh, updated);
+			bmodified(bh);
+			brelse(bh);
 			return error;
 		} else if (gfs2_check_meta(bh, GFS2_METATYPE_LD)) {
-			brelse(bh, updated);
+			bmodified(bh);
+			brelse(bh);
 			return -EIO;
 		}
 		ptr = (__be64 *)(bh->b_data + offset);
 		error = databuf_lo_scan_elements(ip, start, ld, ptr, pass);
 		if (error) {
-			brelse(bh, updated);
+			bmodified(bh);
+			brelse(bh);
 			return error;
 		}
 		error = buf_lo_scan_elements(ip, start, ld, ptr, pass);
 		if (error) {
-			brelse(bh, updated);
+			bmodified(bh);
+			brelse(bh);
 			return error;
 		}
 		error = revoke_lo_scan_elements(ip, start, ld, ptr, pass);
 		if (error) {
-			brelse(bh, updated);
+			bmodified(bh);
+			brelse(bh);
 			return error;
 		}
 
 		while (length--)
 			gfs2_replay_incr_blk(ip, &start);
 
-		brelse(bh, updated);
+		bmodified(bh);
+		brelse(bh);
 	}
 
 	return 0;
@@ -363,10 +384,10 @@ static int fix_journal_seq_no(struct gfs2_inode *ip)
 		lh.lh_sequence = highest_seq;
 		prev_seq = lh.lh_sequence;
 		log_warn( _("Renumbering it as 0x%llx\n"), lh.lh_sequence);
-		block_map(ip, blk, &new, &dblock, &extlen, FALSE, not_updated);
-		bh = bread(&ip->i_sbd->buf_list, dblock);
-		gfs2_log_header_out(&lh, bh->b_data);
-		brelse(bh, updated);
+		block_map(ip, blk, &new, &dblock, &extlen, FALSE);
+		bh = bread(ip->i_sbd, dblock);
+		gfs2_log_header_out(&lh, bh);
+		brelse(bh);
 	}
 	return 0;
 }
@@ -438,9 +459,9 @@ static int gfs2_recover_journal(struct gfs2_inode *ip, int j, int preen,
 				 "without -a or -p.\n"));
 			goto out;
 		}
-		if (!query(&opts, _("\nJournal #%d (\"journal%d\") is "
-				    "corrupt.  Okay to repair it? (y/n)"),
-			   j+1, j)) {
+		if (!query( _("\nJournal #%d (\"journal%d\") is "
+			      "corrupt.  Okay to repair it? (y/n)"),
+			    j+1, j)) {
 			log_err( _("jid=%u: The journal was not repaired.\n"),
 				 j);
 			goto out;
@@ -483,42 +504,47 @@ static int gfs2_recover_journal(struct gfs2_inode *ip, int j, int preen,
 		error = FSCK_ERROR;
 		goto out;
 	}
-	if (query(&opts, _("\nJournal #%d (\"journal%d\") is dirty.  Okay to "
-			   "replay it? (y/n)"), j+1, j)) {
-		log_info( _("jid=%u: Replaying journal...\n"), j);
-
-		sd_found_jblocks = sd_replayed_jblocks = 0;
-		sd_found_metablocks = sd_replayed_metablocks = 0;
-		sd_found_revokes = 0;
-		sd_replay_tail = head.lh_tail;
-		for (pass = 0; pass < 2; pass++) {
-			error = foreach_descriptor(ip, head.lh_tail,
-						   head.lh_blkno, pass);
-			if (error)
-				goto out;
-		}
-		log_info( _("jid=%u: Found %u revoke tags\n"), j,
-			 sd_found_revokes);
-		gfs2_revoke_clean(sdp);
-		error = clean_journal(ip, &head);
+	if (!query( _("\nJournal #%d (\"journal%d\") is dirty.  Okay to "
+		      "replay it? (y/n)"), j+1, j))
+		goto reinit;
+
+	log_info( _("jid=%u: Replaying journal...\n"), j);
+
+	sd_found_jblocks = sd_replayed_jblocks = 0;
+	sd_found_metablocks = sd_replayed_metablocks = 0;
+	sd_found_revokes = 0;
+	sd_replay_tail = head.lh_tail;
+	for (pass = 0; pass < 2; pass++) {
+		error = foreach_descriptor(ip, head.lh_tail,
+					   head.lh_blkno, pass);
 		if (error)
 			goto out;
-		log_err( _("jid=%u: Replayed %u of %u journaled data blocks\n"),
-			j, sd_replayed_jblocks, sd_found_jblocks);
-		log_err( _("jid=%u: Replayed %u of %u metadata blocks\n"),
-			j, sd_replayed_metablocks, sd_found_metablocks);
-	} else {
-		if (query(&opts, _("Do you want to clear the dirty journal instead? (y/n)"))) {
-			write_journal(sdp, sdp->md.journal[j], j,
-				      sdp->md.journal[j]->i_di.di_size /
-				      sdp->sd_sb.sb_bsize);
-			
-		} else
-			log_err( _("jid=%u: Dirty journal not replayed or cleared.\n"), j);
 	}
+	log_info( _("jid=%u: Found %u revoke tags\n"), j, sd_found_revokes);
+	gfs2_revoke_clean(sdp);
+	error = clean_journal(ip, &head);
+	if (error)
+		goto out;
+	log_err( _("jid=%u: Replayed %u of %u journaled data blocks\n"),
+		 j, sd_replayed_jblocks, sd_found_jblocks);
+	log_err( _("jid=%u: Replayed %u of %u metadata blocks\n"),
+		 j, sd_replayed_metablocks, sd_found_metablocks);
 
+	/* Check for errors and give them the option to reinitialize the
+	   journal. */
 out:
-	log_info( _("jid=%u: %s\n"), j, (error) ? _("Failed") : _("Done"));
+	if (!error) {
+		log_info( _("jid=%u: Done\n"), j);
+		return 0;
+	}
+	log_info( _("jid=%u: Failed\n"), j);
+reinit:
+	if (query( _("Do you want to clear the journal instead? (y/n)")))
+		error = write_journal(sdp, sdp->md.journal[j], j,
+				      sdp->md.journal[j]->i_di.di_size /
+				      sdp->sd_sb.sb_bsize);
+	else
+		log_err( _("jid=%u: journal not cleared.\n"), j);
 	return error;
 }
 
@@ -544,8 +570,7 @@ int replay_journals(struct gfs2_sbd *sdp, int preen, int force_check,
 	*clean_journals = 0;
 
 	/* Get master dinode */
-	sdp->master_dir = gfs2_load_inode(sdp,
-					  sdp->sd_sb.sb_master_dir.no_addr);
+	sdp->master_dir = inode_read(sdp, sdp->sd_sb.sb_master_dir.no_addr);
 	gfs2_lookupi(sdp->master_dir, "jindex", 6, &sdp->md.jiinode);
 
 	/* read in the journal index data */
@@ -569,13 +594,11 @@ int replay_journals(struct gfs2_sbd *sdp, int preen, int force_check,
 			}
 			*clean_journals += clean;
 		}
-		inode_put(sdp->md.journal[i],
-			  (opts.no ? not_updated : updated));
+		inode_put(&sdp->md.journal[i]);
 	}
-	inode_put(sdp->master_dir, not_updated);
-	inode_put(sdp->md.jiinode, not_updated);
+	inode_put(&sdp->master_dir);
+	inode_put(&sdp->md.jiinode);
 	/* Sync the buffers to disk so we get a fresh start. */
-	bsync(&sdp->buf_list);
-	bsync(&sdp->nvbuf_list);
+	fsync(sdp->device_fd);
 	return error;
 }
diff --git a/gfs2/fsck/fsck.h b/gfs2/fsck/fsck.h
index 1b479e9..022b61e 100644
--- a/gfs2/fsck/fsck.h
+++ b/gfs2/fsck/fsck.h
@@ -2,12 +2,13 @@
 #define _FSCK_H
 
 #include "libgfs2.h"
+#include "osi_tree.h"
 
 #define FSCK_HASH_SHIFT         (13)
 #define FSCK_HASH_SIZE          (1 << FSCK_HASH_SHIFT)
 #define FSCK_HASH_MASK          (FSCK_HASH_SIZE - 1)
 
-#define query(opts, fmt, args...) gfs2_query(&fsck_abort, opts, fmt, ##args)
+#define query(fmt, args...) fsck_query(fmt, ##args)
 
 /*
  * Exit codes used by fsck-type programs
@@ -22,9 +23,11 @@
 #define FSCK_CANCELED    32     /* Aborted with a signal or ^C */
 #define FSCK_LIBRARY     128    /* Shared library error */
 
+#define BAD_POINTER_TOLERANCE 10 /* How many bad pointers is too many? */
+
 struct inode_info
 {
-        osi_list_t list;
+        struct osi_node node;
         uint64_t   inode;
         uint16_t   link_count;   /* the number of links the inode
                                   * thinks it has */
@@ -33,7 +36,7 @@ struct inode_info
 
 struct dir_info
 {
-        osi_list_t list;
+        struct osi_node node;
         uint64_t dinode;
         uint64_t treewalk_parent;
         uint64_t dotdot_parent;
@@ -44,10 +47,35 @@ struct dir_info
 struct dir_status {
 	uint8_t dotdir:1;
 	uint8_t dotdotdir:1;
-	struct gfs2_block_query q;
+	uint8_t q;
 	uint32_t entry_count;
 };
 
+struct duptree {
+	struct osi_node node;
+	int first_ref_found; /* Has the original reference been found? */
+	int refs;
+	uint64_t block;
+	osi_list_t ref_inode_list; /* list of inodes referencing a dup block */
+	osi_list_t ref_invinode_list; /* list of invalid inodes referencing */
+};
+
+enum dup_ref_type {
+	ref_as_data = 0,
+	ref_as_meta = 1,
+	ref_as_ea   = 2,
+	ref_types   = 3
+};
+
+struct inode_with_dups {
+	osi_list_t list;
+	uint64_t block_no;
+	int dup_count;
+	int reftypecount[ref_types];
+	uint64_t parent;
+	char *name;
+};
+
 enum rgindex_trust_level { /* how far can we trust our RG index? */
 	blind_faith = 0, /* We'd like to trust the rgindex. We always used to
 			    before bz 179069. This should cover most cases. */
@@ -59,37 +87,46 @@ enum rgindex_trust_level { /* how far can we trust our RG index? */
 			  gfs2_grow or something.  Count the RGs by hand. */
 };
 
-struct gfs2_inode *get_system_inode(struct gfs2_sbd *sbp, uint64_t block);
-struct gfs2_inode *fsck_load_inode(struct gfs2_sbd *sbp, uint64_t block);
-struct gfs2_inode *fsck_inode_get(struct gfs2_sbd *sdp,
+extern struct gfs2_inode *fsck_load_inode(struct gfs2_sbd *sbp, uint64_t block);
+extern struct gfs2_inode *fsck_inode_get(struct gfs2_sbd *sdp,
 				  struct gfs2_buffer_head *bh);
-void fsck_inode_put(struct gfs2_inode *ip, enum update_flags update);
-
-int initialize(struct gfs2_sbd *sbp, int force_check, int preen,
-	       int *all_clean);
-void destroy(struct gfs2_sbd *sbp);
-int pass1(struct gfs2_sbd *sbp);
-int pass1b(struct gfs2_sbd *sbp);
-int pass1c(struct gfs2_sbd *sbp);
-int pass2(struct gfs2_sbd *sbp);
-int pass3(struct gfs2_sbd *sbp);
-int pass4(struct gfs2_sbd *sbp);
-int pass5(struct gfs2_sbd *sbp);
-int rg_repair(struct gfs2_sbd *sdp, int trust_lvl, int *rg_count);
+extern void fsck_inode_put(struct gfs2_inode **ip);
+
+extern int initialize(struct gfs2_sbd *sbp, int force_check, int preen,
+		      int *all_clean);
+extern void destroy(struct gfs2_sbd *sbp);
+extern int pass1(struct gfs2_sbd *sbp);
+extern int pass1b(struct gfs2_sbd *sbp);
+extern int pass1c(struct gfs2_sbd *sbp);
+extern int pass2(struct gfs2_sbd *sbp);
+extern int pass3(struct gfs2_sbd *sbp);
+extern int pass4(struct gfs2_sbd *sbp);
+extern int pass5(struct gfs2_sbd *sbp);
+extern int rg_repair(struct gfs2_sbd *sdp, int trust_lvl, int *rg_count);
+extern void gfs2_dup_free(void);
+extern int fsck_query(const char *format, ...)
+	__attribute__((format(printf,1,2)));
+extern struct dir_info *dirtree_find(uint64_t block);
+extern void dup_delete(struct duptree *b);
+extern void dirtree_delete(struct dir_info *b);
 
 /* FIXME: Hack to get this going for pass2 - this should be pulled out
  * of pass1 and put somewhere else... */
-int add_to_dir_list(struct gfs2_sbd *sbp, uint64_t block);
+struct dir_info *dirtree_insert(uint64_t dblock);
 
 extern struct gfs2_options opts;
 extern struct gfs2_inode *lf_dip; /* Lost and found directory inode */
-extern osi_list_t dir_hash[FSCK_HASH_SIZE];
-extern osi_list_t inode_hash[FSCK_HASH_SIZE];
-extern struct gfs2_block_list *bl;
+extern struct gfs2_bmap *bl;
 extern uint64_t last_fs_block, last_reported_block;
+extern int64_t last_reported_fblock;
 extern int skip_this_pass, fsck_abort;
 extern int errors_found, errors_corrected;
 extern uint64_t last_data_block;
 extern uint64_t first_data_block;
-
+extern struct osi_root dup_blocks;
+extern struct osi_root dirtree;
+extern struct osi_root inodetree;
+extern int dups_found; /* How many duplicate references have we found? */
+extern int dups_found_first; /* How many duplicates have we found the original
+				reference for? */
 #endif /* _FSCK_H */
diff --git a/gfs2/fsck/initialize.c b/gfs2/fsck/initialize.c
index f9f82fa..2910326 100644
--- a/gfs2/fsck/initialize.c
+++ b/gfs2/fsck/initialize.c
@@ -16,6 +16,8 @@
 #include "fsck.h"
 #include "util.h"
 #include "fs_recovery.h"
+#include "metawalk.h"
+#include "inode_hash.h"
 
 #define CLEAR_POINTER(x) \
 	if(x) { \
@@ -23,6 +25,8 @@
 		x = NULL; \
 	}
 
+static int was_mounted_ro = 0;
+
 /**
  * block_mounters
  *
@@ -55,6 +59,38 @@ static int block_mounters(struct gfs2_sbd *sbp, int block_em)
 	return 0;
 }
 
+void gfs2_dup_free(void)
+{
+	struct osi_node *n;
+	struct duptree *dt;
+
+	while ((n = osi_first(&dup_blocks))) {
+		dt = (struct duptree *)n;
+		dup_delete(dt);
+	}
+}
+
+static void gfs2_dirtree_free(void)
+{
+	struct osi_node *n;
+	struct dir_info *dt;
+
+	while ((n = osi_first(&dirtree))) {
+		dt = (struct dir_info *)n;
+		dirtree_delete(dt);
+	}
+}
+
+static void gfs2_inodetree_free(void)
+{
+	struct osi_node *n;
+	struct inode_info *dt;
+
+	while ((n = osi_first(&inodetree))) {
+		dt = (struct inode_info *)n;
+		inodetree_delete(dt);
+	}
+}
 
 /*
  * empty_super_block - free all structures in the super block
@@ -67,38 +103,14 @@ static int block_mounters(struct gfs2_sbd *sbp, int block_em)
  */
 static void empty_super_block(struct gfs2_sbd *sdp)
 {
-	uint32_t i;
-
 	log_info( _("Freeing buffers.\n"));
-	while(!osi_list_empty(&sdp->rglist)){
-		struct rgrp_list *rgd;
-
-		rgd = osi_list_entry(sdp->rglist.next, struct rgrp_list, list);
-		log_debug( _("Deleting rgd for 0x%llx:  rgd=0x%p bits=0x%p\n"),
-			  (unsigned long long)rgd->ri.ri_addr, rgd, rgd->bits);
-		osi_list_del(&rgd->list);
-		if(rgd->bits)
-			free(rgd->bits);
-		free(rgd);
-	}
-
-	for(i = 0; i < FSCK_HASH_SIZE; i++) {
-		while(!osi_list_empty(&inode_hash[i])) {
-			struct inode_info *ii;
-			ii = osi_list_entry(inode_hash[i].next, struct inode_info, list);
-			osi_list_del(&ii->list);
-			free(ii);
-		}
-		while(!osi_list_empty(&dir_hash[i])) {
-			struct dir_info *di;
-			di = osi_list_entry(dir_hash[i].next, struct dir_info, list);
-			osi_list_del(&di->list);
-			free(di);
-		}
-	}
+	gfs2_rgrp_free(&sdp->rglist);
 
 	if (bl)
-		gfs2_block_list_destroy(sdp, bl);
+		gfs2_bmap_destroy(sdp, bl);
+	gfs2_inodetree_free();
+	gfs2_dirtree_free();
+	gfs2_dup_free();
 }
 
 
@@ -168,6 +180,132 @@ static int set_block_ranges(struct gfs2_sbd *sdp)
 }
 
 /**
+ * check_rgrp_integrity - verify a rgrp free block count against the bitmap
+ */
+static void check_rgrp_integrity(struct gfs2_sbd *sdp, struct rgrp_list *rgd,
+				 int *fixit, int *this_rg_fixed,
+				 int *this_rg_bad)
+{
+	uint32_t rg_free, rg_reclaimed;
+	int rgb, x, y, off, bytes_to_check, total_bytes_to_check;
+	unsigned int state;
+
+	rg_free = rg_reclaimed = 0;
+	total_bytes_to_check = rgd->ri.ri_bitbytes;
+	*this_rg_fixed = *this_rg_bad = 0;
+
+	for (rgb = 0; rgb < rgd->ri.ri_length; rgb++){
+		/* Count up the free blocks in the bitmap */
+		off = (rgb) ? sizeof(struct gfs2_meta_header) :
+			sizeof(struct gfs2_rgrp);
+		if (total_bytes_to_check <= sdp->bsize - off)
+			bytes_to_check = total_bytes_to_check;
+		else
+			bytes_to_check = sdp->bsize - off;
+		total_bytes_to_check -= bytes_to_check;
+		for (x = 0; x < bytes_to_check; x++) {
+			unsigned char *byte;
+
+			byte = (unsigned char *)&rgd->bh[rgb]->b_data[off + x];
+			if (*byte == 0x55)
+				continue;
+			if (*byte == 0x00) {
+				rg_free += GFS2_NBBY;
+				continue;
+			}
+			for (y = 0; y < GFS2_NBBY; y++) {
+				state = (*byte >>
+					 (GFS2_BIT_SIZE * y)) & GFS2_BIT_MASK;
+				if (state == GFS2_BLKST_USED)
+					continue;
+				if (state == GFS2_BLKST_DINODE)
+					continue;
+				if (state == GFS2_BLKST_FREE) {
+					rg_free++;
+					continue;
+				}
+				/* GFS2_BLKST_UNLINKED */
+				*this_rg_bad = 1;
+				if (!(*fixit)) {
+					if (query(_("Okay to reclaim unlinked "
+						    "inodes? (y/n)")))
+						*fixit = 1;
+				}
+				if (!(*fixit))
+					continue;
+				*byte &= ~(GFS2_BIT_MASK <<
+					   (GFS2_BIT_SIZE * y));
+				bmodified(rgd->bh[rgb]);
+				rg_reclaimed++;
+				rg_free++;
+				*this_rg_fixed = 1;
+			}
+		}
+	}
+	if (rgd->rg.rg_free != rg_free) {
+		*this_rg_bad = 1;
+		log_err( _("Error: resource group %lld (0x%llx): "
+			   "free space (%d) does not match bitmap (%d)\n"),
+			 (unsigned long long)rgd->ri.ri_addr,
+			 (unsigned long long)rgd->ri.ri_addr,
+			 rgd->rg.rg_free, rg_free);
+		if (rg_reclaimed)
+			log_err( _("(%d blocks were reclaimed)\n"),
+				 rg_reclaimed);
+		if (query( _("Fix the rgrp free blocks count? (y/n)"))) {
+			rgd->rg.rg_free = rg_free;
+			gfs2_rgrp_out(&rgd->rg, rgd->bh[0]);
+			*this_rg_fixed = 1;
+			log_err( _("The rgrp was fixed.\n"));
+		} else
+			log_err( _("The rgrp was not fixed.\n"));
+	}
+	/*
+	else {
+		log_debug( _("Resource group %lld (0x%llx) free space "
+			     "is consistent: free: %d reclaimed: %d\n"),
+			   (unsigned long long)rgd->ri.ri_addr,
+			   (unsigned long long)rgd->ri.ri_addr,
+			   rg_free, rg_reclaimed);
+	}*/
+}
+
+/**
+ * check_rgrps_integrity - verify rgrp consistency
+ *
+ * Returns: 0 on success, 1 if errors were detected
+ */
+static int check_rgrps_integrity(struct gfs2_sbd *sdp)
+{
+	int rgs_good = 0, rgs_bad = 0, rgs_fixed = 0;
+	int was_bad = 0, was_fixed = 0, error = 0;
+	osi_list_t *tmp;
+	struct rgrp_list *rgd;
+	int reclaim_unlinked = 0;
+
+	log_info( _("Checking the integrity of all resource groups.\n"));
+	for (tmp = sdp->rglist.next; tmp != &sdp->rglist; tmp = tmp->next) {
+		if (fsck_abort)
+			return 0;
+		rgd = osi_list_entry(tmp, struct rgrp_list, list);
+		check_rgrp_integrity(sdp, rgd, &reclaim_unlinked,
+				     &was_fixed, &was_bad);
+		if (was_fixed)
+			rgs_fixed++;
+		if (was_bad) {
+			error = 1;
+			rgs_bad++;
+		} else
+			rgs_good++;
+	}
+	if (rgs_bad)
+		log_err( _("RGs: Consistent: %d   Inconsistent: %d   Fixed: %d"
+			   "   Total: %d\n"),
+			 rgs_good, rgs_bad, rgs_fixed, rgs_good + rgs_bad);
+	return error;
+}
+
+/**
  * init_system_inodes
  *
  * Returns: 0 on success, -1 on failure
@@ -188,10 +326,9 @@ static int init_system_inodes(struct gfs2_sbd *sdp)
 	log_info( _("Initializing special inodes...\n"));
 
 	/* Get master dinode */
-	sdp->master_dir = gfs2_load_inode(sdp,
-					  sdp->sd_sb.sb_master_dir.no_addr);
+	sdp->master_dir = inode_read(sdp, sdp->sd_sb.sb_master_dir.no_addr);
 	/* Get root dinode */
-	sdp->md.rooti = gfs2_load_inode(sdp, sdp->sd_sb.sb_root_dir.no_addr);
+	sdp->md.rooti = inode_read(sdp, sdp->sd_sb.sb_root_dir.no_addr);
 
 	/* Look for "inum" entry in master dinode */
 	gfs2_lookupi(sdp->master_dir, "inum", 4, &sdp->md.inum);
@@ -246,6 +383,8 @@ static int init_system_inodes(struct gfs2_sbd *sdp)
 	}
 	log_info( _("%u resource groups found.\n"), rgcount);
 
+	check_rgrps_integrity(sdp);
+
 	/*******************************************************************
 	 *******  Now, set boundary fields in the super block  *************
 	 *******************************************************************/
@@ -255,7 +394,7 @@ static int init_system_inodes(struct gfs2_sbd *sdp)
 		goto fail;
 	}
 
-	bl = gfs2_block_list_create(sdp, last_fs_block+1, &addl_mem_needed);
+	bl = gfs2_bmap_create(sdp, last_fs_block+1, &addl_mem_needed);
 	if (!bl) {
 		log_crit( _("This system doesn't have enough memory + swap space to fsck this file system.\n"));
 		log_crit( _("Additional memory needed is approximately: %lluMB\n"),
@@ -278,8 +417,6 @@ static int init_system_inodes(struct gfs2_sbd *sdp)
  */
 static int fill_super_block(struct gfs2_sbd *sdp)
 {
-	uint32_t i;
-
 	sync();
 
 	/********************************************************************
@@ -287,12 +424,6 @@ static int fill_super_block(struct gfs2_sbd *sdp)
 	 ********************************************************************/
 	log_info( _("Initializing lists...\n"));
 	osi_list_init(&sdp->rglist);
-	init_buf_list(sdp, &sdp->buf_list, 128 << 20);
-	init_buf_list(sdp, &sdp->nvbuf_list, 0xffffffff);
-	for(i = 0; i < FSCK_HASH_SIZE; i++) {
-		osi_list_init(&dir_hash[i]);
-		osi_list_init(&inode_hash[i]);
-	}
 
 	/********************************************************************
 	 ************  next, read in on-disk SB and set constants  **********
@@ -324,27 +455,55 @@ static int fill_super_block(struct gfs2_sbd *sdp)
 int initialize(struct gfs2_sbd *sbp, int force_check, int preen,
 	       int *all_clean)
 {
-	int clean_journals = 0;
+	int clean_journals = 0, open_flag;
 
 	*all_clean = 0;
 
-	if(opts.no) {
-		if ((sbp->device_fd = open(opts.device, O_RDONLY)) < 0) {
-			log_crit( _("Unable to open device: %s\n"), opts.device);
-			return FSCK_USAGE;
-		}
-	} else {
-		/* read in sb from disk */
-		if ((sbp->device_fd = open(opts.device, O_RDWR | O_EXCL)) < 0){
-			if (errno == EBUSY)
-				log_crit( _("Device %s is busy.\n"),
-					 opts.device);
-			else
-				log_crit( _("Unable to open device: %s\n"),
-					  opts.device);
+	if(opts.no)
+		open_flag = O_RDONLY;
+	else
+		open_flag = O_RDWR | O_EXCL;
+
+	sbp->device_fd = open(opts.device, open_flag);
+	if (sbp->device_fd < 0) {
+		int is_mounted, ro;
+
+		if (open_flag == O_RDONLY || errno != EBUSY) {
+			log_crit( _("Unable to open device: %s\n"),
+				  opts.device);
 			return FSCK_USAGE;
 		}
+		/* We can't open it EXCL.  It may be already open rw (in which
+		   case we want to deny them access) or it may be mounted as
+		   the root file system at boot time (in which case we need to
+		   allow it.)  We use is_pathname_mounted here even though
+		   we're specifying a device name, not a path name.  The
+		   function checks for device as well. */
+		strncpy(sbp->device_name, opts.device,
+			sizeof(sbp->device_name));
+		sbp->path_name = sbp->device_name; /* This gets overwritten */
+		is_mounted = is_pathname_mounted(sbp, &ro);
+		/* If the device is busy, but not because it's mounted, fail.
+		   This protects against cases where the file system is LVM
+		   and perhaps mounted on a different node. */
+		if (!is_mounted)
+			goto mount_fail;
+		/* If the device is mounted, but not mounted RO, fail.  This
+		   protects them against cases where the file system is
+		   mounted RW, but still allows us to check our own root
+		   file system. */
+		if (!ro)
+			goto mount_fail;
+		/* The device is mounted RO, so it's likely our own root
+		   file system.  We can only do so much to protect the users
+		   from themselves.  Try opening without O_EXCL. */
+		if ((sbp->device_fd = open(opts.device, O_RDWR)) < 0)
+			goto mount_fail;
+
+		was_mounted_ro = 1;
 	}
+
+	/* read in sb from disk */
 	if (fill_super_block(sbp)) {
 		stack;
 		return FSCK_ERROR;
@@ -380,6 +539,10 @@ int initialize(struct gfs2_sbd *sbp, int force_check, int preen,
 		return FSCK_ERROR;
 
 	return FSCK_OK;
+
+mount_fail:
+	log_crit( _("Device %s is busy.\n"), opts.device);
+	return FSCK_USAGE;
 }
 
 static void destroy_sbp(struct gfs2_sbd *sbp)
@@ -394,6 +557,15 @@ static void destroy_sbp(struct gfs2_sbd *sbp)
 	}
 	empty_super_block(sbp);
 	close(sbp->device_fd);
+	if (was_mounted_ro && errors_corrected) {
+		sbp->device_fd = open("/proc/sys/vm/drop_caches", O_WRONLY);
+		if (sbp->device_fd >= 0) {
+			write(sbp->device_fd, "2", 1);
+			close(sbp->device_fd);
+		} else
+			log_err( _("fsck.gfs2: Non-fatal error dropping "
+				   "caches.\n"));
+	}
 }
 
 void destroy(struct gfs2_sbd *sbp)
diff --git a/gfs2/fsck/inode_hash.c b/gfs2/fsck/inode_hash.c
index 45efe09..2ba1d05 100644
--- a/gfs2/fsck/inode_hash.c
+++ b/gfs2/fsck/inode_hash.c
@@ -1,77 +1,68 @@
 #include <stdint.h>
 #include <unistd.h>
+#include <libintl.h>
 
 #include "libgfs2.h"
 #include "osi_list.h"
 #include "hash.h"
 #include "inode_hash.h"
 #include "fsck.h"
+#define _(String) gettext(String)
 
-static uint32_t gfs2_inode_hash(uint64_t block_no)
+struct inode_info *inodetree_find(uint64_t block)
 {
-	unsigned int h;
+	struct osi_node *node = inodetree.osi_node;
 
-	h = fsck_hash(&block_no, sizeof (uint64_t));
-	h &= FSCK_HASH_MASK;
+	while (node) {
+		struct inode_info *data = (struct inode_info *)node;
 
-	return h;
-}
-
-struct inode_info *inode_hash_search(osi_list_t *buckets, uint64_t key)
-{
-	struct inode_info *ii;
-	osi_list_t *tmp;
-	osi_list_t *bucket = &buckets[gfs2_inode_hash(key)];
-
-	osi_list_foreach(tmp, bucket) {
-		ii = osi_list_entry(tmp, struct inode_info, list);
-		if(ii->inode == key) {
-			return ii;
-		}
+		if (block < data->inode)
+			node = node->osi_left;
+		else if (block > data->inode)
+			node = node->osi_right;
+		else
+			return data;
 	}
 	return NULL;
 }
 
-int inode_hash_insert(osi_list_t *buckets, uint64_t key, struct inode_info *ii)
+struct inode_info *inodetree_insert(uint64_t dblock)
 {
-	osi_list_t *tmp;
-	osi_list_t *bucket = &buckets[gfs2_inode_hash(key)];
-	struct inode_info *itmp = NULL;
+	struct osi_node **newn = &inodetree.osi_node, *parent = NULL;
+	struct inode_info *data;
+
+	/* Figure out where to put new node */
+	while (*newn) {
+		struct inode_info *cur = (struct inode_info *)*newn;
 
-	if(osi_list_empty(bucket)) {
-		osi_list_add(&ii->list, bucket);
-		return 0;
+		parent = *newn;
+		if (dblock < cur->inode)
+			newn = &((*newn)->osi_left);
+		else if (dblock > cur->inode)
+			newn = &((*newn)->osi_right);
+		else
+			return cur;
 	}
 
-	osi_list_foreach(tmp, bucket) {
-		itmp = osi_list_entry(tmp, struct inode_info, list);
-		if(itmp->inode < key) {
-			continue;
-		} else {
-			osi_list_add_prev(&ii->list, tmp);
-			return 0;
-		}
+	data = malloc(sizeof(struct inode_info));
+	if (!data) {
+		log_crit( _("Unable to allocate inode_info structure\n"));
+		return NULL;
 	}
-	osi_list_add_prev(&ii->list, bucket);
-	return 0;
-}
+	if (!memset(data, 0, sizeof(struct inode_info))) {
+		log_crit( _("Error while zeroing inode_info structure\n"));
+		return NULL;
+	}
+	/* Add new node and rebalance tree. */
+	data->inode = dblock;
+	osi_link_node(&data->node, parent, newn);
+	osi_insert_color(&data->node, &inodetree);
 
+	return data;
+}
 
-int inode_hash_remove(osi_list_t *buckets, uint64_t key)
+void inodetree_delete(struct inode_info *b)
 {
-	osi_list_t *tmp;
-	osi_list_t *bucket = &buckets[gfs2_inode_hash(key)];
-	struct inode_info *itmp = NULL;
-
-	if(osi_list_empty(bucket)) {
-		return -1;
-	}
-	osi_list_foreach(tmp, bucket) {
-		itmp = osi_list_entry(tmp, struct inode_info, list);
-		if(itmp->inode == key) {
-			osi_list_del(tmp);
-			return 0;
-		}
-	}
-	return -1;
+	osi_erase(&b->node, &inodetree);
+	free(b);
 }
diff --git a/gfs2/fsck/inode_hash.h b/gfs2/fsck/inode_hash.h
index 7e41180..e18022d 100644
--- a/gfs2/fsck/inode_hash.h
+++ b/gfs2/fsck/inode_hash.h
@@ -1,9 +1,10 @@
 #ifndef _INODE_HASH_H
 #define _INODE_HASH_H
 
-struct inode_info *inode_hash_search(osi_list_t *buckets, uint64_t block_no);
-int inode_hash_insert(osi_list_t *buckets, uint64_t key,
-					  struct inode_info *ii);
-int inode_hash_remove(osi_list_t *buckets, uint64_t key);
+struct inode_info;
+
+extern struct inode_info *inodetree_find(uint64_t block);
+extern struct inode_info *inodetree_insert(uint64_t dblock);
+extern void inodetree_delete(struct inode_info *b);
 
 #endif /* _INODE_HASH_H */
diff --git a/gfs2/fsck/link.c b/gfs2/fsck/link.c
index 9666f63..0314074 100644
--- a/gfs2/fsck/link.c
+++ b/gfs2/fsck/link.c
@@ -11,84 +11,72 @@
 #include "inode_hash.h"
 #include "link.h"
 
-int set_link_count(struct gfs2_sbd *sbp, uint64_t inode_no, uint32_t count)
+int set_link_count(uint64_t inode_no, uint32_t count)
 {
-	struct inode_info *ii = NULL;
-	log_debug( _("Setting link count to %u for %" PRIu64 " (0x%" PRIx64 ")\n"),
-			  count, inode_no, inode_no);
-	/* If the list has entries, look for one that matches
-	 * inode_no */
-	ii = inode_hash_search(inode_hash, inode_no);
-	if(ii) {
-		if(ii->link_count) {
-			log_err( _("Link count already set for inode #%" PRIu64 " (0x%"
-					PRIx64 ")!\n"), inode_no, inode_no);
-			stack;
-			return -1;
-		}
-		else
-			ii->link_count = count;
-	}
-	else {
-		/* If not match was found, add a new entry and set it's
-		 * link count to count*/
-		if(!(ii = (struct inode_info *) malloc(sizeof(*ii)))) {
-			log_err( _("Unable to allocate inode_info structure\n"));
-			stack;
-			return -1;
-		}
-		memset(ii, 0, sizeof(*ii));
-		ii->inode = inode_no;
+	struct inode_info *ii;
+	/*log_debug( _("Setting link count to %u for %" PRIu64
+	  " (0x%" PRIx64 ")\n"), count, inode_no, inode_no);*/
+	/* If the list has entries, look for one that matches inode_no */
+	ii = inodetree_find(inode_no);
+	if (!ii)
+		ii = inodetree_insert(inode_no);
+	if (ii)
 		ii->link_count = count;
-		inode_hash_insert(inode_hash, inode_no, ii);
-	}
+	else
+		return -1;
 	return 0;
 }
 
-int increment_link(struct gfs2_sbd *sbp, uint64_t inode_no)
+int increment_link(uint64_t inode_no, uint64_t referenced_from,
+		   const char *why)
 {
 	struct inode_info *ii = NULL;
 
-	ii = inode_hash_search(inode_hash, inode_no);
+	ii = inodetree_find(inode_no);
 	/* If the list has entries, look for one that matches
 	 * inode_no */
-	if(ii) {
+	if (ii) {
 		ii->counted_links++;
-		log_debug( _("Incremented counted links to %u for %"PRIu64" (0x%"
-				  PRIx64 ")\n"), ii->counted_links, inode_no, inode_no);
+		log_debug( _("Directory %lld (0x%llx) incremented counted "
+			     "links to %u for %"PRIu64" (0x%" PRIx64 ") "
+			     "via %s\n"),
+			   (unsigned long long)referenced_from,
+			   (unsigned long long)referenced_from,
+			   ii->counted_links, inode_no, inode_no, why);
 		return 0;
 	}
-	log_debug( _("No match found when incrementing link for %" PRIu64
-			  " (0x%" PRIx64 ")!\n"), inode_no, inode_no);
+	log_debug( _("Ref: %lld (0x%llx) No match found when incrementing "
+		     "link for %" PRIu64 " (0x%" PRIx64 ")!\n"),
+		   (unsigned long long)referenced_from,
+		   (unsigned long long)referenced_from, inode_no, inode_no);
 	/* If no match was found, add a new entry and set its
 	 * counted links to 1 */
-	if(!(ii = (struct inode_info *) malloc(sizeof(*ii)))) {
-		log_err( _("Unable to allocate inode_info structure\n"));
-		stack;
-		return -1;
-	}
-	if(!memset(ii, 0, sizeof(*ii))) {
-		log_err( _("Unable to zero inode_info structure\n"));
-		stack;
+	ii = inodetree_insert(inode_no);
+	if (ii)
+		ii->counted_links = 1;
+	else
 		return -1;
-	}
-	ii->inode = inode_no;
-	ii->counted_links = 1;
-	inode_hash_insert(inode_hash, inode_no, ii);
-
 	return 0;
 }
 
-int decrement_link(struct gfs2_sbd *sbp, uint64_t inode_no)
+int decrement_link(uint64_t inode_no, uint64_t referenced_from,
+		   const char *why)
 {
 	struct inode_info *ii = NULL;
 
-	ii = inode_hash_search(inode_hash, inode_no);
+	ii = inodetree_find(inode_no);
 	/* If the list has entries, look for one that matches
 	 * inode_no */
-	log_err( _("Decrementing %"PRIu64" (0x%" PRIx64 ")\n"), inode_no, inode_no);
+	log_err( _("Decrementing %"PRIu64" (0x%" PRIx64 ") to %d\n"),
+		 inode_no, inode_no, ii->counted_links);
 	if(ii) {
 		ii->counted_links--;
+		log_debug( _("Directory %lld (0x%llx) decremented counted "
+			     "links to %u for %"PRIu64" (0x%" PRIx64 ") "
+			     "via %s\n"),
+			   (unsigned long long)referenced_from,
+			   (unsigned long long)referenced_from,
+			   ii->counted_links, inode_no, inode_no, why);
 		return 0;
 	}
 	log_debug( _("No match found when decrementing link for %" PRIu64
diff --git a/gfs2/fsck/link.h b/gfs2/fsck/link.h
index becede1..f890575 100644
--- a/gfs2/fsck/link.h
+++ b/gfs2/fsck/link.h
@@ -1,8 +1,10 @@
 #ifndef _LINK_H
 #define _LINK_H
 
-int set_link_count(struct gfs2_sbd *sbp, uint64_t inode_no, uint32_t count);
-int increment_link(struct gfs2_sbd *sbp, uint64_t inode_no);
-int decrement_link(struct gfs2_sbd *sbp, uint64_t inode_no);
+int set_link_count(uint64_t inode_no, uint32_t count);
+int increment_link(uint64_t inode_no, uint64_t referenced_from,
+		   const char *why);
+int decrement_link(uint64_t inode_no, uint64_t referenced_from,
+		   const char *why);
 
 #endif /* _LINK_H */
diff --git a/gfs2/fsck/lost_n_found.c b/gfs2/fsck/lost_n_found.c
index 4170380..4a3a926 100644
--- a/gfs2/fsck/lost_n_found.c
+++ b/gfs2/fsck/lost_n_found.c
@@ -12,6 +12,8 @@
 #include "libgfs2.h"
 #include "lost_n_found.h"
 #include "link.h"
+#include "metawalk.h"
+#include "util.h"
 
 /* add_inode_to_lf - Add dir entry to lost+found for the inode
  * @ip: inode to add to lost + found
@@ -24,21 +26,27 @@
  */
 int add_inode_to_lf(struct gfs2_inode *ip){
 	char tmp_name[256];
-	char *filename;
-	int filename_len;
 	__be32 inode_type;
+	uint64_t lf_blocks;
+	struct gfs2_sbd *sdp = ip->i_sbd;
+	struct dir_info *di;
 
 	if(!lf_dip) {
-		struct gfs2_block_query q = {0};
+		uint8_t q;
 
-		log_info( _("Locating/Creating lost and found directory\n"));
+		log_info( _("Locating/Creating lost+found directory\n"));
 
-        lf_dip = createi(ip->i_sbd->md.rooti, "lost+found", S_IFDIR | 0700, 0);
-	if(gfs2_block_check(ip->i_sbd, bl, lf_dip->i_di.di_num.no_addr, &q)) {
-			stack;
-			return -1;
-		}
-		if(q.block_type != gfs2_inode_dir) {
+		lf_dip = createi(sdp->md.rooti, "lost+found",
+				 S_IFDIR | 0700, 0);
+		/* createi will have incremented the di_nlink link count for
+		   the root directory.  We must increment the nlink value
+		   in the hash table to keep them in sync so that pass4 can
+		   detect and fix any descrepancies. */
+		set_link_count(sdp->sd_sb.sb_root_dir.no_addr,
+			       sdp->md.rooti->i_di.di_nlink);
+
+		q = block_type(lf_dip->i_di.di_num.no_addr);
+		if(q != gfs2_inode_dir) {
 			/* This is a new lost+found directory, so set its
 			 * block type and increment link counts for
 			 * the directories */
@@ -47,42 +55,75 @@ int add_inode_to_lf(struct gfs2_inode *ip){
 			 * directory or just found an old one, and we
 			 * used that instead of the block_type to run
 			 * this */
-			gfs2_block_set(ip->i_sbd, bl,
-				       lf_dip->i_di.di_num.no_addr, gfs2_inode_dir);
-			increment_link(ip->i_sbd,
-						   ip->i_sbd->md.rooti->i_di.di_num.no_addr);
-			increment_link(ip->i_sbd, lf_dip->i_di.di_num.no_addr);
-			increment_link(ip->i_sbd, lf_dip->i_di.di_num.no_addr);
+			fsck_blockmap_set(ip, lf_dip->i_di.di_num.no_addr,
+					  _("lost+found dinode"),
+					  gfs2_inode_dir);
+			/* root inode links to lost+found */
+			increment_link(sdp->md.rooti->i_di.di_num.no_addr,
+				       lf_dip->i_di.di_num.no_addr, _("root"));
+			/* lost+found link for '.' from itself */
+			increment_link(lf_dip->i_di.di_num.no_addr,
+				       lf_dip->i_di.di_num.no_addr, "\".\"");
+			/* lost+found link for '..' back to root */
+			increment_link(lf_dip->i_di.di_num.no_addr,
+				       sdp->md.rooti->i_di.di_num.no_addr,
+				       "\"..\"");
+		}
+		log_info( _("lost+found directory is dinode %lld (0x%llx)\n"),
+			  (unsigned long long)lf_dip->i_di.di_num.no_addr,
+			  (unsigned long long)lf_dip->i_di.di_num.no_addr);
+		di = dirtree_find(lf_dip->i_di.di_num.no_addr);
+		if (di) {
+			log_info( _("Marking lost+found inode connected\n"));
+			di->checked = 1;
+			di = NULL;
 		}
 	}
 	if(ip->i_di.di_num.no_addr == lf_dip->i_di.di_num.no_addr) {
 		log_err( _("Trying to add lost+found to itself...skipping"));
 		return 0;
 	}
+	lf_blocks = lf_dip->i_di.di_blocks;
+
 	switch(ip->i_di.di_mode & S_IFMT){
 	case S_IFDIR:
-		log_info( _("Adding .. entry pointing to lost+found for %llu\n"),
-				 (unsigned long long)ip->i_di.di_num.no_addr);
-		sprintf(tmp_name, "..");
-		filename_len = strlen(tmp_name);  /* no trailing NULL */
-		if(!(filename = malloc((sizeof(char) * filename_len) + 1))) {
-			log_err( _("Unable to allocate name\n"));
-			stack;
-			return -1;
-		}
-		if(!memset(filename, 0, (sizeof(char) * filename_len) + 1)) {
-			log_err( _("Unable to zero name\n"));
-			stack;
-			return -1;
-		}
-		memcpy(filename, tmp_name, filename_len);
+		log_info( _("Adding .. entry pointing to lost+found for "
+			    "directory %llu (0x%llx)\n"),
+			  (unsigned long long)ip->i_di.di_num.no_addr,
+			  (unsigned long long)ip->i_di.di_num.no_addr);
+
+		/* If there's a pre-existing .. directory entry, we have to
+		   back out the links. */
+		di = dirtree_find(ip->i_di.di_num.no_addr);
+		if (di && gfs2_check_range(sdp, di->dotdot_parent) == 0) {
+			struct gfs2_inode *dip;
 
-		if(gfs2_dirent_del(ip, NULL, filename, filename_len))
-			log_warn( _("add_inode_to_lf:  "
-					 "Unable to remove \"..\" directory entry.\n"));
+			log_debug(_("Directory %lld (0x%llx) already had a "
+				    "\"..\" link to %lld (0x%llx).\n"),
+				  (unsigned long long)ip->i_di.di_num.no_addr,
+				  (unsigned long long)ip->i_di.di_num.no_addr,
+				  (unsigned long long)di->dotdot_parent,
+				  (unsigned long long)di->dotdot_parent);
+			decrement_link(di->dotdot_parent,
+				       ip->i_di.di_num.no_addr,
+				       _(".. unlinked, moving to lost+found"));
+			dip = fsck_load_inode(sdp, di->dotdot_parent);
+			dip->i_di.di_nlink--;
+			log_debug(_("Decrementing its links to %d\n"),
+				  dip->i_di.di_nlink);
+			bmodified(dip->i_bh);
+			fsck_inode_put(&dip);
+			di = NULL;
+		} else
+			log_debug(_("Couldn't find a valid \"..\" entry "
+				    "for orphan directory %lld (0x%llx)\n"),
+				  (unsigned long long)ip->i_di.di_num.no_addr,
+				  (unsigned long long)ip->i_di.di_num.no_addr);
+		if(gfs2_dirent_del(ip, "..", 2))
+			log_warn( _("add_inode_to_lf:  Unable to remove "
+				    "\"..\" directory entry.\n"));
 
-		dir_add(ip, filename, filename_len, &(lf_dip->i_di.di_num), DT_DIR);
-		free(filename);
+		dir_add(ip, "..", 2, &(lf_dip->i_di.di_num), DT_DIR);
 		sprintf(tmp_name, "lost_dir_%llu",
 			(unsigned long long)ip->i_di.di_num.no_addr);
 		inode_type = DT_DIR;
@@ -123,26 +164,26 @@ int add_inode_to_lf(struct gfs2_inode *ip){
 		inode_type = DT_REG;
 		break;
 	}
-	filename_len = strlen(tmp_name);  /* no trailing NULL */
-	if(!(filename = malloc(sizeof(char) * filename_len))) {
-		log_err( _("Unable to allocate name\n"));
-			stack;
-			return -1;
-		}
-	if(!memset(filename, 0, sizeof(char) * filename_len)) {
-		log_err( _("Unable to zero name\n"));
-		stack;
-		return -1;
-	}
-	memcpy(filename, tmp_name, filename_len);
 
-	dir_add(lf_dip, filename, filename_len, &(ip->i_di.di_num), inode_type);
-  	increment_link(ip->i_sbd, ip->i_di.di_num.no_addr);
+	dir_add(lf_dip, tmp_name, strlen(tmp_name), &(ip->i_di.di_num),
+		inode_type);
+	/* If the lf directory had new blocks added we have to mark them
+	   properly in the bitmap so they're not freed. */
+	if (lf_dip->i_di.di_blocks != lf_blocks)
+		reprocess_inode(lf_dip, "lost+found");
+
+	/* This inode is linked from lost+found */
+	increment_link(ip->i_di.di_num.no_addr, lf_dip->i_di.di_num.no_addr,
+		       _("from lost+found"));
+	/* If it's a directory, lost+found is back-linked to it via .. */
 	if(S_ISDIR(ip->i_di.di_mode))
-		increment_link(ip->i_sbd, lf_dip->i_di.di_num.no_addr);
+		increment_link(lf_dip->i_di.di_num.no_addr,
+			       ip->i_di.di_mode, _("to lost+found"));
 
-	free(filename);
-	log_notice( _("Added inode #%llu to lost+found dir\n"),
-		   (unsigned long long)ip->i_di.di_num.no_addr);
+	log_notice( _("Added inode #%llu (0x%llx) to lost+found dir\n"),
+		    (unsigned long long)ip->i_di.di_num.no_addr,
+		    (unsigned long long)ip->i_di.di_num.no_addr);
+	gfs2_dinode_out(&lf_dip->i_di, lf_dip->i_bh);
+	bwrite(lf_dip->i_bh);
 	return 0;
 }
diff --git a/gfs2/fsck/main.c b/gfs2/fsck/main.c
index 68e8428..6bddeea 100644
--- a/gfs2/fsck/main.c
+++ b/gfs2/fsck/main.c
@@ -15,19 +15,24 @@
 #include "libgfs2.h"
 #include "fsck.h"
 #include "osi_list.h"
+#include "metawalk.h"
+#include "util.h"
 
 struct gfs2_options opts = {0};
-struct gfs2_inode *lf_dip; /* Lost and found directory inode */
-osi_list_t dir_hash[FSCK_HASH_SIZE];
-osi_list_t inode_hash[FSCK_HASH_SIZE];
-struct gfs2_block_list *bl = NULL;
+struct gfs2_inode *lf_dip = NULL; /* Lost and found directory inode */
+struct gfs2_bmap *bl = NULL;
 uint64_t last_fs_block, last_reported_block = -1;
+int64_t last_reported_fblock = -1000000;
 int skip_this_pass = FALSE, fsck_abort = FALSE;
 int errors_found = 0, errors_corrected = 0;
 const char *pass = "";
 uint64_t last_data_block;
 uint64_t first_data_block;
 int preen = 0, force_check = 0;
+struct osi_root dup_blocks = (struct osi_root) { NULL, };
+struct osi_root dirtree = (struct osi_root) { NULL, };
+struct osi_root inodetree = (struct osi_root) { NULL, };
+int dups_found = 0, dups_found_first = 0;
 
 /* This function is for libgfs2's sake.                                      */
 void print_it(const char *label, const char *fmt, const char *fmt2, ...)
@@ -159,25 +164,20 @@ static int check_system_inode(struct gfs2_inode *sysinode, const char *filename,
 		
 		/* FIXME: check this block's validity */
 
-		if(gfs2_block_check(sysinode->i_sbd, bl, iblock, &ds.q)) {
-			log_crit( _("Can't get %s inode block %" PRIu64 " (0x%"
-				 PRIx64 ") from block list\n"), filename,
-				 iblock, iblock);
-			return -1;
-		}
+		ds.q = block_type(iblock);
 		/* If the inode exists but the block is marked      */
 		/* free, we might be recovering from a corrupt      */
 		/* bitmap.  In that case, don't rebuild the inode.  */
 		/* Just reuse the inode and fix the bitmap.         */
-		if (ds.q.block_type == gfs2_block_free) {
-			log_info( _("The inode exists but the block is not marked 'in use'; fixing it.\n"));
-			gfs2_block_set(sysinode->i_sbd, bl,
-				       sysinode->i_di.di_num.no_addr,
-				       mark);
-			ds.q.block_type = mark;
+		if (ds.q == gfs2_block_free) {
+			log_info( _("The inode exists but the block is not "
+				    "marked 'in use'; fixing it.\n"));
+			fsck_blockmap_set(sysinode,
+					  sysinode->i_di.di_num.no_addr,
+					  filename, mark);
+			ds.q = mark;
 			if (mark == gfs2_inode_dir)
-				add_to_dir_list(sysinode->i_sbd,
-						sysinode->i_di.di_num.no_addr);
+				dirtree_insert(sysinode->i_di.di_num.no_addr);
 		}
 	}
 	else
@@ -186,20 +186,16 @@ static int check_system_inode(struct gfs2_inode *sysinode, const char *filename,
 	 * create a new inode and get it all setup - of course,
 	 * everything will be in lost+found then, but we *need* our
 	 * system inodes before we can do any of that. */
-	if(!sysinode || ds.q.block_type != mark) {
+	if(!sysinode || ds.q != mark) {
 		log_err( _("Invalid or missing %s system inode.\n"), filename);
-		errors_found++;
-		if (query(&opts, _("Create new %s system inode? (y/n) "),
-			  filename)) {
-			errors_corrected++;
+		if (query(_("Create new %s system inode? (y/n) "), filename)) {
 			builder(sysinode->i_sbd);
-			gfs2_block_set(sysinode->i_sbd, bl,
-				       sysinode->i_di.di_num.no_addr,
-				       mark);
-			ds.q.block_type = mark;
+			fsck_blockmap_set(sysinode,
+					  sysinode->i_di.di_num.no_addr,
+					  filename, mark);
+			ds.q = mark;
 			if (mark == gfs2_inode_dir)
-				add_to_dir_list(sysinode->i_sbd,
-						sysinode->i_di.di_num.no_addr);
+				dirtree_insert(sysinode->i_di.di_num.no_addr);
 		}
 		else {
 			log_err( _("Cannot continue without valid %s inode\n"),
@@ -297,23 +293,28 @@ static void check_statfs(struct gfs2_sbd *sdp)
 	log_err( _("The statfs file is wrong:\n\n"));
 	log_err( _("Current statfs values:\n"));
 	log_err( _("blocks:  %lld (0x%llx)\n"),
-		sc.sc_total, sc.sc_total);
+		 (unsigned long long)sc.sc_total,
+		 (unsigned long long)sc.sc_total);
 	log_err( _("free:    %lld (0x%llx)\n"),
-		sc.sc_free, sc.sc_free);
+		 (unsigned long long)sc.sc_free,
+		 (unsigned long long)sc.sc_free);
 	log_err( _("dinodes: %lld (0x%llx)\n\n"),
-		sc.sc_dinodes, sc.sc_dinodes);
+		 (unsigned long long)sc.sc_dinodes,
+		 (unsigned long long)sc.sc_dinodes);
 
 	log_err( _("Calculated statfs values:\n"));
 	log_err( _("blocks:  %lld (0x%llx)\n"),
-		sdp->blks_total, sdp->blks_total);
+		 (unsigned long long)sdp->blks_total,
+		 (unsigned long long)sdp->blks_total);
 	log_err( _("free:    %lld (0x%llx)\n"),
-		sdp->blks_total - sdp->blks_alloced,
-		sdp->blks_total - sdp->blks_alloced);
+		 (unsigned long long)(sdp->blks_total - sdp->blks_alloced),
+		 (unsigned long long)(sdp->blks_total - sdp->blks_alloced));
 	log_err( _("dinodes: %lld (0x%llx)\n"),
-		sdp->dinodes_alloced, sdp->dinodes_alloced);
+		 (unsigned long long)sdp->dinodes_alloced,
+		 (unsigned long long)sdp->dinodes_alloced);
 
 	errors_found++;
-	if (!query(&opts, _("Okay to fix the master statfs file? (y/n)"))) {
+	if (!query( _("Okay to fix the master statfs file? (y/n)"))) {
 		log_err( _("The statfs file was not fixed.\n"));
 		return;
 	}
@@ -328,7 +329,6 @@ int main(int argc, char **argv)
 	struct gfs2_sbd sb;
 	struct gfs2_sbd *sbp = &sb;
 	int j;
-	enum update_flags update_sys_files;
 	int error = 0;
 	int all_clean = 0;
 
@@ -447,29 +447,26 @@ int main(int argc, char **argv)
 	} else {
 		error = FSCK_CANCELED;
 	}
-	update_sys_files = (opts.no ? not_updated : updated);
 
 	check_statfs(sbp);
 
 	/* Free up our system inodes */
-	inode_put(sbp->md.inum, update_sys_files);
-	inode_put(sbp->md.statfs, update_sys_files);
+	inode_put(&sbp->md.inum);
+	inode_put(&sbp->md.statfs);
 	for (j = 0; j < sbp->md.journals; j++)
-		inode_put(sbp->md.journal[j], update_sys_files);
-	inode_put(sbp->md.jiinode, update_sys_files);
-	inode_put(sbp->md.riinode, update_sys_files);
-	inode_put(sbp->md.qinode, update_sys_files);
-	inode_put(sbp->md.pinode, update_sys_files);
-	inode_put(sbp->md.rooti, update_sys_files);
-	inode_put(sbp->master_dir, update_sys_files);
+		inode_put(&sbp->md.journal[j]);
+	inode_put(&sbp->md.jiinode);
+	inode_put(&sbp->md.riinode);
+	inode_put(&sbp->md.qinode);
+	inode_put(&sbp->md.pinode);
+	inode_put(&sbp->md.rooti);
+	inode_put(&sbp->master_dir);
 	if (lf_dip)
-		inode_put(lf_dip, update_sys_files);
+		inode_put(&lf_dip);
 
 	if (!opts.no && errors_corrected)
 		log_notice( _("Writing changes to disk\n"));
-
-	bsync(&sbp->buf_list);
-	bsync(&sbp->nvbuf_list);
+	fsync(sbp->device_fd);
 	destroy(sbp);
 	log_notice( _("gfs2_fsck complete    \n"));
 
diff --git a/gfs2/fsck/metawalk.c b/gfs2/fsck/metawalk.c
index b60b8ca..0f18733 100644
--- a/gfs2/fsck/metawalk.c
+++ b/gfs2/fsck/metawalk.c
@@ -6,54 +6,190 @@
 #include <sys/stat.h>
 #include <unistd.h>
 #include <libintl.h>
+#include <ctype.h>
 #define _(String) gettext(String)
 
 #include "libgfs2.h"
+#include "osi_tree.h"
 #include "fsck.h"
 #include "util.h"
 #include "metawalk.h"
 #include "hash.h"
+#include "inode_hash.h"
+
+#define COMFORTABLE_BLKS 5242880 /* 20GB in 4K blocks */
+
+/* There are two bitmaps: (1) The "blockmap" that fsck uses to keep track of
+   what block type has been discovered, and (2) The rgrp bitmap.  Function
+   gfs2_blockmap_set is used to set the former and gfs2_set_bitmap
+   is used to set the latter.  The two must be kept in sync, otherwise
+   you'll get bitmap mismatches.  This function checks the status of the
+   bitmap whenever the blockmap changes, and fixes it accordingly. */
+int check_n_fix_bitmap(struct gfs2_sbd *sdp, uint64_t blk,
+		       enum gfs2_mark_block new_blockmap_state)
+{
+	int old_bitmap_state, new_bitmap_state;
+	struct rgrp_list *rgd;
+
+	rgd = gfs2_blk2rgrpd(sdp, blk);
 
-struct gfs2_inode *get_system_inode(struct gfs2_sbd *sbp, uint64_t block)
+	old_bitmap_state = gfs2_get_bitmap(sdp, blk, rgd);
+	if (old_bitmap_state < 0) {
+		log_err( _("Block %lld (0x%llx) is not represented in the"
+			   "system bitmap; part of an rgrp or superblock.\n"),
+			 (unsigned long long)blk, (unsigned long long)blk);
+		return -1;
+	}
+	new_bitmap_state = blockmap_to_bitmap(new_blockmap_state);
+	if (old_bitmap_state != new_bitmap_state) {
+		const char *allocdesc[] = {"free space", "data", "unlinked",
+					   "inode", "reserved"};
+
+		log_err( _("Block %llu (0x%llx) seems to be %s, but is "
+			   "marked as %s in the bitmap.\n"),
+			 (unsigned long long)blk, (unsigned long long)blk,
+			 allocdesc[new_bitmap_state],
+			 allocdesc[old_bitmap_state]);
+		if(query( _("Okay to fix the bitmap? (y/n)"))) {
+			/* If the new bitmap state is free (and therefore the
+			   old state was not) we have to add to the free
+			   space in the rgrp. If the old bitmap state was
+			   free (and therefore it no longer is) we have to
+			   subtract to the free space.  If the type changed
+			   from dinode to data or data to dinode, no change in
+			   free space. */
+			gfs2_set_bitmap(sdp, blk, new_bitmap_state);
+			if (new_bitmap_state == GFS2_BLKST_FREE) {
+				/* If we're freeing a dinode, get rid of
+				   the hash table entries for it. */
+				if (old_bitmap_state == GFS2_BLKST_DINODE) {
+					struct dir_info *dt;
+					struct inode_info *ii;
+
+					dt = dirtree_find(blk);
+					if (dt)
+						dirtree_delete(dt);
+					ii = inodetree_find(blk);
+					if (ii)
+						inodetree_delete(ii);
+				}
+				rgd->rg.rg_free++;
+				gfs2_rgrp_out(&rgd->rg, rgd->bh[0]);
+			} else if (old_bitmap_state == GFS2_BLKST_FREE) {
+				rgd->rg.rg_free--;
+				gfs2_rgrp_out(&rgd->rg, rgd->bh[0]);
+			}
+			log_err( _("The bitmap was fixed.\n"));
+		} else {
+			log_err( _("The bitmap inconsistency was ignored.\n"));
+		}
+	}
+	return 0;
+}
+
+/*
+ * _fsck_blockmap_set - Mark a block in the 4-bit blockmap and the 2-bit
+ *                      bitmap, and adjust free space accordingly.
+ */
+int _fsck_blockmap_set(struct gfs2_inode *ip, uint64_t bblock,
+		       const char *btype, enum gfs2_mark_block mark,
+		       const char *caller, int fline)
 {
-	int j;
-
-	if (block == sbp->md.inum->i_di.di_num.no_addr)
-		return sbp->md.inum;
-	if (block == sbp->md.statfs->i_di.di_num.no_addr)
-		return sbp->md.statfs;
-	if (block == sbp->md.jiinode->i_di.di_num.no_addr)
-		return sbp->md.jiinode;
-	if (block == sbp->md.riinode->i_di.di_num.no_addr)
-		return sbp->md.riinode;
-	if (block == sbp->md.qinode->i_di.di_num.no_addr)
-		return sbp->md.qinode;
-	if (block == sbp->md.pinode->i_di.di_num.no_addr)
-		return sbp->md.pinode;
-	if (block == sbp->md.rooti->i_di.di_num.no_addr)
-		return sbp->md.rooti;
-	if (block == sbp->master_dir->i_di.di_num.no_addr)
-		return sbp->master_dir;
-	if (lf_dip && block == lf_dip->i_di.di_num.no_addr)
-		return lf_dip;
-	for (j = 0; j < sbp->md.journals; j++)
-		if (block == sbp->md.journal[j]->i_di.di_num.no_addr)
-			return sbp->md.journal[j];
+	int error;
+
+	if (print_level >= MSG_DEBUG) {
+		const char *p;
+
+		p = strrchr(caller, '/');
+		if (p)
+			p++;
+		else
+			p = caller;
+		/* I'm circumventing the log levels here on purpose to make the
+		   output easier to debug. */
+		if (ip->i_di.di_num.no_addr == bblock) {
+			print_fsck_log(MSG_DEBUG, p, fline,
+				       _("%s inode found at block %lld "
+					 "(0x%llx): marking as '%s'\n"),
+				       btype, (unsigned long long)
+				       ip->i_di.di_num.no_addr,
+				       (unsigned long long)
+				       ip->i_di.di_num.no_addr,
+				       block_type_string(mark));
+		} else if (mark == gfs2_bad_block || mark == gfs2_meta_inval) {
+			print_fsck_log(MSG_DEBUG, p, fline,
+				       _("inode %lld (0x%llx) references "
+					 "%s block %lld (0x%llx): "
+					 "marking as '%s'\n"),
+				       (unsigned long long)
+				       ip->i_di.di_num.no_addr,
+				       (unsigned long long)
+				       ip->i_di.di_num.no_addr,
+				       btype, (unsigned long long)bblock,
+				       (unsigned long long)bblock,
+				       block_type_string(mark));
+		} else {
+			print_fsck_log(MSG_DEBUG, p, fline,
+				       _("inode %lld (0x%llx) references "
+					 "%s block %lld (0x%llx): "
+					 "marking as '%s'\n"),
+				       (unsigned long long)
+				       ip->i_di.di_num.no_addr,
+				       (unsigned long long)
+				       ip->i_di.di_num.no_addr, btype,
+				       (unsigned long long)bblock,
+				       (unsigned long long)bblock,
+				       block_type_string(mark));
+		}
+	}
+
+	/* First, check the rgrp bitmap against what we think it should be.
+	   If that fails, it's an invalid block--part of an rgrp. */
+	error = check_n_fix_bitmap(ip->i_sbd, bblock, mark);
+	if (error) {
+		log_err( _("This block is not represented in the bitmap.\n"));
+		return error;
+	}
+
+	error = gfs2_blockmap_set(bl, bblock, mark);
+	return error;
+}
+
+struct duptree *dupfind(uint64_t block)
+{
+	struct osi_node *node = dup_blocks.osi_node;
+
+	while (node) {
+		struct duptree *data = (struct duptree *)node;
+
+		if (block < data->block)
+			node = node->osi_left;
+		else if (block > data->block)
+			node = node->osi_right;
+		else
+			return data;
+	}
 	return NULL;
 }
 
+static struct gfs2_inode *fsck_system_inode(struct gfs2_sbd *sdp,
+					    uint64_t block)
+{
+	if (lf_dip && lf_dip->i_di.di_num.no_addr == block)
+		return lf_dip;
+	return is_system_inode(sdp, block);
+}
+
 /* fsck_load_inode - same as gfs2_load_inode() in libgfs2 but system inodes
    get special treatment. */
 struct gfs2_inode *fsck_load_inode(struct gfs2_sbd *sbp, uint64_t block)
 {
 	struct gfs2_inode *ip = NULL;
 
-	ip = get_system_inode(sbp, block);
-	if (ip) {
-		bhold(ip->i_bh);
+	ip = fsck_system_inode(sbp, block);
+	if (ip)
 		return ip;
-	}
-	return gfs2_load_inode(sbp, block);
+	return inode_read(sbp, block);
 }
 
 /* fsck_inode_get - same as inode_get() in libgfs2 but system inodes
@@ -61,39 +197,25 @@ struct gfs2_inode *fsck_load_inode(struct gfs2_sbd *sbp, uint64_t block)
 struct gfs2_inode *fsck_inode_get(struct gfs2_sbd *sdp,
 				  struct gfs2_buffer_head *bh)
 {
-	struct gfs2_inode *ip, *sysip;
-
-	ip = calloc(1, sizeof(struct gfs2_inode));
-	if (ip == NULL) {
-		fprintf(stderr, _("Out of memory in %s\n"), __FUNCTION__);
-		exit(-1);
-	}
-	gfs2_dinode_in(&ip->i_di, bh->b_data);
-	ip->i_bh = bh;
-	ip->i_sbd = sdp;
+	struct gfs2_inode *sysip;
 
-	sysip = get_system_inode(sdp, ip->i_di.di_num.no_addr);
-	if (sysip) {
-		free(ip);
+	sysip = fsck_system_inode(sdp, bh->b_blocknr);
+	if (sysip)
 		return sysip;
-	}
-	return ip;
+
+	return inode_get(sdp, bh);
 }
 
 /* fsck_inode_put - same as inode_put() in libgfs2 but system inodes
    get special treatment. */
-void fsck_inode_put(struct gfs2_inode *ip, enum update_flags update)
+void fsck_inode_put(struct gfs2_inode **ip_in)
 {
+	struct gfs2_inode *ip = *ip_in;
 	struct gfs2_inode *sysip;
 
-	sysip = get_system_inode(ip->i_sbd, ip->i_di.di_num.no_addr);
-	if (sysip) {
-		if (update)
-			gfs2_dinode_out(&ip->i_di, ip->i_bh->b_data);
-		brelse(ip->i_bh, update);
-	} else {
-		inode_put(ip, update);
-	}
+	sysip = fsck_system_inode(ip->i_sbd, ip->i_di.di_num.no_addr);
+	if (!sysip)
+		inode_put(ip_in);
 }
 
 /**
@@ -127,6 +249,7 @@ static int dirent_repair(struct gfs2_inode *ip, struct gfs2_buffer_head *bh,
 		/* first, figure out a probable name length */
 		p = (char *)dent + sizeof(struct gfs2_dirent);
 		while (*p &&         /* while there's a non-zero char and */
+		       isprint(*p) && /* a printable character and */
 		       p < bh_end) { /* not past end of buffer */
 			calc_de_name_len++;
 			p++;
@@ -162,6 +285,7 @@ static void dirblk_truncate(struct gfs2_inode *ip, struct gfs2_dirent *fixb,
 	old_rec_len = de.de_rec_len;
 	de.de_rec_len = bh_end - (char *)fixb;
 	gfs2_dirent_out(&de, (char *)fixb);
+	bmodified(bh);
 }
 
 /*
@@ -170,7 +294,6 @@ static void dirblk_truncate(struct gfs2_inode *ip, struct gfs2_dirent *fixb,
  * @ip - dinode associated with this leaf block
  * bh - buffer for the leaf block
  * type - type of block this is (linear or exhash)
- * @update - set to 1 if the block was updated
  * @count - set to the count entries
  * @pass - structure pointing to pass-specific functions
  *
@@ -178,8 +301,7 @@ static void dirblk_truncate(struct gfs2_inode *ip, struct gfs2_dirent *fixb,
  *         -1 - error occurred
  */
 static int check_entries(struct gfs2_inode *ip, struct gfs2_buffer_head *bh,
-		  int type, enum update_flags *update,
-		  uint16_t *count, struct metawalk_fxns *pass)
+		  int type, uint16_t *count, struct metawalk_fxns *pass)
 {
 	struct gfs2_leaf *leaf = NULL;
 	struct gfs2_dirent *dent;
@@ -227,15 +349,13 @@ static int check_entries(struct gfs2_inode *ip, struct gfs2_buffer_head *bh,
 				(*count) + 1,
 				(unsigned long long)ip->i_di.di_num.no_addr,
 				(unsigned long long)ip->i_di.di_num.no_addr);
-			errors_found++;
-			if (query(&opts, _("Attempt to repair it? (y/n) "))) {
+			if (query( _("Attempt to repair it? (y/n) "))) {
 				if (dirent_repair(ip, bh, &de, dent, type,
 						  first)) {
 					if (first) /* make a new sentinel */
 						dirblk_truncate(ip, dent, bh);
 					else
 						dirblk_truncate(ip, prev, bh);
-					*update = updated;
 					log_err( _("Unable to repair corrupt "
 						   "directory entry; the "
 						   "entry was removed "
@@ -244,8 +364,6 @@ static int check_entries(struct gfs2_inode *ip, struct gfs2_buffer_head *bh,
 				} else {
 					log_err( _("Corrupt directory entry "
 						   "repaired.\n"));
-					errors_corrected++;
-					*update = updated;
 					/* keep looping through dentries */
 				}
 			} else {
@@ -268,10 +386,8 @@ static int check_entries(struct gfs2_inode *ip, struct gfs2_buffer_head *bh,
 					(unsigned long long)bh->b_blocknr,
 					(unsigned long long)ip->i_di.di_num.no_addr,
 					(unsigned long long)ip->i_di.di_num.no_addr);
-				if (query(&opts,
-					  _("Attempt to remove it? (y/n) "))) {
+				if (query(_("Attempt to remove it? (y/n) "))) {
 					dirblk_truncate(ip, prev, bh);
-					*update = 1;
 					log_err(_("The corrupt directory "
 						  "entry was removed.\n"));
 				} else {
@@ -289,14 +405,12 @@ static int check_entries(struct gfs2_inode *ip, struct gfs2_buffer_head *bh,
 				de.de_inum.no_addr = de.de_inum.no_formal_ino;
 				de.de_inum.no_formal_ino = 0;
 				gfs2_dirent_out(&de, (char *)dent);
-				*update = (opts.no ? not_updated : updated);
+				bmodified(bh);
 				/* Mark dirent buffer as modified */
 				first = 0;
-			}
-			else {
+			} else {
 				error = pass->check_dentry(ip, dent, prev, bh,
-							   filename, update,
-							   count,
+							   filename, count,
 							   pass->private);
 				if(error < 0) {
 					stack;
@@ -306,7 +420,12 @@ static int check_entries(struct gfs2_inode *ip, struct gfs2_buffer_head *bh,
 		}
 
 		if ((char *)dent + de.de_rec_len >= bh_end){
-			log_debug( _("Last entry processed.\n"));
+			log_debug( _("Last entry processed for %lld->%lld "
+				     "(0x%llx->0x%llx).\n"),
+				   (unsigned long long)ip->i_di.di_num.no_addr,
+				   (unsigned long long)bh->b_blocknr,
+				   (unsigned long long)ip->i_di.di_num.no_addr,
+				   (unsigned long long)bh->b_blocknr);
 			break;
 		}
 
@@ -337,10 +456,8 @@ static void warn_and_patch(struct gfs2_inode *ip, uint64_t *leaf_no,
 			(unsigned long long)*leaf_no,
 			(unsigned long long)*leaf_no, msg);
 	}
-	errors_found++;
 	if (*leaf_no == *bad_leaf ||
-	    query(&opts, _("Attempt to patch around it? (y/n) "))) {
-		errors_corrected++;
+	    query( _("Attempt to patch around it? (y/n) "))) {
 		if (gfs2_check_range(ip->i_sbd, old_leaf) == 0)
 			gfs2_put_leaf_nr(ip, pindex, old_leaf);
 		else
@@ -355,14 +472,68 @@ static void warn_and_patch(struct gfs2_inode *ip, uint64_t *leaf_no,
 	*leaf_no = old_leaf;
 }
 
+/*
+ * fix_leaf_pointers - fix a directory dinode that has a number of pointers
+ *                     that is not a multiple of 2.
+ * dip - the directory inode having the problem
+ * lindex - the index of the leaf right after the problem (need to back up)
+ * cur_numleafs - current (incorrect) number of instances of the leaf block
+ * correct_numleafs - the correct number instances of the leaf block
+ */
+static int fix_leaf_pointers(struct gfs2_inode *dip, int *lindex,
+			     int cur_numleafs, int correct_numleafs)
+{
+	int count;
+	char *ptrbuf;
+	int start_lindex = *lindex - cur_numleafs; /* start of bad ptrs */
+	int tot_num_ptrs = (1 << dip->i_di.di_depth) - start_lindex;
+	int bufsize = tot_num_ptrs * sizeof(uint64_t);
+	int off_by = cur_numleafs - correct_numleafs;
+
+	ptrbuf = malloc(bufsize);
+	if (!ptrbuf) {
+		log_err( _("Error: Cannot allocate memory to fix the leaf "
+			   "pointers.\n"));
+		return -1;
+	}
+	/* Read all the pointers, starting with the first bad one */
+	count = gfs2_readi(dip, ptrbuf, start_lindex * sizeof(uint64_t),
+			   bufsize);
+	if (count != bufsize) {
+		log_err( _("Error: bad read while fixing leaf pointers.\n"));
+		free(ptrbuf);
+		return -1;
+	}
+
+	bufsize -= off_by * sizeof(uint64_t); /* We need to write fewer */
+	/* Write the same pointers, but offset them so they fit within the
+	   smaller factor of 2. So if we have 12 pointers, write out only
+	   the last 8 of them.  If we have 7, write the last 4, etc.
+	   We need to write these starting at the current lindex and adjust
+	   lindex accordingly. */
+	count = gfs2_writei(dip, ptrbuf + (off_by * sizeof(uint64_t)),
+			    start_lindex * sizeof(uint64_t), bufsize);
+	if (count != bufsize) {
+		log_err( _("Error: bad read while fixing leaf pointers.\n"));
+		free(ptrbuf);
+		return -1;
+	}
+	/* Now zero out the hole left at the end */
+	memset(ptrbuf, 0, off_by * sizeof(uint64_t));
+	gfs2_writei(dip, ptrbuf, (start_lindex * sizeof(uint64_t)) +
+		    bufsize, off_by * sizeof(uint64_t));
+	free(ptrbuf);
+	*lindex -= off_by; /* adjust leaf index to account for the change */
+	return 0;
+}
+
 /* Checks exhash directory entries */
-static int check_leaf_blks(struct gfs2_inode *ip, enum update_flags *update,
-			   struct metawalk_fxns *pass)
+static int check_leaf_blks(struct gfs2_inode *ip, struct metawalk_fxns *pass)
 {
 	int error;
 	struct gfs2_leaf leaf, oldleaf;
 	uint64_t leaf_no, old_leaf, bad_leaf = -1;
-	uint64_t first_leaf_ptr = -1, first_ok_leaf = -1;
+	uint64_t first_ok_leaf;
 	struct gfs2_buffer_head *lbh;
 	int lindex;
 	struct gfs2_sbd *sbp = ip->i_sbd;
@@ -372,21 +543,27 @@ static int check_leaf_blks(struct gfs2_inode *ip, enum update_flags *update,
 	/* Find the first valid leaf pointer in range and use it as our "old"
 	   leaf. That way, bad blocks at the beginning will be overwritten
 	   with the first valid leaf. */
-	first_ok_leaf = -1;
+	first_ok_leaf = leaf_no = -1;
 	for(lindex = 0; lindex < (1 << ip->i_di.di_depth); lindex++) {
-		gfs2_get_leaf_nr(ip, lindex, &first_ok_leaf);
-		if (first_leaf_ptr == -1)
-			first_leaf_ptr = first_ok_leaf;
-		if(gfs2_check_range(ip->i_sbd, first_ok_leaf) == 0) {
-			lbh = bread(&sbp->buf_list, first_ok_leaf);
+		gfs2_get_leaf_nr(ip, lindex, &leaf_no);
+		if (gfs2_check_range(ip->i_sbd, leaf_no) == 0) {
+			lbh = bread(sbp, leaf_no);
 			/* Make sure it's really a valid leaf block. */
 			if (gfs2_check_meta(lbh, GFS2_METATYPE_LF) == 0) {
-				brelse(lbh, not_updated);
+				brelse(lbh);
+				first_ok_leaf = leaf_no;
 				break;
 			}
-			brelse(lbh, not_updated);
+			brelse(lbh);
 		}
 	}
+	if (first_ok_leaf == -1) { /* no valid leaf found */
+		log_err( _("Directory #%llu (0x%llx) has no valid leaf "
+			   "blocks\n"),
+			 (unsigned long long)ip->i_di.di_num.no_addr,
+			 (unsigned long long)ip->i_di.di_num.no_addr);
+		return 1;
+	}
 	old_leaf = -1;
 	memset(&oldleaf, 0, sizeof(oldleaf));
 	for(lindex = 0; lindex < (1 << ip->i_di.di_depth); lindex++) {
@@ -402,40 +579,75 @@ static int check_leaf_blks(struct gfs2_inode *ip, enum update_flags *update,
 								  leaf info */
 			ref_count++;
 			continue;
-		}
-		else if(old_leaf == leaf_no) {
+		} else if(old_leaf == leaf_no) {
 			ref_count++;
 			continue;
 		}
-		if (gfs2_check_range(ip->i_sbd, old_leaf) == 0 &&
-		    ref_count != exp_count) {
-			log_err( _("Dir #%llu (0x%llx) has an incorrect "
-				   "number of pointers to leaf #%llu "
-				   " (0x%llx)\n\tFound: %u,  Expected: "
-				   "%u\n"), (unsigned long long)
-				 ip->i_di.di_num.no_addr,
-				 (unsigned long long)
-				 ip->i_di.di_num.no_addr,
-				 (unsigned long long)old_leaf,
-				 (unsigned long long)old_leaf,
-				 ref_count, exp_count);
-			errors_found++;
-			if (query(&opts, _("Attempt to fix it? (y/n) "))) {
-				int factor = 0, divisor = ref_count;
-
-				errors_corrected++;
-				lbh = bread(&sbp->buf_list, old_leaf);
-				while (divisor > 1) {
-					factor++;
-					divisor /= 2;
+		if (gfs2_check_range(ip->i_sbd, old_leaf) == 0) {
+			int factor = 0, divisor = ref_count, multiple = 1;
+
+			/* Check to see if the number of pointers we found is
+			   a power of 2.  It needs to be and if it's not we
+			   need to fix it.*/
+			while (divisor > 1) {
+				factor++;
+				divisor /= 2;
+				multiple = multiple << 1;
+			}
+			if (ref_count != multiple) {
+				log_err( _("Directory #%llu (0x%llx) has an "
+					   "invalid number of pointers to "
+					   "leaf #%llu (0x%llx)\n\tFound: %u, "
+					   "which is not a factor of 2.\n"),
+					 (unsigned long long)
+					 ip->i_di.di_num.no_addr,
+					 (unsigned long long)
+					 ip->i_di.di_num.no_addr,
+					 (unsigned long long)old_leaf,
+					 (unsigned long long)old_leaf,
+					 ref_count);
+				if (!query( _("Attempt to fix it? (y/n) "))) {
+					log_err( _("Directory inode was not "
+						   "fixed.\n"));
+					return 1;
 				}
-				gfs2_leaf_in(&oldleaf, lbh->b_data);
+				error = fix_leaf_pointers(ip, &lindex,
+							  ref_count, multiple);
+				if (error)
+					return error;
+				ref_count = multiple;
+				log_err( _("Directory inode was fixed.\n"));
+			}
+			/* Check to see if the counted number of leaf pointers
+			   is what we expect. */
+			if (ref_count != exp_count) {
+				log_err( _("Directory #%llu (0x%llx) has an "
+					   "incorrect number of pointers to "
+					   "leaf #%llu (0x%llx)\n\tFound: "
+					   "%u,  Expected: %u\n"),
+					 (unsigned long long)
+					 ip->i_di.di_num.no_addr,
+					 (unsigned long long)
+					 ip->i_di.di_num.no_addr,
+					 (unsigned long long)old_leaf,
+					 (unsigned long long)old_leaf,
+					 ref_count, exp_count);
+				if (!query( _("Attempt to fix it? (y/n) "))) {
+					log_err( _("Directory leaf was not "
+						   "fixed.\n"));
+					return 1;
+				}
+				lbh = bread(sbp, old_leaf);
+				gfs2_leaf_in(&oldleaf, lbh);
+				log_err( _("Leaf depth was %d, changed to "
+					   "%d\n"), oldleaf.lf_depth,
+					 ip->i_di.di_depth - factor);
 				oldleaf.lf_depth = ip->i_di.di_depth - factor;
-				gfs2_leaf_out(&oldleaf, lbh->b_data);
-				brelse(lbh, updated);
+				gfs2_leaf_out(&oldleaf, lbh);
+				brelse(lbh);
+				exp_count = ref_count;
+				log_err( _("Directory leaf was fixed.\n"));
 			}
-			else
-				return 1;
 		}
 		ref_count = 1;
 
@@ -460,19 +672,19 @@ static int check_leaf_blks(struct gfs2_inode *ip, enum update_flags *update,
 				break;
 			}
 
-			*update = not_updated;
 			/* Try to read in the leaf block. */
-			lbh = bread(&sbp->buf_list, leaf_no);
+			lbh = bread(sbp, leaf_no);
 			/* Make sure it's really a valid leaf block. */
 			if (gfs2_check_meta(lbh, GFS2_METATYPE_LF)) {
 				warn_and_patch(ip, &leaf_no, &bad_leaf,
 					       old_leaf, first_ok_leaf, lindex,
 					       _("that is not really a leaf"));
 				memcpy(&leaf, &oldleaf, sizeof(oldleaf));
-				brelse(lbh, not_updated);
+				bmodified(lbh);
+				brelse(lbh);
 				break;
 			}
-			gfs2_leaf_in(&leaf, lbh->b_data);
+			gfs2_leaf_in(&leaf, lbh);
 			if(pass->check_leaf) {
 				error = pass->check_leaf(ip, leaf_no, lbh,
 							 pass->private);
@@ -488,9 +700,8 @@ static int check_leaf_blks(struct gfs2_inode *ip, enum update_flags *update,
 			if (leaf.lf_dirent_format == (GFS2_FORMAT_DE << 16)) {
 				log_debug( _("incorrect lf_dirent_format at leaf #%" PRIu64 "\n"), leaf_no);
 				leaf.lf_dirent_format = GFS2_FORMAT_DE;
-				gfs2_leaf_out(&leaf, lbh->b_data);
+				gfs2_leaf_out(&leaf, lbh);
 				log_debug( _("Fixing lf_dirent_format.\n"));
-				*update = (opts.no ? not_updated : updated);
 			}
 
 			/* Make sure it's really a leaf. */
@@ -504,34 +715,32 @@ static int check_leaf_blks(struct gfs2_inode *ip, enum update_flags *update,
 					ip->i_di.di_num.no_addr,
 					(unsigned long long)leaf_no,
 					(unsigned long long)leaf_no);
-				brelse(lbh, *update);
+				brelse(lbh);
 				break;
 			}
 			exp_count = (1 << (ip->i_di.di_depth - leaf.lf_depth));
-			log_debug( _("expected count %u - di_depth %u, leaf depth %u\n"),
-					  exp_count, ip->i_di.di_depth, leaf.lf_depth);
+			/*log_debug( _("expected count %u - di_depth %u,
+			  leaf depth %u\n"),
+			  exp_count, ip->i_di.di_depth, leaf.lf_depth);*/
 
-			if(pass->check_dentry &&
-			   S_ISDIR(ip->i_di.di_mode)) {
+			if(pass->check_dentry && S_ISDIR(ip->i_di.di_mode)) {
 				error = check_entries(ip, lbh, DIR_EXHASH,
-						      update, &count, pass);
+						      &count, pass);
 
 				/* Since the buffer possibly got
 				 * updated directly, release it now,
 				 * and grab it again later if we need it. */
 
-				brelse(lbh, *update);
+				brelse(lbh);
 
 				if(error < 0) {
 					stack;
 					return -1;
 				}
 
-				if(update && (count != leaf.lf_entries)) {
-					enum update_flags f = not_updated;
-
-					lbh = bread(&sbp->buf_list, leaf_no);
-					gfs2_leaf_in(&leaf, lbh->b_data);
+				if(count != leaf.lf_entries) {
+					lbh = bread(sbp, leaf_no);
+					gfs2_leaf_in(&leaf, lbh);
 
 					log_err( _("Leaf %llu (0x%llx) entry "
 						"count in directory %llu"
@@ -545,22 +754,19 @@ static int check_leaf_blks(struct gfs2_inode *ip, enum update_flags *update,
 						(unsigned long long)
 						ip->i_di.di_num.no_addr,
 						leaf.lf_entries, count);
-					errors_found++;
-					if(query(&opts, _("Update leaf entry count? (y/n) "))) {
-						errors_corrected++;
+					if(query( _("Update leaf entry count? (y/n) "))) {
 						leaf.lf_entries = count;
-						gfs2_leaf_out(&leaf, lbh->b_data);
+						gfs2_leaf_out(&leaf, lbh);
 						log_warn( _("Leaf entry count updated\n"));
-						f = updated;
 					} else
 						log_err( _("Leaf entry count left in inconsistant state\n"));
-					brelse(lbh, f);
+					brelse(lbh);
 				}
 				/* FIXME: Need to get entry count and
 				 * compare it against leaf->lf_entries */
 				break; /* not a chain; go back to outer loop */
 			} else {
-				brelse(lbh, *update);
+				brelse(lbh);
 				if(!leaf.lf_next)
 					break;
 				leaf_no = leaf.lf_next;
@@ -575,8 +781,7 @@ static int check_leaf_blks(struct gfs2_inode *ip, enum update_flags *update,
 
 static int check_eattr_entries(struct gfs2_inode *ip,
 			       struct gfs2_buffer_head *bh,
-			       struct metawalk_fxns *pass,
-			       enum update_flags *update_it)
+			       struct metawalk_fxns *pass)
 {
 	struct gfs2_ea_header *ea_hdr, *ea_hdr_prev = NULL;
 	uint64_t *ea_data_ptr = NULL;
@@ -584,10 +789,8 @@ static int check_eattr_entries(struct gfs2_inode *ip,
 	int error = 0;
 	uint32_t offset = (uint32_t)sizeof(struct gfs2_meta_header);
 
-	*update_it = 0;
-	if(!pass->check_eattr_entry) {
+	if(!pass->check_eattr_entry)
 		return 0;
-	}
 
 	ea_hdr = (struct gfs2_ea_header *)(bh->b_data +
 					  sizeof(struct gfs2_meta_header));
@@ -603,8 +806,6 @@ static int check_eattr_entries(struct gfs2_inode *ip,
 			stack;
 			return -1;
 		}
-		if (error > 0)
-			*update_it = updated;
 		if(error == 0 && pass->check_eattr_extentry &&
 		   ea_hdr->ea_num_ptrs) {
 			uint32_t tot_ealen = 0;
@@ -626,24 +827,21 @@ static int check_eattr_entries(struct gfs2_inode *ip,
 							      ea_data_ptr,
 							      bh, ea_hdr,
 							      ea_hdr_prev,
-							      update_it,
 							      pass->private)) {
-					errors_found++;
-					if (query(&opts, _("Repair the bad "
-							 "Extended Attribute? "
-							   "(y/n) "))) {
-						errors_corrected++;
+					if (query( _("Repair the bad Extended "
+						     "Attribute? (y/n) "))) {
 						ea_hdr->ea_num_ptrs = i;
 						ea_hdr->ea_data_len =
 							cpu_to_be32(tot_ealen);
 						*ea_data_ptr = 0;
-						*update_it = 1;
+						bmodified(bh);
 						/* Endianness doesn't matter
 						   in this case because it's
 						   a single byte. */
-						gfs2_block_set(sdp, bl,
-							     ip->i_di.di_eattr,
-							     gfs2_meta_eattr);
+						fsck_blockmap_set(ip,
+						       ip->i_di.di_eattr,
+						       _("extended attribute"),
+						       gfs2_meta_eattr);
 						log_err( _("The EA was "
 							   "fixed.\n"));
 					} else {
@@ -679,38 +877,29 @@ static int check_eattr_entries(struct gfs2_inode *ip,
  * Returns: 0 on success, 1 if removal is needed, -1 on error
  */
 static int check_leaf_eattr(struct gfs2_inode *ip, uint64_t block,
-			    uint64_t parent, enum update_flags *want_updated,
-			    struct metawalk_fxns *pass)
+			    uint64_t parent, struct metawalk_fxns *pass)
 {
 	struct gfs2_buffer_head *bh = NULL;
 	int error = 0;
-	enum update_flags updated_this_leaf = not_updated;
 
 	log_debug( _("Checking EA leaf block #%"PRIu64" (0x%" PRIx64 ").\n"),
 			  block, block);
 
 	if(pass->check_eattr_leaf) {
 		error = pass->check_eattr_leaf(ip, block, parent, &bh,
-					       &updated_this_leaf,
 					       pass->private);
-		if (updated_this_leaf) /* if this leaf was updated */
-			*want_updated = updated; /* signal it for the parent */
 		if(error < 0) {
 			stack;
 			return -1;
 		}
 		if(error > 0) {
 			if (bh)
-				brelse(bh, updated_this_leaf);
+				brelse(bh);
 			return 1;
 		}
 		if (bh) {
-			error = check_eattr_entries(ip, bh, pass,
-						    &updated_this_leaf);
-			brelse(bh, updated_this_leaf);
-			if (updated_this_leaf) /* if this leaf was updated */
-				*want_updated = updated; /* signal it for
-							    the parent */
+			error = check_eattr_entries(ip, bh, pass);
+			brelse(bh);
 		}
 		return error;
 	}
@@ -719,6 +908,63 @@ static int check_leaf_eattr(struct gfs2_inode *ip, uint64_t block,
 }
 
 /**
+ * delete_block - delete a block associated with an inode
+ */
+int delete_block(struct gfs2_inode *ip, uint64_t block,
+		 struct gfs2_buffer_head **bh, const char *btype,
+		 void *private)
+{
+	if (gfs2_check_range(ip->i_sbd, block) == 0) {
+		fsck_blockmap_set(ip, block, btype, gfs2_block_free);
+		return 0;
+	}
+	return -1;
+}
+
+/**
+ * delete_block_if_notdup - delete blocks associated with an inode
+ *
+ * Ignore blocks that are already marked free.
+ * If it has been identified as duplicate, remove the duplicate reference.
+ * If all duplicate references have been removed, delete the block.
+ */
+static int delete_block_if_notdup(struct gfs2_inode *ip, uint64_t block,
+				  struct gfs2_buffer_head **bh,
+				  const char *btype, void *private)
+{
+	uint8_t q;
+	struct duptree *d;
+
+	if (gfs2_check_range(ip->i_sbd, block) != 0)
+		return -EFAULT;
+
+	q = block_type(block);
+	if (q == gfs2_block_free) {
+		log_info( _("%s block %lld (0x%llx), part of inode "
+			    "%lld (0x%llx), was already free.\n"),
+			  btype, (unsigned long long)block,
+			  (unsigned long long)block,
+			  (unsigned long long)ip->i_di.di_num.no_addr,
+			  (unsigned long long)ip->i_di.di_num.no_addr);
+		return 0;
+	}
+	d = dupfind(block);
+	if (d) {
+		log_info( _("Removing duplicate reference %d "
+			    "to block %lld (0x%llx).\n"), d->refs,
+			  (unsigned long long)block,
+			  (unsigned long long)block);
+		d->refs--; /* one less reference */
+		if (d->refs == 1) /* If down to the last reference */
+			dup_delete(d); /* not duplicate now */
+		return 1; /* but the original ref still exists
+			     so return (do not free it). */
+	}
+	fsck_blockmap_set(ip, block, btype, gfs2_block_free);
+	return 0;
+}
+
+/**
  * check_indirect_eattr
  * @ip: the inode the eattr comes from
  * @indirect_block
@@ -726,7 +972,6 @@ static int check_leaf_eattr(struct gfs2_inode *ip, uint64_t block,
  * Returns: 0 on success -1 on error
  */
 static int check_indirect_eattr(struct gfs2_inode *ip, uint64_t indirect,
-				enum update_flags *want_updated,
 				struct metawalk_fxns *pass)
 {
 	int error = 0;
@@ -734,19 +979,16 @@ static int check_indirect_eattr(struct gfs2_inode *ip, uint64_t indirect,
 	uint64_t block;
 	struct gfs2_buffer_head *indirect_buf = NULL;
 	struct gfs2_sbd *sdp = ip->i_sbd;
-	enum update_flags update_indir_block = not_updated;
 	int first_ea_is_bad = 0;
 	uint64_t di_eattr_save = ip->i_di.di_eattr;
 
-	*want_updated = not_updated;
 	log_debug( _("Checking EA indirect block #%"PRIu64" (0x%" PRIx64 ").\n"),
 			  indirect, indirect);
 
 	if (!pass->check_eattr_indir)
 		return 0;
 	error = pass->check_eattr_indir(ip, indirect, ip->i_di.di_num.no_addr,
-					&indirect_buf, want_updated,
-					pass->private);
+					&indirect_buf, pass->private);
 	if (!error) {
 		int leaf_pointers = 0, leaf_pointer_errors = 0;
 
@@ -758,19 +1000,11 @@ static int check_indirect_eattr(struct gfs2_inode *ip, uint64_t indirect,
 		while(*ea_leaf_ptr && (ea_leaf_ptr < end)){
 			block = be64_to_cpu(*ea_leaf_ptr);
 			leaf_pointers++;
-			error = check_leaf_eattr(ip, block, indirect,
-						 want_updated, pass);
+			error = check_leaf_eattr(ip, block, indirect, pass);
 			if (error) {
 				leaf_pointer_errors++;
-				if (update_indir_block == not_updated) {
-					errors_found++;
-					if (query(&opts, _("Fix the indirect "
-						"block too? (y/n) "))) {
-						update_indir_block = updated;
-						errors_corrected++;
-						*ea_leaf_ptr = 0;
-					}
-				} else
+				if (query( _("Fix the indirect "
+					     "block too? (y/n) ")))
 					*ea_leaf_ptr = 0;
 			}
 			/* If the first eattr lead is bad, we can't have
@@ -787,7 +1021,6 @@ static int check_indirect_eattr(struct gfs2_inode *ip, uint64_t indirect,
 					pass->finish_eattr_indir(ip,
 							leaf_pointers,
 							leaf_pointer_errors,
-							want_updated,
 							pass->private);
 			} else if (leaf_pointer_errors) {
 				/* This is a bit tricky.  We can't have eattr
@@ -798,6 +1031,7 @@ static int check_indirect_eattr(struct gfs2_inode *ip, uint64_t indirect,
 				   GGGG when we finish.  To do that, we set
 				   di_eattr to 0 temporarily. */
 				ip->i_di.di_eattr = 0;
+				bmodified(ip->i_bh);
 			}
 			ea_leaf_ptr++;
 		}
@@ -810,21 +1044,17 @@ static int check_indirect_eattr(struct gfs2_inode *ip, uint64_t indirect,
 					ip->i_di.di_eattr = di_eattr_save;
 				pass->finish_eattr_indir(ip, leaf_pointers,
 							 leaf_pointer_errors,
-							 want_updated,
 							 pass->private);
 			}
-			if (leaf_pointer_errors == leaf_pointers) {
-				if (*want_updated)
-					gfs2_set_bitmap(sdp, indirect,
-							GFS2_BLKST_FREE);
-				gfs2_block_set(sdp, bl, indirect,
-					       gfs2_block_free);
+			if (leaf_pointer_errors &&
+			    leaf_pointer_errors == leaf_pointers) {
+				delete_block(ip, indirect, NULL, "leaf", NULL);
 				error = 1;
 			}
 		}
 	}
 	if (indirect_buf)
-		brelse(indirect_buf, update_indir_block);
+		brelse(indirect_buf);
 
 	return error;
 }
@@ -835,8 +1065,7 @@ static int check_indirect_eattr(struct gfs2_inode *ip, uint64_t indirect,
  *
  * Returns: 0 on success, -1 on error
  */
-int check_inode_eattr(struct gfs2_inode *ip, enum update_flags *want_updated,
-		      struct metawalk_fxns *pass)
+int check_inode_eattr(struct gfs2_inode *ip, struct metawalk_fxns *pass)
 {
 	int error = 0;
 
@@ -848,13 +1077,11 @@ int check_inode_eattr(struct gfs2_inode *ip, enum update_flags *want_updated,
 		  (unsigned long long)ip->i_di.di_num.no_addr);
 
 	if(ip->i_di.di_flags & GFS2_DIF_EA_INDIRECT){
-		if((error = check_indirect_eattr(ip, ip->i_di.di_eattr,
-						 want_updated, pass)))
+		if((error = check_indirect_eattr(ip, ip->i_di.di_eattr, pass)))
 			stack;
 	} else {
 		error = check_leaf_eattr(ip, ip->i_di.di_eattr,
-					 ip->i_di.di_num.no_addr,
-					 want_updated, pass);
+					 ip->i_di.di_num.no_addr, pass);
 		if (error)
 			stack;
 	}
@@ -863,30 +1090,64 @@ int check_inode_eattr(struct gfs2_inode *ip, enum update_flags *want_updated,
 }
 
 /**
+ * free_metalist - free all metadata on a multi-level metadata list
+ */
+static void free_metalist(struct gfs2_inode *ip, osi_list_t *mlp)
+{
+	int i;
+	struct gfs2_buffer_head *nbh;
+
+	for (i = 0; i < GFS2_MAX_META_HEIGHT; i++) {
+		osi_list_t *list;
+
+		list = &mlp[i];
+		while (!osi_list_empty(list)) {
+			nbh = osi_list_entry(list->next,
+					     struct gfs2_buffer_head, b_altlist);
+			if (nbh == ip->i_bh)
+				osi_list_del(&nbh->b_altlist);
+			else
+				brelse(nbh);
+		}
+	}
+}
+
+/**
  * build_and_check_metalist - check a bunch of indirect blocks
- * Note: Every buffer put on the metalist should be "held".
+ *                            This includes hash table blocks for directories
+ *                            which are technically "data" in the bitmap.
+ *
  * @ip:
  * @mlp:
  */
-static int build_and_check_metalist(struct gfs2_inode *ip,
-				    osi_list_t *mlp,
+static int build_and_check_metalist(struct gfs2_inode *ip, osi_list_t *mlp,
 				    struct metawalk_fxns *pass)
 {
 	uint32_t height = ip->i_di.di_height;
-	struct gfs2_buffer_head *bh, *nbh, *metabh;
+	struct gfs2_buffer_head *bh, *nbh, *metabh = ip->i_bh;
 	osi_list_t *prev_list, *cur_list, *tmp;
-	int i, head_size;
+	int i, head_size, iblk_type;
 	uint64_t *ptr, block;
-	int err;
-
-	metabh = bread(&ip->i_sbd->buf_list, ip->i_di.di_num.no_addr);
+	int error = 0, err;
 
 	osi_list_add(&metabh->b_altlist, &mlp[0]);
 
+	/* Directories are special.  Their 'data' is the hash table, which is
+	   basically an indirect block list. Their height is not important
+	   because it checks everything through the hash table using
+	   "depth" field calculations. However, we still have to check the
+	   indirect blocks, even if the height == 1.  */
+	if (S_ISDIR(ip->i_di.di_mode)) {
+		height++;
+		iblk_type = GFS2_METATYPE_JD;
+	} else {
+		iblk_type = GFS2_METATYPE_IN;
+	}
+
 	/* if(<there are no indirect blocks to check>) */
 	if (height < 2)
 		return 0;
-	for (i = 1; i < height; i++){
+	for (i = 1; i < height; i++) {
 		prev_list = &mlp[i - 1];
 		cur_list = &mlp[i];
 
@@ -896,7 +1157,7 @@ static int build_and_check_metalist(struct gfs2_inode *ip,
 
 			if (i > 1) {
 				/* if this isn't really a block list skip it */
-				if (gfs2_check_meta(bh, GFS2_METATYPE_IN))
+				if (gfs2_check_meta(bh, iblk_type))
 					continue;
 				head_size = sizeof(struct gfs2_meta_header);
 			} else {
@@ -905,10 +1166,12 @@ static int build_and_check_metalist(struct gfs2_inode *ip,
 					continue;
 				head_size = sizeof(struct gfs2_dinode);
 			}
-
+			/* Now check the metadata itself */
 			for (ptr = (uint64_t *)(bh->b_data + head_size);
 			     (char *)ptr < (bh->b_data + ip->i_sbd->bsize);
 			     ptr++) {
+				if (skip_this_pass || fsck_abort)
+					return FSCK_OK;
 				nbh = NULL;
 
 				if (!*ptr)
@@ -921,37 +1184,77 @@ static int build_and_check_metalist(struct gfs2_inode *ip,
 				   it gets with "bread". */
 				if(err < 0) {
 					stack;
+					error = err;
 					goto fail;
 				}
 				if(err > 0) {
+					if (!error)
+						error = err;
 					log_debug( _("Skipping block %" PRIu64
-						  " (0x%" PRIx64 ")\n"),
-						  block, block);
+						     " (0x%" PRIx64 ")\n"),
+						   block, block);
+					continue;
+				}
+				if (gfs2_check_range(ip->i_sbd, block)) {
+					log_debug( _("Skipping invalid block "
+						     "%lld (0x%llx)\n"),
+						   (unsigned long long)block,
+						   (unsigned long long)block);
 					continue;
 				}
 				if(!nbh)
-					nbh = bread(&ip->i_sbd->buf_list,
-						    block);
-
+					nbh = bread(ip->i_sbd, block);
 				osi_list_add(&nbh->b_altlist, cur_list);
 			} /* for all data on the indirect block */
 		} /* for blocks at that height */
 	} /* for height */
-	return 0;
+	return error;
 fail:
-	for (i = 0; i < GFS2_MAX_META_HEIGHT; i++) {
-		osi_list_t *list;
-		list = &mlp[i];
-		while (!osi_list_empty(list)) {
-			nbh = osi_list_entry(list->next,
-					     struct gfs2_buffer_head, b_altlist);
-			brelse(nbh, not_updated);
-			osi_list_del(&nbh->b_altlist);
-		}
+	free_metalist(ip, mlp);
+	return error;
+}
+
+/**
+ * check_data - check all data pointers for a given buffer
+ *              This does not include "data" blocks that are really
+ *              hash table blocks for directories.
+ *
+ * @ip:
+ *
+ * returns: +ENOENT if there are too many bad pointers
+ *          -1 if a more serious error occurred.
+ *          0 if no errors occurred
+ *          1 if errors were found and corrected
+ *          2 (ENOENT) is there were too many bad pointers
+ */
+static int check_data(struct gfs2_inode *ip, struct metawalk_fxns *pass,
+		      uint64_t *ptr_start, char *ptr_end,
+		      uint64_t *blks_checked)
+{
+	int error = 0, rc = 0;
+	uint64_t block, *ptr;
+
+	/* If there isn't much pointer corruption check the pointers */
+	for (ptr = ptr_start ; (char *)ptr < ptr_end && !fsck_abort; ptr++) {
+		if (!*ptr)
+			continue;
+
+		if (skip_this_pass || fsck_abort)
+			return error;
+		block =  be64_to_cpu(*ptr);
+		/* It's important that we don't call gfs2_check_range and
+		   bypass calling check_data on invalid blocks because that
+		   would defeat the rangecheck_block related functions in
+		   pass1. Therefore the individual check_data functions
+		   should do a range check. */
+		rc = pass->check_data(ip, block, pass->private);
+		if (rc < 0)
+			return rc;
+		if (!error && rc)
+			error = rc;
+		(*blks_checked)++;
 	}
-	/* This is an error path, so we need to release the buffer here: */
-	brelse(metabh, not_updated);
-	return -1;
+	return error;
 }
 
 /**
@@ -963,102 +1266,121 @@ fail:
 int check_metatree(struct gfs2_inode *ip, struct metawalk_fxns *pass)
 {
 	osi_list_t metalist[GFS2_MAX_META_HEIGHT];
-	osi_list_t *list, *tmp;
+	osi_list_t *list;
 	struct gfs2_buffer_head *bh;
-	uint64_t block, *ptr;
 	uint32_t height = ip->i_di.di_height;
 	int  i, head_size;
-	enum update_flags update = not_updated;
-	int error = 0;
+	uint64_t blks_checked = 0;
+	int error, rc;
 
-	if (!height)
-		goto end;
+	if (!height && !S_ISDIR(ip->i_di.di_mode))
+		return 0;
 
 	for (i = 0; i < GFS2_MAX_META_HEIGHT; i++)
 		osi_list_init(&metalist[i]);
 
-	/* create metalist for each level */
-	if (build_and_check_metalist(ip, &metalist[0], pass)){
+	/* create and check the metadata list for each height */
+	error = build_and_check_metalist(ip, &metalist[0], pass);
+	if (error) {
 		stack;
-		return -1;
+		return error;
 	}
 
-	/* We don't need to record directory blocks - they will be
-	 * recorded later...i think... */
-        if (S_ISDIR(ip->i_di.di_mode))
-		log_debug( _("Directory with height > 0 at %llu (0x%llx)\n"),
-			  (unsigned long long)ip->i_di.di_num.no_addr,
-			  (unsigned long long)ip->i_di.di_num.no_addr);
+	/* For directories, we've already checked the "data" blocks which
+	 * comprise the directory hash table, so we perform the directory
+	 * checks and exit. */
+        if (S_ISDIR(ip->i_di.di_mode)) {
+		free_metalist(ip, &metalist[0]);
+		if (!(ip->i_di.di_flags & GFS2_DIF_EXHASH))
+			return 0;
+		/* check validity of leaf blocks and leaf chains */
+		error = check_leaf_blks(ip, pass);
+		return error;
+	}
+
+	/* Free the metalist buffers from heights we don't need to check.
+	   For the rest we'll free as we check them to save time.
+	   metalist[0] will only have the dinode bh, so we can skip it. */
+	for (i = 1; i < height - 1; i++) {
+		list = &metalist[i];
+		while (!osi_list_empty(list)) {
+			bh = osi_list_entry(list->next,
+					    struct gfs2_buffer_head, b_altlist);
+			if (bh == ip->i_bh)
+				osi_list_del(&bh->b_altlist);
+			else
+				brelse(bh);
+		}
+	}
 
 	/* check data blocks */
 	list = &metalist[height - 1];
+	if (ip->i_di.di_blocks > COMFORTABLE_BLKS)
+		last_reported_fblock = -10000000;
 
-	for (tmp = list->next; tmp != list; tmp = tmp->next) {
-		bh = osi_list_entry(tmp, struct gfs2_buffer_head, b_altlist);
+	while (error >= 0 && !osi_list_empty(list)) {
+		if (fsck_abort)
+			return 0;
+		bh = osi_list_entry(list->next, struct gfs2_buffer_head,
+				    b_altlist);
 
 		if (height > 1) {
-			/* if this isn't really a block list skip it */
-			if (gfs2_check_meta(bh, GFS2_METATYPE_IN))
+			if (gfs2_check_meta(bh, GFS2_METATYPE_IN)) {
+				if (bh == ip->i_bh)
+					osi_list_del(&bh->b_altlist);
+				else
+					brelse(bh);
 				continue;
+			}
 			head_size = sizeof(struct gfs2_meta_header);
 		} else {
 			/* if this isn't really a dinode, skip it */
-			if (gfs2_check_meta(bh, GFS2_METATYPE_DI))
+			if (gfs2_check_meta(bh, GFS2_METATYPE_DI)) {
+				if (bh == ip->i_bh)
+					osi_list_del(&bh->b_altlist);
+				else
+					brelse(bh);
 				continue;
+			}
 			head_size = sizeof(struct gfs2_dinode);
 		}
-		ptr = (uint64_t *)(bh->b_data + head_size);
 
-		for ( ; (char *)ptr < (bh->b_data + ip->i_sbd->bsize); ptr++) {
-			if (!*ptr)
-				continue;
-
-			block =  be64_to_cpu(*ptr);
-
-			if(pass->check_data &&
-			   (pass->check_data(ip, block, pass->private) < 0)) {
-				stack;
-				return -1;
-			}
-		}
-	}
+		if (pass->check_data)
+			rc = check_data(ip, pass, (uint64_t *)
+					(bh->b_data + head_size),
+					(bh->b_data + ip->i_sbd->bsize),
+					&blks_checked);
+		else
+			rc = 0;
 
-	/* free metalists */
-	for (i = 0; i < GFS2_MAX_META_HEIGHT; i++)
-	{
-		list = &metalist[i];
-		while (!osi_list_empty(list))
-		{
-			bh = osi_list_entry(list->next,
-					    struct gfs2_buffer_head, b_altlist);
-			brelse(bh, not_updated);
+		if (rc && (!error || rc < 0))
+			error = rc;
+		if (ip->i_di.di_blocks > COMFORTABLE_BLKS)
+			big_file_comfort(ip, blks_checked);
+		if (bh == ip->i_bh)
 			osi_list_del(&bh->b_altlist);
-		}
+		else
+			brelse(bh);
 	}
-
-end:
-        if (S_ISDIR(ip->i_di.di_mode)) {
-		/* check validity of leaf blocks and leaf chains */
-		if (ip->i_di.di_flags & GFS2_DIF_EXHASH) {
-			error = check_leaf_blks(ip, &update, pass);
-			if(error < 0)
-				return -1;
-			if(error > 0)
-				return 1;
-		}
+	if (ip->i_di.di_blocks > COMFORTABLE_BLKS) {
+		log_notice( _("\rLarge file at %lld (0x%llx) - 100 percent "
+			      "complete.                                   "
+			      "\n"),
+			    (unsigned long long)ip->i_di.di_num.no_addr,
+			    (unsigned long long)ip->i_di.di_num.no_addr);
+		fflush(stdout);
 	}
-
-	return 0;
+	return error;
 }
 
 /* Checks stuffed inode directories */
-static int check_linear_dir(struct gfs2_inode *ip, struct gfs2_buffer_head *bh,
-		     enum update_flags *update, struct metawalk_fxns *pass)
+int check_linear_dir(struct gfs2_inode *ip, struct gfs2_buffer_head *bh,
+		     struct metawalk_fxns *pass)
 {
 	int error = 0;
 	uint16_t count = 0;
 
-	error = check_entries(ip, bh, DIR_LINEAR, update, &count, pass);
+	error = check_entries(ip, bh, DIR_LINEAR, &count, pass);
 	if(error < 0) {
 		stack;
 		return -1;
@@ -1067,43 +1389,29 @@ static int check_linear_dir(struct gfs2_inode *ip, struct gfs2_buffer_head *bh,
 	return error;
 }
 
-
 int check_dir(struct gfs2_sbd *sbp, uint64_t block, struct metawalk_fxns *pass)
 {
-	struct gfs2_buffer_head *bh;
 	struct gfs2_inode *ip;
-	enum update_flags update = not_updated;
 	int error = 0;
 
-	bh = bread(&sbp->buf_list, block);
-	ip = fsck_inode_get(sbp, bh);
+	ip = fsck_load_inode(sbp, block);
 
-	if(ip->i_di.di_flags & GFS2_DIF_EXHASH) {
-		error = check_leaf_blks(ip, &update, pass);
-		if(error < 0) {
-			stack;
-			fsck_inode_put(ip, not_updated); /* does brelse(bh); */
-			return -1;
-		}
-	}
-	else {
-		error = check_linear_dir(ip, bh, &update, pass);
-		if(error < 0) {
-			stack;
-			fsck_inode_put(ip, not_updated); /* does brelse(bh); */
-			return -1;
-		}
-	}
+	if(ip->i_di.di_flags & GFS2_DIF_EXHASH)
+		error = check_leaf_blks(ip, pass);
+	else
+		error = check_linear_dir(ip, ip->i_bh, pass);
 
-	fsck_inode_put(ip, opts.no ? not_updated : update); /* does a brelse */
+	if(error < 0)
+		stack;
+
+	fsck_inode_put(&ip); /* does a brelse */
 	return error;
 }
 
 static int remove_dentry(struct gfs2_inode *ip, struct gfs2_dirent *dent,
 			 struct gfs2_dirent *prev_de,
 			 struct gfs2_buffer_head *bh,
-			 char *filename, enum update_flags *update,
-			 uint16_t *count, void *private)
+			 char *filename, uint16_t *count, void *private)
 {
 	/* the metawalk_fxn's private field must be set to the dentry
 	 * block we want to clear */
@@ -1113,7 +1421,6 @@ static int remove_dentry(struct gfs2_inode *ip, struct gfs2_dirent *dent,
 	memset(&dentry, 0, sizeof(struct gfs2_dirent));
 	gfs2_dirent_in(&dentry, (char *)dent);
 	de = &dentry;
-	*update = (opts.no ? not_updated : updated);
 
 	if(de->de_inum.no_addr == *dentryblock)
 		dirent2_del(ip, bh, prev_de, dent);
@@ -1128,7 +1435,7 @@ int remove_dentry_from_dir(struct gfs2_sbd *sbp, uint64_t dir,
 			   uint64_t dentryblock)
 {
 	struct metawalk_fxns remove_dentry_fxns = {0};
-	struct gfs2_block_query q;
+	uint8_t q;
 	int error;
 
 	log_debug( _("Removing dentry %" PRIu64 " (0x%" PRIx64 ") from directory %"
@@ -1140,11 +1447,8 @@ int remove_dentry_from_dir(struct gfs2_sbd *sbp, uint64_t dir,
 	remove_dentry_fxns.private = &dentryblock;
 	remove_dentry_fxns.check_dentry = remove_dentry;
 
-	if(gfs2_block_check(sbp, bl, dir, &q)) {
-		stack;
-		return -1;
-	}
-	if(q.block_type != gfs2_inode_dir) {
+	q = block_type(dir);
+	if(q != gfs2_inode_dir) {
 		log_info( _("Parent block is not a directory...ignoring\n"));
 		return 1;
 	}
@@ -1155,128 +1459,132 @@ int remove_dentry_from_dir(struct gfs2_sbd *sbp, uint64_t dir,
 	return error;
 }
 
-/* FIXME: These should be merged with the hash routines in inode_hash.c */
-static uint32_t dinode_hash(uint64_t block_no)
+int delete_metadata(struct gfs2_inode *ip, uint64_t block,
+		    struct gfs2_buffer_head **bh, void *private)
 {
-	unsigned int h;
-
-	h = fsck_hash(&block_no, sizeof (uint64_t));
-	h &= FSCK_HASH_MASK;
-
-	return h;
+	return delete_block_if_notdup(ip, block, bh, _("metadata"), private);
 }
 
-int find_di(struct gfs2_sbd *sbp, uint64_t childblock, struct dir_info **dip)
+int delete_leaf(struct gfs2_inode *ip, uint64_t block,
+		struct gfs2_buffer_head *bh, void *private)
 {
-	osi_list_t *bucket = &dir_hash[dinode_hash(childblock)];
-	osi_list_t *tmp;
-	struct dir_info *di = NULL;
-
-	osi_list_foreach(tmp, bucket) {
-		di = osi_list_entry(tmp, struct dir_info, list);
-		if(di->dinode == childblock) {
-			*dip = di;
-			return 0;
-		}
-	}
-	*dip = NULL;
-	return -1;
-
+	return delete_block_if_notdup(ip, block, &bh, _("leaf"), private);
 }
 
-int dinode_hash_insert(osi_list_t *buckets, uint64_t key, struct dir_info *di)
+int delete_data(struct gfs2_inode *ip, uint64_t block, void *private)
 {
-	osi_list_t *tmp;
-	osi_list_t *bucket = &buckets[dinode_hash(key)];
-	struct dir_info *dtmp = NULL;
-
-	if(osi_list_empty(bucket)) {
-		osi_list_add(&di->list, bucket);
-		return 0;
-	}
-
-	osi_list_foreach(tmp, bucket) {
-		dtmp = osi_list_entry(tmp, struct dir_info, list);
-		if(dtmp->dinode < key) {
-			continue;
-		}
-		else {
-			osi_list_add_prev(&di->list, tmp);
-			return 0;
-		}
-	}
-	osi_list_add_prev(&di->list, bucket);
-	return 0;
+	return delete_block_if_notdup(ip, block, NULL, _("data"), private);
 }
 
-int dinode_hash_remove(osi_list_t *buckets, uint64_t key)
+int delete_eattr_indir(struct gfs2_inode *ip, uint64_t block, uint64_t parent,
+		       struct gfs2_buffer_head **bh, void *private)
 {
-	osi_list_t *tmp;
-	osi_list_t *bucket = &buckets[dinode_hash(key)];
-	struct dir_info *dtmp = NULL;
-
-	if(osi_list_empty(bucket)) {
-		return -1;
-	}
-	osi_list_foreach(tmp, bucket) {
-		dtmp = osi_list_entry(tmp, struct dir_info, list);
-		if(dtmp->dinode == key) {
-			osi_list_del(tmp);
-			return 0;
-		}
-	}
-	return -1;
+	return delete_block_if_notdup(ip, block, NULL,
+				      _("indirect extended attribute"),
+				      private);
 }
 
-/**
- * delete_blocks - delete blocks associated with an inode
- */
-int delete_blocks(struct gfs2_inode *ip, uint64_t block,
-		  struct gfs2_buffer_head **bh, const char *btype,
-		  void *private)
+int delete_eattr_leaf(struct gfs2_inode *ip, uint64_t block, uint64_t parent,
+		      struct gfs2_buffer_head **bh, void *private)
 {
-	struct gfs2_block_query q = {0};
+	return delete_block_if_notdup(ip, block, NULL, _("extended attribute"),
+				      private);
+}
 
-	if (gfs2_check_range(ip->i_sbd, block) == 0) {
-		if (gfs2_block_check(ip->i_sbd, bl, block, &q))
-			return 0;
-		if (!q.dup_block) {
-			log_info( _("Deleting %s block %lld (0x%llx) as part "
-				    "of inode %lld (0x%llx)\n"), btype,
-				  (unsigned long long)block,
-				  (unsigned long long)block,
-				  (unsigned long long)ip->i_di.di_num.no_addr,
-				  (unsigned long long)ip->i_di.di_num.no_addr);
-			gfs2_block_set(ip->i_sbd, bl, block, gfs2_block_free);
-			gfs2_free_block(ip->i_sbd, block);
-		}
+static int alloc_metalist(struct gfs2_inode *ip, uint64_t block,
+			  struct gfs2_buffer_head **bh, void *private)
+{
+	uint8_t q;
+	const char *desc = (const char *)private;
+
+	/* No need to range_check here--if it was added, it's in range. */
+	/* We can't check the bitmap here because this function is called
+	   after the bitmap has been set but before the blockmap has. */
+	*bh = bread(ip->i_sbd, block);
+	q = block_type(block);
+	if (blockmap_to_bitmap(q) == GFS2_BLKST_FREE) { /* If not marked yet */
+		log_debug(_("%s reference to new metadata block "
+			    "%lld (0x%llx) is now marked as indirect.\n"),
+			  desc, (unsigned long long)block,
+			  (unsigned long long)block);
+		gfs2_blockmap_set(bl, block, gfs2_indir_blk);
 	}
 	return 0;
 }
 
-int delete_metadata(struct gfs2_inode *ip, uint64_t block,
-		    struct gfs2_buffer_head **bh, void *private)
+static int alloc_data(struct gfs2_inode *ip, uint64_t block, void *private)
 {
-	return delete_blocks(ip, block, bh, _("metadata"), private);
+	uint8_t q;
+	const char *desc = (const char *)private;
+
+	/* No need to range_check here--if it was added, it's in range. */
+	/* We can't check the bitmap here because this function is called
+	   after the bitmap has been set but before the blockmap has. */
+	q = block_type(block);
+	if (blockmap_to_bitmap(q) == GFS2_BLKST_FREE) { /* If not marked yet */
+		log_debug(_("%s reference to new data block "
+			    "%lld (0x%llx) is now marked as data.\n"),
+			  desc, (unsigned long long)block,
+			  (unsigned long long)block);
+		gfs2_blockmap_set(bl, block, gfs2_block_used);
+	}
+	return 0;
 }
 
-int delete_data(struct gfs2_inode *ip, uint64_t block, void *private)
+static int alloc_leaf(struct gfs2_inode *ip, uint64_t block,
+		      struct gfs2_buffer_head *bh, void *private)
 {
-	return delete_blocks(ip, block, NULL, _("data"), private);
+	uint8_t q;
+
+	/* No need to range_check here--if it was added, it's in range. */
+	/* We can't check the bitmap here because this function is called
+	   after the bitmap has been set but before the blockmap has. */
+	q = block_type(block);
+	if (blockmap_to_bitmap(q) == GFS2_BLKST_FREE) /* If not marked yet */
+		fsck_blockmap_set(ip, block, _("newly allocated leaf"),
+				  gfs2_leaf_blk);
+	return 0;
 }
 
-int delete_eattr_indir(struct gfs2_inode *ip, uint64_t block, uint64_t parent,
-		       struct gfs2_buffer_head **bh,
-		       enum update_flags *want_updated, void *private)
-{
-	return delete_blocks(ip, block, NULL, _("indirect extended attribute"),
-			     private);
-}
+struct metawalk_fxns alloc_fxns = {
+	.private = NULL,
+	.check_leaf = alloc_leaf,
+	.check_metalist = alloc_metalist,
+	.check_data = alloc_data,
+	.check_eattr_indir = NULL,
+	.check_eattr_leaf = NULL,
+	.check_dentry = NULL,
+	.check_eattr_entry = NULL,
+	.check_eattr_extentry = NULL,
+	.finish_eattr_indir = NULL,
+};
 
-int delete_eattr_leaf(struct gfs2_inode *ip, uint64_t block, uint64_t parent,
-		      struct gfs2_buffer_head **bh,
-		      enum update_flags *want_updated, void *private)
+/*
+ * reprocess_inode - fixes the blockmap to match the bitmap due to an
+ *                   unexpected block allocation via libgfs2.
+ *
+ * The problem we're trying to overcome here is when a new block must be
+ * added to a dinode because of a write.  This will happen when lost+found
+ * needs a new indirect block for its hash table.  In that case, the write
+ * causes a new block to be assigned in the bitmap but that block is not yet
+ * accurately reflected in the fsck blockmap.  We need to compensate here.
+ *
+ * We can't really use fsck_blockmap_set here because the new block
+ * was already allocated by libgfs2 and therefore it took care of
+ * the rgrp free space variable.  fsck_blockmap_set adjusts the free space
+ * in the rgrp according to the change, which has already been done.
+ * So it's only our blockmap that now disagrees with the rgrp bitmap, so we
+ * need to fix only that.
+ */
+void reprocess_inode(struct gfs2_inode *ip, const char *desc)
 {
-	return delete_blocks(ip, block, NULL, _("extended attribute"),
-			     private);
+	int error;
+
+	alloc_fxns.private = (void *)desc;
+	log_info( _("%s had blocks added; reprocessing its metadata tree "
+		    "at height=%d.\n"), desc, ip->i_di.di_height);
+	error = check_metatree(ip, &alloc_fxns);
+	if (error)
+		log_err( _("Error %d reprocessing the %s metadata tree.\n"),
+			 error, desc);
 }
diff --git a/gfs2/fsck/metawalk.h b/gfs2/fsck/metawalk.h
index 0721ed1..9eb2372 100644
--- a/gfs2/fsck/metawalk.h
+++ b/gfs2/fsck/metawalk.h
@@ -6,28 +6,39 @@
 
 struct metawalk_fxns;
 
-int check_inode_eattr(struct gfs2_inode *ip, enum update_flags *want_updated,
-		      struct metawalk_fxns *pass);
-int check_metatree(struct gfs2_inode *ip, struct metawalk_fxns *pass);
-int check_dir(struct gfs2_sbd *sbp, uint64_t block,
-			  struct metawalk_fxns *pass);
-int remove_dentry_from_dir(struct gfs2_sbd *sbp, uint64_t dir,
+extern int check_inode_eattr(struct gfs2_inode *ip,
+			     struct metawalk_fxns *pass);
+extern int check_metatree(struct gfs2_inode *ip, struct metawalk_fxns *pass);
+extern int check_dir(struct gfs2_sbd *sbp, uint64_t block,
+		     struct metawalk_fxns *pass);
+extern int check_linear_dir(struct gfs2_inode *ip, struct gfs2_buffer_head *bh,
+			    struct metawalk_fxns *pass);
+extern int remove_dentry_from_dir(struct gfs2_sbd *sbp, uint64_t dir,
 						   uint64_t dentryblock);
-int find_di(struct gfs2_sbd *sbp, uint64_t childblock, struct dir_info **dip);
-int dinode_hash_insert(osi_list_t *buckets, uint64_t key, struct dir_info *di);
-int dinode_hash_remove(osi_list_t *buckets, uint64_t key);
-int delete_blocks(struct gfs2_inode *ip, uint64_t block,
-		  struct gfs2_buffer_head **bh, const char *btype,
-		  void *private);
-int delete_metadata(struct gfs2_inode *ip, uint64_t block,
+extern int delete_block(struct gfs2_inode *ip, uint64_t block,
+		 struct gfs2_buffer_head **bh, const char *btype,
+		 void *private);
+extern int delete_metadata(struct gfs2_inode *ip, uint64_t block,
 		    struct gfs2_buffer_head **bh, void *private);
-int delete_data(struct gfs2_inode *ip, uint64_t block, void *private);
-int delete_eattr_indir(struct gfs2_inode *ip, uint64_t block, uint64_t parent,
-		       struct gfs2_buffer_head **bh,
-		       enum update_flags *want_updated, void *private);
-int delete_eattr_leaf(struct gfs2_inode *ip, uint64_t block, uint64_t parent,
-		      struct gfs2_buffer_head **bh,
-		      enum update_flags *want_updated, void *private);
+extern int delete_leaf(struct gfs2_inode *ip, uint64_t block,
+		struct gfs2_buffer_head *bh, void *private);
+extern int delete_data(struct gfs2_inode *ip, uint64_t block, void *private);
+extern int delete_eattr_indir(struct gfs2_inode *ip, uint64_t block, uint64_t parent,
+		       struct gfs2_buffer_head **bh, void *private);
+extern int delete_eattr_leaf(struct gfs2_inode *ip, uint64_t block, uint64_t parent,
+		      struct gfs2_buffer_head **bh, void *private);
+extern int _fsck_blockmap_set(struct gfs2_inode *ip, uint64_t bblock,
+		       const char *btype, enum gfs2_mark_block mark,
+		       const char *caller, int line);
+extern int check_n_fix_bitmap(struct gfs2_sbd *sdp, uint64_t blk,
+		       enum gfs2_mark_block new_blockmap_state);
+extern void reprocess_inode(struct gfs2_inode *ip, const char *desc);
+extern struct duptree *dupfind(uint64_t block);
+
+#define is_duplicate(dblock) ((dupfind(dblock)) ? 1 : 0)
+
+#define fsck_blockmap_set(ip, b, bt, m) _fsck_blockmap_set(ip, b, bt, m, \
+							   __FILE__, __LINE__)
 
 /* metawalk_fxns: function pointers to check various parts of the fs
  *
@@ -54,18 +65,14 @@ struct metawalk_fxns {
 			   void *private);
 	int (*check_eattr_indir) (struct gfs2_inode *ip, uint64_t block,
 				  uint64_t parent,
-				  struct gfs2_buffer_head **bh,
-				  enum update_flags *want_updated,
-				  void *private);
+				  struct gfs2_buffer_head **bh, void *private);
 	int (*check_eattr_leaf) (struct gfs2_inode *ip, uint64_t block,
 				 uint64_t parent, struct gfs2_buffer_head **bh,
-				 enum update_flags *want_updated,
 				 void *private);
 	int (*check_dentry) (struct gfs2_inode *ip, struct gfs2_dirent *de,
 			     struct gfs2_dirent *prev,
 			     struct gfs2_buffer_head *bh,
-			     char *filename, enum update_flags *update,
-			     uint16_t *count, void *private);
+			     char *filename, uint16_t *count, void *private);
 	int (*check_eattr_entry) (struct gfs2_inode *ip,
 				  struct gfs2_buffer_head *leaf_bh,
 				  struct gfs2_ea_header *ea_hdr,
@@ -76,12 +83,9 @@ struct metawalk_fxns {
 				     struct gfs2_buffer_head *leaf_bh,
 				     struct gfs2_ea_header *ea_hdr,
 				     struct gfs2_ea_header *ea_hdr_prev,
-				     enum update_flags *want_updated,
 				     void *private);
 	int (*finish_eattr_indir) (struct gfs2_inode *ip, int leaf_pointers,
-				   int leaf_pointer_errors,
-				   enum update_flags *want_updated,
-				   void *private);
+				   int leaf_pointer_errors, void *private);
 };
 
 #endif /* _METAWALK_H */
diff --git a/gfs2/fsck/pass1.c b/gfs2/fsck/pass1.c
index 17c4bfc..a3cd7f7 100644
--- a/gfs2/fsck/pass1.c
+++ b/gfs2/fsck/pass1.c
@@ -21,6 +21,7 @@
 
 #include "libgfs2.h"
 #include "fsck.h"
+#include "inode_hash.h"
 #include "util.h"
 #include "link.h"
 #include "metawalk.h"
@@ -35,13 +36,17 @@ static int leaf(struct gfs2_inode *ip, uint64_t block,
 		struct gfs2_buffer_head *bh, void *private);
 static int check_metalist(struct gfs2_inode *ip, uint64_t block,
 			  struct gfs2_buffer_head **bh, void *private);
+static int undo_check_metalist(struct gfs2_inode *ip, uint64_t block,
+			       struct gfs2_buffer_head **bh, void *private);
 static int check_data(struct gfs2_inode *ip, uint64_t block, void *private);
+static int undo_check_data(struct gfs2_inode *ip, uint64_t block,
+			   void *private);
 static int check_eattr_indir(struct gfs2_inode *ip, uint64_t indirect,
 			     uint64_t parent, struct gfs2_buffer_head **bh,
-			     enum update_flags *want_updated, void *private);
+			     void *private);
 static int check_eattr_leaf(struct gfs2_inode *ip, uint64_t block,
 			    uint64_t parent, struct gfs2_buffer_head **bh,
-			    enum update_flags *want_updated, void *private);
+			    void *private);
 static int check_eattr_entries(struct gfs2_inode *ip,
 			       struct gfs2_buffer_head *leaf_bh,
 			       struct gfs2_ea_header *ea_hdr,
@@ -51,11 +56,22 @@ static int check_extended_leaf_eattr(struct gfs2_inode *ip, uint64_t *data_ptr,
 				     struct gfs2_buffer_head *leaf_bh,
 				     struct gfs2_ea_header *ea_hdr,
 				     struct gfs2_ea_header *ea_hdr_prev,
-				     enum update_flags *want_updated,
 				     void *private);
 static int finish_eattr_indir(struct gfs2_inode *ip, int leaf_pointers,
-			      int leaf_pointer_errors,
-			      enum update_flags *want_updated, void *private);
+			      int leaf_pointer_errors, void *private);
+static int invalidate_metadata(struct gfs2_inode *ip, uint64_t block,
+			       struct gfs2_buffer_head **bh, void *private);
+static int invalidate_leaf(struct gfs2_inode *ip, uint64_t block,
+			   struct gfs2_buffer_head *bh, void *private);
+static int invalidate_data(struct gfs2_inode *ip, uint64_t block,
+			   void *private);
+static int invalidate_eattr_indir(struct gfs2_inode *ip, uint64_t block,
+				  uint64_t parent,
+				  struct gfs2_buffer_head **bh,
+				  void *private);
+static int invalidate_eattr_leaf(struct gfs2_inode *ip, uint64_t block,
+				 uint64_t parent, struct gfs2_buffer_head **bh,
+				 void *private);
 
 struct metawalk_fxns pass1_fxns = {
 	.private = NULL,
@@ -70,14 +86,27 @@ struct metawalk_fxns pass1_fxns = {
 	.finish_eattr_indir = finish_eattr_indir,
 };
 
+struct metawalk_fxns undo_fxns = {
+	.private = NULL,
+	.check_metalist = undo_check_metalist,
+	.check_data = undo_check_data,
+};
+
+struct metawalk_fxns invalidate_fxns = {
+	.private = NULL,
+	.check_metalist = invalidate_metadata,
+	.check_data = invalidate_data,
+	.check_leaf = invalidate_leaf,
+	.check_eattr_indir = invalidate_eattr_indir,
+	.check_eattr_leaf = invalidate_eattr_leaf,
+};
+
 static int leaf(struct gfs2_inode *ip, uint64_t block,
 		struct gfs2_buffer_head *bh, void *private)
 {
 	struct block_count *bc = (struct block_count *) private;
 
-	log_debug( _("\tLeaf block at %15" PRIu64 " (0x%" PRIx64 ")\n"),
-			  block, block);
-	gfs2_block_set(ip->i_sbd, bl, block, gfs2_leaf_blk);
+	fsck_blockmap_set(ip, block, _("directory leaf"), gfs2_leaf_blk);
 	bc->indir_count++;
 	return 0;
 }
@@ -85,59 +114,134 @@ static int leaf(struct gfs2_inode *ip, uint64_t block,
 static int check_metalist(struct gfs2_inode *ip, uint64_t block,
 			  struct gfs2_buffer_head **bh, void *private)
 {
-	struct gfs2_block_query q = {0};
-	int found_dup = 0;
+	uint8_t q;
+	int found_dup = 0, iblk_type;
 	struct gfs2_buffer_head *nbh;
 	struct block_count *bc = (struct block_count *)private;
+	const char *blktypedesc;
 
 	*bh = NULL;
 
 	if (gfs2_check_range(ip->i_sbd, block)){ /* blk outside of FS */
-		gfs2_block_set(ip->i_sbd, bl, ip->i_di.di_num.no_addr,
-			       gfs2_bad_block);
-		log_debug( _("Bad indirect block pointer (out of range).\n"));
+		fsck_blockmap_set(ip, ip->i_di.di_num.no_addr,
+				  _("itself"), gfs2_bad_block);
+		log_debug( _("Bad indirect block pointer (out of range) "
+			     "found in inode %lld (0x%llx).\n"),
+			   (unsigned long long)ip->i_di.di_num.no_addr,
+			   (unsigned long long)ip->i_di.di_num.no_addr);
 
 		return 1;
 	}
-	if(gfs2_block_check(ip->i_sbd, bl, block, &q)) {
-		stack;
-		return -1;
+	if (S_ISDIR(ip->i_di.di_mode)) {
+		iblk_type = GFS2_METATYPE_JD;
+		blktypedesc = _("a directory hash table block");
+	} else {
+		iblk_type = GFS2_METATYPE_IN;
+		blktypedesc = _("a journaled data block");
 	}
-	if(q.block_type != gfs2_block_free) {
-		log_err( _("Found duplicate block referenced as metadata in "
-			   "indirect block - was marked %d\n"), q.block_type);
-		gfs2_block_mark(ip->i_sbd, bl, block, gfs2_dup_block);
+	q = block_type(block);
+	if(q != gfs2_block_free) {
+		log_err( _("Found duplicate block %llu (0x%llx) referenced "
+			   "as metadata in indirect block for dinode "
+			   "%llu (0x%llx) - was marked %d (%s)\n"),
+			 (unsigned long long)block,
+			 (unsigned long long)block,
+			 (unsigned long long)ip->i_di.di_num.no_addr,
+			 (unsigned long long)ip->i_di.di_num.no_addr, q,
+			 block_type_string(q));
+		add_duplicate_ref(ip, block, ref_as_meta, 0, INODE_VALID);
 		found_dup = 1;
 	}
-	nbh = bread(&ip->i_sbd->buf_list, block);
-
-	if (gfs2_check_meta(nbh, GFS2_METATYPE_IN)){
-		log_debug( _("Bad indirect block pointer (points to "
-			     "something that is not an indirect block).\n"));
+	nbh = bread(ip->i_sbd, block);
+
+	if (gfs2_check_meta(nbh, iblk_type)){
+		log_debug( _("Inode %lld (0x%llx) has a bad indirect block "
+			     "pointer %lld (0x%llx) (points to something "
+			     "that is not %s).\n"),
+			   (unsigned long long)ip->i_di.di_num.no_addr,
+			   (unsigned long long)ip->i_di.di_num.no_addr,
+			   (unsigned long long)block,
+			   (unsigned long long)block, blktypedesc);
 		if(!found_dup) {
-			gfs2_block_set(ip->i_sbd, bl, block, gfs2_meta_inval);
-			brelse(nbh, not_updated);
+			fsck_blockmap_set(ip, block, _("bad indirect"),
+					  gfs2_meta_inval);
+			brelse(nbh);
 			return 1;
 		}
-		brelse(nbh, not_updated);
+		brelse(nbh);
 	} else /* blk check ok */
 		*bh = nbh;
 
-	if (!found_dup) {
-		log_debug( _("Setting %" PRIu64 " (0x%" PRIx64 ") to indirect "
-			     "block.\n"), block, block);
-		gfs2_block_set(ip->i_sbd, bl, block, gfs2_indir_blk);
-	}
 	bc->indir_count++;
+	if (found_dup)
+		return 1; /* don't process the metadata again */
+	else
+		fsck_blockmap_set(ip, block, _("indirect"),
+				  gfs2_indir_blk);
 
 	return 0;
 }
 
+static int undo_check_metalist(struct gfs2_inode *ip, uint64_t block,
+			       struct gfs2_buffer_head **bh, void *private)
+{
+	struct duptree *d;
+	int found_dup = 0, iblk_type;
+	struct gfs2_buffer_head *nbh;
+	struct block_count *bc = (struct block_count *)private;
+
+	*bh = NULL;
+
+	if (gfs2_check_range(ip->i_sbd, block)){ /* blk outside of FS */
+		fsck_blockmap_set(ip, ip->i_di.di_num.no_addr,
+				  _("itself"), gfs2_block_free);
+		return 1;
+	}
+	if (S_ISDIR(ip->i_di.di_mode))
+		iblk_type = GFS2_METATYPE_JD;
+	else
+		iblk_type = GFS2_METATYPE_IN;
+
+	d = dupfind(block);
+	if (d) {
+		log_err( _("Reversing duplicate status of block %llu (0x%llx) "
+			   "referenced as metadata in indirect block for "
+			   "dinode %llu (0x%llx)\n"),
+			 (unsigned long long)block,
+			 (unsigned long long)block,
+			 (unsigned long long)ip->i_di.di_num.no_addr,
+			 (unsigned long long)ip->i_di.di_num.no_addr);
+		d->refs--; /* one less reference */
+		if (d->refs == 1)
+			dup_delete(d);
+		found_dup = 1;
+	}
+	nbh = bread(ip->i_sbd, block);
+
+	if (gfs2_check_meta(nbh, iblk_type)) {
+		if(!found_dup) {
+			fsck_blockmap_set(ip, block, _("bad indirect"),
+					  gfs2_block_free);
+			brelse(nbh);
+			return 1;
+		}
+		brelse(nbh);
+	} else /* blk check ok */
+		*bh = nbh;
+
+	bc->indir_count--;
+	if (found_dup)
+		return 1; /* don't process the metadata again */
+	else
+		fsck_blockmap_set(ip, block, _("bad indirect"),
+				  gfs2_block_free);
+	return 0;
+}
+
 static int check_data(struct gfs2_inode *ip, uint64_t block, void *private)
 {
-	struct gfs2_block_query q = {0};
+	uint8_t q;
 	struct block_count *bc = (struct block_count *) private;
-	int error = 0, btype;
 
 	if (gfs2_check_range(ip->i_sbd, block)) {
 		log_err( _("inode %lld (0x%llx) has a bad data block pointer "
@@ -148,116 +252,139 @@ static int check_data(struct gfs2_inode *ip, uint64_t block, void *private)
 		/* Mark the owner of this block with the bad_block
 		 * designator so we know to check it for out of range
 		 * blocks later */
-		gfs2_block_set(ip->i_sbd, bl, ip->i_di.di_num.no_addr,
-			       gfs2_bad_block);
+		fsck_blockmap_set(ip, ip->i_di.di_num.no_addr,
+				  _("bad (out of range) data"),
+				  gfs2_bad_block);
 		return 1;
 	}
-	if(gfs2_block_check(ip->i_sbd, bl, block, &q)) {
-		stack;
-		log_err( _("Found bad block referenced as data at %"
-			   PRIu64 " (0x%"PRIx64 ")\n"), block, block);
-		return -1;
-	}
-	if(q.block_type != gfs2_block_free) {
-		log_err( _("Found duplicate block referenced as data at %"
-			   PRIu64 " (0x%"PRIx64 ")\n"), block, block);
-		if (q.block_type != gfs2_meta_inval) {
-			gfs2_block_mark(ip->i_sbd, bl, block, gfs2_dup_block);
+	q = block_type(block);
+	if (q != gfs2_block_free) {
+		log_err( _("Found duplicate %s block %llu (0x%llx) "
+			   "referenced as data by dinode %llu (0x%llx)\n"),
+			 block_type_string(q),
+			 (unsigned long long)block,
+			 (unsigned long long)block,
+			 (unsigned long long)ip->i_di.di_num.no_addr,
+			 (unsigned long long)ip->i_di.di_num.no_addr);
+		if (q != gfs2_meta_inval) {
+			log_info( _("Seems to be a normal duplicate; I'll "
+				    "sort it out in pass1b.\n"));
+			add_duplicate_ref(ip, block, ref_as_data, 0,
+					  INODE_VALID);
 			/* If the prev ref was as data, this is likely a data
 			   block, so keep the block count for both refs. */
-			if (q.block_type == gfs2_block_used)
+			if (q == gfs2_block_used)
 				bc->data_count++;
 			return 1;
 		}
-		/* An explanation is in order here.  At this point we found
-		   a duplicate block, a block that was already referenced
-		   somewhere else.  We'll resolve those duplicates in pass1b.
-		   However, if the block is marked "invalid" that's a special
-		   case.  It's likely that the block was discovered to be
-		   invalid metadata--i.e. doesn't have a metadata header.
-		   However, it still may be a valid data block, since they
-		   won't have metadata headers.  In that case, the block is
-		   marked as duplicate, but also as a data block. */
-		error = 1;
-		gfs2_block_unmark(ip->i_sbd, bl, block, gfs2_meta_inval);
-		gfs2_block_mark(ip->i_sbd, bl, block, gfs2_dup_block);
-	}
-	log_debug( _("Marking block %llu (0x%llx) as data block\n"),
-		   (unsigned long long)block, (unsigned long long)block);
-	gfs2_block_mark(ip->i_sbd, bl, block, gfs2_block_used);
-
-	/* This is also confusing, so I'll clarify.  There are two bitmaps:
-	   (1) The gfs2_bmap that fsck uses to keep track of what block
-	   type has been discovered, and (2) The rgrp bitmap.  Function
-	   gfs2_block_set is used to set the former and gfs2_set_bitmap
-	   is used to set the latter.  In this function we need to set both
-	   because we found a "data" block that could be "meta" in the rgrp
-	   bitmap.  If we don't we could run into the data block again as
-	   metadata when we're traversing the metadata with gfs2_next_rg_meta
-	   in func pass1().  If that happens, it will look at the block,
-	   say "hey this isn't metadata" and mark it incorrectly as an
-	   invalid metadata block and free it.  Ordinarily, fsck will wait
-	   until pass5 to sync (2) so that it agrees with (1).  However, in
-	   this case, it's better to do it upfront.  The duplicate solving
-	   code in pass1b.c is better at resolving metadata referencing a
-	   data block than it is at resolving a data block referencing a
-	   metadata block. */
-	btype = gfs2_get_bitmap(ip->i_sbd, block, NULL);
-	if (btype != GFS2_BLKST_USED) {
-		const char *allocdesc[] = {"free space", "data", "unlinked",
-					   "metadata", "reserved"};
-
-		errors_found++;
-		log_err( _("Block %llu (0x%llx) seems to be data, but is "
-			   "marked as %s.\n"), (unsigned long long)block,
-			   (unsigned long long)block, allocdesc[btype]);
-		if(query(&opts, _("Okay to mark it as 'data'? (y/n)"))) {
-			errors_corrected++;
-			gfs2_set_bitmap(ip->i_sbd, block, GFS2_BLKST_USED);
-			log_err( _("The block was reassigned as data.\n"));
-		} else {
-			log_err( _("The invalid block was ignored.\n"));
-		}
+		log_info( _("The block was invalid as metadata but might be "
+			    "okay as data.  I'll sort it out in pass1b.\n"));
+		add_duplicate_ref(ip, block, ref_as_data, 0, INODE_VALID);
+		bc->data_count++;
+		return 1;
 	}
+	fsck_blockmap_set(ip, block, _("data"), gfs2_block_used);
 	bc->data_count++;
-	return error;
+	return 0;
 }
 
-static int remove_inode_eattr(struct gfs2_inode *ip, struct block_count *bc,
-			      int duplicate, enum update_flags *want_updated)
+static int undo_check_data(struct gfs2_inode *ip, uint64_t block,
+			   void *private)
 {
-	if (!duplicate) {
-		gfs2_set_bitmap(ip->i_sbd, ip->i_di.di_eattr,
-				GFS2_BLKST_FREE);
-		gfs2_block_set(ip->i_sbd, bl, ip->i_di.di_eattr,
-			       gfs2_block_free);
+	struct duptree *d;
+	struct block_count *bc = (struct block_count *) private;
+
+	if (gfs2_check_range(ip->i_sbd, block)) {
+		/* Mark the owner of this block with the bad_block
+		 * designator so we know to check it for out of range
+		 * blocks later */
+		fsck_blockmap_set(ip, ip->i_di.di_num.no_addr,
+				  _("bad (out of range) data"),
+				  gfs2_block_free);
+		return 1;
+	}
+	d = dupfind(block);
+	if (d) {
+		log_err( _("Reversing duplicate status of block %llu (0x%llx) "
+			   "referenced as data by dinode %llu (0x%llx)\n"),
+			 (unsigned long long)block,
+			 (unsigned long long)block,
+			 (unsigned long long)ip->i_di.di_num.no_addr,
+			 (unsigned long long)ip->i_di.di_num.no_addr);
+		d->refs--; /* one less reference */
+		if (d->refs == 1)
+			dup_delete(d);
+		bc->data_count--;
+		return 1;
+	}
+	fsck_blockmap_set(ip, block, _("data"), gfs2_block_free);
+	bc->data_count--;
+	return 0;
+}
+
+static int remove_inode_eattr(struct gfs2_inode *ip, struct block_count *bc)
+{
+	struct duptree *dt;
+	struct inode_with_dups *id;
+	osi_list_t *ref;
+	int moved = 0;
+
+	/* If it's a duplicate reference to the block, we need to check
+	   if the reference is on the valid or invalid inodes list.
+	   If it's on the valid inode's list, move it to the invalid
+	   inodes list.  The reason is simple: This inode, although
+	   valid, has an now-invalid reference, so we should not give
+	   this reference preferential treatment over others. */
+	dt = dupfind(ip->i_di.di_eattr);
+	if (dt) {
+		osi_list_foreach(ref, &dt->ref_inode_list) {
+			id = osi_list_entry(ref, struct inode_with_dups, list);
+			if (id->block_no == ip->i_di.di_num.no_addr) {
+				log_debug( _("Moving inode %lld (0x%llx)'s "
+					     "duplicate reference to %lld "
+					     "(0x%llx) from the valid to the "
+					     "invalid reference list.\n"),
+					   (unsigned long long)
+					   ip->i_di.di_num.no_addr,
+					   (unsigned long long)
+					   ip->i_di.di_num.no_addr,
+					   (unsigned long long)
+					   ip->i_di.di_eattr,
+					   (unsigned long long)
+					   ip->i_di.di_eattr);
+				/* Move from the normal to the invalid list */
+				osi_list_del(&id->list);
+				osi_list_add_prev(&id->list,
+						  &dt->ref_invinode_list);
+				moved = 1;
+				break;
+			}
+		}
+		if (!moved)
+			log_debug( _("Duplicate reference to %lld "
+				     "(0x%llx) not moved.\n"),
+				   (unsigned long long)ip->i_di.di_eattr,
+				   (unsigned long long)ip->i_di.di_eattr);
+	} else {
+		delete_block(ip, ip->i_di.di_eattr, NULL,
+			     "extended attribute", NULL);
 	}
 	ip->i_di.di_eattr = 0;
 	bc->ea_count = 0;
 	ip->i_di.di_blocks = 1 + bc->indir_count + bc->data_count;
 	ip->i_di.di_flags &= ~GFS2_DIF_EA_INDIRECT;
-	*want_updated = updated;
+	bmodified(ip->i_bh);
 	return 0;
 }
 
 static int ask_remove_inode_eattr(struct gfs2_inode *ip,
-				  struct block_count *bc,
-				  enum update_flags *want_updated)
+				  struct block_count *bc)
 {
 	log_err( _("Inode %lld (0x%llx) has unrecoverable Extended Attribute "
 		   "errors.\n"), (unsigned long long)ip->i_di.di_num.no_addr,
 		 (unsigned long long)ip->i_di.di_num.no_addr);
-	errors_found++;
-	if (query(&opts, _("Clear all Extended Attributes from the "
-			   "inode? (y/n) "))) {
-		struct gfs2_block_query q;
-
-		errors_corrected++;
-		if(gfs2_block_check(ip->i_sbd, bl, ip->i_di.di_eattr, &q)) {
-			stack;
-			return -1;
-		}
-		if (!remove_inode_eattr(ip, bc, q.dup_block, want_updated))
+	if (query( _("Clear all Extended Attributes from the inode? (y/n) "))){
+		if (!remove_inode_eattr(ip, bc))
 			log_err( _("Extended attributes were removed.\n"));
 		else
 			log_err( _("Unable to remove inode eattr pointer; "
@@ -278,31 +405,22 @@ static int ask_remove_inode_eattr(struct gfs2_inode *ip,
  * Returns: 1 if the EA is fixed, else 0 if it was not fixed.
  */
 static int clear_eas(struct gfs2_inode *ip, struct block_count *bc,
-		     uint64_t block, int duplicate,
-		     enum update_flags *want_updated, const char *emsg)
+		     uint64_t block, int duplicate, const char *emsg)
 {
-	struct gfs2_sbd *sdp = ip->i_sbd;
-
-	*want_updated = not_updated;
 	log_err( _("Inode #%llu (0x%llx): %s"),
 		(unsigned long long)ip->i_di.di_num.no_addr,
 		(unsigned long long)ip->i_di.di_num.no_addr, emsg);
 	log_err( _(" at block #%lld (0x%llx).\n"),
 		 (unsigned long long)block, (unsigned long long)block);
-	errors_found++;
-	if (query(&opts, _("Clear the bad Extended Attribute? (y/n) "))) {
-		errors_corrected++;
+	if (query( _("Clear the bad Extended Attribute? (y/n) "))) {
 		if (block == ip->i_di.di_eattr) {
-			remove_inode_eattr(ip, bc, duplicate, want_updated);
+			remove_inode_eattr(ip, bc);
 			log_err( _("The bad extended attribute was "
 				   "removed.\n"));
 		} else if (!duplicate) {
-			gfs2_block_set(sdp, bl, block, gfs2_block_free);
-			gfs2_set_bitmap(sdp, block, GFS2_BLKST_FREE);
-			log_err( _("The bad Extended Attribute was "
-				   "removed.\n"));
+			delete_block(ip, block, NULL,
+				     _("bad extended attribute"), NULL);
 		}
-		*want_updated = updated;
 		return 1;
 	} else {
 		log_err( _("The bad Extended Attribute was not fixed.\n"));
@@ -313,11 +431,11 @@ static int clear_eas(struct gfs2_inode *ip, struct block_count *bc,
 
 static int check_eattr_indir(struct gfs2_inode *ip, uint64_t indirect,
 			     uint64_t parent, struct gfs2_buffer_head **bh,
-			     enum update_flags *want_updated, void *private)
+			     void *private)
 {
 	struct gfs2_sbd *sdp = ip->i_sbd;
 	int ret = 0;
-	struct gfs2_block_query q = {0};
+	uint8_t q;
 	struct block_count *bc = (struct block_count *) private;
 
 	/* This inode contains an eattr - it may be invalid, but the
@@ -325,38 +443,34 @@ static int check_eattr_indir(struct gfs2_inode *ip, uint64_t indirect,
 	if(gfs2_check_range(sdp, indirect)) {
 		/*log_warn("EA indirect block #%"PRIu64" is out of range.\n",
 			indirect);
-			gfs2_block_set(sdp, bl, parent, bad_block);*/
+			fsck_blockmap_set(parent, "bad", bad_block);*/
 		/* Doesn't help to mark this here - this gets checked
 		 * in pass1c */
 		return 1;
 	}
-	if(gfs2_block_check(sdp, bl, indirect, &q)) {
-		stack;
-		return -1;
-	}
+	q = block_type(indirect);
 
 	/* Special duplicate processing:  If we have an EA block,
 	   check if it really is an EA.  If it is, let duplicate
 	   handling sort it out.  If it isn't, clear it but don't
 	   count it as a duplicate. */
-	*bh = bread(&sdp->buf_list, indirect);
+	*bh = bread(sdp, indirect);
 	if(gfs2_check_meta(*bh, GFS2_METATYPE_IN)) {
-		if(q.block_type != gfs2_block_free) { /* Duplicate? */
-			if (!clear_eas(ip, bc, indirect, 1, want_updated,
+		if(q != gfs2_block_free) { /* Duplicate? */
+			add_duplicate_ref(ip, indirect, ref_as_ea, 0,
+					  INODE_VALID);
+			if (!clear_eas(ip, bc, indirect, 1,
 				       _("Bad indirect Extended Attribute "
-					 "duplicate found"))) {
-				gfs2_block_mark(sdp, bl, indirect,
-						gfs2_dup_block);
+					 "duplicate found")))
 				bc->ea_count++;
-			}
 			return 1;
 		}
-		clear_eas(ip, bc, indirect, 0, want_updated,
+		clear_eas(ip, bc, indirect, 0,
 			  _("Extended Attribute indirect block has incorrect "
 			    "type"));
 		return 1;
 	}
-	if(q.block_type != gfs2_block_free) { /* Duplicate? */
+	if(q != gfs2_block_free) { /* Duplicate? */
 		log_err( _("Inode #%llu (0x%llx): Duplicate Extended "
 			   "Attribute indirect block found at #%llu "
 			   "(0x%llx).\n"),
@@ -364,48 +478,53 @@ static int check_eattr_indir(struct gfs2_inode *ip, uint64_t indirect,
 			 (unsigned long long)ip->i_di.di_num.no_addr,
 			 (unsigned long long)indirect,
 			 (unsigned long long)indirect);
-		gfs2_block_mark(sdp, bl, indirect, gfs2_dup_block);
+		add_duplicate_ref(ip, indirect, ref_as_ea, 0, INODE_VALID);
 		bc->ea_count++;
 		ret = 1;
 	} else {
-		log_debug( _("Setting #%" PRIu64 " (0x%" PRIx64
-			  ") to indirect Extended Attribute block\n"),
-			   indirect, indirect);
-		gfs2_block_set(sdp, bl, indirect, gfs2_indir_blk);
+		fsck_blockmap_set(ip, indirect,
+				  _("indirect Extended Attribute"),
+				  gfs2_indir_blk);
 		bc->ea_count++;
 	}
 	return ret;
 }
 
 static int finish_eattr_indir(struct gfs2_inode *ip, int leaf_pointers,
-			      int leaf_pointer_errors,
-			      enum update_flags *want_updated, void *private)
+			      int leaf_pointer_errors, void *private)
 {
 	struct block_count *bc = (struct block_count *) private;
+	osi_list_t *head;
+	struct special_blocks *b = NULL;
 
 	if (leaf_pointer_errors == leaf_pointers) /* All eas were bad */
-		return ask_remove_inode_eattr(ip, bc, want_updated);
+		return ask_remove_inode_eattr(ip, bc);
 	log_debug( _("Marking inode #%llu (0x%llx) with extended "
 		     "attribute block\n"),
 		   (unsigned long long)ip->i_di.di_num.no_addr,
 		   (unsigned long long)ip->i_di.di_num.no_addr);
 	/* Mark the inode as having an eattr in the block map
-	   so pass1c can check it. */
-	gfs2_block_mark(ip->i_sbd, bl, ip->i_di.di_num.no_addr,
-			gfs2_eattr_block);
+	   so pass1c can check it. We may have previously added this inode
+	   to the eattr_blocks list and if we did, it would be the first
+	   one on the list.  So check that one only (to save time) and
+	   if that one matches, no need to add it again. */
+	if (!osi_list_empty(&ip->i_sbd->eattr_blocks.list)) {
+		head = &ip->i_sbd->eattr_blocks.list;
+		b = osi_list_entry(head->next, struct special_blocks, list);
+	}
+	if (!b || b->block != ip->i_di.di_num.no_addr)
+		gfs2_special_add(&ip->i_sbd->eattr_blocks,
+				 ip->i_di.di_num.no_addr);
 	if (!leaf_pointer_errors)
 		return 0;
 	log_err( _("Inode %lld (0x%llx) has recoverable indirect "
 		   "Extended Attribute errors.\n"),
 		   (unsigned long long)ip->i_di.di_num.no_addr,
 		   (unsigned long long)ip->i_di.di_num.no_addr);
-	errors_found++;
-	if (query(&opts, _("Okay to fix the block count for the inode? "
-			   "(y/n) "))) {
-		errors_corrected++;
+	if (query( _("Okay to fix the block count for the inode? (y/n) "))) {
 		ip->i_di.di_blocks = 1 + bc->indir_count +
 			bc->data_count + bc->ea_count;
-		*want_updated = updated;
+		bmodified(ip->i_bh);
 		log_err( _("Block count fixed.\n"));
 		return 1;
 	}
@@ -414,59 +533,55 @@ static int finish_eattr_indir(struct gfs2_inode *ip, int leaf_pointers,
 }
 
 static int check_leaf_block(struct gfs2_inode *ip, uint64_t block, int btype,
-			    struct gfs2_buffer_head **bh,
-			    enum update_flags *want_updated, void *private)
+			    struct gfs2_buffer_head **bh, void *private)
 {
 	struct gfs2_buffer_head *leaf_bh = NULL;
 	struct gfs2_sbd *sdp = ip->i_sbd;
-	struct gfs2_block_query q = {0};
+	uint8_t q;
 	struct block_count *bc = (struct block_count *) private;
 
-	if(gfs2_block_check(sdp, bl, block, &q)) {
-		stack;
-		return -1;
-	}
+	q = block_type(block);
 	/* Special duplicate processing:  If we have an EA block, check if it
 	   really is an EA.  If it is, let duplicate handling sort it out.
 	   If it isn't, clear it but don't count it as a duplicate. */
-	leaf_bh = bread(&sdp->buf_list, block);
+	leaf_bh = bread(sdp, block);
 	if(gfs2_check_meta(leaf_bh, btype)) {
-		if(q.block_type != gfs2_block_free) { /* Duplicate? */
-			clear_eas(ip, bc, block, 1, want_updated,
+		if(q != gfs2_block_free) { /* Duplicate? */
+			add_duplicate_ref(ip, block, ref_as_ea, 0,
+					  INODE_VALID);
+			clear_eas(ip, bc, block, 1,
 				  _("Bad Extended Attribute duplicate found"));
 		} else {
-			clear_eas(ip, bc, block, 0, want_updated,
+			clear_eas(ip, bc, block, 0,
 				  _("Extended Attribute leaf block "
 				    "has incorrect type"));
 		}
-		brelse(leaf_bh, *want_updated);
+		brelse(leaf_bh);
 		return 1;
 	}
-	if(q.block_type != gfs2_block_free) { /* Duplicate? */
+	if(q != gfs2_block_free) { /* Duplicate? */
 		log_debug( _("Duplicate block found at #%lld (0x%llx).\n"),
 			   (unsigned long long)block,
 			   (unsigned long long)block);
-		gfs2_block_mark(sdp, bl, block, gfs2_dup_block);
+		add_duplicate_ref(ip, block, ref_as_data, 0, INODE_VALID);
 		bc->ea_count++;
-		brelse(leaf_bh, not_updated);
+		brelse(leaf_bh);
 		return 1;
 	}
 	if (ip->i_di.di_eattr == 0) {
 		/* Can only get in here if there were unrecoverable ea
 		   errors that caused clear_eas to be called.  What we
 		   need to do here is remove the subsequent ea blocks. */
-		clear_eas(ip, bc, block, 0, want_updated,
+		clear_eas(ip, bc, block, 0,
 			  _("Extended Attribute block removed due to "
 			    "previous errors.\n"));
-		brelse(leaf_bh, *want_updated);
+		brelse(leaf_bh);
 		return 1;
 	}
-	log_debug( _("Setting block #%lld (0x%llx) to eattr block\n"),
-		   (unsigned long long)block, (unsigned long long)block);
 	/* Point of confusion: We've got to set the ea block itself to
 	   gfs2_meta_eattr here.  Elsewhere we mark the inode with
 	   gfs2_eattr_block meaning it contains an eattr for pass1c. */
-	gfs2_block_set(sdp, bl, block, gfs2_meta_eattr);
+	fsck_blockmap_set(ip, block, _("Extended Attribute"), gfs2_meta_eattr);
 	bc->ea_count++;
 	*bh = leaf_bh;
 	return 0;
@@ -487,7 +602,6 @@ static int check_extended_leaf_eattr(struct gfs2_inode *ip, uint64_t *data_ptr,
 				     struct gfs2_buffer_head *leaf_bh,
 				     struct gfs2_ea_header *ea_hdr,
 				     struct gfs2_ea_header *ea_hdr_prev,
-				     enum update_flags *want_updated,
 				     void *private)
 {
 	uint64_t el_blk = be64_to_cpu(*data_ptr);
@@ -505,21 +619,24 @@ static int check_extended_leaf_eattr(struct gfs2_inode *ip, uint64_t *data_ptr,
 			 (unsigned long long)ip->i_di.di_eattr,
 			 (unsigned long long)el_blk,
 			 (unsigned long long)el_blk);
-		gfs2_block_set(sdp, bl, ip->i_di.di_eattr, gfs2_bad_block);
+		fsck_blockmap_set(ip, ip->i_di.di_eattr,
+				  _("bad (out of range) Extended Attribute "),
+				  gfs2_bad_block);
 		return 1;
 	}
-	error = check_leaf_block(ip, el_blk, GFS2_METATYPE_ED, &bh,
-				 want_updated, private);
+	error = check_leaf_block(ip, el_blk, GFS2_METATYPE_ED, &bh, private);
 	if (bh)
-		brelse(bh, not_updated);
+		brelse(bh);
 	return error;
 }
 
 static int check_eattr_leaf(struct gfs2_inode *ip, uint64_t block,
 			    uint64_t parent, struct gfs2_buffer_head **bh,
-			    enum update_flags *want_updated, void *private)
+			    void *private)
 {
 	struct gfs2_sbd *sdp = ip->i_sbd;
+	osi_list_t *head;
+	struct special_blocks *b = NULL;
 
 	/* This inode contains an eattr - it may be invalid, but the
 	 * eattr attributes points to a non-zero block.
@@ -530,18 +647,24 @@ static int check_eattr_leaf(struct gfs2_inode *ip, uint64_t block,
 		     "block(s) attached.\n"),
 		   (unsigned long long)ip->i_di.di_num.no_addr,
 		   (unsigned long long)ip->i_di.di_num.no_addr);
-	gfs2_block_mark(sdp, bl, ip->i_di.di_num.no_addr, gfs2_eattr_block);
-	if(gfs2_check_range(sdp, block)) {
+	if (!osi_list_empty(&ip->i_sbd->eattr_blocks.list)) {
+		head = &ip->i_sbd->eattr_blocks.list;
+		b = osi_list_entry(head->next, struct special_blocks, list);
+	}
+	if (!b || b->block != ip->i_di.di_num.no_addr)
+		gfs2_special_add(&sdp->eattr_blocks, ip->i_di.di_num.no_addr);
+	if (gfs2_check_range(sdp, block)) {
 		log_warn( _("Inode #%llu (0x%llx): Extended Attribute leaf "
 			    "block #%llu (0x%llx) is out of range.\n"),
 			 (unsigned long long)ip->i_di.di_num.no_addr,
 			 (unsigned long long)ip->i_di.di_num.no_addr,
 			 (unsigned long long)block, (unsigned long long)block);
-		gfs2_block_set(sdp, bl, ip->i_di.di_eattr, gfs2_bad_block);
+		fsck_blockmap_set(ip, ip->i_di.di_eattr,
+				  _("bad (out of range) Extended "
+				    "Attribute leaf"), gfs2_bad_block);
 		return 1;
 	}
-	return check_leaf_block(ip, block, GFS2_METATYPE_EA, bh, want_updated,
-				private);
+	return check_leaf_block(ip, block, GFS2_METATYPE_EA, bh, private);
 }
 
 static int check_eattr_entries(struct gfs2_inode *ip,
@@ -585,108 +708,180 @@ static int check_eattr_entries(struct gfs2_inode *ip,
 	return 0;
 }
 
-static int clear_metalist(struct gfs2_inode *ip, uint64_t block,
-		   struct gfs2_buffer_head **bh, void *private)
+/**
+ * mark_block_invalid - mark blocks associated with an inode as invalid
+ *                      unless the block is a duplicate.
+ *
+ * An "invalid" block is now considered free in the bitmap, and pass2 will
+ * delete any invalid blocks.  This is nearly identical to function
+ * delete_block_if_notdup.
+ */
+static int mark_block_invalid(struct gfs2_inode *ip, uint64_t block,
+			      enum dup_ref_type reftype, const char *btype)
 {
-	struct gfs2_block_query q = {0};
-
-	*bh = NULL;
-
-	if(gfs2_block_check(ip->i_sbd, bl, block, &q)) {
-		stack;
-		return -1;
-	}
-	if(!q.dup_block) {
-		gfs2_block_set(ip->i_sbd, bl, block, gfs2_block_free);
+	uint8_t q;
+
+	if (gfs2_check_range(ip->i_sbd, block) != 0)
+		return -EFAULT;
+
+	q = block_type(block);
+	if (q != gfs2_block_free) {
+		add_duplicate_ref(ip, block, reftype, 0, INODE_INVALID);
+		log_info( _("%s block %lld (0x%llx), part of inode "
+			    "%lld (0x%llx), was free so the invalid "
+			    "reference is ignored.\n"),
+			  btype, (unsigned long long)block,
+			  (unsigned long long)block,
+			  (unsigned long long)ip->i_di.di_num.no_addr,
+			  (unsigned long long)ip->i_di.di_num.no_addr);
 		return 0;
 	}
+	fsck_blockmap_set(ip, block, btype, gfs2_meta_inval);
 	return 0;
 }
 
-static int clear_data(struct gfs2_inode *ip, uint64_t block, void *private)
+static int invalidate_metadata(struct gfs2_inode *ip, uint64_t block,
+			       struct gfs2_buffer_head **bh, void *private)
 {
-	struct gfs2_block_query q = {0};
+	return mark_block_invalid(ip, block, ref_as_meta, _("metadata"));
+}
 
-	if(gfs2_block_check(ip->i_sbd, bl, block, &q)) {
-		stack;
-		return -1;
-	}
-	if(!q.dup_block) {
-		gfs2_block_set(ip->i_sbd, bl, block, gfs2_block_free);
-		return 0;
-	}
-	return 0;
+static int invalidate_leaf(struct gfs2_inode *ip, uint64_t block,
+			   struct gfs2_buffer_head *bh, void *private)
+{
+	return mark_block_invalid(ip, block, ref_as_meta, _("leaf"));
+}
 
+static int invalidate_data(struct gfs2_inode *ip, uint64_t block,
+			   void *private)
+{
+	return mark_block_invalid(ip, block, ref_as_data, _("data"));
 }
 
-static int clear_leaf(struct gfs2_inode *ip, uint64_t block,
-	       struct gfs2_buffer_head *bh, void *private)
+static int invalidate_eattr_indir(struct gfs2_inode *ip, uint64_t block,
+				  uint64_t parent,
+				  struct gfs2_buffer_head **bh, void *private)
 {
-	struct gfs2_block_query q = {0};
+	return mark_block_invalid(ip, block, ref_as_ea,
+				  _("indirect extended attribute"));
+}
 
-	log_crit( _("Clearing leaf #%" PRIu64 " (0x%" PRIx64 ")\n"),
-		  block, block);
+static int invalidate_eattr_leaf(struct gfs2_inode *ip, uint64_t block,
+				 uint64_t parent, struct gfs2_buffer_head **bh,
+				 void *private)
+{
+	return mark_block_invalid(ip, block, ref_as_ea,
+				  _("extended attribute"));
+}
 
-	if(gfs2_block_check(ip->i_sbd, bl, block, &q)) {
-		stack;
-		return -1;
+/**
+ * Check for massive amounts of pointer corruption.  If the block has
+ * lots of out-of-range pointers, we can't trust any of the pointers.
+ * For example, a stray pointer with a value of 0x1d might be
+ * corruption/nonsense, and if so, we don't want to delete an
+ * important file (like master or the root directory) because of it.
+ * We need to check for a large number of bad pointers BEFORE we start
+ * messing with them because we don't want to mark a block as a
+ * duplicate (for example) until we know if the pointers in general can
+ * be trusted. Thus it needs to be in a separate loop.
+ */
+static int rangecheck_block(struct gfs2_inode *ip, uint64_t block,
+			    struct gfs2_buffer_head **bh,
+			    const char *btype, void *private)
+{
+	long *bad_pointers = (long *)private;
+	uint8_t q;
+
+	if (gfs2_check_range(ip->i_sbd, block) != 0) {
+		(*bad_pointers)++;
+		log_debug( _("Bad %s block pointer (out of range #%ld) "
+			     "found in inode %lld (0x%llx).\n"), btype,
+			   *bad_pointers,
+			   (unsigned long long)ip->i_di.di_num.no_addr,
+			   (unsigned long long)ip->i_di.di_num.no_addr);
+		if ((*bad_pointers) <= BAD_POINTER_TOLERANCE)
+			return ENOENT;
+		else
+			return -ENOENT; /* Exits check_metatree quicker */
 	}
-	if(!q.dup_block) {
-		log_crit( _("Setting leaf #%" PRIu64 " (0x%" PRIx64 ") invalid\n"),
-				 block, block);
-		if(gfs2_block_set(ip->i_sbd, bl, block, gfs2_block_free)) {
-			stack;
-			return -1;
-		}
-		return 0;
+	/* See how many duplicate blocks it has */
+	q = block_type(block);
+	if (q != gfs2_block_free) {
+		(*bad_pointers)++;
+		log_debug( _("Duplicated %s block pointer (violation #%ld) "
+			     "found in inode %lld (0x%llx).\n"), btype,
+			   *bad_pointers,
+			   (unsigned long long)ip->i_di.di_num.no_addr,
+			   (unsigned long long)ip->i_di.di_num.no_addr);
+		if ((*bad_pointers) <= BAD_POINTER_TOLERANCE)
+			return ENOENT;
+		else
+			return -ENOENT; /* Exits check_metatree quicker */
 	}
 	return 0;
 }
 
-int add_to_dir_list(struct gfs2_sbd *sbp, uint64_t block)
+static int rangecheck_metadata(struct gfs2_inode *ip, uint64_t block,
+			       struct gfs2_buffer_head **bh, void *private)
 {
-	struct dir_info *di = NULL;
-	struct dir_info *newdi;
-
-	/* FIXME: This list should probably be a b-tree or
-	 * something...but since most of the time we're going to be
-	 * tacking the directory onto the end of the list, it doesn't
-	 * matter too much */
-	find_di(sbp, block, &di);
-	if(di) {
-		log_err( _("Attempting to add directory block #%" PRIu64
-				" (0x%" PRIx64 ") which is already in list\n"), block, block);
-		return -1;
-	}
+	return rangecheck_block(ip, block, bh, _("metadata"), private);
+}
 
-	if(!(newdi = (struct dir_info *) malloc(sizeof(struct dir_info)))) {
-		log_crit( _("Unable to allocate dir_info structure\n"));
-		return -1;
-	}
-	if(!memset(newdi, 0, sizeof(*newdi))) {
-		log_crit( _("Error while zeroing dir_info structure\n"));
-		return -1;
-	}
+static int rangecheck_leaf(struct gfs2_inode *ip, uint64_t block,
+			   struct gfs2_buffer_head *bh, void *private)
+{
+	return rangecheck_block(ip, block, &bh, _("leaf"), private);
+}
 
-	newdi->dinode = block;
-	dinode_hash_insert(dir_hash, block, newdi);
-	return 0;
+static int rangecheck_data(struct gfs2_inode *ip, uint64_t block,
+			   void *private)
+{
+	return rangecheck_block(ip, block, NULL, _("data"), private);
+}
+
+static int rangecheck_eattr_indir(struct gfs2_inode *ip, uint64_t block,
+				  uint64_t parent,
+				  struct gfs2_buffer_head **bh, void *private)
+{
+	return rangecheck_block(ip, block, NULL,
+				_("indirect extended attribute"),
+				private);
 }
 
-static int handle_di(struct gfs2_sbd *sdp, struct gfs2_buffer_head *bh,
-			  uint64_t block)
+static int rangecheck_eattr_leaf(struct gfs2_inode *ip, uint64_t block,
+				 uint64_t parent, struct gfs2_buffer_head **bh,
+				 void *private)
 {
-	struct gfs2_block_query q = {0};
+	return rangecheck_block(ip, block, NULL, _("extended attribute"),
+				private);
+}
+
+struct metawalk_fxns rangecheck_fxns = {
+        .private = NULL,
+        .check_metalist = rangecheck_metadata,
+        .check_data = rangecheck_data,
+        .check_leaf = rangecheck_leaf,
+        .check_eattr_indir = rangecheck_eattr_indir,
+        .check_eattr_leaf = rangecheck_eattr_leaf,
+};
+
+static int handle_di(struct gfs2_sbd *sdp, struct gfs2_buffer_head *bh)
+{
+	uint8_t q;
 	struct gfs2_inode *ip;
 	int error;
 	struct block_count bc = {0};
-	struct metawalk_fxns invalidate_metatree = {0};
-	enum update_flags f;
-
-	f = not_updated;
-	invalidate_metatree.check_metalist = clear_metalist;
-	invalidate_metatree.check_data = clear_data;
-	invalidate_metatree.check_leaf = clear_leaf;
+	uint64_t block = bh->b_blocknr;
+	long bad_pointers;
+
+	q = block_type(block);
+	if(q != gfs2_block_free) {
+		log_err( _("Found a duplicate inode block at #%" PRIu64
+			   " (0x%" PRIx64 ") previously marked as a %s\n"),
+			 block, block, block_type_string(q));
+		add_duplicate_ref(ip, block, ref_as_meta, 0, INODE_VALID);
+		return 0;
+	}
 
 	ip = fsck_inode_get(sdp, bh);
 	if (ip->i_di.di_num.no_addr != block) {
@@ -695,171 +890,175 @@ static int handle_di(struct gfs2_sbd *sdp, struct gfs2_buffer_head *bh,
 			(unsigned long long)block,
 			(unsigned long long)ip->i_di.di_num.no_addr,
 			(unsigned long long)ip->i_di.di_num.no_addr);
-		errors_found++;
-		if(query(&opts, _("Fix address in inode at block #%"
-				  PRIu64 " (0x%" PRIx64 ")? (y/n) "),
+		if(query( _("Fix address in inode at block #%"
+			    PRIu64 " (0x%" PRIx64 ")? (y/n) "),
 			 block, block)) {
-			errors_corrected++;
 			ip->i_di.di_num.no_addr = ip->i_di.di_num.no_formal_ino = block;
-			gfs2_dinode_out(&ip->i_di, ip->i_bh->b_data);
-			f = updated;
+			bmodified(ip->i_bh);
 		} else
 			log_err( _("Address in inode at block #%" PRIu64
 				 " (0x%" PRIx64 ") not fixed\n"), block, block);
 	}
 
-	if(gfs2_block_check(sdp, bl, block, &q)) {
-		stack;
-		fsck_inode_put(ip, f);
-		return -1;
-	}
-	if(q.block_type != gfs2_block_free) {
-		log_err( _("Found duplicate block referenced as an inode at "
-			   "#%" PRIu64 " (0x%" PRIx64 ")\n"), block, block);
-		if(gfs2_block_mark(sdp, bl, block, gfs2_dup_block)) {
-			stack;
-			fsck_inode_put(ip, f);
-			return -1;
-		}
-		fsck_inode_put(ip, f);
+	bad_pointers = 0L;
+
+	/* First, check the metadata for massive amounts of pointer corruption.
+	   Such corruption can only lead us to ruin trying to clean it up,
+	   so it's better to check it up front and delete the inode if
+	   there is corruption. */
+	rangecheck_fxns.private = &bad_pointers;
+	error = check_metatree(ip, &rangecheck_fxns);
+	if (bad_pointers > BAD_POINTER_TOLERANCE) {
+		log_err( _("Error: inode %llu (0x%llx) has more than "
+			   "%d bad pointers.\n"),
+			 (unsigned long long)ip->i_di.di_num.no_addr,
+			 (unsigned long long)ip->i_di.di_num.no_addr,
+			 BAD_POINTER_TOLERANCE);
+		fsck_blockmap_set(ip, ip->i_di.di_num.no_addr,
+				  _("badly corrupt"), gfs2_block_free);
+		fsck_inode_put(&ip);
 		return 0;
 	}
 
 	switch(ip->i_di.di_mode & S_IFMT) {
 
 	case S_IFDIR:
-		log_debug( _("Setting %" PRIu64 " (0x%" PRIx64 ") to directory inode.\n"),
-				  block, block);
-		if(gfs2_block_set(sdp, bl, block, gfs2_inode_dir)) {
+		if (fsck_blockmap_set(ip, block, _("directory"),
+				      gfs2_inode_dir)) {
 			stack;
-			fsck_inode_put(ip, f);
+			fsck_inode_put(&ip);
 			return -1;
 		}
-		if(add_to_dir_list(sdp, block)) {
+		if(!dirtree_insert(block)) {
 			stack;
-			fsck_inode_put(ip, f);
+			fsck_inode_put(&ip);
 			return -1;
 		}
 		break;
 	case S_IFREG:
-		log_debug( _("Setting %" PRIu64 " (0x%" PRIx64 ") to file inode.\n"),
-				  block, block);
-		if(gfs2_block_set(sdp, bl, block, gfs2_inode_file)) {
+		if (fsck_blockmap_set(ip, block, _("file"),
+				      gfs2_inode_file)) {
 			stack;
-			fsck_inode_put(ip, f);
+			fsck_inode_put(&ip);
 			return -1;
 		}
 		break;
 	case S_IFLNK:
-		log_debug( _("Setting %" PRIu64 " (0x%" PRIx64 ") to symlink inode.\n"),
-				  block, block);
-		if(gfs2_block_set(sdp, bl, block, gfs2_inode_lnk)) {
+		if (fsck_blockmap_set(ip, block, _("symlink"),
+				      gfs2_inode_lnk)) {
 			stack;
-			fsck_inode_put(ip, f);
+			fsck_inode_put(&ip);
 			return -1;
 		}
 		break;
 	case S_IFBLK:
-		log_debug( _("Setting %" PRIu64 " (0x%" PRIx64 ") to block dev inode.\n"),
-				  block, block);
-		if(gfs2_block_set(sdp, bl, block, gfs2_inode_blk)) {
+		if (fsck_blockmap_set(ip, block, _("block device"),
+				      gfs2_inode_blk)) {
 			stack;
-			fsck_inode_put(ip, f);
+			fsck_inode_put(&ip);
 			return -1;
 		}
 		break;
 	case S_IFCHR:
-		log_debug( _("Setting %" PRIu64 " (0x%" PRIx64 ") to char dev inode.\n"),
-				  block, block);
-		if(gfs2_block_set(sdp, bl, block, gfs2_inode_chr)) {
+		if (fsck_blockmap_set(ip, block, _("character device"),
+				      gfs2_inode_chr)) {
 			stack;
-			fsck_inode_put(ip, f);
+			fsck_inode_put(&ip);
 			return -1;
 		}
 		break;
 	case S_IFIFO:
-		log_debug( _("Setting %" PRIu64 " (0x%" PRIx64 ") to fifo inode.\n"),
-				  block, block);
-		if(gfs2_block_set(sdp, bl, block, gfs2_inode_fifo)) {
+		if (fsck_blockmap_set(ip, block, _("fifo"),
+				      gfs2_inode_fifo)) {
 			stack;
-			fsck_inode_put(ip, f);
+			fsck_inode_put(&ip);
 			return -1;
 		}
 		break;
 	case S_IFSOCK:
-		log_debug( _("Setting %" PRIu64 " (0x%" PRIx64 ") to socket inode.\n"),
-				  block, block);
-		if(gfs2_block_set(sdp, bl, block, gfs2_inode_sock)) {
+		if (fsck_blockmap_set(ip, block, _("socket"),
+				      gfs2_inode_sock)) {
 			stack;
-			fsck_inode_put(ip, f);
+			fsck_inode_put(&ip);
 			return -1;
 		}
 		break;
 	default:
-		log_debug( _("Setting %" PRIu64 " (0x%" PRIx64 ") to invalid.\n"),
-				  block, block);
-		if(gfs2_block_set(sdp, bl, block, gfs2_meta_inval)) {
+		/* We found a dinode that has an invalid mode, so we can't
+		   tell if it's a data file, directory or a socket.
+		   Regardless, we have to invalidate its metadata in case there
+		   are duplicate blocks referenced.  If we don't call
+		   check_metatree, the blocks it references will be deleted
+		   wholesale by pass2, and if any of those blocks are
+		   duplicates--referenced by another dinode for some reason--
+		   we will mark it free, even though it's in use.  In other
+		   words, we would introduce file system corruption. So we
+		   need to keep track of the fact that it's invalid and
+		   skip parts that we can't be sure of based on dinode type. */
+		check_metatree(ip, &invalidate_fxns);
+		if (fsck_blockmap_set(ip, block, _("invalid mode"),
+				      gfs2_inode_invalid)) {
 			stack;
-			fsck_inode_put(ip, f);
+			fsck_inode_put(&ip);
 			return -1;
 		}
-		gfs2_set_bitmap(sdp, block, GFS2_BLKST_FREE);
-		fsck_inode_put(ip, f);
+		fsck_inode_put(&ip);
 		return 0;
 	}
-	if(set_link_count(ip->i_sbd, ip->i_di.di_num.no_addr, ip->i_di.di_nlink)) {
+	if(set_link_count(ip->i_di.di_num.no_addr, ip->i_di.di_nlink)) {
 		stack;
-		fsck_inode_put(ip, f);
+		fsck_inode_put(&ip);
 		return -1;
 	}
 
 	if (S_ISDIR(ip->i_di.di_mode) &&
 	    (ip->i_di.di_flags & GFS2_DIF_EXHASH)) {
 		if (((1 << ip->i_di.di_depth) * sizeof(uint64_t)) != ip->i_di.di_size){
-			log_warn( _("Directory dinode #%llu (0x%llx"
+			log_warn( _("Directory dinode block #%llu (0x%llx"
 				 ") has bad depth.  Found %u, Expected %u\n"),
 				 (unsigned long long)ip->i_di.di_num.no_addr,
 				 (unsigned long long)ip->i_di.di_num.no_addr,
 				 ip->i_di.di_depth,
 				 (1 >> (ip->i_di.di_size/sizeof(uint64_t))));
-			/* once implemented, remove continue statement */
-			log_warn( _("Marking inode invalid\n"));
-			if(gfs2_block_set(sdp, bl, block, gfs2_meta_inval)) {
+			if(fsck_blockmap_set(ip, block, _("bad depth"),
+					     gfs2_block_free)) {
 				stack;
-				fsck_inode_put(ip, f);
+				fsck_inode_put(&ip);
 				return -1;
 			}
-			gfs2_set_bitmap(sdp, block, GFS2_BLKST_FREE);
-			fsck_inode_put(ip, f);
+			fsck_inode_put(&ip);
 			return 0;
 		}
 	}
 
 	pass1_fxns.private = &bc;
-
 	error = check_metatree(ip, &pass1_fxns);
 	if (fsck_abort || error < 0) {
-		fsck_inode_put(ip, f);
+		fsck_inode_put(&ip);
 		return 0;
 	}
-	if(error > 0) {
-		log_warn( _("Marking inode #%llu (0x%llx) invalid\n"),
+	if (error > 0) {
+		log_err( _("Error: inode %llu (0x%llx) has unrecoverable "
+			   "errors; invalidating.\n"),
 			 (unsigned long long)ip->i_di.di_num.no_addr,
 			 (unsigned long long)ip->i_di.di_num.no_addr);
-		/* FIXME: Must set all leaves invalid as well */
-		check_metatree(ip, &invalidate_metatree);
-		gfs2_block_set(sdp, bl, ip->i_di.di_num.no_addr,
-			       gfs2_meta_inval);
-		gfs2_set_bitmap(sdp, ip->i_di.di_num.no_addr, GFS2_BLKST_FREE);
-		fsck_inode_put(ip, f);
+		undo_fxns.private = &bc;
+		check_metatree(ip, &undo_fxns);
+		/* If we undo the metadata accounting, including metadatas
+		   duplicate block status, we need to make sure later passes
+		   don't try to free up the metadata referenced by this inode.
+		   Therefore we mark the inode as free space. */
+		fsck_blockmap_set(ip, ip->i_di.di_num.no_addr,
+				  _("corrupt"), gfs2_block_free);
+		fsck_inode_put(&ip);
 		return 0;
 	}
 
-	error = check_inode_eattr(ip, &f, &pass1_fxns);
+	error = check_inode_eattr(ip, &pass1_fxns);
 
-	if (error && f == updated &&
+	if (error &&
 	    !(ip->i_di.di_flags & GFS2_DIF_EA_INDIRECT))
-		ask_remove_inode_eattr(ip, &bc, &f);
+		ask_remove_inode_eattr(ip, &bc);
 
 	if (ip->i_di.di_blocks != 
 		(1 + bc.indir_count + bc.data_count + bc.ea_count)) {
@@ -876,13 +1075,13 @@ static int handle_di(struct gfs2_sbd *sdp, struct gfs2_buffer_head *bh,
 			  (unsigned long long)bc.indir_count,
 			  (unsigned long long)bc.data_count,
 			  (unsigned long long)bc.ea_count);
-		errors_found++;
-		if (query(&opts, _("Fix ondisk block count? (y/n) "))) {
-			errors_corrected++;
+		if (query( _("Fix ondisk block count? (y/n) "))) {
 			ip->i_di.di_blocks = 1 + bc.indir_count + bc.data_count +
 				bc.ea_count;
-			gfs2_dinode_out(&ip->i_di, ip->i_bh->b_data);
-			f = updated;
+			bmodified(ip->i_bh);
+			log_err( _("Block count for #%llu (0x%llx) fixed\n"),
+				(unsigned long long)ip->i_di.di_num.no_addr,
+				(unsigned long long)ip->i_di.di_num.no_addr);
 		} else
 			log_err( _("Bad block count for #%llu (0x%llx"
 				") not fixed\n"),
@@ -890,51 +1089,7 @@ static int handle_di(struct gfs2_sbd *sdp, struct gfs2_buffer_head *bh,
 				(unsigned long long)ip->i_di.di_num.no_addr);
 	}
 
-	fsck_inode_put(ip, f);
-	return 0;
-}
-
-static int scan_meta(struct gfs2_sbd *sdp, struct gfs2_buffer_head *bh,
-			  uint64_t block)
-{
-	if (gfs2_check_meta(bh, 0)) {
-		errors_found++;
-
-		log_info( _("Found invalid metadata at #%llu (0x%llx)\n"),
-			  (unsigned long long)block,
-			  (unsigned long long)block);
-		if(gfs2_block_set(sdp, bl, block, gfs2_meta_inval)) {
-			stack;
-			return -1;
-		}
-		if(query(&opts, _("Okay to free the invalid block? (y/n)"))) {
-			errors_corrected++;
-			gfs2_set_bitmap(sdp, block, GFS2_BLKST_FREE);
-			log_err( _("The invalid block was freed.\n"));
-		} else {
-			log_err( _("The invalid block was ignored.\n"));
-		}
-		return 0;
-	}
-
-	log_debug( _("Checking metadata block #%" PRIu64 " (0x%" PRIx64 ")\n"), block,
-			  block);
-
-	if (!gfs2_check_meta(bh, GFS2_METATYPE_DI)) {
-		/* handle_di calls inode_get, then inode_put, which does brelse.   */
-		/* In order to prevent brelse from getting the count off, hold it. */
-		bhold(bh);
-		if(handle_di(sdp, bh, block)) {
-			stack;
-			return -1;
-		}
-	}
-	/* Ignore everything else - they should be hit by the handle_di step.
-	 * Don't check NONE either, because check_meta passes everything if
-	 * GFS2_METATYPE_NONE is specified.
-	 * Hopefully, other metadata types such as indirect blocks will be
-	 * handled when the inode itself is processed, and if it's not, it
-	 * should be caught in pass5. */
+	fsck_inode_put(&ip);
 	return 0;
 }
 
@@ -978,26 +1133,24 @@ int pass1(struct gfs2_sbd *sbp)
 	 * uses the rg bitmaps, so maybe that's the best way to start
 	 * things - we can change the method later if necessary.
 	 */
-
 	for (tmp = sbp->rglist.next; tmp != &sbp->rglist;
-	     tmp = tmp->next, rg_count++){
-		log_info( _("Checking metadata in Resource Group #%" PRIu64 "\n"),
+	     tmp = tmp->next, rg_count++) {
+		log_debug( _("Checking metadata in Resource Group #%" PRIu64 "\n"),
 				 rg_count);
 		rgd = osi_list_entry(tmp, struct rgrp_list, list);
-		if(gfs2_rgrp_read(sbp, rgd)){
-			stack;
-			return FSCK_ERROR;
-		}
-		log_debug( _("RG at %llu (0x%llx) is %u long\n"),
-			  (unsigned long long)rgd->ri.ri_addr,
-			  (unsigned long long)rgd->ri.ri_addr,
-			  rgd->ri.ri_length);
 		for (i = 0; i < rgd->ri.ri_length; i++) {
-			if(gfs2_block_set(sbp, bl, rgd->ri.ri_addr + i,
-					  gfs2_meta_other)){
+			log_debug( _("rgrp block %lld (0x%llx) "
+				     "is now marked as 'rgrp data'\n"),
+				   rgd->ri.ri_addr + i, rgd->ri.ri_addr + i);
+			if (gfs2_blockmap_set(bl, rgd->ri.ri_addr + i,
+					      gfs2_meta_rgrp)) {
 				stack;
 				return FSCK_ERROR;
 			}
+			/* rgrps and bitmaps don't have bits to represent
+			   their blocks, so don't do this:
+			check_n_fix_bitmap(sbp, rgd->ri.ri_addr + i,
+			gfs2_meta_rgrp);*/
 		}
 
 		offset = sizeof(struct gfs2_rgrp);
@@ -1006,30 +1159,52 @@ int pass1(struct gfs2_sbd *sbp)
 
 		while (1) {
 			/* "block" is relative to the entire file system */
+			/* Get the next dinode in the file system, according
+			   to the bitmap.  This should ONLY be dinodes. */
 			if (gfs2_next_rg_meta(rgd, &block, first))
 				break;
 			warm_fuzzy_stuff(block);
-			if (fsck_abort) { /* if asked to abort */
-				gfs2_rgrp_relse(rgd, not_updated);
+
+			if (fsck_abort) /* if asked to abort */
 				return FSCK_OK;
-			}
 			if (skip_this_pass) {
 				printf( _("Skipping pass 1 is not a good idea.\n"));
 				skip_this_pass = FALSE;
 				fflush(stdout);
 			}
-			bh = bread(&sbp->buf_list, block);
-
-			if (scan_meta(sbp, bh, block)) {
+			bh = bread(sbp, block);
+
+			/*log_debug( _("Checking metadata block #%" PRIu64
+			  " (0x%" PRIx64 ")\n"), block, block);*/
+
+			if (gfs2_check_meta(bh, GFS2_METATYPE_DI)) {
+				log_err( _("Found invalid inode at block #"
+					   "%llu (0x%llx)\n"),
+					 (unsigned long long)block,
+					 (unsigned long long)block);
+				if (gfs2_blockmap_set(bl, block,
+						      gfs2_block_free)) {
+					stack;
+					brelse(bh);
+					return FSCK_ERROR;
+				}
+				check_n_fix_bitmap(sbp, block,
+						   gfs2_block_free);
+			} else if (handle_di(sbp, bh) < 0) {
 				stack;
-				brelse(bh, not_updated);
-				gfs2_rgrp_relse(rgd, not_updated);
+				brelse(bh);
 				return FSCK_ERROR;
 			}
-			brelse(bh, not_updated);
+			/* Ignore everything else - they should be hit by the
+			   handle_di step.  Don't check NONE either, because
+			   check_meta passes everything if GFS2_METATYPE_NONE
+			   is specified.  Hopefully, other metadata types such
+			   as indirect blocks will be handled when the inode
+			   itself is processed, and if it's not, it should be
+			   caught in pass5. */
+			brelse(bh);
 			first = 0;
 		}
-		gfs2_rgrp_relse(rgd, not_updated);
 	}
 	return FSCK_OK;
 }
diff --git a/gfs2/fsck/pass1b.c b/gfs2/fsck/pass1b.c
index 3cf102b..5554325 100644
--- a/gfs2/fsck/pass1b.c
+++ b/gfs2/fsck/pass1b.c
@@ -3,6 +3,7 @@
 #include <string.h>
 #include <unistd.h>
 #include <libintl.h>
+#include <sys/stat.h>
 #define _(String) gettext(String)
 
 #include "libgfs2.h"
@@ -12,15 +13,6 @@
 #include "metawalk.h"
 #include "inode_hash.h"
 
-struct inode_with_dups {
-	osi_list_t list;
-	uint64_t block_no;
-	int dup_count;
-	int ea_only;
-	uint64_t parent;
-	char *name;
-};
-
 struct fxn_info {
 	uint64_t block;
 	int found;
@@ -28,64 +20,95 @@ struct fxn_info {
 };
 
 struct dup_handler {
-	struct dup_blocks *b;
+	struct duptree *b;
 	struct inode_with_dups *id;
 	int ref_inode_count;
 	int ref_count;
 };
 
-static inline void inc_if_found(uint64_t block, int not_ea, void *private) {
-	struct fxn_info *fi = (struct fxn_info *) private;
-	if(block == fi->block) {
-		(fi->found)++;
-		if(not_ea)
-			fi->ea_only = 0;
-	}
-}
+static int check_metalist(struct gfs2_inode *ip, uint64_t block,
+			  struct gfs2_buffer_head **bh, void *private);
+static int check_data(struct gfs2_inode *ip, uint64_t block, void *private);
+static int check_eattr_indir(struct gfs2_inode *ip, uint64_t block,
+			     uint64_t parent, struct gfs2_buffer_head **bh,
+			     void *private);
+static int check_eattr_leaf(struct gfs2_inode *ip, uint64_t block,
+			    uint64_t parent, struct gfs2_buffer_head **bh,
+			    void *private);
+static int check_eattr_entry(struct gfs2_inode *ip,
+			     struct gfs2_buffer_head *leaf_bh,
+			     struct gfs2_ea_header *ea_hdr,
+			     struct gfs2_ea_header *ea_hdr_prev,
+			     void *private);
+static int check_eattr_extentry(struct gfs2_inode *ip, uint64_t *ea_data_ptr,
+				struct gfs2_buffer_head *leaf_bh,
+				struct gfs2_ea_header *ea_hdr,
+				struct gfs2_ea_header *ea_hdr_prev,
+				void *private);
+static int find_dentry(struct gfs2_inode *ip, struct gfs2_dirent *de,
+		       struct gfs2_dirent *prev, struct gfs2_buffer_head *bh,
+		       char *filename, uint16_t *count, void *priv);
+
+struct metawalk_fxns find_refs = {
+	.private = NULL,
+	.check_leaf = NULL,
+	.check_metalist = check_metalist,
+	.check_data = check_data,
+	.check_eattr_indir = check_eattr_indir,
+	.check_eattr_leaf = check_eattr_leaf,
+	.check_dentry = NULL,
+	.check_eattr_entry = check_eattr_entry,
+	.check_eattr_extentry = check_eattr_extentry,
+};
+
+struct metawalk_fxns find_dirents = {
+	.private = NULL,
+	.check_leaf = NULL,
+	.check_metalist = NULL,
+	.check_data = NULL,
+	.check_eattr_indir = NULL,
+	.check_eattr_leaf = NULL,
+	.check_dentry = find_dentry,
+	.check_eattr_entry = NULL,
+	.check_eattr_extentry = NULL,
+};
 
 static int check_metalist(struct gfs2_inode *ip, uint64_t block,
 			  struct gfs2_buffer_head **bh, void *private)
 {
-	inc_if_found(block, 1, private);
-
-	return 0;
+	return add_duplicate_ref(ip, block, ref_as_meta, 1, INODE_VALID);
 }
 
 static int check_data(struct gfs2_inode *ip, uint64_t block, void *private)
 {
-	inc_if_found(block, 1, private);
-
-	return 0;
+	return add_duplicate_ref(ip, block, ref_as_data, 1, INODE_VALID);
 }
 
 static int check_eattr_indir(struct gfs2_inode *ip, uint64_t block,
 			     uint64_t parent, struct gfs2_buffer_head **bh,
-			     enum update_flags *want_updated, void *private)
+			     void *private)
 {
 	struct gfs2_sbd *sbp = ip->i_sbd;
-	struct gfs2_buffer_head *indir_bh = NULL;
+	int error;
 
-	*want_updated = not_updated;
-	inc_if_found(block, 0, private);
-	indir_bh = bread(&sbp->buf_list, block);
-	*bh = indir_bh;
+	error = add_duplicate_ref(ip, block, ref_as_ea, 1, INODE_VALID);
+	if (!error)
+		*bh = bread(sbp, block);
 
-	return 0;
+	return error;
 }
 
 static int check_eattr_leaf(struct gfs2_inode *ip, uint64_t block,
 			    uint64_t parent, struct gfs2_buffer_head **bh,
-			    enum update_flags *want_updated, void *private)
+			    void *private)
 {
 	struct gfs2_sbd *sbp = ip->i_sbd;
-	struct gfs2_buffer_head *leaf_bh = NULL;
-
-	*want_updated = not_updated;
-	inc_if_found(block, 0, private);
-	leaf_bh = bread(&sbp->buf_list, block);
+	int error;
 
-	*bh = leaf_bh;
-	return 0;
+	error = add_duplicate_ref(ip, block, ref_as_ea, 1, INODE_VALID);
+	if (!error)
+		*bh = bread(sbp, block);
+	return error;
 }
 
 static int check_eattr_entry(struct gfs2_inode *ip,
@@ -100,60 +123,75 @@ static int check_eattr_extentry(struct gfs2_inode *ip, uint64_t *ea_data_ptr,
 				struct gfs2_buffer_head *leaf_bh,
 				struct gfs2_ea_header *ea_hdr,
 				struct gfs2_ea_header *ea_hdr_prev,
-				enum update_flags *want_updated, void *private)
+				void *private)
 {
 	uint64_t block = be64_to_cpu(*ea_data_ptr);
 
-	*want_updated = not_updated;
-	inc_if_found(block, 0, private);
+	return add_duplicate_ref(ip, block, ref_as_ea, 1, INODE_VALID);
+}
 
+/*
+ * check_dir_dup_ref - check for a directory entry duplicate reference
+ *                     and if found, set the name into the id.
+ * Returns: 1 if filename was found, otherwise 0
+ */
+static int check_dir_dup_ref(struct gfs2_inode *ip,  struct gfs2_dirent *de,
+			     osi_list_t *tmp2, char *filename)
+{
+	struct inode_with_dups *id;
+
+	id = osi_list_entry(tmp2, struct inode_with_dups, list);
+	if(id->name)
+		/* We can only have one parent of inodes that contain duplicate
+		 * blocks...no need to keep looking for this one. */
+		return 1;
+	if(id->block_no == de->de_inum.no_addr) {
+		id->name = strdup(filename);
+		id->parent = ip->i_di.di_num.no_addr;
+		log_debug( _("Duplicate block %llu (0x%llx"
+			     ") is in file or directory %llu"
+			     " (0x%llx) named %s\n"),
+			   (unsigned long long)id->block_no,
+			   (unsigned long long)id->block_no,
+			   (unsigned long long)ip->i_di.di_num.no_addr,
+			   (unsigned long long)ip->i_di.di_num.no_addr,
+			   filename);
+		/* If there are duplicates of duplicates, I guess we'll miss
+		   them here. */
+		return 1;
+	}
 	return 0;
 }
 
 static int find_dentry(struct gfs2_inode *ip, struct gfs2_dirent *de,
 		       struct gfs2_dirent *prev,
 		       struct gfs2_buffer_head *bh, char *filename,
-		       enum update_flags *update, uint16_t *count, void *priv)
+		       uint16_t *count, void *priv)
 {
-	osi_list_t *tmp1, *tmp2;
-	struct dup_blocks *b;
-	struct inode_with_dups *id;
-	struct gfs2_leaf leaf;
-
-	osi_list_foreach(tmp1, &ip->i_sbd->dup_blocks.list) {
-		b = osi_list_entry(tmp1, struct dup_blocks, list);
-		osi_list_foreach(tmp2, &b->ref_inode_list) {
-			id = osi_list_entry(tmp2, struct inode_with_dups,
-					    list);
-			if(id->name)
-				/* We can only have one parent of
-				 * inodes that contain duplicate
-				 * blocks... */
-				continue;
-			if(id->block_no == de->de_inum.no_addr) {
-				id->name = strdup(filename);
-				id->parent = ip->i_di.di_num.no_addr;
-				log_debug( _("Duplicate block %llu (0x%llx"
-					  ") is in file or directory %llu"
-					  " (0x%llx) named %s\n"),
-					  (unsigned long long)id->block_no,
-					  (unsigned long long)id->block_no,
-					  (unsigned long long)
-					  ip->i_di.di_num.no_addr,
-					  (unsigned long long)
-					  ip->i_di.di_num.no_addr,
-					  filename);
-				/* If there are duplicates of
-				 * duplicates, I guess we'll miss them
-				 * here */
+	struct osi_node *n;
+	osi_list_t *tmp2;
+	struct duptree *b;
+	int found;
+
+	for (n = osi_first(&dup_blocks); n; n = osi_next(n)) {
+		b = (struct duptree *)n;
+		found = 0;
+		osi_list_foreach(tmp2, &b->ref_invinode_list) {
+			if (check_dir_dup_ref(ip, de, tmp2, filename)) {
+				found = 1;
 				break;
 			}
 		}
+		if (!found) {
+			osi_list_foreach(tmp2, &b->ref_inode_list) {
+				if (check_dir_dup_ref(ip, de, tmp2, filename))
+					break;
+			}
+		}
 	}
 	/* Return the number of leaf entries so metawalk doesn't flag this
 	   leaf as having none. */
-	gfs2_leaf_in(&leaf, bh->b_data);
-	*count = leaf.lf_entries;
+	*count = be16_to_cpu(((struct gfs2_leaf *)bh->b_data)->lf_entries);
 	return 0;
 }
 
@@ -161,12 +199,32 @@ static int clear_dup_metalist(struct gfs2_inode *ip, uint64_t block,
 			      struct gfs2_buffer_head **bh, void *private)
 {
 	struct dup_handler *dh = (struct dup_handler *) private;
-
-	if(dh->ref_count == 1)
-		return 1;
-	if(block == dh->b->block_no) {
-		log_err( _("Found duplicate reference in inode \"%s\" at "
-			   "block #%llu (0x%llx) to block #%llu (0x%llx)\n"),
+	struct duptree *d;
+
+	if (gfs2_check_range(ip->i_sbd, block) != 0)
+		return 0;
+
+	/* This gets tricky. We're traversing a metadata tree trying to
+	   delete an inode based on it having a duplicate block reference
+	   somewhere in its metadata.  We know this block is listed as data
+	   or metadata for this inode, but it may or may not be one of the
+	   actual duplicate references that caused the problem.  If it's not
+	   a duplicate, it's normal metadata that isn't referenced anywhere
+	   else, but we're deleting the inode out from under it, so we need
+	   to delete it altogether. If the block is a duplicate referenced
+	   block, we need to keep its type intact and let the caller sort
+	   it out once we're down to a single reference. */
+	d = dupfind(block);
+	if (!d) {
+		fsck_blockmap_set(ip, block, _("no longer valid"),
+				  gfs2_block_free);
+		return 0;
+	}
+	/* This block, having failed the above test, is duplicated somewhere */
+	if(block == dh->b->block) {
+		log_err( _("Not clearing duplicate reference in inode \"%s\" "
+			   "at block #%llu (0x%llx) to block #%llu (0x%llx) "
+			   "because it's valid for another inode.\n"),
 			 dh->id->name ? dh->id->name : _("unknown name"),
 			 (unsigned long long)ip->i_di.di_num.no_addr,
 			 (unsigned long long)ip->i_di.di_num.no_addr,
@@ -174,13 +232,15 @@ static int clear_dup_metalist(struct gfs2_inode *ip, uint64_t block,
 		log_err( _("Inode %s is in directory %"PRIu64" (0x%" PRIx64 ")\n"),
 			 dh->id->name ? dh->id->name : "", dh->id->parent,
 			 dh->id->parent);
-		inode_hash_remove(inode_hash, ip->i_di.di_num.no_addr);
-		/* Setting the block to invalid means the inode is
-		 * cleared in pass2 */
-		gfs2_block_set(ip->i_sbd, bl, ip->i_di.di_num.no_addr,
-			       gfs2_meta_inval);
 	}
-	return 0;
+	/* We return 1 not 0 because we need build_and_check_metalist to
+	   bypass adding the metadata below it to the metalist.  If that
+	   were to happen, all the indirect blocks pointed to by the
+	   duplicate block would be processed twice, which means it might
+	   be mistakenly freed as "no longer valid" (in this function above)
+	   even though it's valid metadata for a different inode. Returning
+	   1 ensures that the metadata isn't processed again. */
+	return 1;
 }
 
 static int clear_dup_data(struct gfs2_inode *ip, uint64_t block, void *private)
@@ -190,60 +250,16 @@ static int clear_dup_data(struct gfs2_inode *ip, uint64_t block, void *private)
 
 static int clear_dup_eattr_indir(struct gfs2_inode *ip, uint64_t block,
 				 uint64_t parent, struct gfs2_buffer_head **bh,
-				 enum update_flags *want_updated,
 				 void *private)
 {
-	struct dup_handler *dh = (struct dup_handler *) private;
-	/* Can't use fxns from eattr.c since we need to check the ref
-	 * count */
-	*bh = NULL;
-	*want_updated = not_updated;
-	if(dh->ref_count == 1)
-		return 1;
-	if(block == dh->b->block_no) {
-		log_err( _("Found dup in inode \"%s\" with address #%llu"
-			" (0x%llx) with block #%llu (0x%llx)\n"),
-			dh->id->name ? dh->id->name : _("unknown name"),
-			(unsigned long long)ip->i_di.di_num.no_addr,
-			(unsigned long long)ip->i_di.di_num.no_addr,
-			(unsigned long long)block,
-			(unsigned long long)block);
-		log_err( _("Inode %s is in directory %" PRIu64 " (0x%" PRIx64 ")\n"),
-				dh->id->name ? dh->id->name : "",
-				dh->id->parent, dh->id->parent);
-		gfs2_block_set(ip->i_sbd, bl, ip->i_di.di_eattr,
-			       gfs2_meta_inval);
-	}
-
-	return 0;
+	return clear_dup_metalist(ip, block, NULL, private);
 }
 
 static int clear_dup_eattr_leaf(struct gfs2_inode *ip, uint64_t block,
 				uint64_t parent, struct gfs2_buffer_head **bh,
-				enum update_flags *want_updated, void *private)
+				void *private)
 {
-	struct dup_handler *dh = (struct dup_handler *) private;
-
-	*want_updated = not_updated;
-	if(dh->ref_count == 1)
-		return 1;
-	if(block == dh->b->block_no) {
-		log_err( _("Found dup in inode \"%s\" with address #%llu"
-			" (0x%llx) with block #%llu (0x%llx)\n"),
-			dh->id->name ? dh->id->name : _("unknown name"),
-			(unsigned long long)ip->i_di.di_num.no_addr,
-			(unsigned long long)ip->i_di.di_num.no_addr,
-			(unsigned long long)block,
-			(unsigned long long)block);
-		log_err( _("Inode %s is in directory %" PRIu64 " (0x%" PRIx64 ")\n"),
-				dh->id->name ? dh->id->name : "",
-				dh->id->parent, dh->id->parent);
-		/* mark the main eattr block invalid */
-		gfs2_block_set(ip->i_sbd, bl, ip->i_di.di_eattr,
-			       gfs2_meta_inval);
-	}
-
-	return 0;
+	return clear_dup_metalist(ip, block, NULL, private);
 }
 
 static int clear_eattr_entry (struct gfs2_inode *ip,
@@ -292,94 +308,89 @@ static int clear_eattr_extentry(struct gfs2_inode *ip, uint64_t *ea_data_ptr,
 				struct gfs2_buffer_head *leaf_bh,
 				struct gfs2_ea_header *ea_hdr,
 				struct gfs2_ea_header *ea_hdr_prev,
-				enum update_flags *want_updated,
 				void *private)
 {
 	uint64_t block = be64_to_cpu(*ea_data_ptr);
-	struct dup_handler *dh = (struct dup_handler *) private;
-
-	*want_updated = not_updated;
-	if(dh->ref_count == 1)
-		return 1;
-	if(block == dh->b->block_no) {
-		log_err( _("Found dup in inode \"%s\" with address #%llu"
-			" (0x%llx) with block #%llu (0x%llx)\n"),
-			dh->id->name ? dh->id->name : _("unknown name"),
-			(unsigned long long)ip->i_di.di_num.no_addr,
-			(unsigned long long)ip->i_di.di_num.no_addr,
-			(unsigned long long)block, (unsigned long long)block);
-		log_err( _("Inode %s is in directory %" PRIu64 " (0x%" PRIx64 ")\n"),
-				dh->id->name ? dh->id->name : "",
-				dh->id->parent, dh->id->parent);
-		/* mark the main eattr block invalid */
-		gfs2_block_set(ip->i_sbd, bl, ip->i_di.di_eattr,
-			       gfs2_meta_inval);
-	}
-
-	return 0;
 
+	return clear_dup_metalist(ip, block, NULL, private);
 }
 
 /* Finds all references to duplicate blocks in the metadata */
-static int find_block_ref(struct gfs2_sbd *sbp, uint64_t inode, struct dup_blocks *b)
+static int find_block_ref(struct gfs2_sbd *sbp, uint64_t inode)
 {
 	struct gfs2_inode *ip;
-	struct fxn_info myfi = {b->block_no, 0, 1};
-	struct inode_with_dups *id = NULL;
-	struct metawalk_fxns find_refs = {
-		.private = (void*) &myfi,
-		.check_leaf = NULL,
-		.check_metalist = check_metalist,
-		.check_data = check_data,
-		.check_eattr_indir = check_eattr_indir,
-		.check_eattr_leaf = check_eattr_leaf,
-		.check_dentry = NULL,
-		.check_eattr_entry = check_eattr_entry,
-		.check_eattr_extentry = check_eattr_extentry,
-	};
-	enum update_flags update;
+	int error = 0;
 
 	ip = fsck_load_inode(sbp, inode); /* bread, inode_get */
-	log_debug( _("Checking inode %" PRIu64 " (0x%" PRIx64 ")'s "
-		     "metatree for references to block %" PRIu64 " (0x%" PRIx64
-		     ")\n"), inode, inode, b->block_no, b->block_no);
-	if(check_metatree(ip, &find_refs)) {
-		stack;
-		fsck_inode_put(ip, not_updated); /* out, brelse, free */
-		return -1;
+	/*log_debug( _("Checking inode %" PRIu64 " (0x%" PRIx64 ")'s "
+		     "metatree for references to duplicate blocks)\n"),
+		     inode, inode);*/
+	/* double-check the meta header just to be sure it's metadata */
+	if (ip->i_di.di_header.mh_magic != GFS2_MAGIC ||
+	    ip->i_di.di_header.mh_type != GFS2_METATYPE_DI) {
+		log_debug( _("Block %lld (0x%llx) is not gfs2 metadata.\n"),
+			     (unsigned long long)inode,
+			     (unsigned long long)inode);
+		return 1;
 	}
-	log_debug( _("Done checking metatree\n"));
-	/* Check for ea references in the inode */
-	if(check_inode_eattr(ip, &update, &find_refs) < 0){
+	error = check_metatree(ip, &find_refs);
+	if (error < 0) {
 		stack;
-		fsck_inode_put(ip, not_updated); /* out, brelse, free */
-		return -1;
+		fsck_inode_put(&ip); /* out, brelse, free */
+		return error;
 	}
-	if (myfi.found) {
-		if(!(id = malloc(sizeof(*id)))) {
-			log_crit( _("Unable to allocate inode_with_dups structure\n"));
-			return -1;
-		}
-		if(!(memset(id, 0, sizeof(*id)))) {
-			log_crit( _("Unable to zero inode_with_dups structure\n"));
-			return -1;
-		}
-		log_debug( _("Found %d entries with block %" PRIu64
-				  " (0x%" PRIx64 ") in inode #%" PRIu64 " (0x%" PRIx64 ")\n"),
-				  myfi.found, b->block_no, b->block_no, inode, inode);
-		id->dup_count = myfi.found;
-		id->block_no = inode;
-		id->ea_only = myfi.ea_only;
-		osi_list_add_prev(&id->list, &b->ref_inode_list);
+
+	/* Exhash dir leafs will be checked by check_metatree (right after
+	   the "end:" label.)  But if this is a linear directory we need to
+	   check the dir with check_linear_dir. */
+	if(S_ISDIR(ip->i_di.di_mode) && !(ip->i_di.di_flags & GFS2_DIF_EXHASH))
+		error = check_linear_dir(ip, ip->i_bh, &find_dirents);
+
+	/* Check for ea references in the inode */
+	if(!error)
+		error = check_inode_eattr(ip, &find_refs);
+
+	fsck_inode_put(&ip); /* out, brelse, free */
+
+	return error;
+}
+
+static void log_inode_reference(struct duptree *b, osi_list_t *tmp, int inval)
+{
+	char reftypestring[32];
+	struct inode_with_dups *id;
+
+	id = osi_list_entry(tmp, struct inode_with_dups, list);
+	if (id->dup_count == 1) {
+		if (id->reftypecount[ref_as_data])
+			strcpy(reftypestring, "as data");
+		else if (id->reftypecount[ref_as_meta])
+			strcpy(reftypestring, "as metadata");
+		else
+			strcpy(reftypestring, "as extended attribute");
+	} else {
+		sprintf(reftypestring, "%d/%d/%d",
+			id->reftypecount[ref_as_data],
+			id->reftypecount[ref_as_meta],
+			id->reftypecount[ref_as_ea]);
 	}
-	fsck_inode_put(ip, (opts.no ? not_updated : updated)); /* out, brelse, free */
-	return 0;
+	if (inval)
+		log_warn( _("Invalid "));
+	log_warn( _("Inode %s (%lld/0x%llx) has %d reference(s) to "
+		    "block %llu (0x%llx) (%s)\n"), id->name,
+		  (unsigned long long)id->block_no,
+		  (unsigned long long)id->block_no, id->dup_count,
+		  (unsigned long long)b->block,
+		  (unsigned long long)b->block, reftypestring);
 }
 
-static int handle_dup_blk(struct gfs2_sbd *sbp, struct dup_blocks *b)
+static int clear_a_reference(struct gfs2_sbd *sbp, struct duptree *b,
+			     osi_list_t *ref_list, struct dup_handler *dh,
+			     int inval)
 {
-	osi_list_t *tmp;
+	struct gfs2_inode *ip;
 	struct inode_with_dups *id;
+	osi_list_t *tmp, *x;
 	struct metawalk_fxns clear_dup_fxns = {
 		.private = NULL,
 		.check_leaf = NULL,
@@ -391,10 +402,60 @@ static int handle_dup_blk(struct gfs2_sbd *sbp, struct dup_blocks *b)
 		.check_eattr_entry = clear_eattr_entry,
 		.check_eattr_extentry = clear_eattr_extentry,
 	};
+
+	osi_list_foreach_safe(tmp, ref_list, x) {
+		id = osi_list_entry(tmp, struct inode_with_dups, list);
+		dh->b = b;
+		dh->id = id;
+		if(dh->ref_inode_count == 1) /* down to the last reference */
+			return 1;
+		if (!(query( _("Okay to clear %s inode %lld (0x%llx)? (y/n) "),
+			     (inval ? _("invalidated") : ""),
+			     (unsigned long long)id->block_no,
+			     (unsigned long long)id->block_no))) {
+			log_warn( _("The bad inode was not cleared...\n"));
+			continue;
+		}
+		log_warn( _("Clearing inode %lld (0x%llx)....\n"),
+			  (unsigned long long)id->block_no,
+			  (unsigned long long)id->block_no);
+		clear_dup_fxns.private = (void *) dh;
+		/* Clear the EAs for the inode first */
+		ip = fsck_load_inode(sbp, id->block_no);
+		check_inode_eattr(ip, &clear_dup_fxns);
+		/* If the dup wasn't only in the EA, clear the inode */
+		if (id->reftypecount[ref_as_data] ||
+		    id->reftypecount[ref_as_meta])
+			check_metatree(ip, &clear_dup_fxns);
+
+		fsck_blockmap_set(ip, ip->i_di.di_num.no_addr,
+				  _("bad"), gfs2_inode_invalid);
+		fsck_inode_put(&ip); /* out, brelse, free */
+		(dh->ref_inode_count)--;
+		/* Inode is marked invalid and is removed in pass2 */
+		/* FIXME: other option should be to duplicate the
+		 * block for each duplicate and point the metadata at
+		 * the cloned blocks */
+	}
+	if(dh->ref_inode_count == 1) /* down to the last reference */
+		return 1;
+	return 0;
+}
+
+static int handle_dup_blk(struct gfs2_sbd *sbp, struct duptree *b)
+{
 	struct gfs2_inode *ip;
+	osi_list_t *tmp;
+	struct inode_with_dups *id;
 	struct dup_handler dh = {0};
-	enum update_flags update;
+	int last_reference, ref_in_invalid_inode = 0;
 
+	osi_list_foreach(tmp, &b->ref_invinode_list) {
+		id = osi_list_entry(tmp, struct inode_with_dups, list);
+		dh.ref_inode_count++;
+		dh.ref_count += id->dup_count;
+		ref_in_invalid_inode = 1;
+	}
 	osi_list_foreach(tmp, &b->ref_inode_list) {
 		id = osi_list_entry(tmp, struct inode_with_dups, list);
 		dh.ref_inode_count++;
@@ -408,97 +469,127 @@ static int handle_dup_blk(struct gfs2_sbd *sbp, struct dup_blocks *b)
 	   just _look_ like metadata by coincidence, and at the time we're
 	   checking, we might not have processed the referenced block.
 	   Here in pass1b we're sure. */
+	/* Another possibility here is that there is a single reference
+	   because all the other metadata references were in inodes that got
+	   invalidated for other reasons, such as bad pointers.  So we need to
+	   make sure at this point that any inode deletes reverse out any
+	   duplicate reference before we get to this point. */
 	if (dh.ref_count == 1) {
 		struct gfs2_buffer_head *bh;
 		uint32_t cmagic;
 
-		bh = bread(&sbp->buf_list, b->block_no);
+		bh = bread(sbp, b->block);
 		cmagic = ((struct gfs2_meta_header *)(bh->b_data))->mh_magic;
-		brelse(bh, not_updated);
+		brelse(bh);
 		if (be32_to_cpu(cmagic) == GFS2_MAGIC) {
-			tmp = b->ref_inode_list.next;
+			if (ref_in_invalid_inode)
+				tmp = b->ref_invinode_list.next;
+			else
+				tmp = b->ref_inode_list.next;
 			id = osi_list_entry(tmp, struct inode_with_dups, list);
 			log_warn( _("Inode %s (%lld/0x%llx) has a reference to"
 				    " data block %llu (0x%llx), "
 				    "but the block is really metadata.\n"),
 				  id->name, (unsigned long long)id->block_no,
 				  (unsigned long long)id->block_no,
-				  (unsigned long long)b->block_no,
-				  (unsigned long long)b->block_no);
-			errors_found++;
-			if (query(&opts, _("Clear the inode? (y/n) "))) {
-				errors_corrected++;
+				  (unsigned long long)b->block,
+				  (unsigned long long)b->block);
+			if (query( _("Clear the inode? (y/n) "))) {
+				struct inode_info *ii;
+
 				log_warn( _("Clearing inode %lld (0x%llx)...\n"),
 					 (unsigned long long)id->block_no,
 					 (unsigned long long)id->block_no);
 				ip = fsck_load_inode(sbp, id->block_no);
-				inode_hash_remove(inode_hash,
-						  ip->i_di.di_num.no_addr);
+				ii = inodetree_find(ip->i_di.di_num.no_addr);
+				if (ii)
+					inodetree_delete(ii);
 				/* Setting the block to invalid means the inode
 				   is cleared in pass2 */
-				gfs2_block_set(ip->i_sbd, bl,
-					       ip->i_di.di_num.no_addr,
-					       gfs2_meta_inval);
-				fsck_inode_put(ip, updated);
+				fsck_blockmap_set(ip, ip->i_di.di_num.no_addr,
+						 _("inode with bad duplicate"),
+						 gfs2_inode_invalid);
+				fsck_inode_put(&ip);
 			} else {
 				log_warn( _("The bad inode was not cleared."));
 			}
 			return 0;
 		}
+		/* The other references may have been discredited due to
+		   invalid metadata or something.  Use the last remaining. */
+		log_notice( _("Block %llu (0x%llx) has only one remaining "
+			      "reference.\n"),
+			    (unsigned long long)b->block,
+			    (unsigned long long)b->block);
+		return 0;
 	}
 
 	log_notice( _("Block %llu (0x%llx) has %d inodes referencing it"
 		   " for a total of %d duplicate references\n"),
-		   (unsigned long long)b->block_no,
-		   (unsigned long long)b->block_no,
+		   (unsigned long long)b->block, (unsigned long long)b->block,
 		   dh.ref_inode_count, dh.ref_count);
 
-	osi_list_foreach(tmp, &b->ref_inode_list) {
-		id = osi_list_entry(tmp, struct inode_with_dups, list);
-		log_warn( _("Inode %s (%lld/0x%llx) has %d reference(s) to "
-			    "block %llu (0x%llx)\n"), id->name,
-			  (unsigned long long)id->block_no,
-			  (unsigned long long)id->block_no,
-			  id->dup_count, (unsigned long long)b->block_no,
-			  (unsigned long long)b->block_no);
-	}
-	osi_list_foreach(tmp, &b->ref_inode_list) {
+	osi_list_foreach(tmp, &b->ref_invinode_list)
+		log_inode_reference(b, tmp, 1);
+	osi_list_foreach(tmp, &b->ref_inode_list)
+		log_inode_reference(b, tmp, 0);
+
+	last_reference = clear_a_reference(sbp, b, &b->ref_invinode_list,
+					   &dh, 1);
+	if (!last_reference)
+		last_reference = clear_a_reference(sbp, b, &b->ref_inode_list,
+						   &dh, 0);
+
+	if (last_reference) {
+		uint8_t q;
+
+		/* If we're down to a single reference (and not all references
+		   deleted, which may be the case of an inode that has only
+		   itself and a reference), we need to reset the block type
+		   from invalid to data or metadata. Start at the first one
+		   in the list, not the structure's place holder. */
+		tmp = (&b->ref_inode_list)->next;
 		id = osi_list_entry(tmp, struct inode_with_dups, list);
-		errors_found++;
-		if (!(query(&opts, _("Okay to clear inode %lld (0x%llx)? "
-				     "(y/n) "),
-				     (unsigned long long)id->block_no,
-				     (unsigned long long)id->block_no))) {
-			log_warn( _("The bad inode was not cleared...\n"));
-			continue;
-		}
-		errors_corrected++;
-		    log_warn( _("Clearing inode %lld (0x%llx)...\n"),
-			 (unsigned long long)id->block_no,
-			 (unsigned long long)id->block_no);
+		log_debug( _("Resetting the type based on the remaining "
+			     "reference in inode %lld (0x%llx).\n"),
+			   (unsigned long long)id->block_no,
+			   (unsigned long long)id->block_no);
 		ip = fsck_load_inode(sbp, id->block_no);
-		dh.b = b;
-		dh.id = id;
-		clear_dup_fxns.private = (void *) &dh;
-		/* Clear the EAs for the inode first */
-		check_inode_eattr(ip, &update, &clear_dup_fxns);
-		/* If the dup wasn't only in the EA, clear the inode */
-		if(!id->ea_only)
-			check_metatree(ip, &clear_dup_fxns);
 
-		gfs2_block_set(ip->i_sbd, bl, ip->i_di.di_num.no_addr,
-			       gfs2_meta_inval);
-		fsck_inode_put(ip, updated); /* out, brelse, free */
-		dh.ref_inode_count--;
-		if(dh.ref_inode_count == 1)
-			break;
-		/* Inode is marked invalid and is removed in pass2 */
-		/* FIXME: other option should be to duplicate the
-		 * block for each duplicate and point the metadata at
-		 * the cloned blocks */
+		q = block_type(id->block_no);
+		if (q == gfs2_inode_invalid) {
+			log_debug( _("The remaining reference inode %lld "
+				     "(0x%llx) is marked invalid: Marking "
+				     "the block as free.\n"),
+				   (unsigned long long)id->block_no,
+				   (unsigned long long)id->block_no);
+			fsck_blockmap_set(ip, b->block,
+					  _("reference-repaired leaf"),
+					  gfs2_block_free);
+		} else if (id->reftypecount[ref_as_data]) {
+			fsck_blockmap_set(ip, b->block,
+					  _("reference-repaired data"),
+					  gfs2_block_used);
+		} else if (id->reftypecount[ref_as_meta]) {
+			if (S_ISDIR(ip->i_di.di_mode))
+				fsck_blockmap_set(ip, b->block,
+						  _("reference-repaired leaf"),
+						  gfs2_leaf_blk);
+			else
+				fsck_blockmap_set(ip, b->block,
+						  _("reference-repaired "
+						    "indirect"),
+						  gfs2_indir_blk);
+		} else
+			fsck_blockmap_set(ip, b->block,
+					  _("reference-repaired extended "
+					    "attribute"),
+					  gfs2_meta_eattr);
+		fsck_inode_put(&ip); /* out, brelse, free */
+	} else {
+		log_debug( _("All duplicate references were resolved.\n"));
 	}
 	return 0;
-
 }
 
 /* Pass 1b handles finding the previous inode for a duplicate block
@@ -506,18 +597,16 @@ static int handle_dup_blk(struct gfs2_sbd *sbp, struct dup_blocks *b)
  * use in pass2 */
 int pass1b(struct gfs2_sbd *sbp)
 {
-	struct dup_blocks *b;
+	struct duptree *b;
 	uint64_t i;
-	struct gfs2_block_query q;
-	osi_list_t *tmp = NULL, *x;
-	struct metawalk_fxns find_dirents = {0};
+	uint8_t q;
+	struct osi_node *n;
 	int rc = FSCK_OK;
-	find_dirents.check_dentry = &find_dentry;
 
 	log_info( _("Looking for duplicate blocks...\n"));
 
 	/* If there were no dups in the bitmap, we don't need to do anymore */
-	if(osi_list_empty(&sbp->dup_blocks.list)) {
+	if (dup_blocks.osi_node == NULL) {
 		log_info( _("No duplicate blocks found\n"));
 		return FSCK_OK;
 	}
@@ -527,47 +616,50 @@ int pass1b(struct gfs2_sbd *sbp)
 	log_info( _("Scanning filesystem for inodes containing duplicate blocks...\n"));
 	log_debug( _("Filesystem has %"PRIu64" (0x%" PRIx64 ") blocks total\n"),
 			  last_fs_block, last_fs_block);
-	for(i = 0; i < last_fs_block; i += 1) {
-		warm_fuzzy_stuff(i);
+	for(i = 0; i < last_fs_block; i++) {
 		if (skip_this_pass || fsck_abort) /* if asked to skip the rest */
 			goto out;
-		log_debug( _("Scanning block %" PRIu64 " (0x%" PRIx64 ") for inodes\n"),
-				  i, i);
-		if(gfs2_block_check(sbp, bl, i, &q)) {
+
+		if (dups_found_first == dups_found) {
+			log_debug(_("Found all %d original references to "
+				    "duplicates.\n"), dups_found);
+			break;
+		}
+		q = block_type(i);
+
+		if (q < gfs2_inode_dir)
+			continue;
+		if (q > gfs2_inode_invalid)
+			continue;
+
+		if (q == gfs2_inode_invalid)
+			log_debug( _("Checking invalidated duplicate dinode "
+				     "%lld (0x%llx)\n"),
+				   (unsigned long long)i,
+				   (unsigned long long)i);
+
+		warm_fuzzy_stuff(i);
+		if (find_block_ref(sbp, i) < 0) {
 			stack;
 			rc = FSCK_ERROR;
 			goto out;
 		}
-		if((q.block_type == gfs2_inode_dir) ||
-		   (q.block_type == gfs2_inode_file) ||
-		   (q.block_type == gfs2_inode_lnk) ||
-		   (q.block_type == gfs2_inode_blk) ||
-		   (q.block_type == gfs2_inode_chr) ||
-		   (q.block_type == gfs2_inode_fifo) ||
-		   (q.block_type == gfs2_inode_sock)) {
-			osi_list_foreach_safe(tmp, &sbp->dup_blocks.list, x) {
-				b = osi_list_entry(tmp, struct dup_blocks,
-						   list);
-				if(find_block_ref(sbp, i, b)) {
-					stack;
-					rc = FSCK_ERROR;
-					goto out;
-				}
-			}
-		}
-		if(q.block_type == gfs2_inode_dir) {
-			check_dir(sbp, i, &find_dirents);
-		}
 	}
 
 	/* Fix dups here - it's going to slow things down a lot to fix
 	 * it later */
 	log_info( _("Handling duplicate blocks\n"));
 out:
-        osi_list_foreach_safe(tmp, &sbp->dup_blocks.list, x) {
-                b = osi_list_entry(tmp, struct dup_blocks, list);
+        for (n = osi_first(&dup_blocks); n; n = osi_next(n)) {
+                b = (struct duptree *)n;
 		if (!skip_this_pass && !rc) /* no error & not asked to skip the rest */
 			handle_dup_blk(sbp, b);
+		/* Do not attempt to free the dup_blocks list or its parts
+		   here because any func that calls check_metatree needs
+		   to check duplicate status based on this linked list.
+		   This is especially true for pass2 where it may delete "bad"
+		   inodes, and we can't delete an inode's indirect block if
+		   it was a duplicate (therefore in use by another dinode). */
 	}
 	return rc;
 }
diff --git a/gfs2/fsck/pass1c.c b/gfs2/fsck/pass1c.c
index 62026f6..10b92ed 100644
--- a/gfs2/fsck/pass1c.c
+++ b/gfs2/fsck/pass1c.c
@@ -27,6 +27,7 @@ static int remove_eattr_entry(struct gfs2_sbd *sdp,
 	log_err( _("Bad Extended Attribute at block #%"PRIu64
 		   " (0x%" PRIx64 ") removed.\n"),
 		 leaf_bh->b_blocknr, leaf_bh->b_blocknr);
+	bmodified(leaf_bh);
 	return 0;
 }
 
@@ -36,10 +37,7 @@ static int ask_remove_eattr_entry(struct gfs2_sbd *sdp,
 				  struct gfs2_ea_header *prev,
 				  int fix_curr, int fix_curr_len)
 {
-	errors_found++;
-	if (query(&opts, _("Remove the bad Extended Attribute entry? "
-			   "(y/n) "))) {
-		errors_corrected++;
+	if (query( _("Remove the bad Extended Attribute entry? (y/n) "))) {
 		if (fix_curr)
 			curr->ea_flags |= GFS2_EAFLAG_LAST;
 		if (fix_curr_len) {
@@ -58,14 +56,11 @@ static int ask_remove_eattr_entry(struct gfs2_sbd *sdp,
 	return 1;
 }
 
-static int ask_remove_eattr(struct gfs2_inode *ip,
-			    enum update_flags *need_update)
+static int ask_remove_eattr(struct gfs2_inode *ip)
 {
-	errors_found++;
-	if (query(&opts, _("Remove the bad Extended Attribute? (y/n) "))) {
-		errors_corrected++;
+	if (query( _("Remove the bad Extended Attribute? (y/n) "))) {
 		ip->i_di.di_eattr = 0;
-		*need_update = updated;
+		bmodified(ip->i_bh);
 		log_err( _("Bad Extended Attribute removed.\n"));
 	} else
 		log_err( _("Bad Extended Attribute not removed.\n"));
@@ -74,13 +69,12 @@ static int ask_remove_eattr(struct gfs2_inode *ip,
 
 static int check_eattr_indir(struct gfs2_inode *ip, uint64_t block,
 		      uint64_t parent, struct gfs2_buffer_head **bh,
-		      enum update_flags *need_update, void *private)
+		      void *private)
 {
 	struct gfs2_sbd *sbp = ip->i_sbd;
-	struct gfs2_block_query q;
+	uint8_t q;
 	struct gfs2_buffer_head *indir_bh = NULL;
 
-	*need_update = not_updated;
 	if(gfs2_check_range(sbp, block)) {
 		log_err( _("Extended attributes indirect block #%llu"
 			" (0x%llx) for inode #%llu"
@@ -89,13 +83,10 @@ static int check_eattr_indir(struct gfs2_inode *ip, uint64_t block,
 			(unsigned long long)block,
 			(unsigned long long)ip->i_di.di_num.no_addr,
 			(unsigned long long)ip->i_di.di_num.no_addr);
-		return ask_remove_eattr(ip, need_update);
+		return ask_remove_eattr(ip);
 	}
-	else if (gfs2_block_check(sbp, bl, block, &q)) {
-		stack;
-		return -1;
-	}
-	else if(q.block_type != gfs2_indir_blk) {
+	q = block_type(block);
+	if(q != gfs2_indir_blk) {
 		log_err( _("Extended attributes indirect block #%llu"
 			" (0x%llx) for inode #%llu"
 			" (0x%llx) invalid.\n"),
@@ -103,10 +94,10 @@ static int check_eattr_indir(struct gfs2_inode *ip, uint64_t block,
 			(unsigned long long)block,
 			(unsigned long long)ip->i_di.di_num.no_addr,
 			(unsigned long long)ip->i_di.di_num.no_addr);
-		return ask_remove_eattr(ip, need_update);
+		return ask_remove_eattr(ip);
 	}
 	else
-		indir_bh = bread(&sbp->buf_list, block);
+		indir_bh = bread(sbp, block);
 
 	*bh = indir_bh;
 	return 0;
@@ -114,31 +105,28 @@ static int check_eattr_indir(struct gfs2_inode *ip, uint64_t block,
 
 static int check_eattr_leaf(struct gfs2_inode *ip, uint64_t block,
 		     uint64_t parent, struct gfs2_buffer_head **bh,
-		     enum update_flags *need_update, void *private)
+		     void *private)
 {
 	struct gfs2_sbd *sbp = ip->i_sbd;
-	struct gfs2_block_query q;
+	uint8_t q;
 
 	if(gfs2_check_range(sbp, block)) {
 		log_err( _("Extended attributes block for inode #%llu"
 			" (0x%llx) out of range.\n"),
 			(unsigned long long)ip->i_di.di_num.no_addr,
 			(unsigned long long)ip->i_di.di_num.no_addr);
-		return ask_remove_eattr(ip, need_update);
-	}
-	else if (gfs2_block_check(sbp, bl, block, &q)) {
-		stack;
-		return -1;
+		return ask_remove_eattr(ip);
 	}
-	else if(q.block_type != gfs2_meta_eattr) {
+	q = block_type(block);
+	if(q != gfs2_meta_eattr) {
 		log_err( _("Extended attributes block for inode #%llu"
 			   " (0x%llx) invalid.\n"),
 			 (unsigned long long)ip->i_di.di_num.no_addr,
 			 (unsigned long long)ip->i_di.di_num.no_addr);
-		return ask_remove_eattr(ip, need_update);
+		return ask_remove_eattr(ip);
 	}
 	else 
-		*bh = bread(&sbp->buf_list, block);
+		*bh = bread(sbp, block);
 
 	return 0;
 }
@@ -213,17 +201,13 @@ static int check_eattr_entry(struct gfs2_inode *ip,
 static int check_eattr_extentry(struct gfs2_inode *ip, uint64_t *ea_ptr,
 			 struct gfs2_buffer_head *leaf_bh,
 			 struct gfs2_ea_header *ea_hdr,
-			 struct gfs2_ea_header *ea_hdr_prev,
-			 enum update_flags *want_updated, void *private)
+			 struct gfs2_ea_header *ea_hdr_prev, void *private)
 {
-	struct gfs2_block_query q;
+	uint8_t q;
 	struct gfs2_sbd *sbp = ip->i_sbd;
 
-	if(gfs2_block_check(sbp, bl, be64_to_cpu(*ea_ptr), &q)) {
-		stack;
-		return -1;
-	}
-	if(q.block_type != gfs2_meta_eattr) {
+	q = block_type(be64_to_cpu(*ea_ptr));
+	if(q != gfs2_meta_eattr) {
 		if(remove_eattr_entry(sbp, leaf_bh, ea_hdr, ea_hdr_prev)){
 			stack;
 			return -1;
@@ -244,7 +228,6 @@ int pass1c(struct gfs2_sbd *sbp)
 	int error = 0;
 	osi_list_t *tmp, *x;
 	struct special_blocks *ea_block;
-	enum update_flags want_updated = not_updated;
 
 	pass1c_fxns.check_eattr_indir = &check_eattr_indir;
 	pass1c_fxns.check_eattr_leaf = &check_eattr_leaf;
@@ -260,29 +243,28 @@ int pass1c(struct gfs2_sbd *sbp)
 
 		if (skip_this_pass || fsck_abort) /* if asked to skip the rest */
 			return FSCK_OK;
-		bh = bread(&sbp->buf_list, block_no);
+		bh = bread(sbp, block_no);
 		if (!gfs2_check_meta(bh, GFS2_METATYPE_DI)) { /* if a dinode */
 			log_info( _("EA in inode %"PRIu64" (0x%" PRIx64 ")\n"),
 				 block_no, block_no);
-			gfs2_block_unmark(sbp, bl, block_no, gfs2_eattr_block);
+			gfs2_special_clear(&sbp->eattr_blocks, block_no);
 			ip = fsck_inode_get(sbp, bh);
+			ip->bh_owned = 1;
 
 			log_debug( _("Found eattr at %llu (0x%llx)\n"),
 				  (unsigned long long)ip->i_di.di_eattr,
 				  (unsigned long long)ip->i_di.di_eattr);
 			/* FIXME: Handle walking the eattr here */
-			error = check_inode_eattr(ip, &want_updated,
-						  &pass1c_fxns);
+			error = check_inode_eattr(ip, &pass1c_fxns);
 			if(error < 0) {
 				stack;
-				brelse(bh, not_updated);
+				brelse(bh);
 				return FSCK_ERROR;
 			}
 
-			fsck_inode_put(ip, want_updated); /* dinode_out,
-							     brelse, free */
+			fsck_inode_put(&ip); /* dinode_out, brelse, free */
 		} else {
-			brelse(bh, want_updated);
+			brelse(bh);
 		}
 	}
 	return FSCK_OK;
diff --git a/gfs2/fsck/pass2.c b/gfs2/fsck/pass2.c
index 55944b6..35acba4 100644
--- a/gfs2/fsck/pass2.c
+++ b/gfs2/fsck/pass2.c
@@ -19,28 +19,37 @@
 /* Set children's parent inode in dir_info structure - ext2 does not set
  * dotdot inode here, but instead in pass3 - should we? */
 static int set_parent_dir(struct gfs2_sbd *sbp, uint64_t childblock,
-				   uint64_t parentblock)
+			  uint64_t parentblock)
 {
 	struct dir_info *di;
 
-	if(!find_di(sbp, childblock, &di)) {
-		if(di->dinode == childblock) {
-			if (di->treewalk_parent) {
-				log_err( _("Another directory at block %" PRIu64
-						" (0x%" PRIx64 ") already contains"
-						" this child - checking %" PRIu64 " (0x%" PRIx64 ")\n"),
-						di->treewalk_parent, di->treewalk_parent,
-						parentblock, parentblock);
-				return 1;
-			}
-			di->treewalk_parent = parentblock;
-		}
-	} else {
+	di = dirtree_find(childblock);
+	if(!di) {
 		log_err( _("Unable to find block %"PRIu64" (0x%" PRIx64
-				") in dir_info list\n"),	childblock,	childblock);
+			   ") in dir_info list\n"), childblock, childblock);
 		return -1;
 	}
 
+	if(di->dinode == childblock) {
+		if (di->treewalk_parent) {
+			log_err( _("Another directory at block %" PRIu64
+				   " (0x%" PRIx64 ") already contains this "
+				   "child %lld (%llx) - checking parent %"
+				   PRIu64 " (0x%" PRIx64 ")\n"),
+				 di->treewalk_parent, di->treewalk_parent,
+				 (unsigned long long)childblock,
+				 (unsigned long long)childblock,
+				 parentblock, parentblock);
+			return 1;
+		}
+		log_debug( _("Child %lld (0x%llx) has parent %lld (0x%llx)\n"),
+			   (unsigned long long)childblock,
+			   (unsigned long long)childblock,
+			   (unsigned long long)parentblock,
+			   (unsigned long long)parentblock);
+		di->treewalk_parent = parentblock;
+	}
+
 	return 0;
 }
 
@@ -50,7 +59,8 @@ static int set_dotdot_dir(struct gfs2_sbd *sbp, uint64_t childblock,
 {
 	struct dir_info *di;
 
-	if(!find_di(sbp, childblock, &di)) {
+	di = dirtree_find(childblock);
+	if(di) {
 		if(di->dinode == childblock) {
 			/* Special case for root inode because we set
 			 * it earlier */
@@ -76,36 +86,34 @@ static int set_dotdot_dir(struct gfs2_sbd *sbp, uint64_t childblock,
 }
 
 static int check_eattr_indir(struct gfs2_inode *ip, uint64_t block,
-			    uint64_t parent, struct gfs2_buffer_head **bh,
-			     enum update_flags *want_updated, void *private)
+			     uint64_t parent, struct gfs2_buffer_head **bh,
+			     void *private)
 {
-	*want_updated = not_updated;
-	*bh = bread(&ip->i_sbd->buf_list, block);
+	*bh = bread(ip->i_sbd, block);
 	return 0;
 }
 static int check_eattr_leaf(struct gfs2_inode *ip, uint64_t block,
 			    uint64_t parent, struct gfs2_buffer_head **bh,
-			    enum update_flags *want_updated, void *private)
+			    void *private)
 {
-	*want_updated = not_updated;
-	*bh = bread(&ip->i_sbd->buf_list, block);
+	*bh = bread(ip->i_sbd, block);
 	return 0;
 }
 
 static const char *de_type_string(uint8_t de_type)
 {
 	const char *de_types[15] = {"unknown", "fifo", "chrdev", "invalid",
-								"directory", "invalid", "blkdev", "invalid",
-								"file", "invalid", "symlink", "invalid",
-								"socket", "invalid", "wht"};
+				    "directory", "invalid", "blkdev", "invalid",
+				    "file", "invalid", "symlink", "invalid",
+				    "socket", "invalid", "wht"};
 	if (de_type < 15)
 		return de_types[de_type];
 	return de_types[3]; /* invalid */
 }
 
-static int check_file_type(uint8_t de_type, uint8_t block_type)
+static int check_file_type(uint8_t de_type, uint8_t blk_type)
 {
-	switch(block_type) {
+	switch(blk_type) {
 	case gfs2_inode_dir:
 		if(de_type != DT_DIR)
 			return 1;
@@ -146,6 +154,7 @@ struct metawalk_fxns pass2_fxns_delete = {
 	.private = NULL,
 	.check_metalist = delete_metadata,
 	.check_data = delete_data,
+	.check_leaf = delete_leaf,
 	.check_eattr_indir = delete_eattr_indir,
 	.check_eattr_leaf = delete_eattr_leaf,
 };
@@ -155,10 +164,10 @@ struct metawalk_fxns pass2_fxns_delete = {
 static int check_dentry(struct gfs2_inode *ip, struct gfs2_dirent *dent,
 		 struct gfs2_dirent *prev_de,
 		 struct gfs2_buffer_head *bh, char *filename,
-		 enum update_flags *update, uint16_t *count, void *priv)
+		 uint16_t *count, void *priv)
 {
 	struct gfs2_sbd *sbp = ip->i_sbd;
-	struct gfs2_block_query q = {0};
+	uint8_t q;
 	char tmp_name[MAX_FILENAME];
 	uint64_t entryblock;
 	struct dir_status *ds = (struct dir_status *) priv;
@@ -172,51 +181,14 @@ static int check_dentry(struct gfs2_inode *ip, struct gfs2_dirent *dent,
 	gfs2_dirent_in(&dentry, (char *)dent);
 	de = &dentry;
 
-	clear_eattrs.check_eattr_indir = clear_eattr_indir;
-	clear_eattrs.check_eattr_leaf = clear_eattr_leaf;
+	clear_eattrs.check_eattr_indir = delete_eattr_indir;
+	clear_eattrs.check_eattr_leaf = delete_eattr_leaf;
 	clear_eattrs.check_eattr_entry = clear_eattr_entry;
 	clear_eattrs.check_eattr_extentry = clear_eattr_extentry;
 
 	entryblock = de->de_inum.no_addr;
 
 	/* Start of checks */
-	if (de->de_rec_len < GFS2_DIRENT_SIZE(de->de_name_len)){
-		log_err( _("Dir entry with bad record or name length\n"
-			"\tRecord length = %u\n"
-			"\tName length = %u\n"),
-			de->de_rec_len,
-			de->de_name_len);
-		gfs2_block_set(sbp, bl, ip->i_di.di_num.no_addr,
-			       gfs2_meta_inval);
-		return 1;
-		/* FIXME: should probably delete the entry here at the
-		 * very least - maybe look at attempting to fix it */
-	}
-	
-	calculated_hash = gfs2_disk_hash(filename, de->de_name_len);
-	if (de->de_hash != calculated_hash){
-	        log_err( _("Dir entry with bad hash or name length\n"
-					"\tHash found         = %u (0x%x)\n"
-					"\tFilename           = %s\n"), de->de_hash, de->de_hash,
-					filename);
-			log_err( _("\tName length found  = %u\n"
-					"\tHash expected      = %u (0x%x)\n"),
-					de->de_name_len, calculated_hash, calculated_hash);
-			errors_found++;
-			if(query(&opts, _("Fix directory hash for %s? (y/n) "),
-					 filename)) {
-				errors_corrected++;
-				de->de_hash = calculated_hash;
-				gfs2_dirent_out(de, (char *)dent);
-				log_err( _("Directory entry hash for %s fixed.\n"), filename);
-			}
-			else {
-				log_err( _("Directory entry hash for %s not fixed.\n"), filename);
-				return 1;
-			}
-	}
-	/* FIXME: This should probably go to the top of the fxn, and
-	 * references to filename should be replaced with tmp_name */
 	memset(tmp_name, 0, MAX_FILENAME);
 	if(de->de_name_len < MAX_FILENAME)
 		strncpy(tmp_name, filename, de->de_name_len);
@@ -224,140 +196,155 @@ static int check_dentry(struct gfs2_inode *ip, struct gfs2_dirent *dent,
 		strncpy(tmp_name, filename, MAX_FILENAME - 1);
 
 	if(gfs2_check_range(ip->i_sbd, entryblock)) {
-		log_err( _("Block # referenced by directory entry %s is out of range\n"),
-				tmp_name);
-		errors_found++;
-		if(query(&opts, 
-				 _("Clear directory entry tp out of range block? (y/n) "))) {
-			errors_corrected++;
-			log_err( _("Clearing %s\n"), tmp_name);
-			dirent2_del(ip, bh, prev_de, dent);
-			*update = updated;
-			return 1;
+		log_err( _("Block # referenced by directory entry %s in inode "
+			   "%lld (0x%llx) is out of range\n"),
+			 tmp_name, (unsigned long long)ip->i_di.di_num.no_addr,
+			 (unsigned long long)ip->i_di.di_num.no_addr);
+		if(query( _("Clear directory entry to out of range block? "
+			    "(y/n) "))) {
+			goto nuke_dentry;
 		} else {
 			log_err( _("Directory entry to out of range block remains\n"));
 			(*count)++;
 			ds->entry_count++;
+			/* can't do this because the block is out of range:
+			   increment_link(entryblock); */
 			return 0;
 		}
 	}
-	if(gfs2_block_check(sbp, bl, de->de_inum.no_addr, &q)) {
-		stack;
-		return -1;
+
+	if (de->de_rec_len < GFS2_DIRENT_SIZE(de->de_name_len)) {
+		log_err( _("Dir entry with bad record or name length\n"
+			"\tRecord length = %u\n\tName length = %u\n"),
+			de->de_rec_len, de->de_name_len);
+		if(!query( _("Clear the directory entry? (y/n) "))) {
+			log_err( _("Directory entry not fixed.\n"));
+			goto dentry_is_valid;
+		}
+		fsck_blockmap_set(ip, ip->i_di.di_num.no_addr,
+				  _("corrupt directory entry"),
+				  gfs2_inode_invalid);
+		log_err( _("Bad directory entry deleted.\n"));
+		return 1;
 	}
+
+	calculated_hash = gfs2_disk_hash(tmp_name, de->de_name_len);
+	if (de->de_hash != calculated_hash){
+	        log_err( _("Dir entry with bad hash or name length\n"
+			   "\tHash found         = %u (0x%x)\n"
+			   "\tFilename           = %s\n"),
+			 de->de_hash, de->de_hash, tmp_name);
+		log_err( _("\tName length found  = %u\n"
+			   "\tHash expected      = %u (0x%x)\n"),
+			 de->de_name_len, calculated_hash, calculated_hash);
+		if(!query( _("Fix directory hash for %s? (y/n) "),
+			   tmp_name)) {
+			log_err( _("Directory entry hash for %s not "
+				   "fixed.\n"), tmp_name);
+			goto dentry_is_valid;
+		}
+		de->de_hash = calculated_hash;
+		gfs2_dirent_out(de, (char *)dent);
+		bmodified(bh);
+		log_err( _("Directory entry hash for %s fixed.\n"),
+			 tmp_name);
+	}
+
+	q = block_type(entryblock);
 	/* Get the status of the directory inode */
-	if(q.bad_block) {
+	if(q == gfs2_inode_invalid || q == gfs2_bad_block) {
 		/* This entry's inode has bad blocks in it */
 
 		/* Handle bad blocks */
-		log_err( _("Found a bad directory entry: %s\n"), filename);
-
-		errors_found++;
-		if(query(&opts, _("Delete inode containing bad blocks? (y/n)"))) {
-			errors_corrected++;
-			entry_ip = fsck_load_inode(sbp, de->de_inum.no_addr);
-			check_inode_eattr(entry_ip, update,
-					  &pass2_fxns_delete);
-			check_metatree(entry_ip, &pass2_fxns_delete);
-			fsck_inode_put(entry_ip, updated);
-			dirent2_del(ip, bh, prev_de, dent);
-			gfs2_block_set(sbp, bl, de->de_inum.no_addr,
-				       gfs2_block_free);
-			*update = updated;
-			log_warn( _("The inode containing bad blocks was "
-				    "deleted.\n"));
-			return 1;
-		} else {
+		log_err( _("Found a bad directory entry: %s\n"), tmp_name);
+
+		if(!query( _("Delete inode containing bad blocks? (y/n)"))) {
 			log_warn( _("Entry to inode containing bad blocks remains\n"));
-			(*count)++;
-			ds->entry_count++;
-			return 0;
+			goto dentry_is_valid;
 		}
 
+		if (ip->i_di.di_num.no_addr == entryblock)
+			entry_ip = ip;
+		else
+			entry_ip = fsck_load_inode(sbp, entryblock);
+		check_inode_eattr(entry_ip, &pass2_fxns_delete);
+		check_metatree(entry_ip, &pass2_fxns_delete);
+		if (entry_ip != ip)
+			fsck_inode_put(&entry_ip);
+		fsck_blockmap_set(ip, entryblock,
+				  _("bad directory entry"), gfs2_block_free);
+		goto nuke_dentry;
 	}
-	if(q.block_type != gfs2_inode_dir && q.block_type != gfs2_inode_file &&
-	   q.block_type != gfs2_inode_lnk && q.block_type != gfs2_inode_blk &&
-	   q.block_type != gfs2_inode_chr && q.block_type != gfs2_inode_fifo &&
-	   q.block_type != gfs2_inode_sock) {
-		log_err( _("Directory entry '%s' at block %llu (0x%llx"
-			   ") in dir inode %llu (0x%llx"
-			   ") block type %d: %s.\n"), tmp_name,
-			 (unsigned long long)de->de_inum.no_addr,
-			 (unsigned long long)de->de_inum.no_addr,
+	if(q < gfs2_inode_dir || q > gfs2_inode_sock) {
+		log_err( _("Directory entry '%s' referencing inode %llu "
+			   "(0x%llx) in dir inode %llu (0x%llx) block type "
+			   "%d: %s.\n"), tmp_name,
+			 (unsigned long long)entryblock,
+			 (unsigned long long)entryblock,
 			 (unsigned long long)ip->i_di.di_num.no_addr,
 			 (unsigned long long)ip->i_di.di_num.no_addr,
-			 q.block_type, q.block_type == gfs2_meta_inval ?
-			 _("previously marked invalid") :
-			 _("is not an inode"));
-
-		errors_found++;
-		if(query(&opts, _("Clear directory entry to non-inode block? "
-				  "(y/n) "))) {
-			struct gfs2_buffer_head *bhi;
-
-			errors_corrected++;
-			dirent2_del(ip, bh, prev_de, dent);
-			*update = updated;
-			log_warn( _("Directory entry '%s' cleared\n"), tmp_name);
-			/* If it was previously marked invalid (i.e. known
-			   to be bad, not just a free block, etc.) then
-			   delete any metadata it holds.  If not, return. */
-			if (q.block_type != gfs2_meta_inval)
-				return 1;
-
-			/* Now try to clear the dinode, if it is an dinode */
-			bhi = bread(&sbp->buf_list, de->de_inum.no_addr);
-			error = gfs2_check_meta(bhi, GFS2_METATYPE_DI);
-			brelse(bhi, updated);
-			if (error)
-				return 1; /* not a dinode: nothing to delete */
-
-			entry_ip = fsck_load_inode(sbp, de->de_inum.no_addr);
-			check_inode_eattr(entry_ip, update,
-					  &pass2_fxns_delete);
-			check_metatree(entry_ip, &pass2_fxns_delete);
-			fsck_inode_put(entry_ip, updated);
-			gfs2_block_set(sbp, bl, de->de_inum.no_addr,
-				       gfs2_block_free);
+			 q, q == gfs2_inode_invalid ?
+			 _("was previously marked invalid") :
+			 _("was deleted or is not an inode"));
 
-			return 1;
-		} else {
+		if(!query( _("Clear directory entry to non-inode block? "
+			     "(y/n) "))) {
 			log_err( _("Directory entry to non-inode block remains\n"));
-			(*count)++;
-			ds->entry_count++;
-			return 0;
+			goto dentry_is_valid;
 		}
+
+		/* Don't decrement the link here: Here in pass2, we increment
+		   only when we know it's okay.
+		   decrement_link(ip->i_di.di_num.no_addr); */
+		/* If it was previously marked invalid (i.e. known
+		   to be bad, not just a free block, etc.) then the temptation
+		   would be to delete any metadata it holds.  The trouble is:
+		   if it's invalid, we may or _may_not_ have traversed its
+		   metadata tree, and therefore may or may not have marked the
+		   blocks it points to as a metadata type, or as a duplicate.
+		   If there is really a duplicate reference, but we didn't
+		   process the metadata tree because it's invalid, some other
+		   inode has a reference to the metadata block, in which case
+		   freeing it would do more harm than good.  IOW we cannot
+		   count on "delete_block_if_notdup" knowing whether it's
+		   really a duplicate block if we never traversed the metadata
+		   tree for the invalid inode. */
+		goto nuke_dentry;
 	}
 
-	error = check_file_type(de->de_type, q.block_type);
+	error = check_file_type(de->de_type, q);
 	if(error < 0) {
+		log_err( _("Error: directory entry type is "
+			   "incompatible with block type at block %lld "
+			   "(0x%llx) in directory inode %llu (0x%llx).\n"),
+			 (unsigned long long)entryblock,
+			 (unsigned long long)entryblock,
+			 (unsigned long long)ip->i_di.di_num.no_addr,
+			 (unsigned long long)ip->i_di.di_num.no_addr);
+		log_err( _("Directory entry type is %d, block type is %d.\n"),
+			 de->de_type, q);
 		stack;
 		return -1;
 	}
 	if(error > 0) {
-		log_warn( _("Type '%s' in dir entry (%s, %llu/0x%llx) conflicts"
+		log_err( _("Type '%s' in dir entry (%s, %llu/0x%llx) conflicts"
 			 " with type '%s' in dinode. (Dir entry is stale.)\n"),
 			 de_type_string(de->de_type), tmp_name,
-			 (unsigned long long)de->de_inum.no_addr,
-			 (unsigned long long)de->de_inum.no_addr,
-			 block_type_string(&q));
-		errors_found++;
-		if(query(&opts, _("Clear stale directory entry? (y/n) "))) {
-			errors_corrected++;
-			entry_ip = fsck_load_inode(sbp, de->de_inum.no_addr);
-			check_inode_eattr(entry_ip, update, &clear_eattrs);
-			fsck_inode_put(entry_ip, not_updated);
-
-			dirent2_del(ip, bh, prev_de, dent);
-			*update = updated;
-			log_err( _("Stale directory entry deleted\n"));
-			return 1;
-		} else {
+			 (unsigned long long)entryblock,
+			 (unsigned long long)entryblock,
+			 block_type_string(q));
+		if(!query( _("Clear stale directory entry? (y/n) "))) {
 			log_err( _("Stale directory entry remains\n"));
-			(*count)++;
-			ds->entry_count++;
-			return 0;
+			goto dentry_is_valid;
 		}
+		if (ip->i_di.di_num.no_addr == entryblock)
+			entry_ip = ip;
+		else
+			entry_ip = fsck_load_inode(sbp, entryblock);
+		check_inode_eattr(entry_ip, &clear_eattrs);
+		if (entry_ip != ip)
+			fsck_inode_put(&entry_ip);
+		goto nuke_dentry;
 	}
 
 	if(!strcmp(".", tmp_name)) {
@@ -368,74 +355,55 @@ static int check_dentry(struct gfs2_inode *ip, struct gfs2_dirent *dent,
 				" (0x%llx)\n"),
 				(unsigned long long)ip->i_di.di_num.no_addr,
 				(unsigned long long)ip->i_di.di_num.no_addr);
-			errors_found++;
-			if(query(&opts, _("Clear duplicate '.' entry? (y/n) "))) {
-
-				errors_corrected++;
-				entry_ip = fsck_load_inode(sbp, de->de_inum.no_addr);
-				check_inode_eattr(entry_ip, update,
-						  &clear_eattrs);
-				fsck_inode_put(entry_ip, not_updated);
-
-				dirent2_del(ip, bh, prev_de, dent);
-				*update = updated;
-				return 1;
-			} else {
+			if(!query( _("Clear duplicate '.' entry? (y/n) "))) {
 				log_err( _("Duplicate '.' entry remains\n"));
 				/* FIXME: Should we continue on here
-				 * and check the rest of the '.'
-				 * entry? */
-				increment_link(sbp, de->de_inum.no_addr);
-				(*count)++;
-				ds->entry_count++;
-				return 0;
+				 * and check the rest of the '.' entry? */
+				goto dentry_is_valid;
 			}
+			if (ip->i_di.di_num.no_addr == entryblock)
+				entry_ip = ip;
+			else
+				entry_ip = fsck_load_inode(sbp, entryblock);
+			check_inode_eattr(entry_ip, &clear_eattrs);
+			if (entry_ip != ip)
+				fsck_inode_put(&entry_ip);
+			goto nuke_dentry;
 		}
 
 		/* GFS2 does not rely on '.' being in a certain
 		 * location */
 
 		/* check that '.' refers to this inode */
-		if(de->de_inum.no_addr != ip->i_di.di_num.no_addr) {
+		if(entryblock != ip->i_di.di_num.no_addr) {
 			log_err( _("'.' entry's value incorrect in directory %llu"
 				" (0x%llx).  Points to %llu"
 				" (0x%llx) when it should point to %llu"
 				" (0x%llx).\n"),
-				(unsigned long long)de->de_inum.no_addr,
-				(unsigned long long)de->de_inum.no_addr,
-				(unsigned long long)de->de_inum.no_addr,
-				(unsigned long long)de->de_inum.no_addr,
+				(unsigned long long)entryblock,
+				(unsigned long long)entryblock,
+				(unsigned long long)entryblock,
+				(unsigned long long)entryblock,
 				(unsigned long long)ip->i_di.di_num.no_addr,
 				(unsigned long long)ip->i_di.di_num.no_addr);
-			errors_found++;
-			if(query(&opts, _("Remove '.' reference? (y/n) "))) {
-				errors_corrected++;
-				entry_ip = fsck_load_inode(sbp, de->de_inum.no_addr);
-				check_inode_eattr(entry_ip, update,
-						  &clear_eattrs);
-				fsck_inode_put(entry_ip, not_updated);
-
-				dirent2_del(ip, bh, prev_de, dent);
-				*update = updated;
-				return 1;
-
-			} else {
+			if(!query( _("Remove '.' reference? (y/n) "))) {
 				log_err( _("Invalid '.' reference remains\n"));
 				/* Not setting ds->dotdir here since
 				 * this '.' entry is invalid */
-				increment_link(sbp, de->de_inum.no_addr);
-				(*count)++;
-				ds->entry_count++;
-				return 0;
+				goto dentry_is_valid;
 			}
+			if (ip->i_di.di_num.no_addr == entryblock)
+				entry_ip = ip;
+			else
+				entry_ip = fsck_load_inode(sbp, entryblock);
+			check_inode_eattr(entry_ip, &clear_eattrs);
+			if (entry_ip != ip)
+				fsck_inode_put(&entry_ip);
+			goto nuke_dentry;
 		}
 
 		ds->dotdir = 1;
-		increment_link(sbp, de->de_inum.no_addr);
-		(*count)++;
-		ds->entry_count++;
-
-		return 0;
+		goto dentry_is_valid;
 	}
 	if(!strcmp("..", tmp_name)) {
 		log_debug( _("Found .. dentry\n"));
@@ -444,56 +412,45 @@ static int check_dentry(struct gfs2_inode *ip, struct gfs2_dirent *dent,
 				"(0x%llx)\n"),
 				(unsigned long long)ip->i_di.di_num.no_addr,
 				(unsigned long long)ip->i_di.di_num.no_addr);
-			errors_found++;
-			if(query(&opts, _("Clear duplicate '..' entry? (y/n) "))) {
-
-				errors_corrected++;
-				entry_ip = fsck_load_inode(sbp, de->de_inum.no_addr);
-				check_inode_eattr(entry_ip, update,
-						  &clear_eattrs);
-				fsck_inode_put(entry_ip, not_updated);
-
-				dirent2_del(ip, bh, prev_de, dent);
-				*update = 1;
-				return 1;
-			} else {
+			if(!query( _("Clear duplicate '..' entry? (y/n) "))) {
 				log_err( _("Duplicate '..' entry remains\n"));
 				/* FIXME: Should we continue on here
 				 * and check the rest of the '..'
 				 * entry? */
-				increment_link(sbp, de->de_inum.no_addr);
-				(*count)++;
-				ds->entry_count++;
-				return 0;
+				goto dentry_is_valid;
 			}
+
+			if (ip->i_di.di_num.no_addr == entryblock)
+				entry_ip = ip;
+			else
+				entry_ip = fsck_load_inode(sbp, entryblock);
+			check_inode_eattr(entry_ip, &clear_eattrs);
+			if (entry_ip != ip)
+				fsck_inode_put(&entry_ip);
+
+			goto nuke_dentry;
 		}
 
-		if(q.block_type != gfs2_inode_dir) {
+		if(q != gfs2_inode_dir) {
 			log_err( _("Found '..' entry in directory %llu (0x%llx) "
 				"pointing to something that's not a directory"),
 				(unsigned long long)ip->i_di.di_num.no_addr,
 				(unsigned long long)ip->i_di.di_num.no_addr);
-			errors_found++;
-			if(query(&opts, _("Clear bad '..' directory entry? (y/n) "))) {
-				errors_corrected++;
-				entry_ip = fsck_load_inode(sbp, de->de_inum.no_addr);
-				check_inode_eattr(entry_ip, update,
-						  &clear_eattrs);
-				fsck_inode_put(entry_ip, not_updated);
-
-				dirent2_del(ip, bh, prev_de, dent);
-				*update = 1;
-				return 1;
-			} else {
+			if(!query( _("Clear bad '..' directory entry? (y/n) "))) {
 				log_err( _("Bad '..' directory entry remains\n"));
-				increment_link(sbp, de->de_inum.no_addr);
-				(*count)++;
-				ds->entry_count++;
-				return 0;
+				goto dentry_is_valid;
 			}
+			if (ip->i_di.di_num.no_addr == entryblock)
+				entry_ip = ip;
+			else
+				entry_ip = fsck_load_inode(sbp, entryblock);
+			check_inode_eattr(entry_ip, &clear_eattrs);
+			if (entry_ip != ip)
+				fsck_inode_put(&entry_ip);
+
+			goto nuke_dentry;
 		}
-		/* GFS2 does not rely on '..' being in a
-		 * certain location */
+		/* GFS2 does not rely on '..' being in a certain location */
 
 		/* Add the address this entry is pointing to
 		 * to this inode's dotdot_parent in
@@ -504,56 +461,47 @@ static int check_dentry(struct gfs2_inode *ip, struct gfs2_dirent *dent,
 		}
 
 		ds->dotdotdir = 1;
-		increment_link(sbp, de->de_inum.no_addr);
-		*update = (opts.no ? not_updated : updated);
-		(*count)++;
-		ds->entry_count++;
-		return 0;
+		goto dentry_is_valid;
 	}
 
-	/* After this point we're only concerned with
-	 * directories */
-	if(q.block_type != gfs2_inode_dir) {
-		log_debug( _("Found non-dir inode dentry\n"));
-		increment_link(sbp, de->de_inum.no_addr);
-		*update = (opts.no ? not_updated : updated);
-		(*count)++;
-		ds->entry_count++;
-		return 0;
+	/* After this point we're only concerned with directories */
+	if(q != gfs2_inode_dir) {
+		log_debug( _("Found non-dir inode dentry pointing to %lld "
+			     "(0x%llx)\n"),
+			   (unsigned long long)entryblock,
+			   (unsigned long long)entryblock);
+		goto dentry_is_valid;
 	}
 
-	log_debug( _("Found plain directory dentry\n"));
+	/*log_debug( _("Found plain directory dentry\n"));*/
 	error = set_parent_dir(sbp, entryblock, ip->i_di.di_num.no_addr);
 	if(error > 0) {
 		log_err( _("%s: Hard link to block %" PRIu64" (0x%" PRIx64
-				") detected.\n"), filename, entryblock, entryblock);
+			   ") detected.\n"), tmp_name, entryblock, entryblock);
 
-		errors_found++;
-		if(query(&opts, _("Clear hard link to directory? (y/n) "))) {
-			errors_corrected++;
-			*update = 1;
-
-			dirent2_del(ip, bh, prev_de, dent);
-			log_warn( _("Directory entry %s cleared\n"), filename);
-
-			return 1;
-		} else {
+		if(query( _("Clear hard link to directory? (y/n) ")))
+			goto nuke_dentry;
+		else {
 			log_err( _("Hard link to directory remains\n"));
-			(*count)++;
-			ds->entry_count++;
-			return 0;
+			goto dentry_is_valid;
 		}
-	}
-	else if (error < 0) {
+	} else if (error < 0) {
 		stack;
 		return -1;
 	}
-	increment_link(sbp, de->de_inum.no_addr);
-	*update = (opts.no ? not_updated : updated);
+dentry_is_valid:
+	/* This directory inode links to this inode via this dentry */
+	increment_link(entryblock, ip->i_di.di_num.no_addr,
+		       _("valid reference"));
 	(*count)++;
 	ds->entry_count++;
 	/* End of checks */
 	return 0;
+
+nuke_dentry:
+	dirent2_del(ip, bh, prev_de, dent);
+	log_err( _("Bad directory entry '%s' cleared.\n"), tmp_name);
+	return 1;
 }
 
 
@@ -575,27 +523,24 @@ static int check_system_dir(struct gfs2_inode *sysinode, const char *dirname,
 {
 	uint64_t iblock = 0;
 	struct dir_status ds = {0};
-	struct gfs2_buffer_head b, *bh = &b;
 	char *filename;
 	int filename_len;
 	char tmp_name[256];
-	enum update_flags update = not_updated;
 	int error = 0;
 
 	log_info( _("Checking system directory inode '%s'\n"), dirname);
 
 	if (sysinode) {
 		iblock = sysinode->i_di.di_num.no_addr;
-		if(gfs2_block_check(sysinode->i_sbd, bl, iblock, &ds.q)) {
-			iblock = sysinode->i_di.di_num.no_addr;
-		}
+		ds.q = block_type(iblock);
 	}
 	pass2_fxns.private = (void *) &ds;
-	if(ds.q.bad_block) {
+	if(ds.q == gfs2_bad_block) {
 		/* First check that the directory's metatree is valid */
-		if(check_metatree(sysinode, &pass2_fxns)) {
+		error = check_metatree(sysinode, &pass2_fxns);
+		if (error < 0) {
 			stack;
-			return -1;
+			return error;
 		}
 	}
 	error = check_dir(sysinode->i_sbd, iblock, &pass2_fxns);
@@ -604,18 +549,18 @@ static int check_system_dir(struct gfs2_inode *sysinode, const char *dirname,
 		return -1;
 	}
 	if (error > 0)
-		gfs2_block_set(sysinode->i_sbd, bl, iblock, gfs2_meta_inval);
+		fsck_blockmap_set(sysinode, iblock, dirname,
+				  gfs2_inode_invalid);
 
-	bh = bhold(sysinode->i_bh);
-	if(check_inode_eattr(sysinode, &update, &pass2_fxns)) {
+	if(check_inode_eattr(sysinode, &pass2_fxns)) {
 		stack;
 		return -1;
 	}
 	if(!ds.dotdir) {
 		log_err( _("No '.' entry found for %s directory.\n"), dirname);
-		errors_found++;
-		if (query(&opts, _("Is it okay to add '.' entry? (y/n) "))) {
-			errors_corrected++;
+		if (query( _("Is it okay to add '.' entry? (y/n) "))) {
+			uint64_t cur_blks = sysinode->i_di.di_blocks;
+
 			sprintf(tmp_name, ".");
 			filename_len = strlen(tmp_name); /* no trailing NULL */
 			if(!(filename = malloc(sizeof(char) * filename_len))) {
@@ -633,11 +578,14 @@ static int check_system_dir(struct gfs2_inode *sysinode, const char *dirname,
 			log_warn( _("Adding '.' entry\n"));
 			dir_add(sysinode, filename, filename_len,
 				&(sysinode->i_di.di_num), DT_DIR);
-			increment_link(sysinode->i_sbd,
-				       sysinode->i_di.di_num.no_addr);
+			if (cur_blks != sysinode->i_di.di_blocks)
+				reprocess_inode(sysinode, dirname);
+			/* This system inode is linked to itself via '.' */
+			increment_link(sysinode->i_di.di_num.no_addr,
+				       sysinode->i_di.di_num.no_addr,
+				       "sysinode \".\"");
 			ds.entry_count++;
 			free(filename);
-			update = 1;
 		} else
 			log_err( _("The directory was not fixed.\n"));
 	}
@@ -647,15 +595,13 @@ static int check_system_dir(struct gfs2_inode *sysinode, const char *dirname,
 			(unsigned long long)sysinode->i_di.di_num.no_addr,
 			(unsigned long long)sysinode->i_di.di_num.no_addr,
 			sysinode->i_di.di_entries, ds.entry_count);
-		errors_found++;
-		if(query(&opts, _("Fix entries for %s inode %llu (0x%llx"
-			 ")? (y/n) "), dirname,
-			 (unsigned long long)sysinode->i_di.di_num.no_addr,
-			 (unsigned long long)sysinode->i_di.di_num.no_addr)) {
-			errors_corrected++;
+		if(query( _("Fix entries for %s inode %llu (0x%llx)? (y/n) "),
+			  dirname,
+			  (unsigned long long)sysinode->i_di.di_num.no_addr,
+			  (unsigned long long)sysinode->i_di.di_num.no_addr)) {
 			sysinode->i_di.di_entries = ds.entry_count;
+			bmodified(sysinode->i_bh);
 			log_warn( _("Entries updated\n"));
-			update = 1;
 		} else {
 			log_err( _("Entries for inode %llu (0x%llx"
 				") left out of sync\n"),
@@ -665,8 +611,6 @@ static int check_system_dir(struct gfs2_inode *sysinode, const char *dirname,
 				sysinode->i_di.di_num.no_addr);
 		}
 	}
-
-	brelse(bh, opts.no ? not_updated : update);
 	return 0;
 }
 
@@ -696,17 +640,14 @@ static inline int is_system_dir(struct gfs2_sbd *sbp, uint64_t block)
  */
 int pass2(struct gfs2_sbd *sbp)
 {
-	uint64_t i;
-	struct gfs2_block_query q;
+	uint64_t dirblk;
+	uint8_t q;
 	struct dir_status ds = {0};
 	struct gfs2_inode *ip;
-	struct gfs2_buffer_head *bh = NULL;
 	char *filename;
 	int filename_len;
 	char tmp_name[256];
 	int error = 0;
-	enum update_flags need_update = NOT_UPDATED;
-	struct dup_blocks *b;
 
 	/* Check all the system directory inodes. */
 	if (check_system_dir(sbp->md.jiinode, "jindex", build_jindex)) {
@@ -727,89 +668,89 @@ int pass2(struct gfs2_sbd *sbp)
 	}
 	log_info( _("Checking directory inodes.\n"));
 	/* Grab each directory inode, and run checks on it */
-	for(i = 0; i < last_fs_block; i++) {
-		need_update = 0;
-		warm_fuzzy_stuff(i);
+	for(dirblk = 0; dirblk < last_fs_block; dirblk++) {
+		warm_fuzzy_stuff(dirblk);
 		if (skip_this_pass || fsck_abort) /* if asked to skip the rest */
 			return FSCK_OK;
 
 		/* Skip the system inodes - they're checked above */
-		if (is_system_dir(sbp, i))
+		if (is_system_dir(sbp, dirblk))
 			continue;
 
-		if(gfs2_block_check(sbp, bl, i, &q)) {
-			log_err( _("Can't get block %"PRIu64 " (0x%" PRIx64
-					") from block list\n"), i, i);
-			return FSCK_ERROR;
-		}
+		q = block_type(dirblk);
 
-		if(q.block_type != gfs2_inode_dir)
+		if(q != gfs2_inode_dir)
 			continue;
 
 		log_debug( _("Checking directory inode at block %"PRIu64" (0x%"
-				  PRIx64 ")\n"), i, i);
+				  PRIx64 ")\n"), dirblk, dirblk);
 
 		memset(&ds, 0, sizeof(ds));
 		pass2_fxns.private = (void *) &ds;
-		if(ds.q.bad_block) {
+		if(ds.q == gfs2_bad_block) {
 			/* First check that the directory's metatree
 			 * is valid */
-			ip = fsck_load_inode(sbp, i);
-			if(check_metatree(ip, &pass2_fxns)) {
-				fsck_inode_put(ip, not_updated);
+			ip = fsck_load_inode(sbp, dirblk);
+			error = check_metatree(ip, &pass2_fxns);
+			fsck_inode_put(&ip);
+			if (error < 0) {
 				stack;
-				return FSCK_ERROR;
+				return error;
 			}
-			fsck_inode_put(ip, not_updated);
 		}
-		error = check_dir(sbp, i, &pass2_fxns);
+		error = check_dir(sbp, dirblk, &pass2_fxns);
 		if(error < 0) {
 			stack;
 			return FSCK_ERROR;
 		}
 		if (error > 0) {
-			struct dir_info *di = NULL;
-			error = find_di(sbp, i, &di);
-			if(error < 0) {
+			struct dir_info *di;
+
+			di = dirtree_find(dirblk);
+			if(!di) {
 				stack;
 				return FSCK_ERROR;
 			}
-			if(error == 0) {
-				/* FIXME: factor */
-				errors_found++;
-				if(query(&opts, _("Remove directory entry for bad"
-						 " inode %"PRIu64" (0x%" PRIx64 ") in %"PRIu64
-						 " (0x%" PRIx64 ")? (y/n)"), i, i, di->treewalk_parent,
-						 di->treewalk_parent)) {
-					errors_corrected++;
-					error = remove_dentry_from_dir(sbp, di->treewalk_parent,
-												   i);
-					if(error < 0) {
-						stack;
-						return FSCK_ERROR;
-					}
-					if(error > 0) {
-						log_warn( _("Unable to find dentry for %"
-								 PRIu64 " (0x%" PRIx64 ") in %" PRIu64
-								 " (0x%" PRIx64 ")\n"), i, i,
-								 di->treewalk_parent, di->treewalk_parent);
-					}
-					log_warn( _("Directory entry removed\n"));
-				} else
-					log_err( _("Directory entry to invalid inode remains.\n"));
-			}
-			gfs2_block_set(sbp, bl, i, gfs2_meta_inval);
+			if(query( _("Remove directory entry for bad"
+				    " inode %"PRIu64" (0x%" PRIx64 ") in %"PRIu64
+				    " (0x%" PRIx64 ")? (y/n)"), dirblk,
+				  dirblk, di->treewalk_parent,
+				  di->treewalk_parent)) {
+				error = remove_dentry_from_dir(sbp, di->treewalk_parent,
+							       dirblk);
+				if(error < 0) {
+					stack;
+					return FSCK_ERROR;
+				}
+				if(error > 0) {
+					log_warn( _("Unable to find dentry for %"
+						    PRIu64 " (0x%" PRIx64 ") in %" PRIu64
+						    " (0x%" PRIx64 ")\n"),
+						  dirblk, dirblk,
+						  di->treewalk_parent,
+						  di->treewalk_parent);
+				}
+				log_warn( _("Directory entry removed\n"));
+			} else
+				log_err( _("Directory entry to invalid inode remains.\n"));
+			log_debug( _("Directory block %lld (0x%llx) "
+				     "is now marked as 'invalid'\n"),
+				   (unsigned long long)dirblk,
+				   (unsigned long long)dirblk);
+			/* Can't use fsck_blockmap_set here because we don't
+			   have an inode in memory. */
+			gfs2_blockmap_set(bl, dirblk, gfs2_inode_invalid);
+			check_n_fix_bitmap(sbp, dirblk, gfs2_inode_invalid);
 		}
-		bh = bread(&sbp->buf_list, i);
-		ip = fsck_inode_get(sbp, bh);
+		ip = fsck_load_inode(sbp, dirblk);
 		if(!ds.dotdir) {
 			log_err(_("No '.' entry found for directory inode at "
-				  "block %"PRIu64" (0x%" PRIx64 ")\n"), i, i);
+				  "block %"PRIu64" (0x%" PRIx64 ")\n"),
+				dirblk, dirblk);
+
+			if (query( _("Is it okay to add '.' entry? (y/n) "))) {
+				uint64_t cur_blks;
 
-			errors_found++;
-			if (query(&opts,
-				  _("Is it okay to add '.' entry? (y/n) "))) {
-				errors_corrected++;
 				sprintf(tmp_name, ".");
 				filename_len = strlen(tmp_name); /* no trailing
 								    NULL */
@@ -827,37 +768,44 @@ int pass2(struct gfs2_sbd *sbp)
 				}
 				memcpy(filename, tmp_name, filename_len);
 
+				cur_blks = ip->i_di.di_blocks;
 				dir_add(ip, filename, filename_len,
 					&(ip->i_di.di_num), DT_DIR);
-				increment_link(ip->i_sbd,
-					       ip->i_di.di_num.no_addr);
+				if (cur_blks != ip->i_di.di_blocks) {
+					char dirname[80];
+
+					sprintf(dirname, _("Directory at %lld "
+							   "(0x%llx)"),
+						(unsigned long long)dirblk,
+						(unsigned long long)dirblk);
+					reprocess_inode(ip, dirname);
+				}
+				/* directory links to itself via '.' */
+				increment_link(ip->i_di.di_num.no_addr,
+					       ip->i_di.di_num.no_addr,
+					       _("\". (itself)\""));
 				ds.entry_count++;
 				free(filename);
 				log_err( _("The directory was fixed.\n"));
-				need_update = UPDATED;
 			} else {
 				log_err( _("The directory was not fixed.\n"));
 			}
 		}
 
-		if(ip->i_di.di_entries != ds.entry_count) {
+		if(!fsck_abort && ip->i_di.di_entries != ds.entry_count) {
 			log_err( _("Entries is %d - should be %d for inode "
 				"block %llu (0x%llx)\n"),
 				ip->i_di.di_entries, ds.entry_count,
 				(unsigned long long)ip->i_di.di_num.no_addr,
 				(unsigned long long)ip->i_di.di_num.no_addr);
-			errors_found++;
-			if (query(&opts,
-				  _("Fix the entry count? (y/n) "))) {
-				errors_corrected++;
+			if (query( _("Fix the entry count? (y/n) "))) {
 				ip->i_di.di_entries = ds.entry_count;
-				need_update = UPDATED;
+				bmodified(ip->i_bh);
 			} else {
 				log_err( _("The entry count was not fixed.\n"));
 			}
 		}
-		fsck_inode_put(ip, need_update); /* does a gfs2_dinode_out,
-						    brelse */
+		fsck_inode_put(&ip); /* does a gfs2_dinode_out, brelse */
 	}
 	/* Now that we've deleted the inodes marked "bad" we can safely
 	   get rid of the duplicate block list.  If we do it any sooner,
@@ -865,12 +813,7 @@ int pass2(struct gfs2_sbd *sbp)
 	   deleting it from both inodes referencing it. Note: The other
 	   returns from this function are premature exits of the program
 	   and gfs2_block_list_destroy should get rid of the list for us. */
-	while (!osi_list_empty(&sbp->dup_blocks.list)) {
-		b = osi_list_entry(sbp->dup_blocks.list.next,
-				   struct dup_blocks, list);
-		osi_list_del(&b->list);
-		free(b);
-	}
+	gfs2_dup_free();
 	return FSCK_OK;
 }
 
diff --git a/gfs2/fsck/pass3.c b/gfs2/fsck/pass3.c
index 9bb7cb4..a15c63f 100644
--- a/gfs2/fsck/pass3.c
+++ b/gfs2/fsck/pass3.c
@@ -12,6 +12,7 @@
 #include "lost_n_found.h"
 #include "link.h"
 #include "metawalk.h"
+#include "util.h"
 
 static int attach_dotdot_to(struct gfs2_sbd *sbp, uint64_t newdotdot,
 			    uint64_t olddotdot, uint64_t block)
@@ -19,6 +20,7 @@ static int attach_dotdot_to(struct gfs2_sbd *sbp, uint64_t newdotdot,
 	char *filename;
 	int filename_len;
 	struct gfs2_inode *ip, *pip;
+	uint64_t cur_blks;
 
 	ip = fsck_load_inode(sbp, block);
 	pip = fsck_load_inode(sbp, newdotdot);
@@ -32,35 +34,45 @@ static int attach_dotdot_to(struct gfs2_sbd *sbp, uint64_t newdotdot,
 	filename_len = strlen("..");
 	if(!(filename = malloc((sizeof(char) * filename_len) + 1))) {
 		log_err( _("Unable to allocate name\n"));
-		fsck_inode_put(ip, not_updated);
-		fsck_inode_put(pip, not_updated);
+		fsck_inode_put(&ip);
+		fsck_inode_put(&pip);
 		stack;
 		return -1;
 	}
 	if(!memset(filename, 0, (sizeof(char) * filename_len) + 1)) {
 		log_err( _("Unable to zero name\n"));
-		fsck_inode_put(ip, not_updated);
-		fsck_inode_put(pip, not_updated);
+		fsck_inode_put(&ip);
+		fsck_inode_put(&pip);
 		stack;
 		return -1;
 	}
 	memcpy(filename, "..", filename_len);
-	if(gfs2_dirent_del(ip, NULL, filename, filename_len))
+	if(gfs2_dirent_del(ip, filename, filename_len))
 		log_warn( _("Unable to remove \"..\" directory entry.\n"));
 	else
-		decrement_link(sbp, olddotdot);
+		decrement_link(olddotdot, block, _("old \"..\""));
+	cur_blks = ip->i_di.di_blocks;
 	dir_add(ip, filename, filename_len, &pip->i_di.di_num, DT_DIR);
-	increment_link(sbp, newdotdot);
-	fsck_inode_put(ip, updated);
-	fsck_inode_put(pip, updated);
+	if (cur_blks != ip->i_di.di_blocks) {
+		char dirname[80];
+
+		sprintf(dirname, _("Directory at %lld (0x%llx)"),
+			(unsigned long long)ip->i_di.di_num.no_addr,
+			(unsigned long long)ip->i_di.di_num.no_addr);
+		reprocess_inode(ip, dirname);
+	}
+	increment_link(newdotdot, block, _("new \"..\""));
+	fsck_inode_put(&ip);
+	fsck_inode_put(&pip);
+	free(filename);
 	return 0;
 }
 
 static struct dir_info *mark_and_return_parent(struct gfs2_sbd *sbp,
-										struct dir_info *di)
+					       struct dir_info *di)
 {
 	struct dir_info *pdi;
-	struct gfs2_block_query q_dotdot, q_treewalk;
+	uint8_t q_dotdot, q_treewalk;
 
 	di->checked = 1;
 
@@ -74,19 +86,8 @@ static struct dir_info *mark_and_return_parent(struct gfs2_sbd *sbp,
 				   PRIu64" (0x%" PRIx64 ")\n"), di->dotdot_parent,
 				   di->dotdot_parent, di->treewalk_parent,
 				   di->treewalk_parent);
-		if(gfs2_block_check(sbp, bl, di->dotdot_parent, &q_dotdot)) {
-			log_err( _("Unable to find block %"PRIu64
-					" (0x%" PRIx64 ") in block map.\n"),
-					di->dotdot_parent, di->dotdot_parent);
-			return NULL;
-		}
-		if(gfs2_block_check(sbp, bl, di->treewalk_parent,
-				    &q_treewalk)) {
-			log_err( _("Unable to find block %"PRIu64
-					" (0x%" PRIx64 ") in block map\n"),
-					di->treewalk_parent, di->treewalk_parent);
-			return NULL;
-		}
+		q_dotdot = block_type(di->dotdot_parent);
+		q_treewalk = block_type(di->treewalk_parent);
 		/* if the dotdot entry isn't a directory, but the
 		 * treewalk is, treewalk is correct - if the treewalk
 		 * entry isn't a directory, but the dotdot is, dotdot
@@ -94,8 +95,8 @@ static struct dir_info *mark_and_return_parent(struct gfs2_sbd *sbp,
 		 * choose? if neither are directories, we have a
 		 * problem - need to move this directory into lost+found
 		 */
-		if(q_dotdot.block_type != gfs2_inode_dir) {
-			if(q_treewalk.block_type != gfs2_inode_dir) {
+		if(q_dotdot != gfs2_inode_dir) {
+			if(q_treewalk != gfs2_inode_dir) {
 				log_err( _("Orphaned directory, move to lost+found\n"));
 				return NULL;
 			}
@@ -107,21 +108,18 @@ static struct dir_info *mark_and_return_parent(struct gfs2_sbd *sbp,
 								 di->dotdot_parent, di->dinode);
 				di->dotdot_parent = di->treewalk_parent;
 			}
-		}
-		else {
-			if(q_treewalk.block_type != gfs2_inode_dir) {
+		} else {
+			if(q_treewalk != gfs2_inode_dir) {
 				int error = 0;
 				log_warn( _(".. parent is valid, but treewalk"
 						 "is bad - reattaching to lost+found"));
 
 				/* FIXME: add a dinode for this entry instead? */
 
-				errors_found++;
-				if(query(&opts, _("Remove directory entry for bad"
-						 " inode %"PRIu64" (0x%" PRIx64 ") in %"PRIu64
-						 " (0x%" PRIx64 ")? (y/n)"), di->dinode, di->dinode,
-						 di->treewalk_parent, di->treewalk_parent)) {
-					errors_corrected++;
+				if(query( _("Remove directory entry for bad"
+					    " inode %"PRIu64" (0x%" PRIx64 ") in %"PRIu64
+					    " (0x%" PRIx64 ")? (y/n)"), di->dinode, di->dinode,
+					  di->treewalk_parent, di->treewalk_parent)) {
 					error = remove_dentry_from_dir(sbp, di->treewalk_parent,
 												   di->dinode);
 					if(error < 0) {
@@ -144,28 +142,24 @@ static struct dir_info *mark_and_return_parent(struct gfs2_sbd *sbp,
 			}
 			else {
 				log_err( _("Both .. and treewalk parents are "
-						"directories, going with treewalk for "
-						"now...\n"));
+					   "directories, going with treewalk "
+					   "for now...\n"));
 				attach_dotdot_to(sbp, di->treewalk_parent,
-								 di->dotdot_parent, di->dinode);
+						 di->dotdot_parent,
+						 di->dinode);
 				di->dotdot_parent = di->treewalk_parent;
 			}
 		}
 	}
 	else {
-		if(gfs2_block_check(sbp, bl, di->dotdot_parent, &q_dotdot)) {
-			log_err( _("Unable to find parent block %"PRIu64
-					" (0x%" PRIx64 ")  in block map\n"),
-					di->dotdot_parent, di->dotdot_parent);
-			return NULL;
-		}
-		if(q_dotdot.block_type != gfs2_inode_dir) {
+		q_dotdot = block_type(di->dotdot_parent);
+		if(q_dotdot != gfs2_inode_dir) {
 			log_err( _("Orphaned directory at block %" PRIu64 " (0x%" PRIx64
 					") moved to lost+found\n"), di->dinode, di->dinode);
 			return NULL;
 		}
 	}
-	find_di(sbp, di->dotdot_parent, &pdi);
+	pdi = dirtree_find(di->dotdot_parent);
 
 	return pdi;
 }
@@ -178,19 +172,18 @@ static struct dir_info *mark_and_return_parent(struct gfs2_sbd *sbp,
  */
 int pass3(struct gfs2_sbd *sbp)
 {
-	osi_list_t *tmp;
+	struct osi_node *tmp;
 	struct dir_info *di, *tdi;
 	struct gfs2_inode *ip;
-	struct gfs2_block_query q;
-	int i;
+	uint8_t q;
 
-	find_di(sbp, sbp->md.rooti->i_di.di_num.no_addr, &di);
-	if(di) {
+	di = dirtree_find(sbp->md.rooti->i_di.di_num.no_addr);
+	if (di) {
 		log_info( _("Marking root inode connected\n"));
 		di->checked = 1;
 	}
-	find_di(sbp, sbp->master_dir->i_di.di_num.no_addr, &di);
-	if(di) {
+	di = dirtree_find(sbp->master_dir->i_di.di_num.no_addr);
+	if (di) {
 		log_info( _("Marking master directory inode connected\n"));
 		di->checked = 1;
 	}
@@ -200,9 +193,8 @@ int pass3(struct gfs2_sbd *sbp)
 	 * find a parent, put in lost+found.
 	 */
 	log_info( _("Checking directory linkage.\n"));
-	for(i = 0; i < FSCK_HASH_SIZE; i++) {
-	osi_list_foreach(tmp, &dir_hash[i]) {
-		di = osi_list_entry(tmp, struct dir_info, list);
+	for (tmp = osi_first(&dirtree); tmp; tmp = osi_next(tmp)) {
+		di = (struct dir_info *)tmp;
 		while(!di->checked) {
 			/* FIXME: Change this so it returns success or
 			 * failure and put the parent inode in a
@@ -211,82 +203,96 @@ int pass3(struct gfs2_sbd *sbp)
 				return FSCK_OK;
 			tdi = mark_and_return_parent(sbp, di);
 
-			/* FIXME: Factor this ? */
-			if(!tdi) {
-				if(gfs2_block_check(sbp, bl, di->dinode, &q)) {
-					stack;
-					return FSCK_ERROR;
-				}
-				if(q.bad_block) {
-					log_err( _("Found unlinked directory containing bad block\n"));
-					errors_found++;
-					if(query(&opts,
-					   _("Clear unlinked directory with bad blocks? (y/n) "))) {
-						errors_corrected++;
-						gfs2_block_set(sbp, bl,
-							       di->dinode,
-							       gfs2_block_free);
-						break;
-					} else
-						log_err( _("Unlinked directory with bad block remains\n"));
-				}
-				if(q.block_type != gfs2_inode_dir &&
-				   q.block_type != gfs2_inode_file &&
-				   q.block_type != gfs2_inode_lnk &&
-				   q.block_type != gfs2_inode_blk &&
-				   q.block_type != gfs2_inode_chr &&
-				   q.block_type != gfs2_inode_fifo &&
-				   q.block_type != gfs2_inode_sock) {
-					log_err( _("Unlinked block marked as inode not an inode\n"));
-					gfs2_block_set(sbp, bl, di->dinode,
-						       gfs2_block_free);
-					log_err( _("Cleared\n"));
+			if (tdi) {
+				log_debug( _("Directory at block %" PRIu64
+					     " (0x%" PRIx64 ") connected\n"),
+					   di->dinode, di->dinode);
+				di = tdi;
+				continue;
+			}
+			q = block_type(di->dinode);
+			if(q == gfs2_bad_block) {
+				log_err( _("Found unlinked directory "
+					   "containing bad block\n"));
+				if(query(_("Clear unlinked directory "
+					   "with bad blocks? (y/n) "))) {
+					log_warn( _("inode %lld (0x%llx) is "
+						    "now marked as free\n"),
+						  (unsigned long long)
+						  di->dinode,
+						  (unsigned long long)
+						  di->dinode);
+					/* Can't use fsck_blockmap_set
+					   because we don't have ip */
+					gfs2_blockmap_set(bl, di->dinode,
+							  gfs2_block_free);
+					check_n_fix_bitmap(sbp, di->dinode,
+							   gfs2_block_free);
+					break;
+				} else
+					log_err( _("Unlinked directory with bad block remains\n"));
+			}
+			if(q != gfs2_inode_dir && q != gfs2_inode_file &&
+			   q != gfs2_inode_lnk && q != gfs2_inode_blk &&
+			   q != gfs2_inode_chr && q != gfs2_inode_fifo &&
+			   q != gfs2_inode_sock) {
+				log_err( _("Unlinked block marked as an inode "
+					   "is not an inode\n"));
+				if(!query(_("Clear the unlinked block?"
+					    " (y/n) "))) {
+					log_err( _("The block was not "
+						   "cleared\n"));
 					break;
 				}
+				log_warn( _("inode %lld (0x%llx) is now "
+					    "marked as free\n"),
+					  (unsigned long long)di->dinode,
+					  (unsigned long long)di->dinode);
+				/* Can't use fsck_blockmap_set
+				   because we don't have ip */
+				gfs2_blockmap_set(bl, di->dinode,
+						  gfs2_block_free);
+				check_n_fix_bitmap(sbp, di->dinode,
+						   gfs2_block_free);
+				log_err( _("The block was cleared\n"));
+				break;
+			}
 
-				log_err( _("Found unlinked directory at block %" PRIu64
-						" (0x%" PRIx64 ")\n"), di->dinode, di->dinode);
-				ip = fsck_load_inode(sbp, di->dinode);
-				/* Don't skip zero size directories
-				 * with eattrs */
-				if(!ip->i_di.di_size && !ip->i_di.di_eattr){
-					log_err( _("Unlinked directory has zero size.\n"));
-					errors_found++;
-					if(query(&opts, _("Remove zero-size unlinked directory? (y/n) "))) {
-						errors_corrected++;
-						gfs2_block_set(sbp, bl,
-							       di->dinode,
-							       gfs2_block_free);
-						fsck_inode_put(ip, not_updated);
-						break;
-					} else {
-						log_err( _("Zero-size unlinked directory remains\n"));
-					}
-				}
-				errors_found++;
-				if(query(&opts, _("Add unlinked directory to lost+found? (y/n) "))) {
-					errors_corrected++;
-					if(add_inode_to_lf(ip)) {
-						fsck_inode_put(ip, not_updated);
-						stack;
-						return FSCK_ERROR;
-					}
-					log_warn( _("Directory relinked to lost+found\n"));
-					fsck_inode_put(ip, updated);
+			log_err( _("Found unlinked directory at block %" PRIu64
+				   " (0x%" PRIx64 ")\n"), di->dinode,
+				 di->dinode);
+			ip = fsck_load_inode(sbp, di->dinode);
+			/* Don't skip zero size directories with eattrs */
+			if(!ip->i_di.di_size && !ip->i_di.di_eattr){
+				log_err( _("Unlinked directory has zero "
+					   "size.\n"));
+				if(query( _("Remove zero-size unlinked "
+					    "directory? (y/n) "))) {
+					fsck_blockmap_set(ip, di->dinode,
+						_("zero-sized unlinked inode"),
+							  gfs2_block_free);
+					fsck_inode_put(&ip);
+					break;
 				} else {
-					log_err( _("Unlinked directory remains unlinked\n"));
-					fsck_inode_put(ip, not_updated);
+					log_err( _("Zero-size unlinked "
+						   "directory remains\n"));
 				}
-				break;
 			}
-			else {
-				log_debug( _("Directory at block %" PRIu64 " (0x%" 
-						  PRIx64 ") connected\n"), di->dinode, di->dinode);
+			if(query( _("Add unlinked directory to "
+				    "lost+found? (y/n) "))) {
+				if(add_inode_to_lf(ip)) {
+					fsck_inode_put(&ip);
+					stack;
+					return FSCK_ERROR;
+				}
+				log_warn( _("Directory relinked to lost+found\n"));
+			} else {
+				log_err( _("Unlinked directory remains unlinked\n"));
 			}
-			di = tdi;
+			fsck_inode_put(&ip);
+			break;
 		}
 	}
-	}
 	if(lf_dip)
 		log_debug( _("At end of pass3, lost+found entries is %u\n"),
 				  lf_dip->i_di.di_entries);
diff --git a/gfs2/fsck/pass4.c b/gfs2/fsck/pass4.c
index 7ece88a..d79f8e5 100644
--- a/gfs2/fsck/pass4.c
+++ b/gfs2/fsck/pass4.c
@@ -9,6 +9,7 @@
 #include "lost_n_found.h"
 #include "inode_hash.h"
 #include "metawalk.h"
+#include "util.h"
 
 struct metawalk_fxns pass4_fxns_delete = {
 	.private = NULL,
@@ -20,15 +21,16 @@ struct metawalk_fxns pass4_fxns_delete = {
 
 /* Updates the link count of an inode to what the fsck has seen for
  * link count */
-static int fix_inode_count(struct gfs2_sbd *sbp, struct inode_info *ii,
-					struct gfs2_inode *ip)
+static int fix_link_count(struct inode_info *ii, struct gfs2_inode *ip)
 {
-	log_info( _("Fixing inode count for %llu (0x%llx) \n"),
+	log_info( _("Fixing inode link count (%d->%d) for %llu (0x%llx) \n"),
+		  ip->i_di.di_nlink, ii->counted_links,
 		 (unsigned long long)ip->i_di.di_num.no_addr,
 		 (unsigned long long)ip->i_di.di_num.no_addr);
 	if(ip->i_di.di_nlink == ii->counted_links)
 		return 0;
 	ip->i_di.di_nlink = ii->counted_links;
+	bmodified(ip->i_bh);
 
 	log_debug( _("Changing inode %llu (0x%llx) to have %u links\n"),
 		  (unsigned long long)ip->i_di.di_num.no_addr,
@@ -37,77 +39,71 @@ static int fix_inode_count(struct gfs2_sbd *sbp, struct inode_info *ii,
 	return 0;
 }
 
-static int scan_inode_list(struct gfs2_sbd *sbp, osi_list_t *list) {
-	osi_list_t *tmp;
+static int scan_inode_list(struct gfs2_sbd *sbp) {
+	struct osi_node *tmp;
 	struct inode_info *ii;
 	struct gfs2_inode *ip;
 	int lf_addition = 0;
-	struct gfs2_block_query q;
-	enum update_flags f;
+	uint8_t q;
 
 	/* FIXME: should probably factor this out into a generic
 	 * scanning fxn */
-	osi_list_foreach(tmp, list) {
+	for (tmp = osi_first(&inodetree); tmp; tmp = osi_next(tmp)) {
 		if (skip_this_pass || fsck_abort) /* if asked to skip the rest */
 			return 0;
-		f = not_updated;
-		if(!(ii = osi_list_entry(tmp, struct inode_info, list))) {
-			log_crit( _("osi_list_foreach broken in scan_info_list!!\n"));
+		if(!(ii = (struct inode_info *)tmp)) {
+			log_crit( _("osi_tree broken in scan_info_list!!\n"));
 			exit(FSCK_ERROR);
 		}
-		log_debug( _("Checking reference count on inode at block %" PRIu64
-				  " (0x%" PRIx64 ")\n"), ii->inode, ii->inode);
 		if(ii->counted_links == 0) {
 			log_err( _("Found unlinked inode at %" PRIu64 " (0x%" PRIx64 ")\n"),
 					ii->inode, ii->inode);
-			if(gfs2_block_check(sbp, bl, ii->inode, &q)) {
-				stack;
-				return -1;
-			}
-			if(q.bad_block) {
+			q = block_type(ii->inode);
+			if(q == gfs2_bad_block) {
 				log_err( _("Unlinked inode %llu (0x%llx) contains"
 					"bad blocks\n"),
 					(unsigned long long)ii->inode,
 					(unsigned long long)ii->inode);
-				errors_found++;
-				if(query(&opts,
-						 _("Delete unlinked inode with bad blocks? (y/n) "))) {
-					errors_corrected++;
+				if(query(  _("Delete unlinked inode with bad "
+					     "blocks? (y/n) "))) {
 					ip = fsck_load_inode(sbp, ii->inode);
-					check_inode_eattr(ip, &f,
+					check_inode_eattr(ip,
 							  &pass4_fxns_delete);
 					check_metatree(ip, &pass4_fxns_delete);
-					fsck_inode_put(ip, updated);
-					gfs2_block_set(sbp, bl, ii->inode,
-						       gfs2_block_free);
+					fsck_blockmap_set(ip, ii->inode,
+							  _("bad unlinked"),
+							  gfs2_block_free);
+					fsck_inode_put(&ip);
 					continue;
 				} else
 					log_err( _("Unlinked inode with bad blocks not cleared\n"));
 			}
-			if(q.block_type != gfs2_inode_dir &&
-			   q.block_type != gfs2_inode_file &&
-			   q.block_type != gfs2_inode_lnk &&
-			   q.block_type != gfs2_inode_blk &&
-			   q.block_type != gfs2_inode_chr &&
-			   q.block_type != gfs2_inode_fifo &&
-			   q.block_type != gfs2_inode_sock) {
-				log_err( _("Unlinked block marked as inode is "
+			if(q != gfs2_inode_dir &&
+			   q != gfs2_inode_file &&
+			   q != gfs2_inode_lnk &&
+			   q != gfs2_inode_blk &&
+			   q != gfs2_inode_chr &&
+			   q != gfs2_inode_fifo &&
+			   q != gfs2_inode_sock) {
+				log_err( _("Unlinked block %lld (0x%llx) "
+					   "marked as inode is "
 					   "not an inode (%d)\n"),
-					 q.block_type);
+					 (unsigned long long)ii->inode,
+					 (unsigned long long)ii->inode, q);
 				ip = fsck_load_inode(sbp, ii->inode);
-				if(query(&opts, _("Delete unlinked inode "
-						  "? (y/n) "))) {
-					check_inode_eattr(ip, &f,
+				if(query(_("Delete unlinked inode? (y/n) "))) {
+					check_inode_eattr(ip,
 							  &pass4_fxns_delete);
 					check_metatree(ip, &pass4_fxns_delete);
-					fsck_inode_put(ip, updated);
-					gfs2_block_set(sbp, bl, ii->inode,
-						       gfs2_block_free);
+					fsck_blockmap_set(ip, ii->inode,
+						  _("invalid unlinked"),
+							  gfs2_block_free);
+					fsck_inode_put(&ip);
 					log_err( _("The inode was deleted\n"));
 				} else {
 					log_err( _("The inode was not "
 						   "deleted\n"));
-					fsck_inode_put(ip, not_updated);
+					fsck_inode_put(&ip);
 				}
 				continue;
 			}
@@ -118,32 +114,29 @@ static int scan_inode_list(struct gfs2_sbd *sbp, osi_list_t *list) {
 			 * them. */
 			if(!ip->i_di.di_size && !ip->i_di.di_eattr){
 				log_err( _("Unlinked inode has zero size\n"));
-				errors_found++;
-				if(query(&opts, _("Clear zero-size unlinked inode? (y/n) "))) {
-					errors_corrected++;
-					gfs2_block_set(sbp, bl, ii->inode,
-						       gfs2_block_free);
-					fsck_inode_put(ip, not_updated);
+				if(query(_("Clear zero-size unlinked inode? "
+					   "(y/n) "))) {
+					fsck_blockmap_set(ip, ii->inode,
+						_("unlinked zero-length"),
+							  gfs2_block_free);
+					fsck_inode_put(&ip);
 					continue;
 				}
 
 			}
-			errors_found++;
-			if(query(&opts, _("Add unlinked inode to lost+found? (y/n)"))) {
-				errors_corrected++;
-				f = updated;
+			if(query( _("Add unlinked inode to lost+found? "
+				    "(y/n)"))) {
 				if(add_inode_to_lf(ip)) {
 					stack;
-					fsck_inode_put(ip, not_updated);
+					fsck_inode_put(&ip);
 					return -1;
-				}
-				else {
-					fix_inode_count(sbp, ii, ip);
+				} else {
+					fix_link_count(ii, ip);
 					lf_addition = 1;
 				}
 			} else
 				log_err( _("Unlinked inode left unlinked\n"));
-			fsck_inode_put(ip, f);
+			fsck_inode_put(&ip);
 		} /* if(ii->counted_links == 0) */
 		else if(ii->link_count != ii->counted_links) {
 			log_err( _("Link count inconsistent for inode %" PRIu64
@@ -151,15 +144,17 @@ static int scan_inode_list(struct gfs2_sbd *sbp, osi_list_t *list) {
 					ii->inode, ii->link_count, ii->counted_links);
 			/* Read in the inode, adjust the link count,
 			 * and write it back out */
-			errors_found++;
-			if(query(&opts, _("Update link count for inode %"
-				 PRIu64 " (0x%" PRIx64 ") ? (y/n) "), ii->inode, ii->inode)) {
-				errors_corrected++;
+			if(query( _("Update link count for inode %" PRIu64
+				    " (0x%" PRIx64 ") ? (y/n) "),
+				  ii->inode, ii->inode)) {
 				ip = fsck_load_inode(sbp, ii->inode); /* bread, inode_get */
-				fix_inode_count(sbp, ii, ip);
-				fsck_inode_put(ip, updated); /* out, brelse, free */
-				log_warn( _("Link count updated for inode %"
-						 PRIu64 " (0x%" PRIx64 ") \n"), ii->inode, ii->inode);
+				fix_link_count(ii, ip);
+				ii->link_count = ii->counted_links;
+				fsck_inode_put(&ip); /* out, brelse, free */
+				log_warn( _("Link count updated to %d for "
+					    "inode %" PRIu64 " (0x%"
+					    PRIx64 ") \n"), ii->link_count,
+					  ii->inode, ii->inode);
 			} else {
 				log_err( _("Link count for inode %" PRIu64 " (0x%" PRIx64
 						") still incorrect\n"), ii->inode, ii->inode);
@@ -170,12 +165,11 @@ static int scan_inode_list(struct gfs2_sbd *sbp, osi_list_t *list) {
 	} /* osi_list_foreach(tmp, list) */
 
 	if (lf_addition) {
-		if(!(ii = inode_hash_search(inode_hash,
-									lf_dip->i_di.di_num.no_addr))) {
+		if(!(ii = inodetree_find(lf_dip->i_di.di_num.no_addr))) {
 			log_crit( _("Unable to find lost+found inode in inode_hash!!\n"));
 			return -1;
 		} else {
-			fix_inode_count(sbp, ii, lf_dip);
+			fix_link_count(ii, lf_dip);
 		}
 	}
 
@@ -193,20 +187,13 @@ static int scan_inode_list(struct gfs2_sbd *sbp, osi_list_t *list) {
  */
 int pass4(struct gfs2_sbd *sbp)
 {
-	uint32_t i;
-	osi_list_t *list;
 	if(lf_dip)
 		log_debug( _("At beginning of pass4, lost+found entries is %u\n"),
 				  lf_dip->i_di.di_entries);
 	log_info( _("Checking inode reference counts.\n"));
-	for (i = 0; i < FSCK_HASH_SIZE; i++) {
-		if (skip_this_pass || fsck_abort) /* if asked to skip the rest */
-			return FSCK_OK;
-		list = &inode_hash[i];
-		if(scan_inode_list(sbp, list)) {
-			stack;
-			return FSCK_ERROR;
-		}
+	if(scan_inode_list(sbp)) {
+		stack;
+		return FSCK_ERROR;
 	}
 
 	if(lf_dip)
diff --git a/gfs2/fsck/pass5.c b/gfs2/fsck/pass5.c
index 53aef44..645d81c 100644
--- a/gfs2/fsck/pass5.c
+++ b/gfs2/fsck/pass5.c
@@ -10,15 +10,12 @@
 #include "fs_bits.h"
 #include "util.h"
 
-static int convert_mark(struct gfs2_block_query *q, uint32_t *count)
+static int convert_mark(uint8_t q, uint32_t *count)
 {
-	if (q->eattr_block) {
-		count[2]++;
-		return GFS2_BLKST_USED;
-	}
-	switch(q->block_type) {
+	switch(q) {
 
 	case gfs2_meta_inval:
+	case gfs2_inode_invalid:
 		/* Convert invalid metadata to free blocks */
 	case gfs2_block_free:
 		count[0]++;
@@ -40,14 +37,13 @@ static int convert_mark(struct gfs2_block_query *q, uint32_t *count)
 
 	case gfs2_indir_blk:
 	case gfs2_leaf_blk:
-	case gfs2_journal_blk:
-	case gfs2_meta_other:
+	case gfs2_meta_rgrp:
 	case gfs2_meta_eattr:
 		count[2]++;
 		return GFS2_BLKST_USED;
 
 	default:
-		log_err( _("Invalid state %d found\n"), q->block_type);
+		log_err( _("Invalid block type %d found\n"), q);
 		return -1;
 	}
 	return -1;
@@ -59,9 +55,8 @@ static int check_block_status(struct gfs2_sbd *sbp, char *buffer, unsigned int b
 	unsigned char *byte, *end;
 	unsigned int bit;
 	unsigned char rg_status, block_status;
-	struct gfs2_block_query q;
+	uint8_t q;
 	uint64_t block;
-	static int free_unlinked = -1;
 
 	/* FIXME verify cast */
 	byte = (unsigned char *) buffer;
@@ -74,9 +69,9 @@ static int check_block_status(struct gfs2_sbd *sbp, char *buffer, unsigned int b
 		warm_fuzzy_stuff(block);
 		if (skip_this_pass || fsck_abort) /* if asked to skip the rest */
 			return 0;
-		gfs2_block_check(sbp, bl, block, &q);
+		q = block_type(block);
 
-		block_status = convert_mark(&q, count);
+		block_status = convert_mark(q, count);
 
 		/* If one node opens a file and another node deletes it, we
 		   may be left with a block that appears to be "unlinked" in
@@ -85,33 +80,22 @@ static int check_block_status(struct gfs2_sbd *sbp, char *buffer, unsigned int b
 		   So we ignore it. */
 		if (rg_status == GFS2_BLKST_UNLINKED &&
 		    block_status == GFS2_BLKST_FREE) {
-			errors_found++;
-			if (free_unlinked == -1) {
-				log_err( _("Unlinked inode block found at "
-					   "block %llu (0x%llx).\n"),
-					 (unsigned long long)block,
-					 (unsigned long long)block);
-				if(query(&opts, _("Do you want me to fix the "
-						  "bitmap for all unlinked "
-						  "blocks? (y/n) ")))
-					free_unlinked = 1;
-				else
-					free_unlinked = 0;
-			}
-			if (free_unlinked) {
+			log_err( _("Unlinked inode block found at "
+				   "block %llu (0x%llx).\n"),
+				 (unsigned long long)block,
+				 (unsigned long long)block);
+			if(query(_("Do you want to fix the bitmap? (y/n) "))) {
 				if(gfs2_set_bitmap(sbp, block, block_status))
 					log_err(_("Unlinked block %llu "
 						  "(0x%llx) bitmap not fixed."
 						  "\n"),
 						(unsigned long long)block,
 						(unsigned long long)block);
-				else {
+				else
 					log_err(_("Unlinked block %llu "
 						  "(0x%llx) bitmap fixed.\n"),
 						(unsigned long long)block,
 						(unsigned long long)block);
-					errors_corrected++;
-				}
 			} else {
 				log_info( _("Unlinked block found at block %"
 					    PRIu64" (0x%" PRIx64 "), left "
@@ -126,13 +110,12 @@ static int check_block_status(struct gfs2_sbd *sbp, char *buffer, unsigned int b
 			log_err( _("Ondisk status is %u (%s) but FSCK thinks it should be "),
 					rg_status, blockstatus[rg_status]);
 			log_err("%u (%s)\n", block_status, blockstatus[block_status]);
-			log_err( _("Metadata type is %u (%s)\n"), q.block_type,
-					block_type_string(&q));
+			log_err( _("Metadata type is %u (%s)\n"), q,
+					block_type_string(q));
 
-			errors_found++;
-			if(query(&opts, _("Fix bitmap for block %"
-					 PRIu64" (0x%" PRIx64 ") ? (y/n) "), block, block)) {
-				errors_corrected++;
+			if(query(_("Fix bitmap for block %" PRIu64
+				   " (0x%" PRIx64 ") ? (y/n) "),
+				 block, block)) {
 				if(gfs2_set_bitmap(sbp, block, block_status))
 					log_err( _("Failed.\n"));
 				else
@@ -152,8 +135,8 @@ static int check_block_status(struct gfs2_sbd *sbp, char *buffer, unsigned int b
 	return 0;
 }
 
-static enum update_flags update_rgrp(struct gfs2_sbd *sbp, struct rgrp_list *rgp,
-							  uint32_t *count)
+static void update_rgrp(struct gfs2_sbd *sbp, struct rgrp_list *rgp,
+			uint32_t *count)
 {
 	uint32_t i;
 	struct gfs2_bitmap *bits;
@@ -167,7 +150,7 @@ static enum update_flags update_rgrp(struct gfs2_sbd *sbp, struct rgrp_list *rgp
 		check_block_status(sbp, rgp->bh[i]->b_data + bits->bi_offset,
 						   bits->bi_len, &rg_block, rgp->ri.ri_data0, count);
 		if (skip_this_pass || fsck_abort) /* if asked to skip the rest */
-			return 0;
+			return;
 	}
 
 	/* actually adjust counters and write out to disk */
@@ -194,17 +177,13 @@ static enum update_flags update_rgrp(struct gfs2_sbd *sbp, struct rgrp_list *rgp
 		exit(FSCK_ERROR);
 	}
 	if(update) {
-		errors_found++;
-		if(query(&opts, _("Update resource group counts? (y/n) "))) {
-			errors_corrected++;
+		if(query( _("Update resource group counts? (y/n) "))) {
 			log_warn( _("Resource group counts updated\n"));
 			/* write out the rgrp */
-			gfs2_rgrp_out(&rgp->rg, rgp->bh[0]->b_data);
-			return updated;
+			gfs2_rgrp_out(&rgp->rg, rgp->bh[0]);
 		} else
 			log_err( _("Resource group counts left inconsistent\n"));
 	}
-	return not_updated;
 }
 
 /**
@@ -222,22 +201,15 @@ int pass5(struct gfs2_sbd *sbp)
 
 	/* Reconcile RG bitmaps with fsck bitmap */
 	for(tmp = sbp->rglist.next; tmp != &sbp->rglist; tmp = tmp->next){
-		enum update_flags f;
-
 		if (skip_this_pass || fsck_abort) /* if asked to skip the rest */
 			return FSCK_OK;
 		log_info( _("Verifying Resource Group #%" PRIu64 "\n"), rg_count);
 		memset(count, 0, sizeof(count));
 		rgp = osi_list_entry(tmp, struct rgrp_list, list);
 
-		if(gfs2_rgrp_read(sbp, rgp)){
-			stack;
-			return FSCK_ERROR;
-		}
 		rg_count++;
 		/* Compare the bitmaps and report the differences */
-		f = update_rgrp(sbp, rgp, count);
-		gfs2_rgrp_relse(rgp, f);
+		update_rgrp(sbp, rgp, count);
 	}
 	/* Fix up superblock info based on this - don't think there's
 	 * anything to do here... */
diff --git a/gfs2/fsck/rgrepair.c b/gfs2/fsck/rgrepair.c
index d0b8f33..0b5c576 100644
--- a/gfs2/fsck/rgrepair.c
+++ b/gfs2/fsck/rgrepair.c
@@ -19,11 +19,11 @@ struct special_blocks false_rgrps;
 
 #define ri_compare(rg, ondisk, expected, field, fmt)	\
 	if (ondisk.field != expected.field) { \
-		log_warn("rindex #%d " #field " discrepancy: index 0x%" fmt \
-			 " != expected: 0x%" fmt "\n",			\
-			 rg + 1, ondisk.field, expected.field);		\
-		ondisk.field = expected.field; \
-		rindex_modified = TRUE; \
+		log_warn( _("rindex #%d " #field " discrepancy: index 0x%" \
+			    fmt	" != expected: 0x%" fmt "\n"),		\
+			  rg + 1, ondisk.field, expected.field);	\
+		ondisk.field = expected.field;				\
+		rindex_modified = TRUE;					\
 	}
 
 /*
@@ -51,17 +51,16 @@ static void find_journaled_rgs(struct gfs2_sbd *sdp)
 		ip = sdp->md.journal[j];
 		jblocks = ip->i_di.di_size / sdp->sd_sb.sb_bsize;
 		for (b = 0; b < jblocks; b++) {
-			block_map(ip, b, &new, &dblock, &extlen, 0,
-				  not_updated);
+			block_map(ip, b, &new, &dblock, &extlen, 0);
 			if (!dblock)
 				break;
-			bh = bread(&sdp->buf_list, dblock);
+			bh = bread(sdp, dblock);
 			if (!gfs2_check_meta(bh, GFS2_METATYPE_RG)) {
 				log_debug( _("False RG found at block "
 					  "0x%" PRIx64 "\n"), dblock);
 				gfs2_special_set(&false_rgrps, dblock);
 			}
-			brelse(bh, not_updated);
+			brelse(bh);
 		}
 	}
 }
@@ -129,7 +128,7 @@ static int gfs2_rindex_rebuild(struct gfs2_sbd *sdp, osi_list_t *ret_list,
 	for (blk = sdp->sb_addr + 1;
 	     blk < sdp->device.length && number_of_rgs < 6;
 	     blk++) {
-		bh = bread(&sdp->nvbuf_list, blk);
+		bh = bread(sdp, blk);
 		if (((blk == sdp->sb_addr + 1) ||
 		    (!gfs2_check_meta(bh, GFS2_METATYPE_RG))) &&
 		    !is_false_rg(blk)) {
@@ -163,7 +162,7 @@ static int gfs2_rindex_rebuild(struct gfs2_sbd *sdp, osi_list_t *ret_list,
 			number_of_rgs++;
 			blk += 250; /* skip ahead for performance */
 		}
-		brelse(bh, not_updated);
+		brelse(bh);
 	}
 	number_of_rgs = 0;
 	gfs2_special_free(&false_rgrps);
@@ -207,9 +206,9 @@ static int gfs2_rindex_rebuild(struct gfs2_sbd *sdp, osi_list_t *ret_list,
 	for (blk = sdp->sb_addr + 1; blk <= sdp->device.length;
 	     blk += block_bump) {
 		log_debug( _("Block 0x%" PRIx64 "\n"), blk);
-		bh = bread(&sdp->nvbuf_list, blk);
+		bh = bread(sdp, blk);
 		rg_was_fnd = (!gfs2_check_meta(bh, GFS2_METATYPE_RG));
-		brelse(bh, not_updated);
+		brelse(bh);
 		/* Allocate a new RG and index. */
 		calc_rgd = malloc(sizeof(struct rgrp_list));
 		if (!calc_rgd) {
@@ -241,10 +240,10 @@ static int gfs2_rindex_rebuild(struct gfs2_sbd *sdp, osi_list_t *ret_list,
 		for (fwd_block = blk + 1;
 		     fwd_block < sdp->device.length; 
 		     fwd_block++) {
-			bh = bread(&sdp->nvbuf_list, fwd_block);
+			bh = bread(sdp, fwd_block);
 			bitmap_was_fnd =
 				(!gfs2_check_meta(bh, GFS2_METATYPE_RB));
-			brelse(bh, not_updated);
+			brelse(bh);
 			if (bitmap_was_fnd) /* if a bitmap */
 				calc_rgd->ri.ri_length++;
 			else
@@ -252,13 +251,6 @@ static int gfs2_rindex_rebuild(struct gfs2_sbd *sdp, osi_list_t *ret_list,
 		} /* for subsequent bitmaps */
 		
 		gfs2_compute_bitstructs(sdp, calc_rgd);
-		log_debug( _("Memory allocated for rg at 0x%llx, bh: %p\n"),
-			  (unsigned long long)calc_rgd->ri.ri_addr,
-			  calc_rgd->bh);
-		if (!calc_rgd->bh) {
-			log_crit( _("Can't allocate memory for bitmap repair.\n"));
-			return -1;
-		}
 		calc_rgd->ri.ri_data0 = calc_rgd->ri.ri_addr +
 			calc_rgd->ri.ri_length;
 		if (prev_rgd) {
@@ -390,28 +382,25 @@ static int rewrite_rg_block(struct gfs2_sbd *sdp, struct rgrp_list *rg,
 		" GFS2_METATYPE_RB nor GFS2_METATYPE_RG.\n"),
 		rg->bh[x]->b_blocknr, rg->bh[x]->b_blocknr,
 		(int)x+1, (int)rg->ri.ri_length);
-	errors_found++;
-	if (query(&opts, "Fix the RG? (y/n)")) {
-
-		errors_corrected++;
+	if (query( _("Fix the Resource Group? (y/n)"))) {
 		log_err( _("Attempting to repair the RG.\n"));
-		rg->bh[x] = bread(&sdp->nvbuf_list, rg->ri.ri_addr + x);
+		rg->bh[x] = bread(sdp, rg->ri.ri_addr + x);
 		if (x) {
 			struct gfs2_meta_header mh;
 
 			mh.mh_magic = GFS2_MAGIC;
 			mh.mh_type = GFS2_METATYPE_RB;
 			mh.mh_format = GFS2_FORMAT_RB;
-			gfs2_meta_header_out(&mh, rg->bh[x]->b_data);
+			gfs2_meta_header_out(&mh, rg->bh[x]);
 		} else {
 			memset(&rg->rg, 0, sizeof(struct gfs2_rgrp));
 			rg->rg.rg_header.mh_magic = GFS2_MAGIC;
 			rg->rg.rg_header.mh_type = GFS2_METATYPE_RG;
 			rg->rg.rg_header.mh_format = GFS2_FORMAT_RG;
 			rg->rg.rg_free = rg->ri.ri_data;
-			gfs2_rgrp_out(&rg->rg, rg->bh[x]->b_data);
+			gfs2_rgrp_out(&rg->rg, rg->bh[x]);
 		}
-		brelse(rg->bh[x], updated);
+		brelse(rg->bh[x]);
 		return 0;
 	}
 	return 1;
@@ -427,7 +416,7 @@ static int rewrite_rg_block(struct gfs2_sbd *sdp, struct rgrp_list *rg,
  */
 int rg_repair(struct gfs2_sbd *sdp, int trust_lvl, int *rg_count)
 {
-	int error, descrepencies;
+	int error, discrepancies;
 	osi_list_t expected_rglist;
 	int calc_rg_count = 0, rgcount_from_index, rg;
 	osi_list_t *exp, *act; /* expected, actual */
@@ -440,7 +429,7 @@ int rg_repair(struct gfs2_sbd *sdp, int trust_lvl, int *rg_count)
 		error = gfs2_rindex_calculate(sdp, &expected_rglist,
 					       &calc_rg_count);
 		if (error) { /* If calculated RGs don't match the fs */
-			gfs2_rgrp_free(&expected_rglist, not_updated);
+			gfs2_rgrp_free(&expected_rglist);
 			return -1;
 		}
 	}
@@ -449,7 +438,7 @@ int rg_repair(struct gfs2_sbd *sdp, int trust_lvl, int *rg_count)
 					     &calc_rg_count);
 		if (error) {
 			log_crit( _("Error rebuilding rg list.\n"));
-			gfs2_rgrp_free(&expected_rglist, not_updated);
+			gfs2_rgrp_free(&expected_rglist);
 			return -1;
 		}
 		sdp->rgrps = calc_rg_count;
@@ -459,8 +448,8 @@ int rg_repair(struct gfs2_sbd *sdp, int trust_lvl, int *rg_count)
 	rindex_read(sdp, 0, &rgcount_from_index);
 	if (sdp->md.riinode->i_di.di_size % sizeof(struct gfs2_rindex)) {
 		log_warn( _("WARNING: rindex file is corrupt.\n"));
-		gfs2_rgrp_free(&expected_rglist, not_updated);
-		gfs2_rgrp_free(&sdp->rglist, not_updated);
+		gfs2_rgrp_free(&expected_rglist);
+		gfs2_rgrp_free(&sdp->rglist);
 		return -1;
 	}
 	log_warn( _("L%d: number of rgs expected     = %lld.\n"), trust_lvl + 1,
@@ -468,8 +457,8 @@ int rg_repair(struct gfs2_sbd *sdp, int trust_lvl, int *rg_count)
 	if (calc_rg_count != sdp->rgrps) {
 		log_warn( _("L%d: They don't match; either (1) the fs was extended, (2) an odd\n"), trust_lvl + 1);
 		log_warn( _("L%d: rg size was used, or (3) we have a corrupt rg index.\n"), trust_lvl + 1);
-		gfs2_rgrp_free(&expected_rglist, not_updated);
-		gfs2_rgrp_free(&sdp->rglist, not_updated);
+		gfs2_rgrp_free(&expected_rglist);
+		gfs2_rgrp_free(&sdp->rglist);
 		return -1;
 	}
 	/* ------------------------------------------------------------- */
@@ -479,7 +468,7 @@ int rg_repair(struct gfs2_sbd *sdp, int trust_lvl, int *rg_count)
 	/* we have a large number that are completely wrong, we should   */
 	/* abandon this method of recovery and try a better one.         */
 	/* ------------------------------------------------------------- */
-	descrepencies = 0;
+	discrepancies = 0;
 	for (rg = 0, act = sdp->rglist.next, exp = expected_rglist.next;
 	     act != &sdp->rglist && exp != &expected_rglist;
 	     act = act->next, exp = exp->next, rg++) {
@@ -492,16 +481,16 @@ int rg_repair(struct gfs2_sbd *sdp, int trust_lvl, int *rg_count)
 		    !ri_equal(actual->ri, expected->ri, ri_data0) ||
 		    !ri_equal(actual->ri, expected->ri, ri_data) ||
 		    !ri_equal(actual->ri, expected->ri, ri_bitbytes)) {
-			descrepencies++;
+			discrepancies++;
 		}
 	}
-	if (trust_lvl < distrust && descrepencies > (trust_lvl * 8)) {
+	if (trust_lvl < distrust && discrepancies > (trust_lvl * 8)) {
 		log_warn( _("Level %d didn't work.  Too many descepencies.\n"),
 			 trust_lvl + 1);
 		log_warn( _("%d out of %d RGs did not match what was expected.\n"),
-			 descrepencies, rg);
-		gfs2_rgrp_free(&expected_rglist, not_updated);
-		gfs2_rgrp_free(&sdp->rglist, not_updated);
+			 discrepancies, rg);
+		gfs2_rgrp_free(&expected_rglist);
+		gfs2_rgrp_free(&sdp->rglist);
 		return -1;
 	}
 	/* ------------------------------------------------------------- */
@@ -523,9 +512,7 @@ int rg_repair(struct gfs2_sbd *sdp, int trust_lvl, int *rg_count)
 		ri_compare(rg, actual->ri, expected->ri, ri_bitbytes, PRIx32);
 		/* If we modified the index, write it back to disk. */
 		if (rindex_modified) {
-			errors_found++;
-			if (query(&opts, _("Fix the index? (y/n)"))) {
-				errors_corrected++;
+			if (query( _("Fix the index? (y/n)"))) {
 				gfs2_rindex_out(&expected->ri, (char *)&buf);
 				gfs2_writei(sdp->md.riinode, (char *)&buf,
 					    rg * sizeof(struct gfs2_rindex),
@@ -540,8 +527,6 @@ int rg_repair(struct gfs2_sbd *sdp, int trust_lvl, int *rg_count)
 				/* Therefore, gfs2_compute_bitstructs might  */
 				/* have malloced the wrong length for bitmap */
 				/* buffers.  So we have to redo it.          */
-				if (actual->bh)
-					free(actual->bh);
 				if (actual->bits)
 					free(actual->bits);
 			}
@@ -573,16 +558,15 @@ int rg_repair(struct gfs2_sbd *sdp, int trust_lvl, int *rg_count)
 					break;
 				prev_err = errblock;
 				rewrite_rg_block(sdp, rgd, errblock);
-			}
-			else {
-				gfs2_rgrp_relse(rgd, not_updated);
+			} else {
+				gfs2_rgrp_relse(rgd);
 				break;
 			}
 			i++;
 		} while (i < rgd->ri.ri_length);
 	}
 	*rg_count = rg;
-	gfs2_rgrp_free(&expected_rglist, not_updated);
-	gfs2_rgrp_free(&sdp->rglist, not_updated);
+	gfs2_rgrp_free(&expected_rglist);
+	gfs2_rgrp_free(&sdp->rglist);
 	return 0;
 }
diff --git a/gfs2/fsck/util.c b/gfs2/fsck/util.c
index eac0e1b..6fe05ab 100644
--- a/gfs2/fsck/util.c
+++ b/gfs2/fsck/util.c
@@ -5,12 +5,53 @@
 #include <sys/time.h>
 #include <stdio.h>
 #include <libintl.h>
+#include <ctype.h>
 #define _(String) gettext(String)
 
 #include "libgfs2.h"
 #include "fs_bits.h"
 #include "util.h"
 
+const char *reftypes[3] = {"data", "metadata", "extended attribute"};
+
+void big_file_comfort(struct gfs2_inode *ip, uint64_t blks_checked)
+{
+	static struct timeval tv;
+	static uint32_t seconds = 0;
+	static uint64_t percent, fsize, chksize;
+	uint64_t one_percent = 0;
+	int i, cs;
+	const char *human_abbrev = " KMGTPE";
+
+	one_percent = ip->i_di.di_blocks / 100;
+	if (blks_checked - last_reported_fblock < one_percent)
+		return;
+
+	last_reported_block = blks_checked;
+	gettimeofday(&tv, NULL);
+	if (!seconds)
+		seconds = tv.tv_sec;
+	if (tv.tv_sec == seconds)
+		return;
+
+	fsize = ip->i_di.di_size;
+	for (i = 0; i < 6 && fsize > 1024; i++)
+		fsize /= 1024;
+	chksize = blks_checked * ip->i_sbd->bsize;
+	for (cs = 0; cs < 6 && chksize > 1024; cs++)
+		chksize /= 1024;
+	seconds = tv.tv_sec;
+	percent = (blks_checked * 100) / ip->i_di.di_blocks;
+	log_notice( _("\rChecking %lld%c of %lld%c of file at %lld (0x%llx)"
+		      "- %llu percent complete.                   \r"),
+		    (long long)chksize, human_abbrev[cs],
+		    (unsigned long long)fsize, human_abbrev[i],
+		    (unsigned long long)ip->i_di.di_num.no_addr,
+		    (unsigned long long)ip->i_di.di_num.no_addr,
+		    (unsigned long long)percent);
+	fflush(stdout);
+}
+
 /* Put out a warm, fuzzy message every second so the user     */
 /* doesn't think we hung.  (This may take a long time).       */
 void warm_fuzzy_stuff(uint64_t block)
@@ -18,7 +59,7 @@ void warm_fuzzy_stuff(uint64_t block)
 	static uint64_t one_percent = 0;
 	static struct timeval tv;
 	static uint32_t seconds = 0;
-	
+
 	if (!one_percent)
 		one_percent = last_fs_block / 100;
 	if (block - last_reported_block >= one_percent) {
@@ -33,19 +74,300 @@ void warm_fuzzy_stuff(uint64_t block)
 			if (last_fs_block) {
 				percent = (block * 100) / last_fs_block;
 				log_notice( _("\r%" PRIu64 " percent complete.\r"), percent);
+				fflush(stdout);
+			}
+		}
+	}
+}
+
+/* fsck_query: Same as gfs2_query except it adjusts errors_found and
+   errors_corrected. */
+int fsck_query(const char *format, ...)
+{
+	va_list args;
+	const char *transform;
+	char response;
+	int ret = 0;
+
+	errors_found++;
+	fsck_abort = 0;
+	if(opts.yes) {
+		errors_corrected++;
+		return 1;
+	}
+	if(opts.no)
+		return 0;
+
+	opts.query = TRUE;
+	while (1) {
+		va_start(args, format);
+		transform = _(format);
+		vprintf(transform, args);
+		va_end(args);
+
+		/* Make sure query is printed out */
+		fflush(NULL);
+		response = gfs2_getch();
+
+		printf("\n");
+		fflush(NULL);
+		if (response == 0x3) { /* if interrupted, by ctrl-c */
+			response = generic_interrupt("Question", "response",
+						     NULL,
+						     "Do you want to abort " \
+						     "or continue (a/c)?",
+						     "ac");
+			if (response == 'a') {
+				ret = 0;
+				fsck_abort = 1;
+				break;
+			}
+			printf("Continuing.\n");
+		} else if(tolower(response) == 'y') {
+			errors_corrected++;
+                        ret = 1;
+                        break;
+		} else if (tolower(response) == 'n') {
+			ret = 0;
+			break;
+		} else {
+			printf("Bad response %d, please type 'y' or 'n'.\n",
+			       response);
+		}
+	}
+
+	opts.query = FALSE;
+	return ret;
+}
+
+/*
+ * gfs2_dup_set - Flag a block as a duplicate
+ * We keep the references in a red/black tree.  We can't keep track of every
+ * single inode in the file system, so the first time this function is called
+ * will actually be for the second reference to the duplicated block.
+ * This will return the number of references to the block.
+ *
+ * create - will be set if the call is supposed to create the reference. */
+static struct duptree *gfs2_dup_set(uint64_t dblock, int create)
+{
+	struct osi_node **newn = &dup_blocks.osi_node, *parent = NULL;
+	struct duptree *data;
+
+	/* Figure out where to put new node */
+	while (*newn) {
+		struct duptree *cur = (struct duptree *)*newn;
+
+		parent = *newn;
+		if (dblock < cur->block)
+			newn = &((*newn)->osi_left);
+		else if (dblock > cur->block)
+			newn = &((*newn)->osi_right);
+		else
+			return cur;
+	}
+
+	if (!create)
+		return NULL;
+	data = malloc(sizeof(struct duptree));
+	dups_found++;
+	memset(data, 0, sizeof(struct duptree));
+	/* Add new node and rebalance tree. */
+	data->block = dblock;
+	data->refs = 1; /* reference 1 is actually the reference we need to
+			   discover in pass1b. */
+	data->first_ref_found = 0;
+	osi_list_init(&data->ref_inode_list);
+	osi_list_init(&data->ref_invinode_list);
+	osi_link_node(&data->node, parent, newn);
+	osi_insert_color(&data->node, &dup_blocks);
+
+	return data;
+}
+
+/*
+ * add_duplicate_ref - Add a duplicate reference to the duplicates tree list
+ * A new element of the tree will be created as needed
+ * When the first reference is discovered in pass1, it realizes it's a
+ * duplicate but it has already forgotten where the first reference was.
+ * So we need to recreate the duplicate reference structure if it's not there.
+ * Later, in pass1b, it has to go back through the file system
+ * and figure out those original references in order to resolve them.
+ */
+int add_duplicate_ref(struct gfs2_inode *ip, uint64_t block,
+		      enum dup_ref_type reftype, int first, int inode_valid)
+{
+	osi_list_t *ref;
+	struct inode_with_dups *id, *found_id;
+	struct duptree *dt;
+
+	if (gfs2_check_range(ip->i_sbd, block) != 0)
+		return 0;
+	/* If this is not the first reference (i.e. all calls from pass1) we
+	   need to create the duplicate reference. If this is pass1b, we want
+	   to ignore references that aren't found. */
+	dt = gfs2_dup_set(block, !first);
+	if (!dt)        /* If this isn't a duplicate */
+		return 0;
+
+	/* If we found the duplicate reference but we've already discovered
+	   the first reference (in pass1b) and the other references in pass1,
+	   we don't need to count it, so just return. */
+	if (dt->first_ref_found)
+		return 0;
+
+	/* The first time this is called from pass1 is actually the second
+	   reference.  When we go back in pass1b looking for the original
+	   reference, we don't want to increment the reference count because
+	   it's already accounted for. */
+	if (first) {
+		if (!dt->first_ref_found) {
+			dt->first_ref_found = 1;
+			dups_found_first++; /* We found another first ref. */
+		}
+	} else {
+		dt->refs++;
+	}
+
+	/* Check for a previous reference to this duplicate on the "invalid
+	   inode" reference list. */
+	found_id = NULL;
+	osi_list_foreach(ref, &dt->ref_invinode_list) {
+		id = osi_list_entry(ref, struct inode_with_dups, list);
+
+		if (id->block_no == ip->i_di.di_num.no_addr) {
+			found_id = id;
+			break;
+		}
+	}
+	if (found_id == NULL) {
+		osi_list_foreach(ref, &dt->ref_inode_list) {
+			id = osi_list_entry(ref, struct inode_with_dups, list);
+
+			if (id->block_no == ip->i_di.di_num.no_addr) {
+				found_id = id;
+				break;
 			}
 		}
 	}
+	if (found_id == NULL) {
+		/* Check for the inode on the invalid inode reference list. */
+		uint8_t q;
+
+		if(!(found_id = malloc(sizeof(*found_id)))) {
+			log_crit( _("Unable to allocate "
+				    "inode_with_dups structure\n"));
+			return -1;
+		}
+		if(!(memset(found_id, 0, sizeof(*found_id)))) {
+			log_crit( _("Unable to zero inode_with_dups "
+				    "structure\n"));
+			return -1;
+		}
+		found_id->block_no = ip->i_di.di_num.no_addr;
+		q = block_type(ip->i_di.di_num.no_addr);
+		/* If it's an invalid dinode, put it first on the invalid
+		   inode reference list otherwise put it on the normal list. */
+		if (!inode_valid || q == gfs2_inode_invalid)
+			osi_list_add_prev(&found_id->list,
+					  &dt->ref_invinode_list);
+		else
+			osi_list_add_prev(&found_id->list,
+					  &dt->ref_inode_list);
+	}
+	found_id->reftypecount[reftype]++;
+	found_id->dup_count++;
+	log_info( _("Found %d reference(s) to block %llu"
+		    " (0x%llx) as %s in inode #%llu (0x%llx)\n"),
+		  found_id->dup_count, (unsigned long long)block,
+		  (unsigned long long)block, reftypes[reftype],
+		  (unsigned long long)ip->i_di.di_num.no_addr,
+		  (unsigned long long)ip->i_di.di_num.no_addr);
+	if (first)
+		log_info( _("This is the original reference.\n"));
+	else
+		log_info( _("This brings the total to: %d\n"), dt->refs);
+	return 0;
+}
+
+struct dir_info *dirtree_insert(uint64_t dblock)
+{
+	struct osi_node **newn = &dirtree.osi_node, *parent = NULL;
+	struct dir_info *data;
+
+	/* Figure out where to put new node */
+	while (*newn) {
+		struct dir_info *cur = (struct dir_info *)*newn;
+
+		parent = *newn;
+		if (dblock < cur->dinode)
+			newn = &((*newn)->osi_left);
+		else if (dblock > cur->dinode)
+			newn = &((*newn)->osi_right);
+		else
+			return cur;
+	}
+
+	data = malloc(sizeof(struct dir_info));
+	if (!data) {
+		log_crit( _("Unable to allocate dir_info structure\n"));
+		return NULL;
+	}
+	if (!memset(data, 0, sizeof(struct dir_info))) {
+		log_crit( _("Error while zeroing dir_info structure\n"));
+		return NULL;
+	}
+	/* Add new node and rebalance tree. */
+	data->dinode = dblock;
+	osi_link_node(&data->node, parent, newn);
+	osi_insert_color(&data->node, &dirtree);
+
+	return data;
+}
+
+struct dir_info *dirtree_find(uint64_t block)
+{
+	struct osi_node *node = dirtree.osi_node;
+
+	while (node) {
+		struct dir_info *data = (struct dir_info *)node;
+
+		if (block < data->dinode)
+			node = node->osi_left;
+		else if (block > data->dinode)
+			node = node->osi_right;
+		else
+			return data;
+	}
+	return NULL;
+}
+
+void dup_delete(struct duptree *b)
+{
+	struct inode_with_dups *id;
+	osi_list_t *tmp;
+
+	while (!osi_list_empty(&b->ref_invinode_list)) {
+		tmp = (&b->ref_invinode_list)->next;
+		id = osi_list_entry(tmp, struct inode_with_dups, list);
+		if (id->name)
+			free(id->name);
+		osi_list_del(&id->list);
+		free(id);
+	}
+	while (!osi_list_empty(&b->ref_inode_list)) {
+		tmp = (&b->ref_inode_list)->next;
+		id = osi_list_entry(tmp, struct inode_with_dups, list);
+		if (id->name)
+			free(id->name);
+		osi_list_del(&id->list);
+		free(id);
+	}
+	osi_erase(&b->node, &dup_blocks);
+	free(b);
 }
 
-const char *block_type_string(struct gfs2_block_query *q)
+void dirtree_delete(struct dir_info *b)
 {
-	const char *blktyp[] = {"free", "used", "indirect data", "inode",
-							"file", "symlink", "block dev", "char dev",
-							"fifo", "socket", "dir leaf", "journ data",
-							"other meta", "eattribute", "unused",
-							"invalid"};
-	if (q->block_type < 16)
-		return (blktyp[q->block_type]);
-	return blktyp[15];
+	osi_erase(&b->node, &dirtree);
+	free(b);
 }
diff --git a/gfs2/fsck/util.h b/gfs2/fsck/util.h
index f84ab41..d2c81db 100644
--- a/gfs2/fsck/util.h
+++ b/gfs2/fsck/util.h
@@ -1,13 +1,32 @@
 #ifndef __UTIL_H__
 #define __UTIL_H__
 
+#include "fsck.h"
 #include "libgfs2.h"
 
 #define fsck_lseek(fd, off) \
   ((lseek((fd), (off), SEEK_SET) == (off)) ? 0 : -1)
 
+#define INODE_VALID 1
+#define INODE_INVALID 0
+
 struct di_info *search_list(osi_list_t *list, uint64_t addr);
+void big_file_comfort(struct gfs2_inode *ip, uint64_t blks_checked);
 void warm_fuzzy_stuff(uint64_t block);
-const char *block_type_string(struct gfs2_block_query *q);
+int add_duplicate_ref(struct gfs2_inode *ip, uint64_t block,
+		      enum dup_ref_type reftype, int first, int inode_valid);
+extern const char *reftypes[3];
+
+static inline uint8_t block_type(uint64_t bblock)
+{
+	static unsigned char *byte;
+	static uint64_t b;
+	static uint8_t btype;
+
+	byte = bl->map + BLOCKMAP_SIZE4(bblock);
+	b = BLOCKMAP_BYTE_OFFSET4(bblock);
+	btype = (*byte & (BLOCKMAP_MASK4 << b )) >> b;
+	return btype;
+}
 
 #endif /* __UTIL_H__ */
diff --git a/gfs2/include/osi_tree.h b/gfs2/include/osi_tree.h
new file mode 100644
index 0000000..f0d0768
--- /dev/null
+++ b/gfs2/include/osi_tree.h
@@ -0,0 +1,395 @@
+#ifndef __OSI_RBTREE_DOT_H__
+#define __OSI_RBTREE_DOT_H__
+
+#include <stdlib.h>
+#include <stddef.h>
+#include <assert.h>
+
+/* Adapted from the kernel's rbtree.c */
+struct osi_node {
+	unsigned long  osi_parent_color;
+#define	OSI_RED		0
+#define	OSI_BLACK	1
+	struct osi_node *osi_left;
+	struct osi_node *osi_right;
+	struct osi_node *osi_parent;
+};
+
+#define osi_parent(r)   ((struct osi_node *)((r)->osi_parent_color & ~3))
+#define osi_color(r)   ((r)->osi_parent_color & 1)
+#define osi_is_red(r)   (!osi_color(r))
+#define osi_is_black(r) osi_color(r)
+#define osi_set_red(r)  do { (r)->osi_parent_color &= ~1; } while (0)
+#define osi_set_black(r)  do { (r)->osi_parent_color |= 1; } while (0)
+
+struct osi_root
+{
+	struct osi_node *osi_node;
+};
+
+static inline void osi_set_color(struct osi_node *rb, int color)
+{
+	rb->osi_parent_color = (rb->osi_parent_color & ~1) | color;
+}
+
+static inline void osi_set_parent(struct osi_node *rb, struct osi_node *p)
+{
+        rb->osi_parent_color = (rb->osi_parent_color & 3) | (unsigned long)p;
+}
+
+static inline void osi_link_node(struct osi_node *node,
+				 struct osi_node *parent,
+				 struct osi_node **osi_link)
+{
+	node->osi_parent_color = (unsigned long )parent;
+	node->osi_left = node->osi_right = NULL;
+
+	*osi_link = node;
+}
+
+static inline void __osi_rotate_left(struct osi_node *node,
+				     struct osi_root *root)
+{
+	struct osi_node *right = node->osi_right;
+	struct osi_node *parent = osi_parent(node);
+
+	if ((node->osi_right = right->osi_left))
+		osi_set_parent(right->osi_left, node);
+	right->osi_left = node;
+
+	osi_set_parent(right, parent);
+
+	if (parent) {
+		if (node == parent->osi_left)
+			parent->osi_left = right;
+		else
+			parent->osi_right = right;
+	}
+	else
+		root->osi_node = right;
+	osi_set_parent(node, right);
+}
+
+static inline void __osi_rotate_right(struct osi_node *node,
+				      struct osi_root *root)
+{
+	struct osi_node *left = node->osi_left;
+	struct osi_node *parent = osi_parent(node);
+
+	if ((node->osi_left = left->osi_right))
+		osi_set_parent(left->osi_right, node);
+	left->osi_right = node;
+
+	osi_set_parent(left, parent);
+
+	if (parent) {
+		if (node == parent->osi_right)
+			parent->osi_right = left;
+		else
+			parent->osi_left = left;
+	} else
+		root->osi_node = left;
+	osi_set_parent(node, left);
+}
+
+static inline void osi_insert_color(struct osi_node *node,
+				    struct osi_root *root)
+{
+	struct osi_node *parent, *gparent;
+
+	while ((parent = osi_parent(node)) && osi_is_red(parent)) {
+		gparent = osi_parent(parent);
+
+		if (parent == gparent->osi_left) {
+			{
+				register struct osi_node *uncle = gparent->osi_right;
+				if (uncle && osi_is_red(uncle)) {
+					osi_set_black(uncle);
+					osi_set_black(parent);
+					osi_set_red(gparent);
+					node = gparent;
+					continue;
+				}
+			}
+
+			if (parent->osi_right == node) {
+				register struct osi_node *tmp;
+
+				__osi_rotate_left(parent, root);
+				tmp = parent;
+				parent = node;
+				node = tmp;
+			}
+
+			osi_set_black(parent);
+			osi_set_red(gparent);
+			__osi_rotate_right(gparent, root);
+		} else {
+			{
+				register struct osi_node *uncle = gparent->osi_left;
+				if (uncle && osi_is_red(uncle)) {
+					osi_set_black(uncle);
+					osi_set_black(parent);
+					osi_set_red(gparent);
+					node = gparent;
+					continue;
+				}
+			}
+
+			if (parent->osi_left == node) {
+				register struct osi_node *tmp;
+				__osi_rotate_right(parent, root);
+				tmp = parent;
+				parent = node;
+				node = tmp;
+			}
+
+			osi_set_black(parent);
+			osi_set_red(gparent);
+			__osi_rotate_left(gparent, root);
+		}
+	}
+
+	osi_set_black(root->osi_node);
+}
+
+static inline void __osi_erase_color(struct osi_node *node,
+				     struct osi_node *parent,
+				     struct osi_root *root)
+{
+	struct osi_node *other;
+
+	while ((!node || osi_is_black(node)) && node != root->osi_node) {
+		if (parent->osi_left == node) {
+			other = parent->osi_right;
+			if (osi_is_red(other)) {
+				osi_set_black(other);
+				osi_set_red(parent);
+				__osi_rotate_left(parent, root);
+				other = parent->osi_right;
+			}
+			if ((!other->osi_left || osi_is_black(other->osi_left)) &&
+			    (!other->osi_right || osi_is_black(other->osi_right)))
+			{
+				osi_set_red(other);
+				node = parent;
+				parent = osi_parent(node);
+			} else {
+				if (!other->osi_right || osi_is_black(other->osi_right))
+				{
+					struct osi_node *o_left;
+					if ((o_left = other->osi_left))
+						osi_set_black(o_left);
+					osi_set_red(other);
+					__osi_rotate_right(other, root);
+					other = parent->osi_right;
+				}
+				osi_set_color(other, osi_color(parent));
+				osi_set_black(parent);
+				if (other->osi_right)
+					osi_set_black(other->osi_right);
+				__osi_rotate_left(parent, root);
+				node = root->osi_node;
+				break;
+			}
+		} else {
+			other = parent->osi_left;
+			if (osi_is_red(other)) {
+				osi_set_black(other);
+				osi_set_red(parent);
+				__osi_rotate_right(parent, root);
+				other = parent->osi_left;
+			}
+			if ((!other->osi_left || osi_is_black(other->osi_left)) &&
+			    (!other->osi_right || osi_is_black(other->osi_right)))
+			{
+				osi_set_red(other);
+				node = parent;
+				parent = osi_parent(node);
+			} else {
+				if (!other->osi_left || osi_is_black(other->osi_left))
+				{
+					register struct osi_node *o_right;
+					if ((o_right = other->osi_right))
+						osi_set_black(o_right);
+					osi_set_red(other);
+					__osi_rotate_left(other, root);
+					other = parent->osi_left;
+				}
+				osi_set_color(other, osi_color(parent));
+				osi_set_black(parent);
+				if (other->osi_left)
+					osi_set_black(other->osi_left);
+				__osi_rotate_right(parent, root);
+				node = root->osi_node;
+				break;
+			}
+		}
+	}
+	if (node)
+		osi_set_black(node);
+}
+
+static inline void osi_erase(struct osi_node *node, struct osi_root *root)
+{
+	struct osi_node *child, *parent;
+	int color;
+
+	if (!node->osi_left)
+		child = node->osi_right;
+	else if (!node->osi_right)
+		child = node->osi_left;
+	else {
+		struct osi_node *old = node, *left;
+
+		node = node->osi_right;
+		while ((left = node->osi_left) != NULL)
+			node = left;
+		child = node->osi_right;
+		parent = osi_parent(node);
+		color = osi_color(node);
+
+		if (child)
+			osi_set_parent(child, parent);
+		if (parent == old) {
+			parent->osi_right = child;
+			parent = node;
+		} else
+			parent->osi_left = child;
+
+		node->osi_parent_color = old->osi_parent_color;
+		node->osi_right = old->osi_right;
+		node->osi_left = old->osi_left;
+
+		if (osi_parent(old)) {
+			if (osi_parent(old)->osi_left == old)
+				osi_parent(old)->osi_left = node;
+			else
+				osi_parent(old)->osi_right = node;
+		} else
+			root->osi_node = node;
+
+		osi_set_parent(old->osi_left, node);
+		if (old->osi_right)
+			osi_set_parent(old->osi_right, node);
+		goto color;
+	}
+
+	parent = osi_parent(node);
+	color = osi_color(node);
+
+	if (child)
+		osi_set_parent(child, parent);
+	if (parent)
+	{
+		if (parent->osi_left == node)
+			parent->osi_left = child;
+		else
+			parent->osi_right = child;
+	}
+	else
+		root->osi_node = child;
+
+ color:
+	if (color == OSI_BLACK)
+		__osi_erase_color(child, parent, root);
+}
+
+/*
+ * This function returns the first node (in sort order) of the tree.
+ */
+static inline struct osi_node *osi_first(struct osi_root *root)
+{
+	struct osi_node	*n;
+
+	n = root->osi_node;
+	if (!n)
+		return NULL;
+	while (n->osi_left)
+		n = n->osi_left;
+	return n;
+}
+
+static inline struct osi_node *osi_last(struct osi_root *root)
+{
+	struct osi_node	*n;
+
+	n = root->osi_node;
+	if (!n)
+		return NULL;
+	while (n->osi_right)
+		n = n->osi_right;
+	return n;
+}
+
+static inline struct osi_node *osi_next(struct osi_node *node)
+{
+	struct osi_node *parent;
+
+	/* If we have a right-hand child, go down and then left as far
+	   as we can. */
+	if (node->osi_right) {
+		node = node->osi_right;
+		while (node->osi_left)
+			node=node->osi_left;
+		return node;
+	}
+
+	/* No right-hand children.  Everything down and left is
+	   smaller than us, so any 'next' node must be in the general
+	   direction of our parent. Go up the tree; any time the
+	   ancestor is a right-hand child of its parent, keep going
+	   up. First time it's a left-hand child of its parent, said
+	   parent is our 'next' node. */
+	while ((parent = osi_parent(node)) && node == parent->osi_right)
+		node = parent;
+
+	return parent;
+}
+
+static inline struct osi_node *osi_prev(struct osi_node *node)
+{
+	struct osi_node *parent;
+
+	/* If we have a left-hand child, go down and then right as far
+	   as we can. */
+	if (node->osi_left) {
+		node = node->osi_left;
+		while (node->osi_right)
+			node=node->osi_right;
+		return node;
+	}
+
+	/* No left-hand children. Go up till we find an ancestor which
+	   is a right-hand child of its parent */
+	while ((parent = osi_parent(node)) && node == parent->osi_left)
+		node = parent;
+
+	return parent;
+}
+
+static inline void osi_replace_node(struct osi_node *victim,
+				    struct osi_node *new,
+				    struct osi_root *root)
+{
+	struct osi_node *parent = osi_parent(victim);
+
+	/* Set the surrounding nodes to point to the replacement */
+	if (parent) {
+		if (victim == parent->osi_left)
+			parent->osi_left = new;
+		else
+			parent->osi_right = new;
+	} else {
+		root->osi_node = new;
+	}
+	if (victim->osi_left)
+		osi_set_parent(victim->osi_left, new);
+	if (victim->osi_right)
+		osi_set_parent(victim->osi_right, new);
+
+	/* Copy the pointers/colour from the victim to the replacement */
+	*new = *victim;
+}
+
+#endif
diff --git a/gfs2/init.d/gfs2.in b/gfs2/init.d/gfs2.in
index b28a99a..e957a36 100644
--- a/gfs2/init.d/gfs2.in
+++ b/gfs2/init.d/gfs2.in
@@ -18,14 +18,28 @@
 # set secure PATH
 PATH="/bin:/usr/bin:/sbin:/usr/sbin:@SBINDIR@"
 
+### generic wrapper functions
+
 success()
 {
-    echo -ne "[  OK  ]\r"
+	echo -ne "[  OK  ]\r"
 }
 
 failure()
 {
-    echo -ne "[FAILED]\r"
+	echo -ne "[FAILED]\r"
+}
+
+ok() {
+	success
+	echo
+}
+
+nok() {
+	echo -e "$errmsg"
+	failure
+	echo
+	exit 1
 }
 
 # rpm based distros
@@ -43,106 +57,75 @@ if [ -d /etc/default ]; then
 	[ -z "$LOCK_FILE" ] && LOCK_FILE="/var/lock/gfs2"
 fi
 
+# proc is required for both status and stop.
+# start could live without, but better be consistent with the behavior
+if [ ! -f /proc/mounts ]; then
+	echo "GFS2: /proc is not available, unable to proceed"
+	exit 1
+fi
+
 #
 # This script's behavior is modeled closely after the netfs script.  
 #
 GFS2FSTAB=$(LC_ALL=C awk '!/^#/ && $3 == "gfs2" && $4 !~ /noauto/ { print $2 }' /etc/fstab)
 GFS2MTAB=$(LC_ALL=C awk '!/^#/ && $3 == "gfs2" && $2 != "/" { print $2 }' /proc/mounts)
 
+if [ -z "$GFS2FSTAB" ]; then
+	echo "GFS2: no entries found in /etc/fstab"
+	exit 6
+fi
+
 # See how we were called.
 case "$1" in
-  start)
-        if [ -n "$GFS2FSTAB" ] 
-	then
-		echo -n "Mounting GFS2 filesystems: "
-		mount -a -t gfs2
-		rtrn=$?
-		if [ $rtrn = 0 ]; then
-			touch $LOCK_FILE
-			success
-			echo
-		else
-			failure
-			echo
-		fi
-	fi
+start)
+	[ -z "$GFS2FSTAB" ] && exit 0
+	echo -n "Mounting GFS2 filesystems: "
+	errmsg="$(mount -a -t gfs2 2>&1)" || nok
+	touch $LOCK_FILE
+	ok
+;;
+stop)
+	[ -z "$GFS2MTAB" ] && exit 0
+	echo -n "Unmounting GFS2 filesystems: "
+	errmsg="$(umount -a -t gfs2 2>&1)" || nok
+	modprobe -r gfs2 > /dev/null 2>&1 || true
+	rm -f $LOCK_FILE
+	ok
 	;;
 
-  stop)
-  	if [ -n "$GFS2MTAB" ] 
-	then
-		sig=
-		retry=6
-		remaining=`LC_ALL=C awk '!/^#/ && $3 == "gfs2" && $2 != "/" {print $2}' /proc/mounts`
-		while [ -n "$remaining" -a "$retry" -gt 0 ]
-		do
-			echo -n "Unmounting GFS2 filesystems: "
-			umount -a -t gfs2
-			rtrn=$?
-			if [ $rtrn = 0 ]; then
-				success
-				echo
-			else
-				failure
-				echo
-			fi
-			
-			if [ $retry -eq 0 ] 
-			then
-				echo -n "Unmounting GFS2 filesystems (lazy): "
-				umount -l -a -t gfs2
-				rtrn=$?
-				if [ $rtrn = 0 ]; then
-					success
-					echo
-				else
-					failure
-					echo
-				fi
-				break
-			fi
-
-			sleep 2
-			remaining=`LC_ALL=C awk '!/^#/ && $3 == "gfs2" && $2 != "/" {print $2}' /proc/mounts`
-			[ -z "$remaining" ] && break
-			fuser -k -m $sig $remaining > /dev/null 2>&1
-			sleep 10
-			retry=$(($retry - 1))
-			sig=-9
-		done
+status)
+	if [ -z "$GFS2MTAB" ] && [ -f $LOCK_FILE ]; then
+		echo "GFS2: Found stale lock file $LOCK_FILE"
+		exit 2
 	fi
 
-	modprobe -r gfs2
-	rm -f $LOCK_FILE
-	;;
-
-  status)
-	if [ -f /proc/mounts ]
-	then
-	        [ -n "$GFS2FSTAB" ] && {
-		     echo "Configured GFS2 mountpoints: "
-		     for fs in $GFS2FSTAB; do echo $fs ; done
-		}
-		[ -n "$GFS2MTAB" ] && {
-                      echo "Active GFS2 mountpoints: "
-		      for fs in $GFS2MTAB; do echo $fs ; done
-		}
-	else
-		echo "/proc filesystem unavailable"
+	if [ -n "$GFS2FSTAB" ] && [ -z "$GFS2MTAB" ]; then
+		echo "GFS2: service is not running"
+		exit 3
 	fi
-	;;
 
-  restart)
+	echo "Configured GFS2 mountpoints: "
+	for fs in $GFS2FSTAB; do
+		echo $fs;
+	done
+
+	echo "Active GFS2 mountpoints: "
+	for fs in $GFS2MTAB; do
+		echo $fs;
+	done
+;;
+condrestart|try-restart)
+	$0 status >/dev/null 2>&1 || exit 0
+	$0 restart
+;;
+restart|reload|force-reload)
 	$0 stop
 	$0 start
-	;;
-
-  reload|force-reload)
-        $0 start
-	;;
-  *)
-	echo $"Usage: $0 {start|stop|restart|reload|status}"
-	exit 1
+;;
+*)
+	echo "Usage: $0 {start|stop|restart|reload|force-reload|condrestart|try-restart|status}"
+	exit 2
+;;
 esac
 
 exit 0
diff --git a/gfs2/libgfs2/block_list.c b/gfs2/libgfs2/block_list.c
index 571c737..faedc54 100644
--- a/gfs2/libgfs2/block_list.c
+++ b/gfs2/libgfs2/block_list.c
@@ -8,64 +8,13 @@
 
 #include "libgfs2.h"
 
-/* Must be kept in sync with mark_block enum in libgfs2.h */
-static int mark_to_gbmap[16] = {
-	FREE, BLOCK_IN_USE, DIR_INDIR_BLK, DIR_INODE, FILE_INODE,
-	LNK_INODE, BLK_INODE, CHR_INODE, FIFO_INODE, SOCK_INODE,
-	DIR_LEAF_INODE, JOURNAL_BLK, OTHER_META, EATTR_META,
-	INVALID_META, INVALID_META
-};
-
-#define BITMAP_SIZE(size, cpb) (size / cpb)
-#define BITMAP_SIZE1(size) (size >> 3)
-#define BITMAP_SIZE4(size) (size >> 1)
-
-#define BITMAP_BYTE_OFFSET(x, map) ((x % map->chunks_per_byte) \
-                                    * map->chunksize )
-
-/* BITMAP_BYTE_OFFSET1 is for chunksize==1, which implies chunks_per_byte==8 */
-/* Reducing the math, we get:                                                */
-/* #define BITMAP_BYTE_OFFSET1(x) ((x % 8) * 1)                              */
-/* #define BITMAP_BYTE_OFFSET1(x) (x % 8)                                    */
-/* #define BITMAP_BYTE_OFFSET1(x) (x & 0x0000000000000007)                   */
-#define BITMAP_BYTE_OFFSET1(x) (x & 0x0000000000000007)
-
-/* BITMAP_BYTE_OFFSET4 is for chunksize==4, which implies chunks_per_byte==2 */
-/* Reducing the math, we get:                                                */
-/* #define BITMAP_BYTE_OFFSET4(x) ((x % 2) * 4)                              */
-/* #define BITMAP_BYTE_OFFSET4(x) ((x & 0x0000000000000001) * 4)             */
-/* #define BITMAP_BYTE_OFFSET4(x) ((x & 0x0000000000000001) << 2)            */
-#define BITMAP_BYTE_OFFSET4(x) ((x & 0x0000000000000001) << 2)
-
-#define BITMAP_MASK(chunksize) ((2 << (chunksize - 1)) - 1)
-/* BITMAP_MASK1 is  for chunksize==1                                         */
-/* Reducing the math, we get:                                                */
-/* #define BITMAP_MASK1(chunksize) ((2 << (1 - 1)) - 1)                      */
-/* #define BITMAP_MASK1(chunksize) ((2 << 0) - 1)                            */
-/* #define BITMAP_MASK1(chunksize) ((2) - 1)                                 */
-#define BITMAP_MASK1(chunksize) (1)
-
-/* BITMAP_MASK4 is  for chunksize==4                                         */
-/* #define BITMAP_MASK(chunksize) ((2 << (4 - 1)) - 1)                       */
-/* #define BITMAP_MASK(chunksize) ((2 << 3) - 1)                             */
-/* #define BITMAP_MASK(chunksize) (0x10 - 1)                                 */
-#define BITMAP_MASK4(chunksize) (0xf)
-
-static int gfs2_bitmap_create(struct gfs2_bmap *bmap, uint64_t size,
-					   uint8_t chunksize)
+static int gfs2_blockmap_create(struct gfs2_bmap *bmap, uint64_t size)
 {
-	if((((chunksize >> 1) << 1) != chunksize) && chunksize != 1)
-		return -1;
-	if(chunksize > 8)
-		return -1;
-	bmap->chunksize = chunksize;
-	bmap->chunks_per_byte = 8 / chunksize;
-
 	bmap->size = size;
 
-	/* Have to add 1 to BITMAP_SIZE since it's 0-based and mallocs
+	/* Have to add 1 to BLOCKMAP_SIZE since it's 0-based and mallocs
 	 * must be 1-based */
-	bmap->mapsize = BITMAP_SIZE(size, bmap->chunks_per_byte)+1;
+	bmap->mapsize = BLOCKMAP_SIZE4(size);
 
 	if(!(bmap->map = malloc(sizeof(char) * bmap->mapsize)))
 		return -ENOMEM;
@@ -77,95 +26,29 @@ static int gfs2_bitmap_create(struct gfs2_bmap *bmap, uint64_t size,
 	return 0;
 }
 
-static int gfs2_bitmap_set(struct gfs2_bmap *bmap, uint64_t offset, uint8_t val)
-{
-	static char *byte;
-	static uint64_t b;
-
-	if(offset < bmap->size) {
-		if (bmap->chunksize == 1) {
-			byte = bmap->map + BITMAP_SIZE1(offset);
-			b = BITMAP_BYTE_OFFSET1(offset);
-			*byte |= (val & BITMAP_MASK1(bmap->chunksize));
-		} else {
-			byte = bmap->map + BITMAP_SIZE4(offset);
-			b = BITMAP_BYTE_OFFSET4(offset);
-			*byte |= (val & BITMAP_MASK4(bmap->chunksize)) << b;
-		}
-		return 0;
-	}
-	return -1;
-}
-
-static int gfs2_bitmap_get(struct gfs2_bmap *bmap, uint64_t bit, uint8_t *val)
-{
-	static char *byte;
-	static uint64_t b;
-
-	if(bit < bmap->size) {
-		if (bmap->chunksize == 1) {
-			byte = bmap->map + BITMAP_SIZE1(bit);
-			b = BITMAP_BYTE_OFFSET1(bit);
-			*val = (*byte & (BITMAP_MASK1(bmap->chunksize) << b )) >> b;
-		} else {
-			byte = bmap->map + BITMAP_SIZE4(bit);
-			b = BITMAP_BYTE_OFFSET4(bit);
-			*val = (*byte & (BITMAP_MASK4(bmap->chunksize) << b )) >> b;
-		}
-		return 0;
-	}
-	return -1;
-}
-
-static int gfs2_bitmap_clear(struct gfs2_bmap *bmap, uint64_t offset)
-{
-	static char *byte;
-	static uint64_t b;
-
-	if(offset < bmap->size) {
-		if (bmap->chunksize == 1) {
-			byte = bmap->map + BITMAP_SIZE1(offset);
-			b = BITMAP_BYTE_OFFSET1(offset);
-			*byte &= ~(BITMAP_MASK1(bmap->chunksize) << b);
-		} else {
-			byte = bmap->map + BITMAP_SIZE4(offset);
-			b = BITMAP_BYTE_OFFSET4(offset);
-			*byte &= ~(BITMAP_MASK4(bmap->chunksize) << b);
-		}
-		return 0;
-	}
-	return -1;
-
-}
-
-static void gfs2_bitmap_destroy(struct gfs2_bmap *bmap)
+static void gfs2_blockmap_destroy(struct gfs2_bmap *bmap)
 {
 	if(bmap->map)
 		free(bmap->map);
 	bmap->size = 0;
 	bmap->mapsize = 0;
-	bmap->chunksize = 0;
-	bmap->chunks_per_byte = 0;
 }
 
-struct gfs2_block_list *gfs2_block_list_create(struct gfs2_sbd *sdp,
-					       uint64_t size,
-					       uint64_t *addl_mem_needed)
+struct gfs2_bmap *gfs2_bmap_create(struct gfs2_sbd *sdp, uint64_t size,
+				   uint64_t *addl_mem_needed)
 {
-	struct gfs2_block_list *il;
+	struct gfs2_bmap *il;
 
 	*addl_mem_needed = 0L;
 	il = malloc(sizeof(*il));
 	if (!il || !memset(il, 0, sizeof(*il)))
 		return NULL;
 
-	if(gfs2_bitmap_create(&il->list.gbmap.group_map, size, 4)) {
-		*addl_mem_needed = il->list.gbmap.group_map.mapsize;
+	if(gfs2_blockmap_create(il, size)) {
+		*addl_mem_needed = il->mapsize;
 		free(il);
 		il = NULL;
 	}
-	osi_list_init(&sdp->bad_blocks.list);
-	osi_list_init(&sdp->dup_blocks.list);
 	osi_list_init(&sdp->eattr_blocks.list);
 	return il;
 }
@@ -182,19 +65,6 @@ void gfs2_special_free(struct special_blocks *blist)
 	}
 }
 
-static void gfs2_dup_free(struct dup_blocks *blist)
-{
-	struct dup_blocks *f;
-
-	while(!osi_list_empty(&blist->list)) {
-		f = osi_list_entry(blist->list.next, struct dup_blocks, list);
-		while (!osi_list_empty(&f->ref_inode_list))
-			osi_list_del(&f->ref_inode_list);
-		osi_list_del(&f->list);
-		free(f);
-	}
-}
-
 struct special_blocks *blockfind(struct special_blocks *blist, uint64_t num)
 {
 	osi_list_t *head = &blist->list;
@@ -209,52 +79,26 @@ struct special_blocks *blockfind(struct special_blocks *blist, uint64_t num)
 	return NULL;
 }
 
-static struct dup_blocks *dupfind(struct dup_blocks *blist, uint64_t num)
-{
-	osi_list_t *head = &blist->list;
-	osi_list_t *tmp;
-	struct dup_blocks *b;
-
-	for (tmp = head->next; tmp != head; tmp = tmp->next) {
-		b = osi_list_entry(tmp, struct dup_blocks, list);
-		if (b->block_no == num)
-			return b;
-	}
-	return NULL;
-}
-
-void gfs2_special_set(struct special_blocks *blocklist, uint64_t block)
+void gfs2_special_add(struct special_blocks *blocklist, uint64_t block)
 {
 	struct special_blocks *b;
 
-	if (blockfind(blocklist, block))
-		return;
 	b = malloc(sizeof(struct special_blocks));
 	if (b) {
 		memset(b, 0, sizeof(*b));
 		b->block = block;
 		osi_list_add(&b->list, &blocklist->list);
 	}
-	return;
 }
 
-static void gfs2_dup_set(struct dup_blocks *blocklist, uint64_t block)
+void gfs2_special_set(struct special_blocks *blocklist, uint64_t block)
 {
-	struct dup_blocks *b;
-
-	if (dupfind(blocklist, block))
+	if (blockfind(blocklist, block))
 		return;
-	b = malloc(sizeof(struct dup_blocks));
-	if (b) {
-		memset(b, 0, sizeof(*b));
-		b->block_no = block;
-		osi_list_init(&b->ref_inode_list);
-		osi_list_add(&b->list, &blocklist->list);
-	}
-	return;
+	gfs2_special_add(blocklist, block);
 }
 
-static void gfs2_special_clear(struct special_blocks *blocklist, uint64_t block)
+void gfs2_special_clear(struct special_blocks *blocklist, uint64_t block)
 {
 	struct special_blocks *b;
 
@@ -265,111 +109,29 @@ static void gfs2_special_clear(struct special_blocks *blocklist, uint64_t block)
 	}
 }
 
-static void gfs2_dup_clear(struct dup_blocks *blocklist, uint64_t block)
+int gfs2_blockmap_set(struct gfs2_bmap *bmap, uint64_t bblock,
+		      enum gfs2_mark_block mark)
 {
-	struct dup_blocks *b;
-
-	b = dupfind(blocklist, block);
-	if (b) {
-		osi_list_del(&b->list);
-		free(b);
-	}
-}
-
-int gfs2_block_mark(struct gfs2_sbd *sdp, struct gfs2_block_list *il,
-		    uint64_t block, enum gfs2_mark_block mark)
-{
-	int err = 0;
-
-	if(mark == gfs2_bad_block)
-		gfs2_special_set(&sdp->bad_blocks, block);
-	else if(mark == gfs2_dup_block)
-		gfs2_dup_set(&sdp->dup_blocks, block);
-	else if(mark == gfs2_eattr_block)
-		gfs2_special_set(&sdp->eattr_blocks, block);
-	else
-		err = gfs2_bitmap_set(&il->list.gbmap.group_map, block,
-				      mark_to_gbmap[mark]);
-	return err;
-}
-
-/* gfs2_block_unmark clears ONE mark for the given block */
-int gfs2_block_unmark(struct gfs2_sbd *sdp, struct gfs2_block_list *il,
-		      uint64_t block, enum gfs2_mark_block mark)
-{
-	int err = 0;
-
-	switch (mark) {
-	case gfs2_dup_block:
-		gfs2_dup_clear(&sdp->dup_blocks, block);
-		break;
-	case gfs2_bad_block:
-		gfs2_special_clear(&sdp->bad_blocks, block);
-		break;
-	case gfs2_eattr_block:
-		gfs2_special_clear(&sdp->eattr_blocks, block);
-		break;
-	default:
-		/* FIXME: check types */
-		err = gfs2_bitmap_clear(&il->list.gbmap.group_map, block);
-		break;
-	}
-	return err;
-}
-
-/* gfs2_block_clear clears all the marks for the given block */
-int gfs2_block_clear(struct gfs2_sbd *sdp, struct gfs2_block_list *il,
-		     uint64_t block)
-{
-	int err = 0;
-
-	gfs2_dup_clear(&sdp->dup_blocks, block);
-	gfs2_special_clear(&sdp->bad_blocks, block);
-	gfs2_special_clear(&sdp->eattr_blocks, block);
-	err = gfs2_bitmap_clear(&il->list.gbmap.group_map, block);
-	return err;
-}
-
-int gfs2_block_set(struct gfs2_sbd *sdp, struct gfs2_block_list *il,
-		   uint64_t block, enum gfs2_mark_block mark)
-{
-	int err;
-
-	err = gfs2_block_clear(sdp, il, block); /* clear all block status */
-	if(!err)
-		err = gfs2_block_mark(sdp, il, block, mark);
-	return err;
-}
+	static unsigned char *byte;
+	static uint64_t b;
 
-int gfs2_block_check(struct gfs2_sbd *sdp, struct gfs2_block_list *il,
-		     uint64_t block, struct gfs2_block_query *val)
-{
-	int err = 0;
+	if(bblock > bmap->size)
+		return -1;
 
-	val->bad_block = 0;
-	val->dup_block = 0;
-	val->eattr_block = 0;
-	if (blockfind(&sdp->bad_blocks, block))
-		val->bad_block = 1;
-	if (dupfind(&sdp->dup_blocks, block))
-		val->dup_block = 1;
-	if (blockfind(&sdp->eattr_blocks, block))
-		val->eattr_block = 1;
-	if((err = gfs2_bitmap_get(&il->list.gbmap.group_map, block,
-				  &val->block_type)))
-		return err;
+	byte = bmap->map + BLOCKMAP_SIZE4(bblock);
+	b = BLOCKMAP_BYTE_OFFSET4(bblock);
+	*byte &= ~(BLOCKMAP_MASK4 << b);
+	*byte |= (mark & BLOCKMAP_MASK4) << b;
 	return 0;
 }
 
-void *gfs2_block_list_destroy(struct gfs2_sbd *sdp, struct gfs2_block_list *il)
+void *gfs2_bmap_destroy(struct gfs2_sbd *sdp, struct gfs2_bmap *il)
 {
 	if(il) {
-		gfs2_bitmap_destroy(&il->list.gbmap.group_map);
+		gfs2_blockmap_destroy(il);
 		free(il);
 		il = NULL;
 	}
-	gfs2_special_free(&sdp->bad_blocks);
-	gfs2_dup_free(&sdp->dup_blocks);
 	gfs2_special_free(&sdp->eattr_blocks);
 	return il;
 }
diff --git a/gfs2/libgfs2/buf.c b/gfs2/libgfs2/buf.c
index f0164b4..5a0f718 100644
--- a/gfs2/libgfs2/buf.c
+++ b/gfs2/libgfs2/buf.c
@@ -12,116 +12,18 @@
 
 #include "libgfs2.h"
 
-static __inline__ osi_list_t *
-blkno2head(struct buf_list *bl, uint64_t blkno)
-{
-	return bl->buf_hash +
-		(gfs2_disk_hash((char *)&blkno, sizeof(uint64_t)) & BUF_HASH_MASK);
-}
-
-static int write_buffer(struct buf_list *bl, struct gfs2_buffer_head *bh)
-{
-	struct gfs2_sbd *sdp = bl->sbp;
-
-	osi_list_del(&bh->b_list);
-	osi_list_del(&bh->b_hash);
-	bl->num_bufs--;
-	if (bh->b_changed) {
-		if (lseek(sdp->device_fd, bh->b_blocknr * sdp->bsize,
-			  SEEK_SET) != bh->b_blocknr * sdp->bsize) {
-			return -1;
-		}
-		if (write(sdp->device_fd, bh->b_data, sdp->bsize) !=
-		    sdp->bsize) {
-			return -1;
-		}
-		sdp->writes++;
-	}
-	free(bh);
-	return 0;
-}
-
-void init_buf_list(struct gfs2_sbd *sdp, struct buf_list *bl, uint32_t limit)
-{
-	int i;
-
-	bl->num_bufs = 0;
-	bl->spills = 0;
-	bl->limit = limit;
-	bl->sbp = sdp;
-	osi_list_init(&bl->list);
-	for(i = 0; i < BUF_HASH_SIZE; i++)
-		osi_list_init(&bl->buf_hash[i]);
-}
-
-static int add_buffer(struct buf_list *bl, struct gfs2_buffer_head *bh)
-{
-	osi_list_t *head = blkno2head(bl, bh->b_blocknr);
-
-	osi_list_add(&bh->b_list, &bl->list);
-	osi_list_add(&bh->b_hash, head);
-	bl->num_bufs++;
-
-	if (bl->num_bufs * bl->sbp->bsize > bl->limit) {
-		int found = 0;
-		osi_list_t *tmp, *x;
-
-		for (tmp = bl->list.prev, x = tmp->prev; tmp != &bl->list;
-		     tmp = x, x = x->prev) {
-			bh = osi_list_entry(tmp, struct gfs2_buffer_head,
-					    b_list);
-			if (!bh->b_count) {
-				if (write_buffer(bl, bh))
-					return -1;
-				found++;
-				if (found >= 10)
-					break;
-			}
-		}
-		bl->spills++;
-	}
-	return 0;
-}
-
-struct gfs2_buffer_head *bfind(struct buf_list *bl, uint64_t num)
-{
-	osi_list_t *head = blkno2head(bl, num);
-	osi_list_t *tmp;
-	struct gfs2_buffer_head *bh;
-
-	for (tmp = head->next; tmp != head; tmp = tmp->next) {
-		bh = osi_list_entry(tmp, struct gfs2_buffer_head, b_hash);
-		if (bh->b_blocknr == num) {
-			osi_list_del(&bh->b_list);
-			osi_list_add(&bh->b_list, &bl->list);
-			osi_list_del(&bh->b_hash);
-			osi_list_add(&bh->b_hash, head);
-			bh->b_count++;
-			return bh;
-		}
-	}
-
-	return NULL;
-}
-
-struct gfs2_buffer_head *__bget_generic(struct buf_list *bl, uint64_t num,
-					int find_existing, int read_disk,
+struct gfs2_buffer_head *__bget_generic(struct gfs2_sbd *sdp, uint64_t num,
+					int read_disk,
 					int line, const char *caller)
 {
 	struct gfs2_buffer_head *bh;
-	struct gfs2_sbd *sdp = bl->sbp;
 
-	if (find_existing) {
-		bh = bfind(bl, num);
-		if (bh)
-			return bh;
-	}
 	bh = calloc(1, sizeof(struct gfs2_buffer_head) + sdp->bsize);
 	if (bh == NULL)
 		return NULL;
 
-	bh->b_count = 1;
 	bh->b_blocknr = num;
+	bh->sdp = sdp;
 	bh->b_data = (char *)bh + sizeof(struct gfs2_buffer_head);
 	if (read_disk) {
 		if (lseek(sdp->device_fd, num * sdp->bsize, SEEK_SET) !=
@@ -140,116 +42,48 @@ struct gfs2_buffer_head *__bget_generic(struct buf_list *bl, uint64_t num,
 			exit(-1);
 		}
 	}
-	if (add_buffer(bl, bh)) {
-		fprintf(stderr, "bad write: %s from %s:%d: block "
-			"%llu (0x%llx)\n", strerror(errno),
-			caller, line, (unsigned long long)num,
-			(unsigned long long)num);
-		exit(-1);
-	}
-	bh->b_changed = FALSE;
 
 	return bh;
 }
 
-struct gfs2_buffer_head *__bget(struct buf_list *bl, uint64_t num, int line,
+struct gfs2_buffer_head *__bget(struct gfs2_sbd *sdp, uint64_t num, int line,
 				const char *caller)
 {
-	return __bget_generic(bl, num, TRUE, FALSE, line, caller);
+	return __bget_generic(sdp, num, FALSE, line, caller);
 }
 
-struct gfs2_buffer_head *__bread(struct buf_list *bl, uint64_t num, int line,
+struct gfs2_buffer_head *__bread(struct gfs2_sbd *sdp, uint64_t num, int line,
 				 const char *caller)
 {
-	return __bget_generic(bl, num, TRUE, TRUE, line, caller);
-}
-
-struct gfs2_buffer_head *bhold(struct gfs2_buffer_head *bh)
-{
-	if (!bh->b_count)
-		return NULL;
-	bh->b_count++;
-	return bh;
-}
-
-void brelse(struct gfs2_buffer_head *bh, enum update_flags is_updated)
-{
-    /* We can't just say b_changed = updated because we don't want to     */
-	/* set it FALSE if it's TRUE until we write the changed data to disk. */
-	if (is_updated)
-		bh->b_changed = TRUE;
-	if (!bh->b_count) {
-		fprintf(stderr, "buffer count underflow for block %" PRIu64
-			" (0x%" PRIx64")\n", bh->b_blocknr, bh->b_blocknr);
-		exit(-1);
-	}
-	bh->b_count--;
+	return __bget_generic(sdp, num, TRUE, line, caller);
 }
 
-void __bsync(struct buf_list *bl, int line, const char *caller)
+int bwrite(struct gfs2_buffer_head *bh)
 {
-	struct gfs2_buffer_head *bh;
+	struct gfs2_sbd *sdp = bh->sdp;
 
-	while (!osi_list_empty(&bl->list)) {
-		bh = osi_list_entry(bl->list.prev, struct gfs2_buffer_head,
-							b_list);
-		if (bh->b_count) {
-			fprintf(stderr, "buffer still held for block: %" PRIu64
-				" (0x%" PRIx64")\n", bh->b_blocknr, bh->b_blocknr);
-			exit(-1);
-		}
-		if (write_buffer(bl, bh)) {
-			fprintf(stderr, "bad write: %s from %s:%d: block "
-				"%lld (0x%llx)\n", strerror(errno),
-				caller, line,
-				(unsigned long long)bh->b_blocknr,
-				(unsigned long long)bh->b_blocknr);
-			exit(-1);
-		}
+	if (lseek(sdp->device_fd, bh->b_blocknr * sdp->bsize, SEEK_SET) !=
+	    bh->b_blocknr * sdp->bsize) {
+		return -1;
 	}
+	if (write(sdp->device_fd, bh->b_data, sdp->bsize) != sdp->bsize)
+		return -1;
+	sdp->writes++;
+	bh->b_modified = 0;
+	return 0;
 }
 
-/* commit buffers to disk but do not discard */
-void __bcommit(struct buf_list *bl, int line, const char *caller)
+int brelse(struct gfs2_buffer_head *bh)
 {
-	osi_list_t *tmp, *x;
-	struct gfs2_buffer_head *bh;
-	struct gfs2_sbd *sdp = bl->sbp;
-
-	osi_list_foreach_safe(tmp, &bl->list, x) {
-		bh = osi_list_entry(tmp, struct gfs2_buffer_head, b_list);
-		if (!bh->b_count) {            /* if not reserved for later */
-			if (write_buffer(bl, bh)) { /* write & free */
-				fprintf(stderr, "bad write: %s from %s:%d: "
-					"block %lld (0x%llx)\n",
-					strerror(errno), caller, line,
-					(unsigned long long)bh->b_blocknr,
-					(unsigned long long)bh->b_blocknr);
-				exit(-1);
-			}
-		} else if (bh->b_changed) {     /* if buffer has changed */
-			if (lseek(sdp->device_fd,
-				  bh->b_blocknr * sdp->bsize, SEEK_SET) !=
-			    bh->b_blocknr * sdp->bsize) {
-				fprintf(stderr, "bad seek: %s from %s:%d: "
-					"block %lld (0x%llx)\n",
-					strerror(errno), caller, line,
-					(unsigned long long)bh->b_blocknr,
-					(unsigned long long)bh->b_blocknr);
-				exit(-1);
-			}
-			if (write(sdp->device_fd, bh->b_data, sdp->bsize) !=
-			    sdp->bsize) {
-				fprintf(stderr, "bad write: %s from %s:%d: "
-					"block %lld (0x%llx)\n",
-					strerror(errno), caller, line,
-					(unsigned long long)bh->b_blocknr,
-					(unsigned long long)bh->b_blocknr);
-				exit(-1);
-			}
-			bh->b_changed = FALSE;    /* no longer changed */
-		}
-	}
-	fsync(sdp->device_fd);
+	int error = 0;
+
+	if (bh->b_blocknr == -1)
+		printf("Double free!\n");
+	if (bh->b_modified)
+		error = bwrite(bh);
+	bh->b_blocknr = -1;
+	if (bh->b_altlist.next && !osi_list_empty(&bh->b_altlist))
+		osi_list_del(&bh->b_altlist);
+	free(bh);
+	return error;
 }
-
diff --git a/gfs2/libgfs2/fs_bits.c b/gfs2/libgfs2/fs_bits.c
index ed459af..c420dfc 100644
--- a/gfs2/libgfs2/fs_bits.c
+++ b/gfs2/libgfs2/fs_bits.c
@@ -1,9 +1,22 @@
 #include <inttypes.h>
 #include <stdlib.h>
+#include <string.h>
 #include <unistd.h>
 
 #include "libgfs2.h"
 
+#if BITS_PER_LONG == 32
+#define LBITMASK   (0x55555555UL)
+#define LBITSKIP55 (0x55555555UL)
+#define LBITSKIP00 (0x00000000UL)
+#else
+#define LBITMASK   (0x5555555555555555UL)
+#define LBITSKIP55 (0x5555555555555555UL)
+#define LBITSKIP00 (0x0000000000000000UL)
+#endif
+
+#define ALIGN(x,a) (((x)+(a)-1)&~((a)-1))
+
 /**
  * gfs2_bitfit - Find a free block in the bitmaps
  * @buffer: the buffer that holds the bitmaps
@@ -17,32 +30,55 @@
 uint32_t gfs2_bitfit(unsigned char *buffer, unsigned int buflen,
 		     uint32_t goal, unsigned char old_state)
 {
-	unsigned char *byte, *end, alloc;
-	uint32_t blk = goal;
-	unsigned int bit;
-
-	byte = buffer + (goal / GFS2_NBBY);
-	bit = (goal % GFS2_NBBY) * GFS2_BIT_SIZE;
-	end = buffer + buflen;
-	alloc = (old_state & 1) ? 0 : 0x55;
-
-	while (byte < end){
-		if ((*byte & 0x55) == alloc){
-			blk += (8 - bit) >> 1;
-			bit = 0;
-			byte++;
-			continue;
+	const uint8_t *byte, *start, *end;
+	int bit, startbit;
+	uint32_t g1, g2, misaligned;
+	unsigned long *plong;
+	unsigned long lskipval;
+
+	lskipval = (old_state & GFS2_BLKST_USED) ? LBITSKIP00 : LBITSKIP55;
+	g1 = (goal / GFS2_NBBY);
+	start = buffer + g1;
+	byte = start;
+        end = buffer + buflen;
+	g2 = ALIGN(g1, sizeof(unsigned long));
+	plong = (unsigned long *)(buffer + g2);
+	startbit = bit = (goal % GFS2_NBBY) * GFS2_BIT_SIZE;
+	misaligned = g2 - g1;
+	if (!misaligned)
+		goto ulong_aligned;
+/* parse the bitmap a byte at a time */
+misaligned:
+	while (byte < end) {
+		if (((*byte >> bit) & GFS2_BIT_MASK) == old_state) {
+			return goal +
+				(((byte - start) * GFS2_NBBY) +
+				 ((bit - startbit) >> 1));
 		}
-
-		if (((*byte >> bit) & GFS2_BIT_MASK) == old_state)
-			return blk;
-
 		bit += GFS2_BIT_SIZE;
-		if (bit >= 8){
+		if (bit >= GFS2_NBBY * GFS2_BIT_SIZE) {
 			bit = 0;
 			byte++;
+			misaligned--;
+			if (!misaligned) {
+				plong = (unsigned long *)byte;
+				goto ulong_aligned;
+			}
 		}
-		blk++;
+	}
+	return BFITNOENT;
+
+/* parse the bitmap a unsigned long at a time */
+ulong_aligned:
+	while ((unsigned char *)plong < end) {
+		if (((*plong) & LBITMASK) != lskipval)
+			break;
+		plong++;
+	}
+	if ((unsigned char *)plong < end) {
+		byte = (const uint8_t *)plong;
+		misaligned += sizeof(unsigned long) - 1;
+		goto misaligned;
 	}
 	return BFITNOENT;
 }
@@ -122,8 +158,6 @@ int gfs2_set_bitmap(struct gfs2_sbd *sdp, uint64_t blkno, int state)
 	if(!rgd)
 		return -1;
 
-	if(gfs2_rgrp_read(sdp, rgd))
-		return -1;
 	rgrp_block = (uint32_t)(blkno - rgd->ri.ri_data0);
 	for(buf= 0; buf < rgd->ri.ri_length; buf++){
 		bits = &(rgd->bits[buf]);
@@ -139,12 +173,12 @@ int gfs2_set_bitmap(struct gfs2_sbd *sdp, uint64_t blkno, int state)
 	*byte ^= cur_state << bit;
 	*byte |= state << bit;
 
-	gfs2_rgrp_relse(rgd, updated);
+	bmodified(rgd->bh[buf]);
 	return 0;
 }
 
 /*
- * fs_get_bitmap - get value of FS bitmap
+ * gfs2_get_bitmap - get value of FS bitmap
  * @sdp: super block
  * @blkno: block number relative to file system
  *
@@ -166,40 +200,28 @@ int gfs2_get_bitmap(struct gfs2_sbd *sdp, uint64_t blkno,
 	struct gfs2_bitmap	*bits = NULL;
 	unsigned int  bit;
 	unsigned char *byte;
-	int local_rgd = 0;
 
-	if(gfs2_check_range(sdp, blkno))
-		return -1;
-	if(rgd == NULL) {
-		local_rgd = 1;
+	if (rgd == NULL) {
 		rgd = gfs2_blk2rgrpd(sdp, blkno);
+		if(rgd == NULL)
+			return -1;
 	}
-	if(rgd == NULL)
-		return -1;
-	if(gfs2_rgrp_read(sdp, rgd))
-		return -1;
 
 	rgrp_block = (uint32_t)(blkno - rgd->ri.ri_data0);
 
-	for(i= 0; i < rgd->ri.ri_length; i++){
+	for (i = 0; i < rgd->ri.ri_length; i++) {
 		bits = &(rgd->bits[i]);
-		if(rgrp_block < ((bits->bi_start + bits->bi_len)*GFS2_NBBY)){
+		if(rgrp_block < ((bits->bi_start + bits->bi_len)*GFS2_NBBY))
 			break;
-		}
 	}
 
-	if(i >= rgd->ri.ri_length){
-		gfs2_rgrp_relse(rgd, not_updated);
+	if (i >= rgd->ri.ri_length)
 		return -1;
-	}
-
 	byte = (unsigned char *)(rgd->bh[i]->b_data + bits->bi_offset) +
 		(rgrp_block/GFS2_NBBY - bits->bi_start);
 	bit = (rgrp_block % GFS2_NBBY) * GFS2_BIT_SIZE;
 
 	val = ((*byte >> bit) & GFS2_BIT_MASK);
-	if(local_rgd)
-		gfs2_rgrp_relse(rgd, not_updated);
 
 	return val;
 }
diff --git a/gfs2/libgfs2/fs_geometry.c b/gfs2/libgfs2/fs_geometry.c
index 9c5daf1..a06e8a2 100644
--- a/gfs2/libgfs2/fs_geometry.c
+++ b/gfs2/libgfs2/fs_geometry.c
@@ -112,7 +112,6 @@ void compute_rgrp_layout(struct gfs2_sbd *sdp, int rgsize_specified)
 			rl->length = dev->length -
 				(nrgrp - 1) * (dev->length / nrgrp);
 		}
-		rl->rgf_flags = dev->rgf_flags;
 
 		log_info("%d: start: %" PRIu64 " (0x%"
 			 PRIx64 "), length = %"PRIu64" (0x%"
@@ -183,10 +182,8 @@ void build_rgrps(struct gfs2_sbd *sdp, int do_write)
 	struct rgrp_list *rl;
 	uint32_t rgblocks, bitblocks;
 	struct gfs2_rindex *ri;
-	struct gfs2_rgrp *rg;
 	struct gfs2_meta_header mh;
 	unsigned int x;
-	struct gfs2_buffer_head *bh;
 
 	mh.mh_magic = GFS2_MAGIC;
 	mh.mh_type = GFS2_METATYPE_RB;
@@ -197,7 +194,6 @@ void build_rgrps(struct gfs2_sbd *sdp, int do_write)
 	     tmp = tmp->next) {
 		rl = osi_list_entry(tmp, struct rgrp_list, list);
 		ri = &rl->ri;
-		rg = &rl->rg;
 
 		rgblocks = rl->length;
 		rgblocks2bitblocks(sdp->bsize, &rgblocks, &bitblocks);
@@ -208,20 +204,21 @@ void build_rgrps(struct gfs2_sbd *sdp, int do_write)
 		ri->ri_data = rgblocks;
 		ri->ri_bitbytes = rgblocks / GFS2_NBBY;
 
-		rg->rg_header.mh_magic = GFS2_MAGIC;
-		rg->rg_header.mh_type = GFS2_METATYPE_RG;
-		rg->rg_header.mh_format = GFS2_FORMAT_RG;
-		rg->rg_flags = rl->rgf_flags;
-		rg->rg_free = rgblocks;
+		memset(&rl->rg, 0, sizeof(rl->rg));
+		rl->rg.rg_header.mh_magic = GFS2_MAGIC;
+		rl->rg.rg_header.mh_type = GFS2_METATYPE_RG;
+		rl->rg.rg_header.mh_format = GFS2_FORMAT_RG;
+		rl->rg.rg_free = rgblocks;
+
+		gfs2_compute_bitstructs(sdp, rl);
 
 		if (do_write) {
 			for (x = 0; x < bitblocks; x++) {
-				bh = bget(&sdp->nvbuf_list, rl->start + x);
+				rl->bh[x] = bget(sdp, rl->start + x);
 				if (x)
-					gfs2_meta_header_out(&mh, bh->b_data);
+					gfs2_meta_header_out(&mh, rl->bh[x]);
 				else
-					gfs2_rgrp_out(rg, bh->b_data);
-				brelse(bh, updated);
+					gfs2_rgrp_out(&rl->rg, rl->bh[x]);
 			}
 		}
 
diff --git a/gfs2/libgfs2/fs_ops.c b/gfs2/libgfs2/fs_ops.c
index fc898ce..7c698ef 100644
--- a/gfs2/libgfs2/fs_ops.c
+++ b/gfs2/libgfs2/fs_ops.c
@@ -37,18 +37,79 @@ struct gfs2_inode *inode_get(struct gfs2_sbd *sdp, struct gfs2_buffer_head *bh)
 		fprintf(stderr, "Out of memory in %s\n", __FUNCTION__);
 		exit(-1);
 	}
-	gfs2_dinode_in(&ip->i_di, bh->b_data);
+	gfs2_dinode_in(&ip->i_di, bh);
 	ip->i_bh = bh;
 	ip->i_sbd = sdp;
+	ip->bh_owned = 0; /* caller did the bread so we don't own the bh */
 	return ip;
 }
 
-void inode_put(struct gfs2_inode *ip, enum update_flags is_updated)
+struct gfs2_inode *inode_read(struct gfs2_sbd *sdp, uint64_t di_addr)
 {
-	if (is_updated)
-		gfs2_dinode_out(&ip->i_di, ip->i_bh->b_data);
-	brelse(ip->i_bh, is_updated);
+	struct gfs2_inode *ip;
+
+	ip = calloc(1, sizeof(struct gfs2_inode));
+	if (ip == NULL) {
+		fprintf(stderr, "Out of memory in %s\n", __FUNCTION__);
+		exit(-1);
+	}
+	ip->i_bh = bread(sdp, di_addr);
+	gfs2_dinode_in(&ip->i_di, ip->i_bh);
+	ip->i_sbd = sdp;
+	ip->bh_owned = 1; /* We did the bread so we own the bh */
+	return ip;
+}
+
+struct gfs2_inode *is_system_inode(struct gfs2_sbd *sdp, uint64_t block)
+{
+	int j;
+
+	if (sdp->md.inum && block == sdp->md.inum->i_di.di_num.no_addr)
+		return sdp->md.inum;
+	if (sdp->md.statfs && block == sdp->md.statfs->i_di.di_num.no_addr)
+		return sdp->md.statfs;
+	if (sdp->md.jiinode && block == sdp->md.jiinode->i_di.di_num.no_addr)
+		return sdp->md.jiinode;
+	if (sdp->md.riinode && block == sdp->md.riinode->i_di.di_num.no_addr)
+		return sdp->md.riinode;
+	if (sdp->md.qinode && block == sdp->md.qinode->i_di.di_num.no_addr)
+		return sdp->md.qinode;
+	if (sdp->md.pinode && block == sdp->md.pinode->i_di.di_num.no_addr)
+		return sdp->md.pinode;
+	if (sdp->md.rooti && block == sdp->md.rooti->i_di.di_num.no_addr)
+		return sdp->md.rooti;
+	if (sdp->master_dir && block == sdp->master_dir->i_di.di_num.no_addr)
+		return sdp->master_dir;
+	for (j = 0; j < sdp->md.journals; j++)
+		if (sdp->md.journal && sdp->md.journal[j] &&
+		    block == sdp->md.journal[j]->i_di.di_num.no_addr)
+			return sdp->md.journal[j];
+	return NULL;
+}
+
+void inode_put(struct gfs2_inode **ip_in)
+{
+	struct gfs2_inode *ip = *ip_in;
+	uint64_t block = ip->i_di.di_num.no_addr;
+	struct gfs2_sbd *sdp = ip->i_sbd;
+
+	if (ip->i_bh->b_modified) {
+		gfs2_dinode_out(&ip->i_di, ip->i_bh);
+		if (!ip->bh_owned && is_system_inode(sdp, block))
+			fprintf(stderr, "Warning: Change made to inode "
+				"were discarded.\n");
+		/* This is for debugging only: a convenient place to set
+		   a breakpoint. This means a system inode was modified but
+		   not written.  That's not fatal: some places like
+		   adjust_inode in gfs2_convert will do this on purpose.
+		   It can also point out a coding problem, but we don't
+		   want to raise alarm in the users either. */
+	}
+	if (ip->bh_owned)
+		brelse(ip->i_bh);
+	ip->i_bh = NULL;
 	free(ip);
+	*ip_in = NULL; /* make sure the memory isn't accessed again */
 }
 
 static uint64_t blk_alloc_i(struct gfs2_sbd *sdp, unsigned int type)
@@ -58,11 +119,11 @@ static uint64_t blk_alloc_i(struct gfs2_sbd *sdp, unsigned int type)
 	struct gfs2_rindex *ri;
 	struct gfs2_rgrp *rg;
 	unsigned int block, bn = 0, x = 0, y = 0;
-	struct gfs2_buffer_head *bh;
 	unsigned int state;
+	struct gfs2_buffer_head *bh;
 
-	for (head = &sdp->rglist, tmp = head->next;
-	     tmp != head;
+	memset(&rg, 0, sizeof(rg));
+	for (head = &sdp->rglist, tmp = head->next; tmp != head;
 	     tmp = tmp->next) {
 		rl = osi_list_entry(tmp, struct rgrp_list, list);
 		if (rl->rg.rg_free)
@@ -76,7 +137,7 @@ static uint64_t blk_alloc_i(struct gfs2_sbd *sdp, unsigned int type)
 	rg = &rl->rg;
 
 	for (block = 0; block < ri->ri_length; block++) {
-		bh = bread(&sdp->nvbuf_list, ri->ri_addr + block);
+		bh = rl->bh[block];
 		x = (block) ? sizeof(struct gfs2_meta_header) : sizeof(struct gfs2_rgrp);
 
 		for (; x < sdp->bsize; x++)
@@ -86,8 +147,6 @@ static uint64_t blk_alloc_i(struct gfs2_sbd *sdp, unsigned int type)
 					goto found;
 				bn++;
 			}
-
-		brelse(bh, FALSE);
 	}
 
 	die("allocation is broken (1): %"PRIu64" %u\n",
@@ -95,8 +154,9 @@ static uint64_t blk_alloc_i(struct gfs2_sbd *sdp, unsigned int type)
 
 found:
 	if (bn >= ri->ri_bitbytes * GFS2_NBBY)
-		die("allocation is broken (2): %u %u %"PRIu64" %u\n",
-		    bn, ri->ri_bitbytes * GFS2_NBBY,
+		die("allocation is broken (2): bn: %u %u rgrp: %"PRIu64
+		    " (0x%" PRIx64 ") Free:%u\n",
+		    bn, ri->ri_bitbytes * GFS2_NBBY, (uint64_t)rl->ri.ri_addr,
 		    (uint64_t)rl->ri.ri_addr, rl->rg.rg_free);
 
 	switch (type) {
@@ -116,14 +176,10 @@ found:
 	bh->b_data[x] |= state << (GFS2_BIT_SIZE * y);
 	rg->rg_free--;
 
-	brelse(bh, updated);
-
-	bh = bread(&sdp->nvbuf_list, ri->ri_addr);
-	gfs2_rgrp_out(rg, bh->b_data);
-	brelse(bh, updated);
+	bmodified(bh);
+	gfs2_rgrp_out(rg, rl->bh[0]);
 
 	sdp->blks_alloced++;
-
 	return ri->ri_data0 + bn;
 }
 
@@ -132,6 +188,7 @@ uint64_t data_alloc(struct gfs2_inode *ip)
 	uint64_t x;
 	x = blk_alloc_i(ip->i_sbd, DATA);
 	ip->i_di.di_goal_data = x;
+	bmodified(ip->i_bh);
 	return x;
 }
 
@@ -140,6 +197,7 @@ uint64_t meta_alloc(struct gfs2_inode *ip)
 	uint64_t x;
 	x = blk_alloc_i(ip->i_sbd, META);
 	ip->i_di.di_goal_meta = x;
+	bmodified(ip->i_bh);
 	return x;
 }
 
@@ -153,6 +211,7 @@ static __inline__ void buffer_clear_tail(struct gfs2_sbd *sdp,
 					 struct gfs2_buffer_head *bh, int head)
 {
 	memset(bh->b_data + head, 0, sdp->bsize - head);
+	bmodified(bh);
 }
 
 static __inline__ void
@@ -164,6 +223,7 @@ buffer_copy_tail(struct gfs2_sbd *sdp,
 	       sdp->bsize - from_head);
 	memset(to_bh->b_data + sdp->bsize + to_head - from_head, 0,
 	       from_head - to_head);
+	bmodified(to_bh);
 }
 
 void unstuff_dinode(struct gfs2_inode *ip)
@@ -175,29 +235,27 @@ void unstuff_dinode(struct gfs2_inode *ip)
 
 	if (ip->i_di.di_size) {
 		if (isdir) {
+			struct gfs2_meta_header mh;
+
 			block = meta_alloc(ip);
-			bh = bget(&sdp->buf_list, block);
-			{
-				struct gfs2_meta_header mh;
-				mh.mh_magic = GFS2_MAGIC;
-				mh.mh_type = GFS2_METATYPE_JD;
-				mh.mh_format = GFS2_FORMAT_JD;
-				gfs2_meta_header_out(&mh, bh->b_data);
-			}
+			bh = bget(sdp, block);
+			mh.mh_magic = GFS2_MAGIC;
+			mh.mh_type = GFS2_METATYPE_JD;
+			mh.mh_format = GFS2_FORMAT_JD;
+			gfs2_meta_header_out(&mh, bh);
 
 			buffer_copy_tail(sdp, bh,
 					 sizeof(struct gfs2_meta_header),
 					 ip->i_bh, sizeof(struct gfs2_dinode));
 
-			brelse(bh, updated);
+			brelse(bh);
 		} else {
 			block = data_alloc(ip);
-			bh = bget(&sdp->buf_list, block);
+			bh = bget(sdp, block);
 
 			buffer_copy_tail(sdp, bh, 0,
 					 ip->i_bh, sizeof(struct gfs2_dinode));
-
-			brelse(bh, updated);
+			brelse(bh);
 		}
 	}
 
@@ -205,6 +263,7 @@ void unstuff_dinode(struct gfs2_inode *ip)
 
 	if (ip->i_di.di_size) {
 		*(uint64_t *)(ip->i_bh->b_data + sizeof(struct gfs2_dinode)) = cpu_to_be64(block);
+		/* no need: bmodified(ip->i_bh); buffer_clear_tail does it */
 		ip->i_di.di_blocks++;
 	}
 
@@ -253,26 +312,25 @@ void build_height(struct gfs2_inode *ip, int height)
 			}
 
 		if (new_block) {
+			struct gfs2_meta_header mh;
+
 			block = meta_alloc(ip);
-			bh = bget(&sdp->buf_list, block);
-			{
-				struct gfs2_meta_header mh;
-				mh.mh_magic = GFS2_MAGIC;
-				mh.mh_type = GFS2_METATYPE_IN;
-				mh.mh_format = GFS2_FORMAT_IN;
-				gfs2_meta_header_out(&mh, bh->b_data);
-			}
+			bh = bget(sdp, block);
+			mh.mh_magic = GFS2_MAGIC;
+			mh.mh_type = GFS2_METATYPE_IN;
+			mh.mh_format = GFS2_FORMAT_IN;
+			gfs2_meta_header_out(&mh, bh);
 			buffer_copy_tail(sdp, bh,
 					 sizeof(struct gfs2_meta_header),
 					 ip->i_bh, sizeof(struct gfs2_dinode));
-
-			brelse(bh, updated);
+			brelse(bh);
 		}
 
 		buffer_clear_tail(sdp, ip->i_bh, sizeof(struct gfs2_dinode));
 
 		if (new_block) {
 			*(uint64_t *)(ip->i_bh->b_data + sizeof(struct gfs2_dinode)) = cpu_to_be64(block);
+			/* no need: bmodified(ip->i_bh);*/
 			ip->i_di.di_blocks++;
 		}
 
@@ -323,14 +381,15 @@ void lookup_block(struct gfs2_inode *ip, struct gfs2_buffer_head *bh,
 		*block = meta_alloc(ip);
 
 	*ptr = cpu_to_be64(*block);
+	bmodified(bh);
 	ip->i_di.di_blocks++;
+	bmodified(ip->i_bh);
 
 	*new = 1;
 }
 
 void block_map(struct gfs2_inode *ip, uint64_t lblock, int *new,
-	       uint64_t *dblock, uint32_t *extlen, int prealloc,
-	       enum update_flags if_changed)
+	       uint64_t *dblock, uint32_t *extlen, int prealloc)
 {
 	struct gfs2_sbd *sdp = ip->i_sbd;
 	struct gfs2_buffer_head *bh;
@@ -368,23 +427,28 @@ void block_map(struct gfs2_inode *ip, uint64_t lblock, int *new,
 	mp = find_metapath(ip, lblock);
 	end_of_metadata = ip->i_di.di_height - 1;
 
-	bh = bhold(ip->i_bh);
+	bh = ip->i_bh;
 
 	for (x = 0; x < end_of_metadata; x++) {
 		lookup_block(ip, bh, x, mp, create, new, dblock);
-		brelse(bh, if_changed);
+		if (bh != ip->i_bh)
+			brelse(bh);
 		if (!*dblock)
 			goto out;
 
 		if (*new) {
 			struct gfs2_meta_header mh;
-			bh = bget(&sdp->buf_list, *dblock);
+			bh = bget(sdp, *dblock);
 			mh.mh_magic = GFS2_MAGIC;
 			mh.mh_type = GFS2_METATYPE_IN;
 			mh.mh_format = GFS2_FORMAT_IN;
-			gfs2_meta_header_out(&mh, bh->b_data);
-		} else
-			bh = bread(&sdp->buf_list, *dblock);
+			gfs2_meta_header_out(&mh, bh);
+		} else {
+			if (*dblock == ip->i_di.di_num.no_addr)
+				bh = ip->i_bh;
+			else
+				bh = bread(sdp, *dblock);
+		}
 	}
 
 	if (!prealloc)
@@ -412,7 +476,8 @@ void block_map(struct gfs2_inode *ip, uint64_t lblock, int *new,
 		}
 	}
 
-	brelse(bh, if_changed);
+	if (bh != ip->i_bh)
+		brelse(bh);
 
  out:
 	free(mp);
@@ -475,18 +540,21 @@ int gfs2_readi(struct gfs2_inode *ip, void *buf,
 
 		if (!extlen)
 			block_map(ip, lblock, &not_new, &dblock, &extlen,
-				  FALSE, not_updated);
+				  FALSE);
 
 		if (dblock) {
-			bh = bread(&sdp->buf_list, dblock);
+			if (dblock == ip->i_di.di_num.no_addr)
+				bh = ip->i_bh;
+			else
+				bh = bread(sdp, dblock);
 			dblock++;
 			extlen--;
 		} else
 			bh = NULL;
 
 		copy2mem(bh, &buf, o, amount);
-		if (bh)
-			brelse(bh, not_updated);
+		if (bh && bh != ip->i_bh)
+			brelse(bh);
 
 		copied += amount;
 		lblock++;
@@ -498,11 +566,12 @@ int gfs2_readi(struct gfs2_inode *ip, void *buf,
 }
 
 static void copy_from_mem(struct gfs2_buffer_head *bh, void **buf,
-						  unsigned int offset, unsigned int size)
+			  unsigned int offset, unsigned int size)
 {
 	char **p = (char **)buf;
 
 	memcpy(bh->b_data + offset, *p, size);
+	bmodified(bh);
 	*p += size;
 }
 
@@ -548,23 +617,27 @@ int gfs2_writei(struct gfs2_inode *ip, void *buf,
 
 		if (!extlen) {
 			new = TRUE;
-			block_map(ip, lblock, &new, &dblock, &extlen, FALSE,
-				  updated);
+			block_map(ip, lblock, &new, &dblock, &extlen, FALSE);
 		}
 
 		if (new) {
-			bh = bget(&sdp->buf_list, dblock);
+			bh = bget(sdp, dblock);
 			if (isdir) {
 				struct gfs2_meta_header mh;
 				mh.mh_magic = GFS2_MAGIC;
 				mh.mh_type = GFS2_METATYPE_JD;
 				mh.mh_format = GFS2_FORMAT_JD;
-				gfs2_meta_header_out(&mh, bh->b_data);
+				gfs2_meta_header_out(&mh, bh);
 			}
-		} else
-			bh = bread(&sdp->buf_list, dblock);
+		} else {
+			if (dblock == ip->i_di.di_num.no_addr)
+				bh = ip->i_bh;
+			else
+				bh = bread(sdp, dblock);
+		}
 		copy_from_mem(bh, &buf, o, amount);
-		brelse(bh, updated);
+		if (bh != ip->i_bh)
+			brelse(bh);
 
 		copied += amount;
 		lblock++;
@@ -574,8 +647,10 @@ int gfs2_writei(struct gfs2_inode *ip, void *buf,
 		o = (isdir) ? sizeof(struct gfs2_meta_header) : 0;
 	}
 
-	if (ip->i_di.di_size < start + copied)
+	if (ip->i_di.di_size < start + copied) {
+		bmodified(ip->i_bh);
 		ip->i_di.di_size = start + copied;
+	}
 
 	return copied;
 }
@@ -590,18 +665,19 @@ struct gfs2_buffer_head *get_file_buf(struct gfs2_inode *ip, uint64_t lbn,
 	if (inode_is_stuffed(ip))
 		unstuff_dinode(ip);
 
-	block_map(ip, lbn, &new, &dbn, NULL, prealloc, updated);
+	block_map(ip, lbn, &new, &dbn, NULL, prealloc);
 	if (!dbn)
 		die("get_file_buf\n");
 
 	if (!prealloc && new &&
-	    ip->i_di.di_size < (lbn + 1) << sdp->sd_sb.sb_bsize_shift)
+	    ip->i_di.di_size < (lbn + 1) << sdp->sd_sb.sb_bsize_shift) {
+		bmodified(ip->i_bh);
 		ip->i_di.di_size = (lbn + 1) << sdp->sd_sb.sb_bsize_shift;
-
-	if (new)
-		return bget(&sdp->buf_list, dbn);
+	}
+	if (dbn == ip->i_di.di_num.no_addr)
+		return ip->i_bh;
 	else
-		return bread(&sdp->buf_list, dbn);
+		return bread(sdp, dbn);
 }
 
 int gfs2_dirent_first(struct gfs2_inode *dip, struct gfs2_buffer_head *bh,
@@ -627,7 +703,7 @@ int gfs2_dirent_next(struct gfs2_inode *dip, struct gfs2_buffer_head *bh,
 	bh_end = bh->b_data + dip->i_sbd->bsize;
 	cur_rec_len = be16_to_cpu((*dent)->de_rec_len);
 
-	if ((char *)(*dent) + cur_rec_len >= bh_end)
+	if (cur_rec_len == 0 || (char *)(*dent) + cur_rec_len >= bh_end)
 		return -ENOENT;
 
 	*dent = (struct gfs2_dirent *)((char *)(*dent) + cur_rec_len);
@@ -658,8 +734,10 @@ static int dirent_alloc(struct gfs2_inode *dip, struct gfs2_buffer_head *bh,
 	if (!entries) {
 		dent->de_rec_len = cpu_to_be16(dip->i_sbd->bsize - offset);
 		dent->de_name_len = cpu_to_be16(name_len);
-
+		bmodified(bh);
 		*dent_out = dent;
+		dip->i_di.di_entries++;
+		bmodified(dip->i_bh);
 		return 0;
 	}
 
@@ -680,19 +758,25 @@ static int dirent_alloc(struct gfs2_inode *dip, struct gfs2_buffer_head *bh,
 				memset(new, 0, sizeof(struct gfs2_dirent));
 
 				new->de_rec_len = cpu_to_be16(cur_rec_len -
-											  GFS2_DIRENT_SIZE(cur_name_len));
+					  GFS2_DIRENT_SIZE(cur_name_len));
 				new->de_name_len = cpu_to_be16(name_len);
 
 				new_rec_len = be16_to_cpu(new->de_rec_len);
 				dent->de_rec_len = cpu_to_be16(cur_rec_len - new_rec_len);
 
 				*dent_out = new;
+				bmodified(bh);
+				dip->i_di.di_entries++;
+				bmodified(dip->i_bh);
 				return 0;
 			}
 
 			dent->de_name_len = cpu_to_be16(name_len);
 
 			*dent_out = dent;
+			bmodified(bh);
+			dip->i_di.di_entries++;
+			bmodified(dip->i_bh);
 			return 0;
 		}
 	} while (gfs2_dirent_next(dip, bh, &dent) == 0);
@@ -705,8 +789,11 @@ void dirent2_del(struct gfs2_inode *dip, struct gfs2_buffer_head *bh,
 {
 	uint16_t cur_rec_len, prev_rec_len;
 
-	if (dip->i_di.di_entries)
+	bmodified(bh);
+	if (dip->i_di.di_entries) {
+		bmodified(dip->i_bh);
 		dip->i_di.di_entries--;
+	}
 	if (!prev) {
 		cur->de_inum.no_formal_ino = 0;
 		return;
@@ -720,14 +807,13 @@ void dirent2_del(struct gfs2_inode *dip, struct gfs2_buffer_head *bh,
 }
 
 void gfs2_get_leaf_nr(struct gfs2_inode *dip, uint32_t lindex,
-					  uint64_t *leaf_out)
+		      uint64_t *leaf_out)
 {
 	uint64_t leaf_no;
 	int count;
 
-	count = gfs2_readi(dip, (char *)&leaf_no,
-		      lindex * sizeof(uint64_t),
-		      sizeof(uint64_t));
+	count = gfs2_readi(dip, (char *)&leaf_no, lindex * sizeof(uint64_t),
+			   sizeof(uint64_t));
 	if (count != sizeof(uint64_t))
 		die("gfs2_get_leaf_nr:  Bad internal read.\n");
 
@@ -746,7 +832,8 @@ void gfs2_put_leaf_nr(struct gfs2_inode *dip, uint32_t inx, uint64_t leaf_out)
 		die("gfs2_put_leaf_nr:  Bad internal write.\n");
 }
 
-static void dir_split_leaf(struct gfs2_inode *dip, uint32_t lindex, uint64_t leaf_no)
+static void dir_split_leaf(struct gfs2_inode *dip, uint32_t lindex,
+			   uint64_t leaf_no)
 {
 	struct gfs2_buffer_head *nbh, *obh;
 	struct gfs2_leaf *nleaf, *oleaf;
@@ -758,19 +845,21 @@ static void dir_split_leaf(struct gfs2_inode *dip, uint32_t lindex, uint64_t lea
 	int count;
 
 	bn = meta_alloc(dip);
-	nbh = bget(&dip->i_sbd->buf_list, bn);
+	nbh = bget(dip->i_sbd, bn);
 	{
 		struct gfs2_meta_header mh;
 		mh.mh_magic = GFS2_MAGIC;
 		mh.mh_type = GFS2_METATYPE_LF;
 		mh.mh_format = GFS2_FORMAT_LF;
-		gfs2_meta_header_out(&mh, nbh->b_data);
+		gfs2_meta_header_out(&mh, nbh);
+		buffer_clear_tail(dip->i_sbd, nbh,
+				  sizeof(struct gfs2_meta_header));
 	}
 
 	nleaf = (struct gfs2_leaf *)nbh->b_data;
 	nleaf->lf_dirent_format = cpu_to_be32(GFS2_FORMAT_DE);
 
-	obh = bread(&dip->i_sbd->buf_list, leaf_no);
+	obh = bread(dip->i_sbd, leaf_no);
 	oleaf = (struct gfs2_leaf *)obh->b_data;
 
 	len = 1 << (dip->i_di.di_depth - be16_to_cpu(oleaf->lf_depth));
@@ -783,16 +872,11 @@ static void dir_split_leaf(struct gfs2_inode *dip, uint32_t lindex, uint64_t lea
 		fprintf(stderr, "Out of memory in %s\n", __FUNCTION__);
 		exit(-1);
 	}
-	count = gfs2_readi(dip, (char *)lp, start * sizeof(uint64_t),
-		      half_len * sizeof(uint64_t));
-	if (count != half_len * sizeof(uint64_t))
-		die("dir_split_leaf (1)\n");
-
 	for (x = 0; x < half_len; x++)
 		lp[x] = cpu_to_be64(bn);
 
 	count = gfs2_writei(dip, (char *)lp, start * sizeof(uint64_t),
-		       half_len * sizeof(uint64_t));
+			    half_len * sizeof(uint64_t));
 	if (count != half_len * sizeof(uint64_t))
 		die("dir_split_leaf (2)\n");
 
@@ -811,7 +895,8 @@ static void dir_split_leaf(struct gfs2_inode *dip, uint32_t lindex, uint64_t lea
 		    be32_to_cpu(dent->de_hash) < divider) {
 			name_len = be16_to_cpu(dent->de_name_len);
 
-			dirent_alloc(dip, nbh, name_len, &new);
+			if (dirent_alloc(dip, nbh, name_len, &new))
+				die("dir_split_leaf (3)\n");
 
 			new->de_inum = dent->de_inum;
 			new->de_hash = dent->de_hash;
@@ -837,7 +922,8 @@ static void dir_split_leaf(struct gfs2_inode *dip, uint32_t lindex, uint64_t lea
 	} while (dent);
 
 	if (!moved) {
-		dirent_alloc(dip, nbh, 0, &new);
+		if (dirent_alloc(dip, nbh, 0, &new))
+			die("dir_split_leaf (4)\n");
 		new->de_inum.no_formal_ino = 0;
 	}
 
@@ -846,9 +932,11 @@ static void dir_split_leaf(struct gfs2_inode *dip, uint32_t lindex, uint64_t lea
 	nleaf->lf_depth = oleaf->lf_depth;
 
 	dip->i_di.di_blocks++;
+	bmodified(dip->i_bh);
 
-	brelse(obh, not_updated);
-	brelse(nbh, updated);
+	brelse(obh);
+	bmodified(nbh);
+	brelse(nbh);
 }
 
 static void dir_double_exhash(struct gfs2_inode *dip)
@@ -882,7 +970,7 @@ static void dir_double_exhash(struct gfs2_inode *dip)
 		}
 
 		count = gfs2_writei(dip, (char *)buf + sdp->sd_hash_bsize,
-							block * sdp->bsize, sdp->bsize);
+				    block * sdp->bsize, sdp->bsize);
 		if (count != sdp->bsize)
 			die("dir_double_exhash (2)\n");
 
@@ -891,6 +979,7 @@ static void dir_double_exhash(struct gfs2_inode *dip)
 	free(buf);
 
 	dip->i_di.di_depth++;
+	bmodified(dip->i_bh);
 }
 
 /**
@@ -907,12 +996,12 @@ int gfs2_get_leaf(struct gfs2_inode *dip, uint64_t leaf_no,
 {
 	int error = 0;
 
-	*bhp = bread(&dip->i_sbd->buf_list, leaf_no);
+	*bhp = bread(dip->i_sbd, leaf_no);
 	if (error)
 		return error;
 	error = gfs2_check_meta(*bhp, GFS2_METATYPE_LF);
 	if(error)
-		brelse(*bhp, not_updated);
+		brelse(*bhp);
 	return error;
 }
 
@@ -931,7 +1020,7 @@ static int get_first_leaf(struct gfs2_inode *dip, uint32_t lindex,
 	uint64_t leaf_no;
 
 	gfs2_get_leaf_nr(dip, lindex, &leaf_no);
-	*bh_out = bread(&dip->i_sbd->buf_list, leaf_no);
+	*bh_out = bread(dip->i_sbd, leaf_no);
 	return 0;
 }
 
@@ -945,7 +1034,7 @@ static int get_first_leaf(struct gfs2_inode *dip, uint32_t lindex,
  */
 
 static int get_next_leaf(struct gfs2_inode *dip,struct gfs2_buffer_head *bh_in,
-			 struct gfs2_buffer_head **bh_out)
+						 struct gfs2_buffer_head **bh_out)
 {
 	struct gfs2_leaf *leaf;
 
@@ -953,7 +1042,7 @@ static int get_next_leaf(struct gfs2_inode *dip,struct gfs2_buffer_head *bh_in,
 
 	if (!leaf->lf_next)
 		return -1;
-	*bh_out = bread(&dip->i_sbd->buf_list, be64_to_cpu(leaf->lf_next));
+	*bh_out = bread(dip->i_sbd, be64_to_cpu(leaf->lf_next));
 	return 0;
 }
 
@@ -967,8 +1056,8 @@ static void dir_e_add(struct gfs2_inode *dip, const char *filename, int len,
 	uint32_t hash;
 	uint64_t leaf_no, bn;
 
- restart:
 	hash = gfs2_disk_hash(filename, len);
+restart:
 	/* Have to kludge because (hash >> 32) gives hash for some reason. */
 	if (dip->i_di.di_depth)
 		lindex = hash >> (32 - dip->i_di.di_depth);
@@ -978,36 +1067,35 @@ static void dir_e_add(struct gfs2_inode *dip, const char *filename, int len,
 	gfs2_get_leaf_nr(dip, lindex, &leaf_no);
 
 	for (;;) {
-		bh = bread(&dip->i_sbd->buf_list, leaf_no);
+		bh = bread(dip->i_sbd, leaf_no);
 		leaf = (struct gfs2_leaf *)bh->b_data;
 
 		if (dirent_alloc(dip, bh, len, &dent)) {
 
 			if (be16_to_cpu(leaf->lf_depth) < dip->i_di.di_depth) {
-				brelse(bh, not_updated);
+				brelse(bh);
 				dir_split_leaf(dip, lindex, leaf_no);
 				goto restart;
 
 			} else if (dip->i_di.di_depth < GFS2_DIR_MAX_DEPTH) {
-				brelse(bh, not_updated);
+				brelse(bh);
 				dir_double_exhash(dip);
 				goto restart;
 
 			} else if (leaf->lf_next) {
 				leaf_no = be64_to_cpu(leaf->lf_next);
-				brelse(bh, not_updated);
+				brelse(bh);
 				continue;
 
 			} else {
+				struct gfs2_meta_header mh;
+
 				bn = meta_alloc(dip);
-				nbh = bget(&dip->i_sbd->buf_list, bn);
-				{
-					struct gfs2_meta_header mh;
-					mh.mh_magic = GFS2_MAGIC;
-					mh.mh_type = GFS2_METATYPE_LF;
-					mh.mh_format = GFS2_FORMAT_LF;
-					gfs2_meta_header_out(&mh, nbh->b_data);
-				}
+				nbh = bget(dip->i_sbd, bn);
+				mh.mh_magic = GFS2_MAGIC;
+				mh.mh_type = GFS2_METATYPE_LF;
+				mh.mh_format = GFS2_FORMAT_LF;
+				gfs2_meta_header_out(&mh, nbh);
 
 				leaf->lf_next = cpu_to_be64(bn);
 
@@ -1015,9 +1103,12 @@ static void dir_e_add(struct gfs2_inode *dip, const char *filename, int len,
 				nleaf->lf_depth = leaf->lf_depth;
 				nleaf->lf_dirent_format = cpu_to_be32(GFS2_FORMAT_DE);
 
-				dirent_alloc(dip, nbh, len, &dent);
+				if (dirent_alloc(dip, nbh, len, &dent))
+					die("dir_split_leaf (3)\n");
 				dip->i_di.di_blocks++;
-				brelse(bh, updated);
+				bmodified(dip->i_bh);
+				bmodified(bh);
+				brelse(bh);
 				bh = nbh;
 				leaf = nleaf;
 			}
@@ -1031,10 +1122,8 @@ static void dir_e_add(struct gfs2_inode *dip, const char *filename, int len,
 		leaf->lf_entries = be16_to_cpu(leaf->lf_entries) + 1;
 		leaf->lf_entries = cpu_to_be16(leaf->lf_entries);
 
-		brelse(bh, updated);
-
-		dip->i_di.di_entries++;
-
+		bmodified(bh);
+		brelse(bh);
 		return;
 	}
 }
@@ -1050,13 +1139,13 @@ static void dir_make_exhash(struct gfs2_inode *dip)
 	uint64_t *lp, bn;
 
 	bn = meta_alloc(dip);
-	bh = bget(&sdp->buf_list, bn);
+	bh = bget(sdp, bn);
 	{
 		struct gfs2_meta_header mh;
 		mh.mh_magic = GFS2_MAGIC;
 		mh.mh_type = GFS2_METATYPE_LF;
 		mh.mh_format = GFS2_FORMAT_LF;
-		gfs2_meta_header_out(&mh, bh->b_data);
+		gfs2_meta_header_out(&mh, bh);
 	}
 
 	leaf = (struct gfs2_leaf *)bh->b_data;
@@ -1080,7 +1169,8 @@ static void dir_make_exhash(struct gfs2_inode *dip)
 	dent->de_rec_len = cpu_to_be16(dent->de_rec_len +
 		sizeof(struct gfs2_dinode) - sizeof(struct gfs2_leaf));
 
-	brelse(bh, updated);
+	/* no need to: bmodified(bh); (buffer_copy_tail does it) */
+	brelse(bh);
 
 	buffer_clear_tail(sdp, dip->i_bh, sizeof(struct gfs2_dinode));
 
@@ -1093,9 +1183,13 @@ static void dir_make_exhash(struct gfs2_inode *dip)
 	dip->i_di.di_blocks++;
 	dip->i_di.di_flags |= GFS2_DIF_EXHASH;
 	dip->i_di.di_payload_format = 0;
+	/* no need: bmodified(dip->i_bh); buffer_clear_tail does it. */
 
 	for (x = sdp->sd_hash_ptrs, y = -1; x; x >>= 1, y++) ;
 	dip->i_di.di_depth = y;
+
+	gfs2_dinode_out(&dip->i_di, dip->i_bh);
+	bwrite(dip->i_bh);
 }
 
 static void dir_l_add(struct gfs2_inode *dip, const char *filename, int len,
@@ -1114,8 +1208,6 @@ static void dir_l_add(struct gfs2_inode *dip, const char *filename, int len,
 	dent->de_hash = cpu_to_be32(dent->de_hash);
 	dent->de_type = cpu_to_be16(type);
 	memcpy((char *)(dent + 1), filename, len);
-
-	dip->i_di.di_entries++;
 }
 
 void dir_add(struct gfs2_inode *dip, const char *filename, int len,
@@ -1134,7 +1226,7 @@ struct gfs2_buffer_head *init_dinode(struct gfs2_sbd *sdp,
 	struct gfs2_buffer_head *bh;
 	struct gfs2_dinode di;
 
-	bh = bget(&sdp->buf_list, inum->no_addr);
+	bh = bget(sdp, inum->no_addr);
 
 	memset(&di, 0, sizeof(struct gfs2_dinode));
 	di.di_header.mh_magic = GFS2_MAGIC;
@@ -1184,7 +1276,7 @@ struct gfs2_buffer_head *init_dinode(struct gfs2_sbd *sdp,
 		di.di_entries = 2;
 	}
 
-	gfs2_dinode_out(&di, bh->b_data);
+	gfs2_dinode_out(&di, bh);
 
 	return bh;
 }
@@ -1207,12 +1299,16 @@ struct gfs2_inode *createi(struct gfs2_inode *dip, const char *filename,
 
 		dir_add(dip, filename, strlen(filename), &inum, IF2DT(mode));
 
-		if(S_ISDIR(mode))
+		if(S_ISDIR(mode)) {
+			bmodified(dip->i_bh);
 			dip->i_di.di_nlink++;
+		}
 
 		bh = init_dinode(sdp, &inum, mode, flags, &dip->i_di.di_num);
 		ip = inode_get(sdp, bh);
+		bmodified(bh);
 	}
+	ip->bh_owned = 1;
 	return ip;
 }
 
@@ -1305,7 +1401,6 @@ static int leaf_search(struct gfs2_inode *dip, struct gfs2_buffer_head *bh,
 
 static int linked_leaf_search(struct gfs2_inode *dip, const char *filename,
 			      int len, struct gfs2_dirent **dent_out,
-			      struct gfs2_dirent **dent_prev,
 			      struct gfs2_buffer_head **bh_out)
 {
 	struct gfs2_buffer_head *bh = NULL, *bh_next;
@@ -1328,12 +1423,12 @@ static int linked_leaf_search(struct gfs2_inode *dip, const char *filename,
 
 	/*  Find the entry  */
 	do{
-		if (bh)
-			brelse(bh, not_updated);
+		if (bh && bh != dip->i_bh)
+			brelse(bh);
 
 		bh = bh_next;
-		
-		error = leaf_search(dip, bh, filename, len, dent_out, dent_prev);
+
+		error = leaf_search(dip, bh, filename, len, dent_out, NULL);
 		switch (error){
 		case 0:
 			*bh_out = bh;
@@ -1343,14 +1438,16 @@ static int linked_leaf_search(struct gfs2_inode *dip, const char *filename,
 			break;
 			
 		default:
-			brelse(bh, not_updated);
+			if (bh && bh != dip->i_bh)
+				brelse(bh);
 			return error;
 		}
 		
 		error = get_next_leaf(dip, bh, &bh_next);
 	} while (!error);
 	
-	brelse(bh, not_updated);
+	if (bh && bh != dip->i_bh)
+		brelse(bh);
 	
 	return error;
 }
@@ -1370,7 +1467,7 @@ static int dir_e_search(struct gfs2_inode *dip, const char *filename,
 	struct gfs2_dirent *dent;
 	int error;
 
-	error = linked_leaf_search(dip, filename, len, &dent, NULL, &bh);
+	error = linked_leaf_search(dip, filename, len, &dent, &bh);
 	if (error)
 		return error;
 
@@ -1378,7 +1475,7 @@ static int dir_e_search(struct gfs2_inode *dip, const char *filename,
 	if (type)
 		*type = be16_to_cpu(dent->de_type);
 
-	brelse(bh, not_updated);
+	brelse(bh);
 
 	return 0;
 }
@@ -1395,21 +1492,18 @@ static int dir_e_search(struct gfs2_inode *dip, const char *filename,
 static int dir_l_search(struct gfs2_inode *dip, const char *filename,
 			int len, unsigned int *type, struct gfs2_inum *inum)
 {
-	struct gfs2_buffer_head *dibh;
 	struct gfs2_dirent *dent;
 	int error;
 
 	if(!inode_is_stuffed(dip))
 		return -1;
 
-	dibh = bread(&dip->i_sbd->buf_list, dip->i_di.di_num.no_addr);
-	error = leaf_search(dip, dibh, filename, len, &dent, NULL);
+	error = leaf_search(dip, dip->i_bh, filename, len, &dent, NULL);
 	if (!error) {
 		gfs2_inum_in(inum, (char *)&dent->de_inum);
 		if(type)
 			*type = be16_to_cpu(dent->de_type);
 	}
-	brelse(dibh, not_updated);
 	return error;
 }
 
@@ -1457,15 +1551,15 @@ static int dir_e_del(struct gfs2_inode *dip, const char *filename, int len)
 		gfs2_get_leaf_nr(dip, lindex, &leaf_no);
 
 		while(leaf_no && !found){
-			bh = bread(&dip->i_sbd->buf_list, leaf_no);
+			bh = bread(dip->i_sbd, leaf_no);
 			error = leaf_search(dip, bh, filename, len, &cur, &prev);
 			if (error) {
 				if(error != -ENOENT){
-					brelse(bh, updated);
+					brelse(bh);
 					return -1;
 				}
 				leaf_no = be64_to_cpu(((struct gfs2_leaf *)bh->b_data)->lf_next);
-				brelse(bh, updated);
+				brelse(bh);
 			} else
 				found = 1;
 		}
@@ -1476,40 +1570,28 @@ static int dir_e_del(struct gfs2_inode *dip, const char *filename, int len)
 
 	if (bh) {
 		dirent2_del(dip, bh, prev, cur);
-		brelse(bh, updated);
+		brelse(bh);
 	}
 	return 0;
 }
 
-static int dir_l_del(struct gfs2_inode *dip, struct gfs2_buffer_head *dibh,
-		     const char *filename, int len){
+static int dir_l_del(struct gfs2_inode *dip, const char *filename, int len)
+{
 	int error=0;
-	int got_buf = 0;
 	struct gfs2_dirent *cur, *prev;
 
 	if(!inode_is_stuffed(dip))
 		return -1;
 
-	if(!dibh) {
-		dibh = bread(&dip->i_sbd->buf_list, dip->i_di.di_num.no_addr);
-		if (error)
-			return -1;
-		got_buf = 1;
-	}
-
-	error = leaf_search(dip, dibh, filename, len, &cur, &prev);
+	error = leaf_search(dip, dip->i_bh, filename, len, &cur, &prev);
 	if (error) {
-		if (got_buf)
-			brelse(dibh, not_updated);
 		if (error == -ENOENT)
 			return 1;
 		else
 			return -1;
 	}
 
-	dirent2_del(dip, dibh, prev, cur);
-	if (got_buf)
-		brelse(dibh, updated);
+	dirent2_del(dip, dip->i_bh, prev, cur);
 	return 0;
 }
 
@@ -1525,8 +1607,8 @@ static int dir_l_del(struct gfs2_inode *dip, struct gfs2_buffer_head *dibh,
  *
  * Returns: 0 on success (or if it doesn't already exist), -1 on failure
  */
-int gfs2_dirent_del(struct gfs2_inode *dip, struct gfs2_buffer_head *bh,
-		    const char *filename, int len){
+int gfs2_dirent_del(struct gfs2_inode *dip, const char *filename, int len)
+{
 	int error;
 
 	if(!S_ISDIR(dip->i_di.di_mode))
@@ -1535,8 +1617,8 @@ int gfs2_dirent_del(struct gfs2_inode *dip, struct gfs2_buffer_head *bh,
 	if (dip->i_di.di_flags & GFS2_DIF_EXHASH)
 		error = dir_e_del(dip, filename, len);
 	else
-		error = dir_l_del(dip, bh, filename, len);
-
+		error = dir_l_del(dip, filename, len);
+	bmodified(dip->i_bh);
 	return error;
 }
 
@@ -1569,7 +1651,7 @@ int gfs2_lookupi(struct gfs2_inode *dip, const char *filename, int len,
 			return 0;
 	}
 	else
-		*ipp = gfs2_load_inode(sdp, inum.no_addr);
+		*ipp = inode_read(sdp, inum.no_addr);
 
 	return error;
 }
@@ -1579,16 +1661,16 @@ int gfs2_lookupi(struct gfs2_inode *dip, const char *filename, int len,
  */
 void gfs2_free_block(struct gfs2_sbd *sdp, uint64_t block)
 {
-	struct gfs2_buffer_head *bh;
 	struct rgrp_list *rgd;
 
-	gfs2_set_bitmap(sdp, block, GFS2_BLKST_FREE);
 	/* Adjust the free space count for the freed block */
 	rgd = gfs2_blk2rgrpd(sdp, block); /* find the rg for indir block */
-	bh = bget(&sdp->nvbuf_list, rgd->ri.ri_addr); /* get the rg buffer */
-	rgd->rg.rg_free++; /* adjust the free count */
-	gfs2_rgrp_out(&rgd->rg, bh->b_data); /* back to the buffer */
-	brelse(bh, updated); /* release the buffer */
+	if (rgd) {
+		gfs2_set_bitmap(sdp, block, GFS2_BLKST_FREE);
+		rgd->rg.rg_free++; /* adjust the free count */
+		gfs2_rgrp_out(&rgd->rg, rgd->bh[0]); /* back to the buffer */
+		sdp->blks_alloced--;
+	}
 }
 
 /**
@@ -1609,7 +1691,7 @@ int gfs2_freedi(struct gfs2_sbd *sdp, uint64_t diblock)
 	for (h = 0; h < GFS2_MAX_META_HEIGHT; h++)
 		osi_list_init(&metalist[h]);
 
-	bh = bread(&sdp->buf_list, diblock);
+	bh = bread(sdp, diblock);
 	ip = inode_get(sdp, bh);
 	height = ip->i_di.di_height;
 	osi_list_add(&bh->b_altlist, &metalist[0]);
@@ -1633,28 +1715,24 @@ int gfs2_freedi(struct gfs2_sbd *sdp, uint64_t diblock)
 				gfs2_free_block(sdp, block);
 				if (h == height - 1) /* if not metadata */
 					continue; /* don't queue it up */
-				/* Read the next metadata block in the chain.
-				   First see if it's on the nvbuf_list. */
-				nbh = bfind(&sdp->nvbuf_list, block);
-				if (!nbh)
-					nbh = bread(&sdp->buf_list, block);
+				/* Read the next metadata block in the chain */
+				nbh = bread(sdp, block);
 				osi_list_add(&nbh->b_altlist, next_list);
-				brelse(nbh, not_updated);
+				brelse(nbh);
 			}
 		}
 	}
 	/* Set the bitmap type for inode to free space: */
 	gfs2_set_bitmap(sdp, ip->i_di.di_num.no_addr, GFS2_BLKST_FREE);
-	inode_put(ip, updated);
+	inode_put(&ip);
+	/* inode_put deallocated the extra block used by the dist inode, */
+	/* so adjust it in the superblock struct */
+	sdp->blks_alloced--;
 	/* Now we have to adjust the rg freespace count and inode count: */
 	rgd = gfs2_blk2rgrpd(sdp, diblock);
-	/* The rg itself is in memory as rgd->rg, but there's most likely a  */
-	/* buffer in memory for the rg on disk because we used it to fix the */
-	/* bitmaps, some of which are on the same block on disk.             */
-	bh = bread(&sdp->nvbuf_list, rgd->ri.ri_addr); /* get the buffer */
 	rgd->rg.rg_free++;
 	rgd->rg.rg_dinodes--; /* one less inode in use */
-	gfs2_rgrp_out(&rgd->rg, bh->b_data);
-	brelse(bh, updated); /* release the buffer */
+	gfs2_rgrp_out(&rgd->rg, rgd->bh[0]);
+	sdp->dinodes_alloced--;
 	return 0;
 }
diff --git a/gfs2/libgfs2/gfs1.c b/gfs2/libgfs2/gfs1.c
index 59c4f8f..5018334 100644
--- a/gfs2/libgfs2/gfs1.c
+++ b/gfs2/libgfs2/gfs1.c
@@ -70,9 +70,7 @@ void gfs1_block_map(struct gfs2_inode *ip, uint64_t lblock, int *new,
 	unsigned int height;
 	unsigned int end_of_metadata;
 	unsigned int x;
-	enum update_flags f;
 
-	f = not_updated;
 	*new = 0;
 	*dblock = 0;
 	if (extlen)
@@ -100,26 +98,25 @@ void gfs1_block_map(struct gfs2_inode *ip, uint64_t lblock, int *new,
 	mp = find_metapath(ip, lblock);
 	end_of_metadata = ip->i_di.di_height - 1;
 
-	bh = bhold(ip->i_bh);
+	bh = ip->i_bh;
 
 	for (x = 0; x < end_of_metadata; x++) {
 		gfs1_lookup_block(ip, bh, x, mp, create, new, dblock);
-		brelse(bh, f);
+		if (bh != ip->i_bh)
+			brelse(bh);
 		if (!*dblock)
 			goto out;
 
 		if (*new) {
 			struct gfs2_meta_header mh;
 
-			bh = bget(&sdp->buf_list, *dblock);
+			bh = bget(sdp, *dblock);
 			mh.mh_magic = GFS2_MAGIC;
 			mh.mh_type = GFS2_METATYPE_IN;
 			mh.mh_format = GFS2_FORMAT_IN;
-			gfs2_meta_header_out(&mh, bh->b_data);
-			f = updated;
+			gfs2_meta_header_out(&mh, bh);
 		} else {
-			bh = bread(&sdp->buf_list, *dblock);
-			f = not_updated;
+			bh = bread(sdp, *dblock);
 		}
 	}
 
@@ -150,7 +147,8 @@ void gfs1_block_map(struct gfs2_inode *ip, uint64_t lblock, int *new,
 		}
 	}
 
-	brelse(bh, f);
+	if (bh != ip->i_bh)
+		brelse(bh);
 
  out:
 	free(mp);
@@ -201,7 +199,7 @@ int gfs1_readi(struct gfs2_inode *ip, void *bufin,
 				       &extlen, FALSE);
 
 		if (dblock) {
-			bh = bread(&sdp->buf_list, dblock);
+			bh = bread(sdp, dblock);
 			dblock++;
 			extlen--;
 		} else
@@ -210,7 +208,7 @@ int gfs1_readi(struct gfs2_inode *ip, void *bufin,
 
 		if (bh) {
 			memcpy(buf+copied, bh->b_data + offset, amount);
-			brelse(bh, not_updated);
+			brelse(bh);
 		} else
 			memset(buf+copied, 0, amount);
 		copied += amount;
@@ -294,9 +292,11 @@ int gfs1_rindex_read(struct gfs2_sbd *sdp, int fd, int *count1)
 int gfs1_ri_update(struct gfs2_sbd *sdp, int fd, int *rgcount, int quiet)
 {
 	struct rgrp_list *rgd;
+	struct gfs2_rindex *ri;
 	osi_list_t *tmp;
 	int count1 = 0, count2 = 0;
 	uint64_t errblock = 0;
+	uint64_t rmax = 0;
 
 	if (gfs1_rindex_read(sdp, fd, &count1))
 	    goto fail;
@@ -310,8 +310,12 @@ int gfs1_ri_update(struct gfs2_sbd *sdp, int fd, int *rgcount, int quiet)
 			printf(".");
 			fflush(stdout);
 		}
+		ri = &rgd->ri;
+		if (ri->ri_data0 + ri->ri_data - 1 > rmax)
+			rmax = ri->ri_data0 + ri->ri_data - 1;
 	}
 
+	sdp->fssize = rmax;
 	*rgcount = count1;
 	if (count1 != count2)
 		goto fail;
@@ -319,18 +323,18 @@ int gfs1_ri_update(struct gfs2_sbd *sdp, int fd, int *rgcount, int quiet)
 	return 0;
 
  fail:
-	gfs2_rgrp_free(&sdp->rglist, not_updated);
+	gfs2_rgrp_free(&sdp->rglist);
 	return -1;
 }
 
 /* ------------------------------------------------------------------------ */
 /* gfs_dinode_in */
 /* ------------------------------------------------------------------------ */
-static void gfs_dinode_in(struct gfs_dinode *di, char *buf)
+static void gfs_dinode_in(struct gfs_dinode *di, struct gfs2_buffer_head *bh)
 {
-	struct gfs_dinode *str = (struct gfs_dinode *)buf;
+	struct gfs_dinode *str = (struct gfs_dinode *)bh->b_data;
 
-	gfs2_meta_header_in(&di->di_header, buf);
+	gfs2_meta_header_in(&di->di_header, bh);
 	gfs2_inum_in(&di->di_num, (char *)&str->di_num);
 
 	di->di_mode = be32_to_cpu(str->di_mode);
@@ -367,7 +371,7 @@ struct gfs2_inode *gfs_inode_get(struct gfs2_sbd *sdp,
 		exit(-1);
 	}
 
-	gfs_dinode_in(&gfs1_dinode, bh->b_data);
+	gfs_dinode_in(&gfs1_dinode, bh);
 	memcpy(&ip->i_di.di_header, &gfs1_dinode.di_header,
 	       sizeof(struct gfs2_meta_header));
 	memcpy(&ip->i_di.di_num, &gfs1_dinode.di_num,
@@ -394,5 +398,49 @@ struct gfs2_inode *gfs_inode_get(struct gfs2_sbd *sdp,
 	ip->i_di.di_eattr = gfs1_dinode.di_eattr;
 	ip->i_bh = bh;
 	ip->i_sbd = sdp;
+	ip->bh_owned = 0;
 	return ip;
 }
+
+struct gfs2_inode *gfs_inode_read(struct gfs2_sbd *sdp, uint64_t di_addr)
+{
+	struct gfs_dinode gfs1_dinode;
+	struct gfs2_inode *ip;
+
+	ip = calloc(1, sizeof(struct gfs2_inode));
+	if (ip == NULL) {
+		fprintf(stderr, "Out of memory in %s\n", __FUNCTION__);
+		exit(-1);
+	}
+
+	ip->i_bh = bread(sdp, di_addr);
+	gfs_dinode_in(&gfs1_dinode, ip->i_bh);
+	memcpy(&ip->i_di.di_header, &gfs1_dinode.di_header,
+	       sizeof(struct gfs2_meta_header));
+	memcpy(&ip->i_di.di_num, &gfs1_dinode.di_num,
+	       sizeof(struct gfs2_inum));
+	ip->i_di.di_mode = gfs1_dinode.di_mode;
+	ip->i_di.di_uid = gfs1_dinode.di_uid;
+	ip->i_di.di_gid = gfs1_dinode.di_gid;
+	ip->i_di.di_nlink = gfs1_dinode.di_nlink;
+	ip->i_di.di_size = gfs1_dinode.di_size;
+	ip->i_di.di_blocks = gfs1_dinode.di_blocks;
+	ip->i_di.di_atime = gfs1_dinode.di_atime;
+	ip->i_di.di_mtime = gfs1_dinode.di_mtime;
+	ip->i_di.di_ctime = gfs1_dinode.di_ctime;
+	ip->i_di.di_major = gfs1_dinode.di_major;
+	ip->i_di.di_minor = gfs1_dinode.di_minor;
+	ip->i_di.di_goal_data = gfs1_dinode.di_goal_dblk;
+	ip->i_di.di_goal_meta = gfs1_dinode.di_goal_mblk;
+	ip->i_di.di_flags = gfs1_dinode.di_flags;
+	ip->i_di.di_payload_format = gfs1_dinode.di_payload_format;
+	ip->i_di.__pad1 = gfs1_dinode.di_type;
+	ip->i_di.di_height = gfs1_dinode.di_height;
+	ip->i_di.di_depth = gfs1_dinode.di_depth;
+	ip->i_di.di_entries = gfs1_dinode.di_entries;
+	ip->i_di.di_eattr = gfs1_dinode.di_eattr;
+	ip->i_sbd = sdp;
+	ip->bh_owned = 1;
+	return ip;
+}
+
diff --git a/gfs2/libgfs2/gfs2_log.c b/gfs2/libgfs2/gfs2_log.c
index 41f4faa..75a06a7 100644
--- a/gfs2/libgfs2/gfs2_log.c
+++ b/gfs2/libgfs2/gfs2_log.c
@@ -12,19 +12,16 @@
 
 #define _(String) gettext(String)
 
-struct log_state {
-	int print_level;
-};
-static struct log_state _state = {MSG_NOTICE};
+int print_level = MSG_NOTICE;
 
 void increase_verbosity(void)
 {
-	_state.print_level++;
+	print_level++;
 }
 
 void decrease_verbosity(void)
 {
-	_state.print_level--;
+	print_level--;
 }
 
 static __attribute__((format (printf, 4, 0)))
@@ -34,9 +31,8 @@ void print_msg(int priority, const char *file, int line,
 	switch (priority) {
 
 	case MSG_DEBUG:
-		printf("(%s:%d)\t", file, line);
+		printf("(%s:%d) ", file, line);
 		vprintf(format, args);
-		fflush(NULL);
 		break;
 	case MSG_INFO:
 	case MSG_NOTICE:
@@ -50,29 +46,22 @@ void print_msg(int priority, const char *file, int line,
 		vfprintf(stderr, format, args);
 		break;
 	}
-	return;
 }
 
 
-void print_fsck_log(int iif, int priority, const char *file, int line,
+void print_fsck_log(int priority, const char *file, int line,
 		    const char *format, ...)
 {
-
 	va_list args;
 	const char *transform;
 
-        va_start(args, format);
-
+	va_start(args, format);
 	transform = _(format);
-
-	if((_state.print_level == priority) ||
-	   (!iif && (_state.print_level >= priority)))
-		print_msg(priority, file, line, transform, args);
-
+	print_msg(priority, file, line, transform, args);
 	va_end(args);
 }
 
-static char gfs2_getch(void)
+char gfs2_getch(void)
 {
 	struct termios termattr, savetermattr;
 	char ch;
@@ -150,7 +139,6 @@ char generic_interrupt(const char *caller, const char *where,
 int gfs2_query(int *setonabort, struct gfs2_options *opts,
 	       const char *format, ...)
 {
-
 	va_list args;
 	const char *transform;
 	char response;
@@ -188,9 +176,9 @@ int gfs2_query(int *setonabort, struct gfs2_options *opts,
 			}
 			printf("Continuing.\n");
 		} else if(tolower(response) == 'y') {
-                        ret = 1;
-                        break;
- 		} else if (tolower(response) == 'n') {
+			ret = 1;
+			break;
+		} else if (tolower(response) == 'n') {
 			ret = 0;
 			break;
 		} else {
diff --git a/gfs2/libgfs2/libgfs2.h b/gfs2/libgfs2/libgfs2.h
index bc2507e..6aa9e2d 100644
--- a/gfs2/libgfs2/libgfs2.h
+++ b/gfs2/libgfs2/libgfs2.h
@@ -69,6 +69,10 @@ __BEGIN_DECLS
 
 #endif  /*  __BYTE_ORDER == __LITTLE_ENDIAN  */
 
+#define BLOCKMAP_SIZE4(size) (size >> 1)
+#define BLOCKMAP_BYTE_OFFSET4(x) ((x & 0x0000000000000001) << 2)
+#define BLOCKMAP_MASK4 (0xf)
+
 static __inline__ __attribute__((noreturn, format (printf, 1, 2)))
 void die(const char *fmt, ...)
 {
@@ -97,7 +101,6 @@ struct rgrp_list {
 	osi_list_t list;
 	uint64_t start;	   /* The offset of the beginning of this resource group */
 	uint64_t length;	/* The length of this resource group */
-	uint32_t rgf_flags;
 
 	struct gfs2_rindex ri;
 	struct gfs2_rgrp rg;
@@ -106,21 +109,11 @@ struct rgrp_list {
 };
 
 struct gfs2_buffer_head {
-	osi_list_t b_list;
-	osi_list_t b_hash;
 	osi_list_t b_altlist; /* alternate list */
-
-	unsigned int b_count;
 	uint64_t b_blocknr;
+	int b_modified;
 	char *b_data;
-
-	int b_changed;
-};
-
-struct dup_blocks {
-	osi_list_t list;
-	uint64_t block_no;
-	osi_list_t ref_inode_list;
+	struct gfs2_sbd *sdp;
 };
 
 struct special_blocks {
@@ -130,6 +123,7 @@ struct special_blocks {
 
 struct gfs2_sbd;
 struct gfs2_inode {
+	int bh_owned; /* Is this bh owned, iow, should we release it later? */
 	struct gfs2_dinode i_di;
 	struct gfs2_buffer_head *i_bh;
 	struct gfs2_sbd *i_sbd;
@@ -174,15 +168,6 @@ struct master_dir
 	struct per_node *pn;              /* Array of per_node entries */
 };
 
-struct buf_list {
-	unsigned int num_bufs;
-	unsigned int spills;
-	uint32_t limit;
-	osi_list_t list;
-	struct gfs2_sbd *sbp;
-	osi_list_t buf_hash[BUF_HASH_SIZE];
-};
-
 struct gfs2_sbd {
 	struct gfs2_sb sd_sb;    /* a copy of the ondisk structure */
 	char lockproto[GFS2_LOCKNAME_LEN];
@@ -243,17 +228,12 @@ struct gfs2_sbd {
 
 	unsigned int orig_journals;
 
-	struct buf_list buf_list;   /* transient buffer list */
-	struct buf_list nvbuf_list; /* non-volatile buffer list */
-
 	struct gfs2_inode *master_dir;
 	struct master_dir md;
 
 	unsigned int writes;
 	int metafs_fd;
 	char metafs_path[PATH_MAX]; /* where metafs is mounted */
-	struct special_blocks bad_blocks;
-	struct dup_blocks dup_blocks;
 	struct special_blocks eattr_blocks;
 };
 
@@ -275,129 +255,117 @@ struct metapath {
 #define META (2)
 #define DINODE (3)
 
-#define NOT_UPDATED (0)
-#define UPDATED (1)
-
-/* A bit of explanation is in order: */
-/* updated flag means the buffer was updated from THIS function before */
-/*         brelse was called. */
-/* not_updated flag means the buffer may or may not have been updated  */
-/*         by a function called within this one, but it wasn't updated */
-/*         by this function. */
-enum update_flags {
-	not_updated = NOT_UPDATED,
-	updated = UPDATED
-};
-
 /* bitmap.c */
 struct gfs2_bmap {
-        uint64_t size;
-        uint64_t mapsize;
-        int chunksize;
-        int chunks_per_byte;
-        char *map;
+	uint64_t size;
+	uint64_t mapsize;
+	unsigned char *map;
 };
 
 /* block_list.c */
-#define FREE	        (0x0)  /*   0000 */
-#define BLOCK_IN_USE    (0x1)  /*   0001 */
-#define DIR_INDIR_BLK   (0x2)  /*   0010 */
-#define DIR_INODE       (0x3)  /*   0011 */
-#define FILE_INODE      (0x4)  /*   0100 */
-#define LNK_INODE       (0x5)
-#define BLK_INODE       (0x6)
-#define CHR_INODE       (0x7)
-#define FIFO_INODE      (0x8)
-#define SOCK_INODE      (0x9)
-#define DIR_LEAF_INODE  (0xA)  /*   1010 */
-#define JOURNAL_BLK     (0xB)  /*   1011 */
-#define OTHER_META      (0xC)  /*   1100 */
-#define EATTR_META      (0xD)  /*   1101 */
-#define UNUSED1         (0xE)  /*   1110 */
-#define INVALID_META    (0xF)  /*   1111 */
-
-/* Must be kept in sync with mark_to_bitmap array in block_list.c */
-enum gfs2_mark_block {
-	gfs2_block_free = FREE,
-	gfs2_block_used = BLOCK_IN_USE,
-	gfs2_indir_blk = DIR_INDIR_BLK,
-	gfs2_inode_dir = DIR_INODE,
-	gfs2_inode_file = FILE_INODE,
-	gfs2_inode_lnk = LNK_INODE,
-	gfs2_inode_blk = BLK_INODE,
-	gfs2_inode_chr = CHR_INODE,
-	gfs2_inode_fifo = FIFO_INODE,
-	gfs2_inode_sock = SOCK_INODE,
-	gfs2_leaf_blk = DIR_LEAF_INODE,
-	gfs2_journal_blk = JOURNAL_BLK,
-	gfs2_meta_other = OTHER_META,
-	gfs2_meta_eattr = EATTR_META,
-	gfs2_meta_unused = UNUSED1,
-	gfs2_meta_inval = INVALID_META,
-	gfs2_bad_block,      /* Contains at least one bad block */
-	gfs2_dup_block,      /* Contains at least one duplicate block */
-	gfs2_eattr_block,    /* Contains an eattr */
-};
-
-struct gfs2_block_query {
-        uint8_t block_type;
-        uint8_t bad_block;
-        uint8_t dup_block;
-        uint8_t eattr_block;
-};
 
-struct gfs2_gbmap {
-        struct gfs2_bmap group_map;
-        struct gfs2_bmap bad_map;
-        struct gfs2_bmap dup_map;
-        struct gfs2_bmap eattr_map;
+enum gfs2_mark_block {
+	gfs2_block_free    = (0x0),
+	gfs2_block_used    = (0x1),
+	gfs2_indir_blk     = (0x2),
+	gfs2_inode_dir     = (0x3),
+	gfs2_inode_file    = (0x4),
+
+	gfs2_inode_lnk     = (0x5),
+	gfs2_inode_blk     = (0x6),
+	gfs2_inode_chr     = (0x7),
+	gfs2_inode_fifo    = (0x8),
+	gfs2_inode_sock    = (0x9),
+
+	gfs2_inode_invalid = (0xa),
+	gfs2_meta_inval    = (0xb),
+	gfs2_leaf_blk      = (0xc),
+	gfs2_meta_rgrp     = (0xd),
+	gfs2_meta_eattr    = (0xe),
+
+	gfs2_bad_block     = (0xf), /* Contains at least one bad block */
 };
 
-union gfs2_block_lists {
-        struct gfs2_gbmap gbmap;
-};
+static const inline char *block_type_string(uint8_t q)
+{
+	const char *blktyp[] = {
+		"free",
+		"data",
+		"indirect data",
+		"directory",
+		"file",
+
+		"symlink",
+		"block device",
+		"char device",
+		"fifo",
+		"socket",
+
+		"invalid inode",
+		"invalid meta",
+		"dir leaf",
+		"rgrp meta",
+		"eattribute",
+
+		"bad"};
+	if (q < 16)
+		return (blktyp[q]);
+	return blktyp[15];
+}
 
-/* bitmap implementation */
-struct gfs2_block_list {
-        union gfs2_block_lists list;
-};
+/* Must be kept in sync with gfs2_mark_block enum above. Blocks marked as
+   invalid or bad are considered metadata until actually freed. */
+static inline int blockmap_to_bitmap(enum gfs2_mark_block m)
+{
+	static int bitmap_states[16] = {
+		GFS2_BLKST_FREE,
+		GFS2_BLKST_USED,
+		GFS2_BLKST_USED,
+		GFS2_BLKST_DINODE,
+		GFS2_BLKST_DINODE,
+
+		GFS2_BLKST_DINODE,
+		GFS2_BLKST_DINODE,
+		GFS2_BLKST_DINODE,
+		GFS2_BLKST_DINODE,
+		GFS2_BLKST_DINODE,
+
+		GFS2_BLKST_FREE,
+		GFS2_BLKST_FREE,
+		GFS2_BLKST_USED,
+		GFS2_BLKST_USED,
+		GFS2_BLKST_USED,
+
+		GFS2_BLKST_USED
+	};
+	return bitmap_states[m];
+}
 
-extern struct gfs2_block_list *gfs2_block_list_create(struct gfs2_sbd *sdp,
-					       uint64_t size,
-					       uint64_t *addl_mem_needed);
+extern struct gfs2_bmap *gfs2_bmap_create(struct gfs2_sbd *sdp, uint64_t size,
+					  uint64_t *addl_mem_needed);
 extern struct special_blocks *blockfind(struct special_blocks *blist, uint64_t num);
+extern void gfs2_special_add(struct special_blocks *blocklist, uint64_t block);
 extern void gfs2_special_set(struct special_blocks *blocklist, uint64_t block);
 extern void gfs2_special_free(struct special_blocks *blist);
-extern int gfs2_block_mark(struct gfs2_sbd *sdp, struct gfs2_block_list *il,
-	 		   uint64_t block, enum gfs2_mark_block mark);
-extern int gfs2_block_set(struct gfs2_sbd *sdp, struct gfs2_block_list *il,
-			  uint64_t block, enum gfs2_mark_block mark);
-/* gfs2_block_unmark clears ONE mark for the given block */
-extern int gfs2_block_unmark(struct gfs2_sbd *sdp, struct gfs2_block_list *il,
-			     uint64_t block, enum gfs2_mark_block m);
-/* gfs2_block_clear clears all the marks for the given block */
-extern int gfs2_block_clear(struct gfs2_sbd *sdp, struct gfs2_block_list *il,
-			    uint64_t block);
-extern int gfs2_block_check(struct gfs2_sbd *sdp, struct gfs2_block_list *il,
-			    uint64_t block, struct gfs2_block_query *val);
-extern void *gfs2_block_list_destroy(struct gfs2_sbd *sdp,
-				     struct gfs2_block_list *il);
+extern int gfs2_blockmap_set(struct gfs2_bmap *il, uint64_t block,
+			     enum gfs2_mark_block mark);
+extern void gfs2_special_clear(struct special_blocks *blocklist,
+			       uint64_t block);
+extern void *gfs2_bmap_destroy(struct gfs2_sbd *sdp, struct gfs2_bmap *il);
 
 /* buf.c */
-extern void init_buf_list(struct gfs2_sbd *sdp, struct buf_list *bl, uint32_t limit);
-extern struct gfs2_buffer_head *bfind(struct buf_list *bl, uint64_t num);
-extern struct gfs2_buffer_head *__bget_generic(struct buf_list *bl,
-					       uint64_t num, int find_existing,
+extern struct gfs2_buffer_head *__bget_generic(struct gfs2_sbd *sdp,
+					       uint64_t num,
 					       int read_disk, int line,
 					       const char *caller);
-extern struct gfs2_buffer_head *__bget(struct buf_list *bl, uint64_t num,
+extern struct gfs2_buffer_head *__bget(struct gfs2_sbd *sdp, uint64_t num,
 				       int line, const char *caller);
-extern struct gfs2_buffer_head *__bread(struct buf_list *bl, uint64_t num,
+extern struct gfs2_buffer_head *__bread(struct gfs2_sbd *sdp, uint64_t num,
 					int line, const char *caller);
-extern struct gfs2_buffer_head *bhold(struct gfs2_buffer_head *bh);
-extern void brelse(struct gfs2_buffer_head *bh, enum update_flags is_updated);
-extern void __bsync(struct buf_list *bl, int line, const char *caller);
-extern void __bcommit(struct buf_list *bl, int line, const char *caller);
+extern int bwrite(struct gfs2_buffer_head *bh);
+extern int brelse(struct gfs2_buffer_head *bh);
+
+#define bmodified(bh) do { bh->b_modified = 1; } while(0)
 
 #define bget_generic(bl, num, find, read) __bget_generic(bl, num, find, read, \
 							 __LINE__, \
@@ -447,7 +415,10 @@ extern void lookup_block(struct gfs2_inode *ip, struct gfs2_buffer_head *bh,
 			 int create, int *new, uint64_t *block);
 extern struct gfs2_inode *inode_get(struct gfs2_sbd *sdp,
 				    struct gfs2_buffer_head *bh);
-extern void inode_put(struct gfs2_inode *ip, enum update_flags updated);
+extern struct gfs2_inode *inode_read(struct gfs2_sbd *sdp, uint64_t di_addr);
+extern struct gfs2_inode *is_system_inode(struct gfs2_sbd *sdp,
+					  uint64_t block);
+extern void inode_put(struct gfs2_inode **ip);
 extern uint64_t data_alloc(struct gfs2_inode *ip);
 extern uint64_t meta_alloc(struct gfs2_inode *ip);
 extern uint64_t dinode_alloc(struct gfs2_sbd *sdp);
@@ -465,16 +436,14 @@ extern struct gfs2_inode *createi(struct gfs2_inode *dip, const char *filename,
 				  unsigned int mode, uint32_t flags);
 extern void dirent2_del(struct gfs2_inode *dip, struct gfs2_buffer_head *bh,
 			struct gfs2_dirent *prev, struct gfs2_dirent *cur);
-extern struct gfs2_inode *gfs2_load_inode(struct gfs2_sbd *sbp, uint64_t block);
 extern int gfs2_lookupi(struct gfs2_inode *dip, const char *filename, int len,
 			struct gfs2_inode **ipp);
 extern void dir_add(struct gfs2_inode *dip, const char *filename, int len,
 		    struct gfs2_inum *inum, unsigned int type);
-extern int gfs2_dirent_del(struct gfs2_inode *dip, struct gfs2_buffer_head *bh,
-			   const char *filename, int filename_len);
+extern int gfs2_dirent_del(struct gfs2_inode *dip, const char *filename,
+			   int filename_len);
 extern void block_map(struct gfs2_inode *ip, uint64_t lblock, int *new,
-		      uint64_t *dblock, uint32_t *extlen, int prealloc,
-		      enum update_flags if_changed);
+		      uint64_t *dblock, uint32_t *extlen, int prealloc);
 extern void gfs2_get_leaf_nr(struct gfs2_inode *dip, uint32_t index,
 			     uint64_t *leaf_out);
 extern void gfs2_put_leaf_nr(struct gfs2_inode *dip, uint32_t inx, uint64_t leaf_out);
@@ -568,6 +537,8 @@ extern int gfs1_rindex_read(struct gfs2_sbd *sdp, int fd, int *count1);
 extern int gfs1_ri_update(struct gfs2_sbd *sdp, int fd, int *rgcount, int quiet);
 extern struct gfs2_inode *gfs_inode_get(struct gfs2_sbd *sdp,
 					struct gfs2_buffer_head *bh);
+extern struct gfs2_inode *gfs_inode_read(struct gfs2_sbd *sdp,
+					 uint64_t di_addr);
 
 /* gfs2_log.c */
 struct gfs2_options {
@@ -577,6 +548,8 @@ struct gfs2_options {
 	unsigned int query:1;
 };
 
+extern int print_level;
+
 #define MSG_DEBUG       7
 #define MSG_INFO        6
 #define MSG_NOTICE      5
@@ -585,51 +558,34 @@ struct gfs2_options {
 #define MSG_CRITICAL    2
 #define MSG_NULL        1
 
-#define print_log(iif, priority, format...)     \
-do { print_fsck_log(iif, priority, __FILE__, __LINE__, ## format); } while(0)
+#define print_log(priority, format...) \
+	do { print_fsck_log(priority, __FILE__, __LINE__, ## format); } while(0)
 
 #define log_debug(format...) \
-do { print_log(0, MSG_DEBUG, format); } while(0)
+	do { if(print_level >= MSG_DEBUG) print_log(MSG_DEBUG, format); } while(0)
 #define log_info(format...) \
-do { print_log(0, MSG_INFO, format); } while(0)
+	do { if(print_level >= MSG_INFO) print_log(MSG_INFO, format); } while(0)
 
 #define log_notice(format...) \
-do { print_log(0, MSG_NOTICE, format); } while(0)
+	do { if(print_level >= MSG_NOTICE) print_log(MSG_NOTICE, format); } while(0)
 
 #define log_warn(format...) \
-do { print_log(0, MSG_WARN, format); } while(0)
+	do { if(print_level >= MSG_WARN) print_log(MSG_WARN, format); } while(0)
 
 #define log_err(format...) \
-do { print_log(0, MSG_ERROR, format); } while(0)
+	do { if(print_level >= MSG_ERROR) print_log(MSG_ERROR, format); } while(0)
 
 #define log_crit(format...) \
-do { print_log(0, MSG_CRITICAL, format); } while(0)
+	do { if(print_level >= MSG_CRITICAL) print_log(MSG_CRITICAL, format); } while(0)
 
 #define stack log_debug("<backtrace> - %s()\n", __func__)
 
-#define log_at_debug(format...)         \
-do { print_log(1, MSG_DEBUG, format); } while(0)
-
-#define log_at_info(format...) \
-do { print_log(1, MSG_INFO, format); } while(0)
-
-#define log_at_notice(format...) \
-do { print_log(1, MSG_NOTICE, format); } while(0)
-
-#define log_at_warn(format...) \
-do { print_log(1, MSG_WARN, format); } while(0)
-
-#define log_at_err(format...) \
-do { print_log(1, MSG_ERROR, format); } while(0)
-
-#define log_at_crit(format...) \
-do { print_log(1, MSG_CRITICAL, format); } while(0)
-
+extern char gfs2_getch(void);
 extern void increase_verbosity(void);
 extern void decrease_verbosity(void);
-extern void print_fsck_log(int iif, int priority, const char *file, int line,
+extern void print_fsck_log(int priority, const char *file, int line,
 			   const char *format, ...)
-	__attribute__((format(printf,5,6)));
+	__attribute__((format(printf,4,5)));
 extern char generic_interrupt(const char *caller, const char *where,
 			      const char *progress, const char *question,
 			      const char *answers);
@@ -642,6 +598,8 @@ extern int gfs2_query(int *setonabort, struct gfs2_options *opts,
 extern int compute_heightsize(struct gfs2_sbd *sdp, uint64_t *heightsize,
 		uint32_t *maxheight, uint32_t bsize1, int diptrs, int inptrs);
 extern int compute_constants(struct gfs2_sbd *sdp);
+extern int is_pathname_mounted(struct gfs2_sbd *sdp, int *ro_mount);
+extern int is_gfs2(struct gfs2_sbd *sdp);
 extern int find_gfs2_meta(struct gfs2_sbd *sdp);
 extern int dir_exists(const char *dir);
 extern int check_for_gfs2(struct gfs2_sbd *sdp);
@@ -674,8 +632,8 @@ extern int clean_journal(struct gfs2_inode *ip, struct gfs2_log_header *head);
 extern int gfs2_compute_bitstructs(struct gfs2_sbd *sdp, struct rgrp_list *rgd);
 extern struct rgrp_list *gfs2_blk2rgrpd(struct gfs2_sbd *sdp, uint64_t blk);
 extern uint64_t gfs2_rgrp_read(struct gfs2_sbd *sdp, struct rgrp_list *rgd);
-extern void gfs2_rgrp_relse(struct rgrp_list *rgd, enum update_flags updated);
-extern void gfs2_rgrp_free(osi_list_t *rglist, enum update_flags updated);
+extern void gfs2_rgrp_relse(struct rgrp_list *rgd);
+extern void gfs2_rgrp_free(osi_list_t *rglist);
 
 /* structures.c */
 extern int build_master(struct gfs2_sbd *sdp);
@@ -690,7 +648,8 @@ extern int build_root(struct gfs2_sbd *sdp);
 extern int do_init_inum(struct gfs2_sbd *sdp);
 extern int do_init_statfs(struct gfs2_sbd *sdp);
 extern int gfs2_check_meta(struct gfs2_buffer_head *bh, int type);
-extern int gfs2_next_rg_meta(struct rgrp_list *rgd, uint64_t *block, int first);
+extern int gfs2_next_rg_meta(struct rgrp_list *rgd, uint64_t *block,
+			     int first);
 extern int gfs2_next_rg_metatype(struct gfs2_sbd *sdp, struct rgrp_list *rgd,
 				 uint64_t *block, uint32_t type, int first);
 /* super.c */
@@ -712,34 +671,42 @@ extern void print_it(const char *label, const char *fmt, const char *fmt2, ...)
 
 extern void gfs2_inum_in(struct gfs2_inum *no, char *buf);
 extern void gfs2_inum_out(struct gfs2_inum *no, char *buf);
-extern void gfs2_meta_header_in(struct gfs2_meta_header *mh, char *buf);
-extern void gfs2_meta_header_out(struct gfs2_meta_header *mh, char *buf);
-extern void gfs2_sb_in(struct gfs2_sb *sb, char *buf);
-extern void gfs2_sb_out(struct gfs2_sb *sb, char *buf);
+extern void gfs2_meta_header_in(struct gfs2_meta_header *mh,
+				struct gfs2_buffer_head *bh);
+extern void gfs2_meta_header_out(struct gfs2_meta_header *mh,
+				 struct gfs2_buffer_head *bh);
+extern void gfs2_sb_in(struct gfs2_sb *sb, struct gfs2_buffer_head *bh);
+extern void gfs2_sb_out(struct gfs2_sb *sb, struct gfs2_buffer_head *bh);
 extern void gfs2_rindex_in(struct gfs2_rindex *ri, char *buf);
 extern void gfs2_rindex_out(struct gfs2_rindex *ri, char *buf);
-extern void gfs2_rgrp_in(struct gfs2_rgrp *rg, char *buf);
-extern void gfs2_rgrp_out(struct gfs2_rgrp *rg, char *buf);
+extern void gfs2_rgrp_in(struct gfs2_rgrp *rg, struct gfs2_buffer_head *bh);
+extern void gfs2_rgrp_out(struct gfs2_rgrp *rg, struct gfs2_buffer_head *bh);
 extern void gfs2_quota_in(struct gfs2_quota *qu, char *buf);
 extern void gfs2_quota_out(struct gfs2_quota *qu, char *buf);
-extern void gfs2_dinode_in(struct gfs2_dinode *di, char *buf);
-extern void gfs2_dinode_out(struct gfs2_dinode *di, char *buf);
+extern void gfs2_dinode_in(struct gfs2_dinode *di,
+			   struct gfs2_buffer_head *bh);
+extern void gfs2_dinode_out(struct gfs2_dinode *di,
+			    struct gfs2_buffer_head *bh);
 extern void gfs2_dirent_in(struct gfs2_dirent *de, char *buf);
 extern void gfs2_dirent_out(struct gfs2_dirent *de, char *buf);
-extern void gfs2_leaf_in(struct gfs2_leaf *lf, char *buf);
-extern void gfs2_leaf_out(struct gfs2_leaf *lf, char *buf);
+extern void gfs2_leaf_in(struct gfs2_leaf *lf, struct gfs2_buffer_head *bh);
+extern void gfs2_leaf_out(struct gfs2_leaf *lf, struct gfs2_buffer_head *bh);
 extern void gfs2_ea_header_in(struct gfs2_ea_header *ea, char *buf);
 extern void gfs2_ea_header_out(struct gfs2_ea_header *ea, char *buf);
-extern void gfs2_log_header_in(struct gfs2_log_header *lh, char *buf);
-extern void gfs2_log_header_out(struct gfs2_log_header *lh, char *buf);
-extern void gfs2_log_descriptor_in(struct gfs2_log_descriptor *ld, char *buf);
-extern void gfs2_log_descriptor_out(struct gfs2_log_descriptor *ld, char *buf);
-extern void gfs2_inum_range_in(struct gfs2_inum_range *ir, char *buf);
-extern void gfs2_inum_range_out(struct gfs2_inum_range *ir, char *buf);
+extern void gfs2_log_header_in(struct gfs2_log_header *lh,
+			       struct gfs2_buffer_head *bh);
+extern void gfs2_log_header_out(struct gfs2_log_header *lh,
+				struct gfs2_buffer_head *bh);
+extern void gfs2_log_descriptor_in(struct gfs2_log_descriptor *ld,
+				   struct gfs2_buffer_head *bh);
+extern void gfs2_log_descriptor_out(struct gfs2_log_descriptor *ld,
+				    struct gfs2_buffer_head *bh);
 extern void gfs2_statfs_change_in(struct gfs2_statfs_change *sc, char *buf);
 extern void gfs2_statfs_change_out(struct gfs2_statfs_change *sc, char *buf);
-extern void gfs2_quota_change_in(struct gfs2_quota_change *qc, char *buf);
-extern void gfs2_quota_change_out(struct gfs2_quota_change *qc, char *buf);
+extern void gfs2_quota_change_in(struct gfs2_quota_change *qc,
+				 struct gfs2_buffer_head *bh);
+extern void gfs2_quota_change_out(struct gfs2_quota_change *qc,
+				  struct gfs2_buffer_head *bh);
 
 /* Printing functions */
 
@@ -755,7 +722,6 @@ extern void gfs2_leaf_print(struct gfs2_leaf *lf);
 extern void gfs2_ea_header_print(struct gfs2_ea_header *ea, char *name);
 extern void gfs2_log_header_print(struct gfs2_log_header *lh);
 extern void gfs2_log_descriptor_print(struct gfs2_log_descriptor *ld);
-extern void gfs2_inum_range_print(struct gfs2_inum_range *ir);
 extern void gfs2_statfs_change_print(struct gfs2_statfs_change *sc);
 extern void gfs2_quota_change_print(struct gfs2_quota_change *qc);
 
diff --git a/gfs2/libgfs2/misc.c b/gfs2/libgfs2/misc.c
index d8fe515..ffa0dba 100644
--- a/gfs2/libgfs2/misc.c
+++ b/gfs2/libgfs2/misc.c
@@ -15,6 +15,7 @@
 #include <dirent.h>
 #include <linux/kdev_t.h>
 #include <sys/sysmacros.h>
+#include <mntent.h>
 
 #include "libgfs2.h"
 
@@ -96,58 +97,95 @@ int compute_constants(struct gfs2_sbd *sdp)
 	return 0;
 }
 
-int check_for_gfs2(struct gfs2_sbd *sdp)
+int is_pathname_mounted(struct gfs2_sbd *sdp, int *ro_mount)
 {
 	FILE *fp;
-	char buffer[PATH_MAX];
-	char fstype[80];
-	int fsdump, fspass, ret;
-	char fspath[PATH_MAX];
-	char fsoptions[PATH_MAX];
-	char *realname;
-
-	realname = realpath(sdp->path_name, NULL);
-	if (!realname) {
-		return -1;
+	struct mntent *mnt;
+	dev_t file_dev=0, file_rdev=0;
+	ino_t file_ino=0;
+	struct stat st_buf;
+
+	*ro_mount = 0;
+	if ((fp = setmntent("/proc/mounts", "r")) == NULL) {
+		perror("open: /proc/mounts");
+		return 0;
 	}
-	fp = fopen("/proc/mounts", "r");
-	if (fp == NULL) {
-		free(realname);
-		return -1;
+	if (stat(sdp->path_name, &st_buf) == 0) {
+		if (S_ISBLK(st_buf.st_mode)) {
+#ifndef __GNU__ /* The GNU hurd is broken with respect to stat devices */
+			file_rdev = st_buf.st_rdev;
+#endif  /* __GNU__ */
+		} else {
+			file_dev = st_buf.st_dev;
+			file_ino = st_buf.st_ino;
+		}
 	}
-	while ((fgets(buffer, PATH_MAX - 1, fp)) != NULL) {
-		buffer[PATH_MAX - 1] = 0;
-
-		if (strstr(buffer, "0") == 0)
-			continue;
+	while ((mnt = getmntent (fp)) != NULL) {
+		/* Check if they specified the device instead of mnt point */
+		if (strcmp(sdp->device_name, mnt->mnt_fsname) == 0) {
+			strcpy(sdp->path_name, mnt->mnt_dir); /* fix it */
+			break;
+		}
+		if (strcmp(sdp->path_name, mnt->mnt_dir) == 0) {
+			strcpy(sdp->device_name, mnt->mnt_fsname); /* fix it */
+			break;
+		}
+		if (stat(mnt->mnt_fsname, &st_buf) == 0) {
+			if (S_ISBLK(st_buf.st_mode)) {
+#ifndef __GNU__
+				if (file_rdev && (file_rdev == st_buf.st_rdev))
+					break;
+#endif  /* __GNU__ */
+			} else {
+				if (file_dev && ((file_dev == st_buf.st_dev) &&
+						 (file_ino == st_buf.st_ino)))
+					break;
+			}
+		}
+	}
+	endmntent (fp);
+	if (mnt == NULL)
+		return 0;
+	if (stat(mnt->mnt_dir, &st_buf) < 0) {
+		if (errno == ENOENT)
+			return 0;
+	}
+	/* Can't trust fstype because / has "rootfs". */
+	if (file_rdev && (st_buf.st_dev != file_rdev))
+		return 0;
+	if (hasmntopt(mnt, MNTOPT_RO))
+               *ro_mount = 1;
+	return 1; /* mounted */
+}
 
-		ret = sscanf(buffer, "%s %s %s %s %d %d", sdp->device_name,
-				fspath, fstype, fsoptions, &fsdump, &fspass);
-		if (6 != ret)
-			continue;
+int is_gfs2(struct gfs2_sbd *sdp)
+{
+	int fd, rc;
+	struct gfs2_sb sb;
 
-		if (strcmp(fstype, "gfs2") != 0)
-			continue;
+	fd = open(sdp->device_name, O_RDWR);
+	if (fd < 0)
+		return 0;
 
-		/* Check if they specified the device instead of mnt point */
-		if (strcmp(sdp->device_name, realname) == 0)
-			strcpy(sdp->path_name, fspath); /* fix it */
-		else if (strcmp(fspath, realname) != 0)
-			continue;
+	rc = 0;
+	if (lseek(fd, GFS2_SB_ADDR * GFS2_BASIC_BLOCK, SEEK_SET) >= 0 &&
+	    read(fd, &sb, sizeof(sb)) == sizeof(sb) &&
+	    be32_to_cpu(sb.sb_header.mh_magic) == GFS2_MAGIC &&
+	    be32_to_cpu(sb.sb_header.mh_type) == GFS2_METATYPE_SB)
+		rc = 1;
+	close(fd);
+	return rc;
+}
 
-		fclose(fp);
-		free(realname);
-		if (strncmp(sdp->device_name, "/dev/loop", 9) == 0) {
-			errno = EINVAL;
-			return -1;
-		}
+int check_for_gfs2(struct gfs2_sbd *sdp)
+{
+	int ro;
 
-		return 0;
-	}
-	free(realname);
-	fclose(fp);
-	errno = EINVAL;
-	return -1;
+	if (!is_pathname_mounted(sdp, &ro))
+		return -1;
+	if (!is_gfs2(sdp))
+		return -1;
+	return 0;
 }
 
 static int lock_for_admin(struct gfs2_sbd *sdp)
@@ -180,7 +218,7 @@ int mount_gfs2_meta(struct gfs2_sbd *sdp)
 	if(!mkdtemp(sdp->metafs_path))
 		return -1;
 
-	ret = mount(sdp->device_name, sdp->metafs_path, "gfs2", 0, "meta");
+	ret = mount(sdp->path_name, sdp->metafs_path, "gfs2meta", 0, NULL);
 	if (ret) {
 		rmdir(sdp->metafs_path);
 		return -1;
diff --git a/gfs2/libgfs2/ondisk.c b/gfs2/libgfs2/ondisk.c
index bd9afd3..829bb10 100644
--- a/gfs2/libgfs2/ondisk.c
+++ b/gfs2/libgfs2/ondisk.c
@@ -28,11 +28,11 @@
 /*
  * gfs2_xxx_in - read in an xxx struct
  * first arg: the cpu-order structure
- * buf: the disk-order buffer
+ * bh: the disk-order buffer_head
  *
  * gfs2_xxx_out - write out an xxx struct
  * first arg: the cpu-order structure
- * buf: the disk-order buffer
+ * bh: the disk-order buffer_head
  *
  * gfs2_xxx_print - print out an xxx struct
  * first arg: the cpu-order structure
@@ -60,24 +60,27 @@ void gfs2_inum_print(struct gfs2_inum *no)
 	pv(no, no_addr, "%llu", "0x%llx");
 }
 
-void gfs2_meta_header_in(struct gfs2_meta_header *mh, char *buf)
+void gfs2_meta_header_in(struct gfs2_meta_header *mh,
+			 struct gfs2_buffer_head *bh)
 {
-	struct gfs2_meta_header *str = (struct gfs2_meta_header *)buf;
+	struct gfs2_meta_header *str = (struct gfs2_meta_header *)bh->b_data;
 
 	CPIN_32(mh, str, mh_magic);
 	CPIN_32(mh, str, mh_type);
 	CPIN_32(mh, str, mh_format);
 }
 
-void gfs2_meta_header_out(struct gfs2_meta_header *mh, char *buf)
+void gfs2_meta_header_out(struct gfs2_meta_header *mh,
+			  struct gfs2_buffer_head *bh)
 {
-	struct gfs2_meta_header *str = (struct gfs2_meta_header *)buf;
+	struct gfs2_meta_header *str = (struct gfs2_meta_header *)bh->b_data;
 
 	CPOUT_32(mh, str, mh_magic);
 	CPOUT_32(mh, str, mh_type);
 	CPOUT_32(mh, str, mh_format);
 	str->__pad0 = 0;
 	str->__pad1 = 0;
+	bmodified(bh);
 }
 
 void gfs2_meta_header_print(struct gfs2_meta_header *mh)
@@ -87,11 +90,11 @@ void gfs2_meta_header_print(struct gfs2_meta_header *mh)
 	pv(mh, mh_format, "%u", "0x%x");
 }
 
-void gfs2_sb_in(struct gfs2_sb *sb, char *buf)
+void gfs2_sb_in(struct gfs2_sb *sb, struct gfs2_buffer_head *bh)
 {
-	struct gfs2_sb *str = (struct gfs2_sb *)buf;
+	struct gfs2_sb *str = (struct gfs2_sb *)bh->b_data;
 
-	gfs2_meta_header_in(&sb->sb_header, buf);
+	gfs2_meta_header_in(&sb->sb_header, bh);
 
 	CPIN_32(sb, str, sb_fs_format);
 	CPIN_32(sb, str, sb_multihost_format);
@@ -109,11 +112,11 @@ void gfs2_sb_in(struct gfs2_sb *sb, char *buf)
 #endif
 }
 
-void gfs2_sb_out(struct gfs2_sb *sb, char *buf)
+void gfs2_sb_out(struct gfs2_sb *sb, struct gfs2_buffer_head *bh)
 {
-	struct gfs2_sb *str = (struct gfs2_sb *)buf;
+	struct gfs2_sb *str = (struct gfs2_sb *)bh->b_data;
 
-	gfs2_meta_header_out(&sb->sb_header, buf);
+	gfs2_meta_header_out(&sb->sb_header, bh);
 
 	CPOUT_32(sb, str, sb_fs_format);
 	CPOUT_32(sb, str, sb_multihost_format);
@@ -129,6 +132,7 @@ void gfs2_sb_out(struct gfs2_sb *sb, char *buf)
 #ifdef GFS2_HAS_UUID
 	memcpy(str->sb_uuid, sb->sb_uuid, 16);
 #endif
+	bmodified(bh);
 }
 
 const char *str_uuid(const unsigned char *uuid)
@@ -220,11 +224,11 @@ void gfs2_rindex_print(struct gfs2_rindex *ri)
 	pv(ri, ri_bitbytes, "%u", "0x%x");
 }
 
-void gfs2_rgrp_in(struct gfs2_rgrp *rg, char *buf)
+void gfs2_rgrp_in(struct gfs2_rgrp *rg, struct gfs2_buffer_head *bh)
 {
-	struct gfs2_rgrp *str = (struct gfs2_rgrp *)buf;
+	struct gfs2_rgrp *str = (struct gfs2_rgrp *)bh->b_data;
 
-	gfs2_meta_header_in(&rg->rg_header, buf);
+	gfs2_meta_header_in(&rg->rg_header, bh);
 	CPIN_32(rg, str, rg_flags);
 	CPIN_32(rg, str, rg_free);
 	CPIN_32(rg, str, rg_dinodes);
@@ -232,16 +236,17 @@ void gfs2_rgrp_in(struct gfs2_rgrp *rg, char *buf)
 	CPIN_08(rg, str, rg_reserved, 36);
 }
 
-void gfs2_rgrp_out(struct gfs2_rgrp *rg, char *buf)
+void gfs2_rgrp_out(struct gfs2_rgrp *rg, struct gfs2_buffer_head *bh)
 {
-	struct gfs2_rgrp *str = (struct gfs2_rgrp *)buf;
+	struct gfs2_rgrp *str = (struct gfs2_rgrp *)bh->b_data;
 
-	gfs2_meta_header_out(&rg->rg_header, buf);
+	gfs2_meta_header_out(&rg->rg_header, bh);
 	CPOUT_32(rg, str, rg_flags);
 	CPOUT_32(rg, str, rg_free);
 	CPOUT_32(rg, str, rg_dinodes);
 
 	CPOUT_08(rg, str, rg_reserved, 36);
+	bmodified(bh);
 }
 
 void gfs2_rgrp_print(struct gfs2_rgrp *rg)
@@ -259,7 +264,8 @@ void gfs2_quota_in(struct gfs2_quota *qu, char *buf)
 	CPIN_64(qu, str, qu_limit);
 	CPIN_64(qu, str, qu_warn);
 	CPIN_64(qu, str, qu_value);
-	CPIN_08(qu, str, qu_reserved, sizeof(qu->qu_reserved));
+	CPIN_32(qu, str, qu_ll_next);
+	CPIN_08(qu, str, qu_reserved, 60);
 }
 
 void gfs2_quota_out(struct gfs2_quota *qu, char *buf)
@@ -269,7 +275,8 @@ void gfs2_quota_out(struct gfs2_quota *qu, char *buf)
 	CPOUT_64(qu, str, qu_limit);
 	CPOUT_64(qu, str, qu_warn);
 	CPOUT_64(qu, str, qu_value);
-	memset(qu->qu_reserved, 0, sizeof(qu->qu_reserved));
+	CPOUT_32(qu, str, qu_ll_next);
+	CPOUT_08(qu, str, qu_reserved, 60);
 }
 
 void gfs2_quota_print(struct gfs2_quota *qu)
@@ -279,11 +286,11 @@ void gfs2_quota_print(struct gfs2_quota *qu)
 	pv(qu, qu_value, "%lld", "0x%llx");
 }
 
-void gfs2_dinode_in(struct gfs2_dinode *di, char *buf)
+void gfs2_dinode_in(struct gfs2_dinode *di, struct gfs2_buffer_head *bh)
 {
-	struct gfs2_dinode *str = (struct gfs2_dinode *)buf;
+	struct gfs2_dinode *str = (struct gfs2_dinode *)bh->b_data;
 
-	gfs2_meta_header_in(&di->di_header, buf);
+	gfs2_meta_header_in(&di->di_header, bh);
 	gfs2_inum_in(&di->di_num, (char *)&str->di_num);
 
 	CPIN_32(di, str, di_mode);
@@ -314,11 +321,11 @@ void gfs2_dinode_in(struct gfs2_dinode *di, char *buf)
 	CPIN_08(di, str, di_reserved, 32);
 }
 
-void gfs2_dinode_out(struct gfs2_dinode *di, char *buf)
+void gfs2_dinode_out(struct gfs2_dinode *di, struct gfs2_buffer_head *bh)
 {
-	struct gfs2_dinode *str = (struct gfs2_dinode *)buf;
+	struct gfs2_dinode *str = (struct gfs2_dinode *)bh->b_data;
 
-	gfs2_meta_header_out(&di->di_header, buf);
+	gfs2_meta_header_out(&di->di_header, bh);
 	gfs2_inum_out(&di->di_num, (char *)&str->di_num);
 
 	CPOUT_32(di, str, di_mode);
@@ -346,6 +353,7 @@ void gfs2_dinode_out(struct gfs2_dinode *di, char *buf)
 	CPOUT_64(di, str, di_eattr);
 
 	CPOUT_08(di, str, di_reserved, 32);
+	bmodified(bh);
 }
 
 void gfs2_dinode_print(struct gfs2_dinode *di)
@@ -416,11 +424,11 @@ void gfs2_dirent_print(struct gfs2_dirent *de, char *name)
 	print_it("  name", "%s", NULL, buf);
 }
 
-void gfs2_leaf_in(struct gfs2_leaf *lf, char *buf)
+void gfs2_leaf_in(struct gfs2_leaf *lf, struct gfs2_buffer_head *bh)
 {
-	struct gfs2_leaf *str = (struct gfs2_leaf *)buf;
+	struct gfs2_leaf *str = (struct gfs2_leaf *)bh->b_data;
 
-	gfs2_meta_header_in(&lf->lf_header, buf);
+	gfs2_meta_header_in(&lf->lf_header, bh);
 	CPIN_16(lf, str, lf_depth);
 	CPIN_16(lf, str, lf_entries);
 	CPIN_32(lf, str, lf_dirent_format);
@@ -429,17 +437,18 @@ void gfs2_leaf_in(struct gfs2_leaf *lf, char *buf)
 	CPIN_08(lf, str, lf_reserved, 32);
 }
 
-void gfs2_leaf_out(struct gfs2_leaf *lf, char *buf)
+void gfs2_leaf_out(struct gfs2_leaf *lf, struct gfs2_buffer_head *bh)
 {
-	struct gfs2_leaf *str = (struct gfs2_leaf *)buf;
+	struct gfs2_leaf *str = (struct gfs2_leaf *)bh->b_data;
 
-	gfs2_meta_header_out(&lf->lf_header, buf);
+	gfs2_meta_header_out(&lf->lf_header, bh);
 	CPOUT_16(lf, str, lf_depth);
 	CPOUT_16(lf, str, lf_entries);
 	CPOUT_32(lf, str, lf_dirent_format);
 	CPOUT_64(lf, str, lf_next);
 
 	CPOUT_08(lf, str, lf_reserved, 32);
+	bmodified(bh);
 }
 
 void gfs2_leaf_print(struct gfs2_leaf *lf)
@@ -492,11 +501,12 @@ void gfs2_ea_header_print(struct gfs2_ea_header *ea, char *name)
 	print_it("  name", "%s", NULL, buf);
 }
 
-void gfs2_log_header_in(struct gfs2_log_header *lh, char *buf)
+void gfs2_log_header_in(struct gfs2_log_header *lh,
+			struct gfs2_buffer_head *bh)
 {
-	struct gfs2_log_header *str = (struct gfs2_log_header *)buf;
+	struct gfs2_log_header *str = (struct gfs2_log_header *)bh->b_data;
 
-	gfs2_meta_header_in(&lh->lh_header, buf);
+	gfs2_meta_header_in(&lh->lh_header, bh);
 	CPIN_64(lh, str, lh_sequence);
 	CPIN_32(lh, str, lh_flags);
 	CPIN_32(lh, str, lh_tail);
@@ -504,16 +514,18 @@ void gfs2_log_header_in(struct gfs2_log_header *lh, char *buf)
 	CPIN_32(lh, str, lh_hash);
 }
 
-void gfs2_log_header_out(struct gfs2_log_header *lh, char *buf)
+void gfs2_log_header_out(struct gfs2_log_header *lh,
+			 struct gfs2_buffer_head *bh)
 {
-	struct gfs2_log_header *str = (struct gfs2_log_header *)buf;
+	struct gfs2_log_header *str = (struct gfs2_log_header *)bh->b_data;
 
-	gfs2_meta_header_out(&lh->lh_header, buf);
+	gfs2_meta_header_out(&lh->lh_header, bh);
 	CPOUT_64(lh, str, lh_sequence);
 	CPOUT_32(lh, str, lh_flags);
 	CPOUT_32(lh, str, lh_tail);
 	CPOUT_32(lh, str, lh_blkno);
 	CPOUT_32(lh, str, lh_hash);
+	bmodified(bh);
 }
 
 void gfs2_log_header_print(struct gfs2_log_header *lh)
@@ -526,11 +538,12 @@ void gfs2_log_header_print(struct gfs2_log_header *lh)
 	pv(lh, lh_hash, "0x%.8X", NULL);
 }
 
-void gfs2_log_descriptor_in(struct gfs2_log_descriptor *ld, char *buf)
+void gfs2_log_descriptor_in(struct gfs2_log_descriptor *ld,
+			    struct gfs2_buffer_head *bh)
 {
-	struct gfs2_log_descriptor *str = (struct gfs2_log_descriptor *)buf;
+	struct gfs2_log_descriptor *str = (struct gfs2_log_descriptor *)bh->b_data;
 
-	gfs2_meta_header_in(&ld->ld_header, buf);
+	gfs2_meta_header_in(&ld->ld_header, bh);
 	CPIN_32(ld, str, ld_type);
 	CPIN_32(ld, str, ld_length);
 	CPIN_32(ld, str, ld_data1);
@@ -539,17 +552,19 @@ void gfs2_log_descriptor_in(struct gfs2_log_descriptor *ld, char *buf)
 	CPIN_08(ld, str, ld_reserved, 32);
 }
 
-void gfs2_log_descriptor_out(struct gfs2_log_descriptor *ld, char *buf)
+void gfs2_log_descriptor_out(struct gfs2_log_descriptor *ld,
+			     struct gfs2_buffer_head *bh)
 {
-	struct gfs2_log_descriptor *str = (struct gfs2_log_descriptor *)buf;
+	struct gfs2_log_descriptor *str = (struct gfs2_log_descriptor *)bh->b_data;
 
-	gfs2_meta_header_out(&ld->ld_header, buf);
+	gfs2_meta_header_out(&ld->ld_header, bh);
 	CPOUT_32(ld, str, ld_type);
 	CPOUT_32(ld, str, ld_length);
 	CPOUT_32(ld, str, ld_data1);
 	CPOUT_32(ld, str, ld_data2);
 
 	CPOUT_08(ld, str, ld_reserved, 32);
+	bmodified(bh);
 }
 
 void gfs2_log_descriptor_print(struct gfs2_log_descriptor *ld)
@@ -561,28 +576,6 @@ void gfs2_log_descriptor_print(struct gfs2_log_descriptor *ld)
 	pv(ld, ld_data2, "%u", "0x%x");
 }
 
-void gfs2_inum_range_in(struct gfs2_inum_range *ir, char *buf)
-{
-	struct gfs2_inum_range *str = (struct gfs2_inum_range *)buf;
-
-	CPIN_64(ir, str, ir_start);
-	CPIN_64(ir, str, ir_length);
-}
-
-void gfs2_inum_range_out(struct gfs2_inum_range *ir, char *buf)
-{
-	struct gfs2_inum_range *str = (struct gfs2_inum_range *)buf;
-
-	CPOUT_64(ir, str, ir_start);
-	CPOUT_64(ir, str, ir_length);
-}
-
-void gfs2_inum_range_print(struct gfs2_inum_range *ir)
-{
-	pv(ir, ir_start, "%llu", "0x%llx");
-	pv(ir, ir_length, "%llu", "0x%llx");
-}
-
 void gfs2_statfs_change_in(struct gfs2_statfs_change *sc, char *buf)
 {
 	struct gfs2_statfs_change *str = (struct gfs2_statfs_change *)buf;
@@ -608,22 +601,25 @@ void gfs2_statfs_change_print(struct gfs2_statfs_change *sc)
 	pv(sc, sc_dinodes, "%lld", "0x%llx");
 }
 
-void gfs2_quota_change_in(struct gfs2_quota_change *qc, char *buf)
+void gfs2_quota_change_in(struct gfs2_quota_change *qc,
+			  struct gfs2_buffer_head *bh)
 {
-	struct gfs2_quota_change *str = (struct gfs2_quota_change *)buf;
+	struct gfs2_quota_change *str = (struct gfs2_quota_change *)bh->b_data;
 
 	CPIN_64(qc, str, qc_change);
 	CPIN_32(qc, str, qc_flags);
 	CPIN_32(qc, str, qc_id);
 }
 
-void gfs2_quota_change_out(struct gfs2_quota_change *qc, char *buf)
+void gfs2_quota_change_out(struct gfs2_quota_change *qc,
+			   struct gfs2_buffer_head *bh)
 {
-	struct gfs2_quota_change *str = (struct gfs2_quota_change *)buf;
+	struct gfs2_quota_change *str = (struct gfs2_quota_change *)bh->b_data;
 
 	CPOUT_64(qc, str, qc_change);
 	CPOUT_32(qc, str, qc_flags);
 	CPOUT_32(qc, str, qc_id);
+	bmodified(bh);
 }
 
 void gfs2_quota_change_print(struct gfs2_quota_change *qc)
diff --git a/gfs2/libgfs2/recovery.c b/gfs2/libgfs2/recovery.c
index b10fe77..74f896e 100644
--- a/gfs2/libgfs2/recovery.c
+++ b/gfs2/libgfs2/recovery.c
@@ -27,11 +27,11 @@ int gfs2_replay_read_block(struct gfs2_inode *ip, unsigned int blk,
 	uint64_t dblock;
 	uint32_t extlen;
 
-	block_map(ip, blk, &new, &dblock, &extlen, FALSE, not_updated);
+	block_map(ip, blk, &new, &dblock, &extlen, FALSE);
 	if (!dblock)
 		return -EIO;
 
-	*bh = bread(&ip->i_sbd->buf_list, dblock);
+	*bh = bread(ip->i_sbd, dblock);
 	return 0;
 }
 
@@ -66,8 +66,8 @@ int get_log_header(struct gfs2_inode *ip, unsigned int blk,
 	tmp->lh_hash = 0;
 	hash = gfs2_disk_hash(bh->b_data, sizeof(struct gfs2_log_header));
 	tmp->lh_hash = saved_hash;
-	gfs2_log_header_in(&lh, bh->b_data);
-	brelse(bh, not_updated);
+	gfs2_log_header_in(&lh, bh);
+	brelse(bh);
 
 	if (error || lh.lh_blkno != blk || lh.lh_hash != hash)
 		return 1;
@@ -219,11 +219,11 @@ int clean_journal(struct gfs2_inode *ip, struct gfs2_log_header *head)
 
 	lblock = head->lh_blkno;
 	gfs2_replay_incr_blk(ip, &lblock);
-	block_map(ip, lblock, &new, &dblock, &extlen, 0, not_updated);
+	block_map(ip, lblock, &new, &dblock, &extlen, 0);
 	if (!dblock)
 		return -EIO;
 
-	bh = bread(&ip->i_sbd->buf_list, dblock);
+	bh = bread(ip->i_sbd, dblock);
 	memset(bh->b_data, 0, ip->i_sbd->bsize);
 
 	lh = (struct gfs2_log_header *)bh->b_data;
@@ -236,8 +236,8 @@ int clean_journal(struct gfs2_inode *ip, struct gfs2_log_header *head)
 	lh->lh_blkno = cpu_to_be32(lblock);
 	hash = gfs2_disk_hash((const char *)lh, sizeof(struct gfs2_log_header));
 	lh->lh_hash = cpu_to_be32(hash);
-
-	brelse(bh, updated);
+	bmodified(bh);
+	brelse(bh);
 
 	return 0;
 }
diff --git a/gfs2/libgfs2/rgrp.c b/gfs2/libgfs2/rgrp.c
index fc548d1..f42fdaa 100644
--- a/gfs2/libgfs2/rgrp.c
+++ b/gfs2/libgfs2/rgrp.c
@@ -94,23 +94,30 @@ int gfs2_compute_bitstructs(struct gfs2_sbd *sdp, struct rgrp_list *rgd)
 struct rgrp_list *gfs2_blk2rgrpd(struct gfs2_sbd *sdp, uint64_t blk)
 {
 	osi_list_t *tmp;
-	struct rgrp_list *rgd = NULL;
+	struct rgrp_list *rgd;
+	static struct rgrp_list *prev_rgd = NULL;
 	struct gfs2_rindex *ri;
 
-	for(tmp = sdp->rglist.next; tmp != &sdp->rglist; tmp = tmp->next){
+	if (prev_rgd) {
+		ri = &prev_rgd->ri;
+		if (ri->ri_data0 <= blk && blk < ri->ri_data0 + ri->ri_data)
+			return prev_rgd;
+	}
+
+	for (tmp = sdp->rglist.next; tmp != &sdp->rglist; tmp = tmp->next) {
 		rgd = osi_list_entry(tmp, struct rgrp_list, list);
 		ri = &rgd->ri;
 
-		if (ri->ri_data0 <= blk && blk < ri->ri_data0 + ri->ri_data){
-			break;
-		} else
-			rgd = NULL;
+		if (ri->ri_data0 <= blk && blk < ri->ri_data0 + ri->ri_data) {
+			prev_rgd = rgd;
+			return rgd;
+		}
 	}
-	return rgd;
+	return NULL;
 }
 
 /**
- * fs_rgrp_read - read in the resource group information from disk.
+ * gfs2_rgrp_read - read in the resource group information from disk.
  * @rgd - resource group structure
  * returns: 0 if no error, otherwise the block number that failed
  */
@@ -119,7 +126,7 @@ uint64_t gfs2_rgrp_read(struct gfs2_sbd *sdp, struct rgrp_list *rgd)
 	int x, length = rgd->ri.ri_length;
 
 	for (x = 0; x < length; x++){
-		rgd->bh[x] = bread(&sdp->nvbuf_list, rgd->ri.ri_addr + x);
+		rgd->bh[x] = bread(sdp, rgd->ri.ri_addr + x);
 		if(gfs2_check_meta(rgd->bh[x],
 				   (x) ? GFS2_METATYPE_RB : GFS2_METATYPE_RG))
 		{
@@ -127,32 +134,33 @@ uint64_t gfs2_rgrp_read(struct gfs2_sbd *sdp, struct rgrp_list *rgd)
 
 			error = rgd->ri.ri_addr + x;
 			for (; x >= 0; x--)
-				brelse(rgd->bh[x], not_updated);
+				brelse(rgd->bh[x]);
 			return error;
 		}
 	}
 
-	gfs2_rgrp_in(&rgd->rg, rgd->bh[0]->b_data);
+	gfs2_rgrp_in(&rgd->rg, rgd->bh[0]);
 	return 0;
 }
 
-void gfs2_rgrp_relse(struct rgrp_list *rgd, enum update_flags is_updated)
+void gfs2_rgrp_relse(struct rgrp_list *rgd)
 {
 	int x, length = rgd->ri.ri_length;
 
-	for (x = 0; x < length; x++)
-		brelse(rgd->bh[x], is_updated);
+	for (x = 0; x < length; x++) {
+		brelse(rgd->bh[x]);
+		rgd->bh[x] = NULL;
+	}
 }
 
-void gfs2_rgrp_free(osi_list_t *rglist, enum update_flags is_updated)
+void gfs2_rgrp_free(osi_list_t *rglist)
 {
 	struct rgrp_list *rgd;
 
 	while(!osi_list_empty(rglist->next)){
 		rgd = osi_list_entry(rglist->next, struct rgrp_list, list);
-		if (rgd->bh && rgd->bh[0] && /* if a buffer exists and       */
-			rgd->bh[0]->b_count) /* the 1st buffer is allocated */
-			gfs2_rgrp_relse(rgd, is_updated); /* free them all. */
+		if (rgd->bh && rgd->bh[0]) /* if a buffer exists        */
+			gfs2_rgrp_relse(rgd); /* free them all. */
 		if(rgd->bits)
 			free(rgd->bits);
 		if(rgd->bh) {
diff --git a/gfs2/libgfs2/structures.c b/gfs2/libgfs2/structures.c
index 8e9ad86..37ed8e6 100644
--- a/gfs2/libgfs2/structures.c
+++ b/gfs2/libgfs2/structures.c
@@ -30,6 +30,7 @@ int build_master(struct gfs2_sbd *sdp)
 		printf("\nMaster dir:\n");
 		gfs2_dinode_print(&sdp->master_dir->i_di);
 	}
+	sdp->master_dir->bh_owned = 1;
 	return 0;
 }
 
@@ -41,9 +42,10 @@ void build_sb(struct gfs2_sbd *sdp, const unsigned char *uuid)
 
 	/* Zero out the beginning of the device up to the superblock */
 	for (x = 0; x < sdp->sb_addr; x++) {
-		bh = bget(&sdp->buf_list, x);
+		bh = bget(sdp, x);
 		memset(bh->b_data, 0, sdp->bsize);
-		brelse(bh, updated);
+		bmodified(bh);
+		brelse(bh);
 	}
 
 	memset(&sb, 0, sizeof(struct gfs2_sb));
@@ -61,9 +63,9 @@ void build_sb(struct gfs2_sbd *sdp, const unsigned char *uuid)
 #ifdef GFS2_HAS_UUID
 	memcpy(sb.sb_uuid, uuid, sizeof(sb.sb_uuid));
 #endif
-	bh = bget(&sdp->buf_list, sdp->sb_addr);
-	gfs2_sb_out(&sb, bh->b_data);
-	brelse(bh, updated);
+	bh = bget(sdp, sdp->sb_addr);
+	gfs2_sb_out(&sb, bh);
+	brelse(bh);
 
 	if (sdp->debug) {
 		printf("\nSuper Block:\n");
@@ -95,20 +97,23 @@ int write_journal(struct gfs2_sbd *sdp, struct gfs2_inode *ip, unsigned int j,
 		struct gfs2_buffer_head *bh = get_file_buf(ip, x, TRUE);
 		if (!bh)
 			return -1;
-		brelse(bh, updated);
+		bmodified(bh);
+		brelse(bh);
 	}
 	for (x = 0; x < blocks; x++) {
 		struct gfs2_buffer_head *bh = get_file_buf(ip, x, FALSE);
 		if (!bh)
 			return -1;
 
+		memset(bh->b_data, 0, sdp->bsize);
 		lh.lh_sequence = seq;
 		lh.lh_blkno = x;
-		gfs2_log_header_out(&lh, bh->b_data);
+		gfs2_log_header_out(&lh, bh);
 		hash = gfs2_disk_hash(bh->b_data, sizeof(struct gfs2_log_header));
 		((struct gfs2_log_header *)bh->b_data)->lh_hash = cpu_to_be32(hash);
 
-		brelse(bh, updated);
+		bmodified(bh);
+		brelse(bh);
 
 		if (++seq == blocks)
 			seq = 0;
@@ -140,7 +145,7 @@ int build_jindex(struct gfs2_sbd *sdp)
 			      sdp->jsize << 20 >> sdp->sd_sb.sb_bsize_shift);
 		if (ret)
 			return ret;
-		inode_put(ip, updated);
+		inode_put(&ip);
 	}
 
 	if (sdp->debug) {
@@ -148,7 +153,7 @@ int build_jindex(struct gfs2_sbd *sdp)
 		gfs2_dinode_print(&jindex->i_di);
 	}
 
-	inode_put(jindex, updated);
+	inode_put(&jindex);
 	return 0;
 }
 
@@ -162,13 +167,14 @@ static int build_inum_range(struct gfs2_inode *per_node, unsigned int j)
 	ip = createi(per_node, name, S_IFREG | 0600,
 		     GFS2_DIF_SYSTEM | GFS2_DIF_JDATA);
 	ip->i_di.di_size = sizeof(struct gfs2_inum_range);
+	gfs2_dinode_out(&ip->i_di, ip->i_bh);
 
 	if (sdp->debug) {
 		printf("\nInum Range %u:\n", j);
 		gfs2_dinode_print(&ip->i_di);
 	}
 
-	inode_put(ip, updated);
+	inode_put(&ip);
 	return 0;
 }
 
@@ -182,13 +188,14 @@ static void build_statfs_change(struct gfs2_inode *per_node, unsigned int j)
 	ip = createi(per_node, name, S_IFREG | 0600,
 		     GFS2_DIF_SYSTEM | GFS2_DIF_JDATA);
 	ip->i_di.di_size = sizeof(struct gfs2_statfs_change);
+	gfs2_dinode_out(&ip->i_di, ip->i_bh);
 
 	if (sdp->debug) {
 		printf("\nStatFS Change %u:\n", j);
 		gfs2_dinode_print(&ip->i_di);
 	}
 
-	inode_put(ip, updated);
+	inode_put(&ip);
 }
 
 static int build_quota_change(struct gfs2_inode *per_node, unsigned int j)
@@ -199,6 +206,8 @@ static int build_quota_change(struct gfs2_inode *per_node, unsigned int j)
 	struct gfs2_inode *ip;
 	unsigned int blocks = sdp->qcsize << (20 - sdp->sd_sb.sb_bsize_shift);
 	unsigned int x;
+	unsigned int hgt;
+	struct gfs2_buffer_head *bh;
 
 	memset(&mh, 0, sizeof(struct gfs2_meta_header));
 	mh.mh_magic = GFS2_MAGIC;
@@ -206,17 +215,19 @@ static int build_quota_change(struct gfs2_inode *per_node, unsigned int j)
 	mh.mh_format = GFS2_FORMAT_QC;
 
 	sprintf(name, "quota_change%u", j);
-	ip = createi(per_node, name, S_IFREG | 0600,
-		     GFS2_DIF_SYSTEM);
+	ip = createi(per_node, name, S_IFREG | 0600, GFS2_DIF_SYSTEM);
+
+	hgt = calc_tree_height(ip, (blocks + 1) * sdp->bsize);
+	build_height(ip, hgt);
 
 	for (x = 0; x < blocks; x++) {
-		struct gfs2_buffer_head *bh = get_file_buf(ip, ip->i_di.di_size >> sdp->sd_sb.sb_bsize_shift, FALSE);
+		bh = get_file_buf(ip, x, FALSE);
 		if (!bh)
 			return -1;
 
-		gfs2_meta_header_out(&mh, bh->b_data);
-
-		brelse(bh, updated);
+		memset(bh->b_data, 0, sdp->bsize);
+		gfs2_meta_header_out(&mh, bh);
+		brelse(bh);
 	}
 
 	if (sdp->debug) {
@@ -224,7 +235,7 @@ static int build_quota_change(struct gfs2_inode *per_node, unsigned int j)
 		gfs2_dinode_print(&ip->i_di);
 	}
 
-	inode_put(ip, updated);
+	inode_put(&ip);
 	return 0;
 }
 
@@ -247,7 +258,7 @@ int build_per_node(struct gfs2_sbd *sdp)
 		gfs2_dinode_print(&per_node->i_di);
 	}
 
-	inode_put(per_node, updated);
+	inode_put(&per_node);
 	return 0;
 }
 
@@ -294,9 +305,9 @@ int build_rindex(struct gfs2_sbd *sdp)
 	ip = createi(sdp->master_dir, "rindex", S_IFREG | 0600,
 		     GFS2_DIF_SYSTEM | GFS2_DIF_JDATA);
 	ip->i_di.di_payload_format = GFS2_FORMAT_RI;
+	bmodified(ip->i_bh);
 
-	for (head = &sdp->rglist, tmp = head->next;
-	     tmp != head;
+	for (head = &sdp->rglist, tmp = head->next; tmp != head;
 	     tmp = tmp->next) {
 		rl = osi_list_entry(tmp, struct rgrp_list, list);
 
@@ -313,7 +324,7 @@ int build_rindex(struct gfs2_sbd *sdp)
 		gfs2_dinode_print(&ip->i_di);
 	}
 
-	inode_put(ip, updated);
+	inode_put(&ip);
 	return 0;
 }
 
@@ -327,6 +338,7 @@ int build_quota(struct gfs2_sbd *sdp)
 	ip = createi(sdp->master_dir, "quota", S_IFREG | 0600,
 		     GFS2_DIF_SYSTEM | GFS2_DIF_JDATA);
 	ip->i_di.di_payload_format = GFS2_FORMAT_QU;
+	bmodified(ip->i_bh);
 
 	memset(&qu, 0, sizeof(struct gfs2_quota));
 	qu.qu_value = 1;
@@ -344,7 +356,7 @@ int build_quota(struct gfs2_sbd *sdp)
 		gfs2_quota_print(&qu);
 	}
 
-	inode_put(ip, updated);
+	inode_put(&ip);
 	return 0;
 }
 
@@ -365,6 +377,7 @@ int build_root(struct gfs2_sbd *sdp)
 		printf("\nRoot directory:\n");
 		gfs2_dinode_print(&sdp->md.rooti->i_di);
 	}
+	sdp->md.rooti->bh_owned = 1;
 	return 0;
 }
 
@@ -408,16 +421,6 @@ int do_init_statfs(struct gfs2_sbd *sdp)
 	return 0;
 }
 
-struct gfs2_inode *gfs2_load_inode(struct gfs2_sbd *sbp, uint64_t block)
-{
-	struct gfs2_buffer_head *bh;
-	struct gfs2_inode *ip;
-
-	bh = bread(&sbp->buf_list, block);
-	ip = inode_get(sbp, bh);
-	return ip;
-}
-
 int gfs2_check_meta(struct gfs2_buffer_head *bh, int type)
 {
 	uint32_t check_magic = ((struct gfs2_meta_header *)(bh->b_data))->mh_magic;
@@ -490,12 +493,12 @@ int gfs2_next_rg_metatype(struct gfs2_sbd *sdp, struct rgrp_list *rgd,
 
 	do{
 		if (bh)
-			brelse(bh, not_updated);
+			brelse(bh);
 		if (gfs2_next_rg_meta(rgd, block, first))
 			return -1;
-		bh = bread(&sdp->buf_list, *block);
+		bh = bread(sdp, *block);
 		first = 0;
 	} while(gfs2_check_meta(bh, type));
-	brelse(bh, not_updated);
+	brelse(bh);
 	return 0;
 }
diff --git a/gfs2/libgfs2/super.c b/gfs2/libgfs2/super.c
index ba3af34..1fa18ce 100644
--- a/gfs2/libgfs2/super.c
+++ b/gfs2/libgfs2/super.c
@@ -58,9 +58,9 @@ int read_sb(struct gfs2_sbd *sdp)
 	unsigned int x;
 	int error;
 
-	bh = bread(&sdp->buf_list, GFS2_SB_ADDR >> sdp->sd_fsb2bb_shift);
-	gfs2_sb_in(&sdp->sd_sb, bh->b_data);
-	brelse(bh, not_updated);
+	bh = bread(sdp, GFS2_SB_ADDR >> sdp->sd_fsb2bb_shift);
+	gfs2_sb_in(&sdp->sd_sb, bh);
+	brelse(bh);
 
 	error = check_sb(&sdp->sd_sb);
 	if (error)
@@ -131,7 +131,7 @@ int ji_update(struct gfs2_sbd *sdp)
 	int i;
 
 	if(!ip) {
-		log_crit("Journal inode not found.\n");
+		log_crit("Journal index inode not found.\n");
 		return -1;
 	}
 
@@ -225,25 +225,26 @@ int rindex_read(struct gfs2_sbd *sdp, int fd, int *count1)
 int ri_update(struct gfs2_sbd *sdp, int fd, int *rgcount)
 {
 	struct rgrp_list *rgd;
+	struct gfs2_rindex *ri;
 	osi_list_t *tmp;
 	int count1 = 0, count2 = 0;
 	uint64_t errblock = 0;
+	uint64_t rmax = 0;
 
 	if (rindex_read(sdp, fd, &count1))
 	    goto fail;
 	for (tmp = sdp->rglist.next; tmp != &sdp->rglist; tmp = tmp->next) {
-		enum update_flags f;
-
-		f = not_updated;
 		rgd = osi_list_entry(tmp, struct rgrp_list, list);
 		errblock = gfs2_rgrp_read(sdp, rgd);
 		if (errblock)
 			return errblock;
-		else
-			gfs2_rgrp_relse(rgd, f);
+		ri = &rgd->ri;
+		if (ri->ri_data0 + ri->ri_data - 1 > rmax)
+			rmax = ri->ri_data0 + ri->ri_data - 1;
 		count2++;
 	}
 
+	sdp->fssize = rmax;
 	*rgcount = count1;
 	if (count1 != count2)
 		goto fail;
@@ -251,7 +252,7 @@ int ri_update(struct gfs2_sbd *sdp, int fd, int *rgcount)
 	return 0;
 
  fail:
-	gfs2_rgrp_free(&sdp->rglist, not_updated);
+	gfs2_rgrp_free(&sdp->rglist);
 	return -1;
 }
 
@@ -259,11 +260,10 @@ int write_sb(struct gfs2_sbd *sbp)
 {
 	struct gfs2_buffer_head *bh;
 
-	bh = bread(&sbp->buf_list, GFS2_SB_ADDR >> sbp->sd_fsb2bb_shift);
-	gfs2_sb_out(&sbp->sd_sb, bh->b_data);
-	brelse(bh, updated);
-	bcommit(&sbp->buf_list); /* make sure the change gets to disk ASAP */
-	bcommit(&sbp->nvbuf_list); /* make sure the change gets to disk ASAP */
+	bh = bread(sbp, GFS2_SB_ADDR >> sbp->sd_fsb2bb_shift);
+	gfs2_sb_out(&sbp->sd_sb, bh);
+	brelse(bh);
+	fsync(sbp->device_fd); /* make sure the change gets to disk ASAP */
 	return 0;
 }
 
diff --git a/gfs2/man/gfs2_tool.8 b/gfs2/man/gfs2_tool.8
index 1533de7..dccd631 100644
--- a/gfs2/man/gfs2_tool.8
+++ b/gfs2/man/gfs2_tool.8
@@ -16,7 +16,10 @@ as gfs_tool used to.
 .SH COMMANDS
 .TP
 \fBclearflag\fP \fIFlag\fR \fIFile1\fR \fIFile2\fR \fI...\fR 
-Clear an attribute flag on a file. See \fBsetflag\fP for available flags.
+Clear an attribute flag on a file. This is now obsolete and kept
+only for backward compatibility, chattr is the preferred way to
+clear attribute flags. See \fBsetflag\fP for available flags. This
+option will probably be removed at a future date.
 .TP
 \fBfreeze\fP \fIMountPoint\fR
 Freeze (quiesce) a GFS2 cluster.
@@ -35,7 +38,8 @@ Print out information about the journals in a mounted filesystem.
 \fBlockdump\fP \fIMountPoint\fR
 Print out information about the locks this machine holds for a given
 filesystem. This information is also available via the debugfs
-glock dump file.
+glock dump file, and accessing that file is the preferred method
+of obtaining a dump of the glock state.
 .\".TP
 .\"\fBrindex\fP \fIMountPoint\fR
 .\"Print out the resource group index of a mounted filesystem.
@@ -73,7 +77,7 @@ Print out the superblock.
 Set an attribute flag on a file.  The currently supported flags are 
 jdata, immutable, appendonly, noatime, and sync.  The 
 chattr command is the preferred way to set attributes on
-files.
+files. This option will probably be removed at a future date.
 
 The \fIjdata\fR flag causes all the data written to a file
 to be journaled.  If the \fIjdata\fR flag is set for a directory,
@@ -108,4 +112,6 @@ Print out the version of GFS2 that this program goes with.
 .TP
 \fBwithdraw\fP \fIMountPoint\fR
 Cause GFS2 to abnormally shutdown a given filesystem on this node.
+This feature is only useful for testing and should not be used
+during normal filesystem operation.
 
diff --git a/gfs2/man/mount.gfs2.8 b/gfs2/man/mount.gfs2.8
index fcf99c4..29d14c8 100644
--- a/gfs2/man/mount.gfs2.8
+++ b/gfs2/man/mount.gfs2.8
@@ -4,18 +4,17 @@
 mount.gfs2 - GFS2 mount options
 
 .SH SYNOPSIS
-.B mount
-[\fIStandardMountOptions\fR] \fB-t\fP gfs2 \fIDEVICE\fR \fIMOUNTPOINT\fR \fB-o\fP [GFS2Option1,GFS2Option2,GFS2OptionX...]
+.BI "mount \-a [\-fnrsvw] \-t " gfs2 " [\-O " options "]
+.br
+.BI "mount [\-fnrsvw] \-t " gfs2 " [\-o " options " ] " "device dir"
 
 .SH DESCRIPTION
-GFS2 may be used as a local (single computer) filesystem, but its real purpose
-is in clusters, where multiple computers (nodes) share a common storage device.
-
-Above is the format typically used to mount a GFS2 filesystem, using the
-\fBmount\fP(8) command.  The \fIdevice\fR may be any block device on which you
+For details on the common mount options, please see the
+\fBmount\fP(8) command man page.
+The \fIdevice\fR may be any block device on which you
 have created a GFS2 filesystem.  Examples include a
 single disk partition (e.g. /dev/sdb3), a loopback device, a device exported
-from another node (e.g. an iSCSI device or a \fBgnbd\fP(8) device), or a
+from another node (e.g. an iSCSI device), or a
 logical volume (typically comprised of a number of individual disks).
 
 \fIdevice\fR does not necessarily need to match the device name as seen on
@@ -31,8 +30,6 @@ different computers.  Each computer must see an entire filesystem.  You
 may, however, mount several GFS2 filesystems if you want to distribute your
 data storage in a controllable way.
 
-\fImountpoint\fR is the same as \fIdir\fR in the \fBmount\fP(8) man page.
-
 This man page describes GFS2-specific options that can be passed to the GFS2 
 file system at mount time, using the \fB-o\fP flag.  There are many other
 \fB-o\fP options handled by the generic mount command \fBmount\fP(8).
@@ -41,9 +38,9 @@ interpreted by the mount command nor by the kernel's Virtual File System.  GFS2
 and non-GFS2 options may be intermingled after the \fB-o\fP, separated by
 commas (but no spaces).
 
-The options debug, commit, discard, acl, quota, suiddir, and data can be
+The options commit, discard, errors, quota_quantum, statfs_quantum, statfs_percent, barrier, acl, quota, suiddir, and data can be
 changed after mount using the "mount -o remount,option /mountpoint" command.
-The options debug, quota, discard, acl, and suiddir support the "no"
+The options quota, discard, barrier, acl, and suiddir support the "no"
 prefix.  For example, "noacl" turns off what "acl" turns on.
 
 If you have trouble mounting GFS2, check the syslog (e.g. /var/log/messages)
@@ -51,15 +48,13 @@ for specific error messages.
 
 .SH OPTIONS
 .TP
-\fBlockproto=\fP\fILockModuleName\fR
+\fBlockproto=\fP\fILockProtoName\fR
 This specifies which inter-node lock protocol is used by the GFS2 filesystem
 for this mount, overriding the default lock protocol name stored in the
 filesystem's on-disk superblock.
 
-The \fILockModuleName\fR must be an exact match of the protocol name presented
-by the lock module when it registers with the lock harness.  Traditionally,
-this matches the .o filename of the lock module, e.g. \fIlock_dlm\fR,
-or \fIlock_nolock\fR.
+The \fILockProtoName\fR must be one of the supported locking protocols,
+currently these are \fIlock_nolock\fR and \fIlock_dlm\fR.
 
 The default lock protocol name is written to disk initially when creating the
 filesystem with \fBmkfs.gfs2\fP(8), -p option.  It can be changed on-disk by
@@ -67,7 +62,9 @@ using the \fBgfs2_tool\fP(8) utility's \fBsb proto\fP command.
 
 The \fBlockproto\fP mount option should be used only under special
 circumstances in which you want to temporarily use a different lock protocol
-without changing the on-disk default.
+without changing the on-disk default. Using the incorrect lock protocol
+on a cluster filesystem mounted from more than one node will almost
+certainly result in filesystem corruption.
 .TP
 \fBlocktable=\fP\fILockTableName\fR
 This specifies the identity of the cluster and of the filesystem for this
@@ -78,8 +75,8 @@ the inter-node locking system, enabling the mounting of multiple GFS2
 filesystems.
 
 The format of \fILockTableName\fR is lock-module-specific.  For
-lock_dlm, the format is \fIclustername:fsname\fR.  For
-lock_nolock, the field is ignored.
+\fIlock_dlm\fR, the format is \fIclustername:fsname\fR.  For
+\fIlock_nolock\fR, the field is ignored.
 
 The default cluster/filesystem name is written to disk initially when creating
 the filesystem with \fBmkfs.gfs2\fP(8), -t option.  It can be changed on-disk
@@ -108,10 +105,13 @@ is achieved by letting VFS handle the whole job.
 This is turned on automatically by the lock_nolock module,
 but can be overridden by using the \fBignore_local_fs\fP option.
 .TP
-\fBdebug\fP
-Causes GFS2 to oops when encountering an error that would cause the
-mount to withdraw or print an assertion warning.  This option should
-probably not be used in a production system. 
+\fBerrors=\fP\fI[panic|withdraw]\fR
+Setting errors=panic causes GFS2 to oops when encountering an error that
+would otherwise cause the
+mount to withdraw or print an assertion warning. The default setting
+is errors=withdraw. This option should not be used in a production system.
+It replaces the earlier \fBdebug\fP option on kernel versions 2.6.31 and
+above.
 .TP
 \fBignore_local_fs\fP
 By default, using the nolock lock module automatically turns on the
@@ -133,7 +133,8 @@ Enables POSIX Access Control List \fBacl\fP(5) support within GFS2.
 .TP
 \fBspectator\fP
 Mount this filesystem using a special form of read-only mount.  The mount
-does not use one of the filesystem's journals.
+does not use one of the filesystem's journals. The node is unable to
+recover journals for other nodes.
 .TP
 \fBsuiddir\fP
 Sets owner of any newly created file or directory to be that of parent
@@ -155,13 +156,21 @@ been freed. These can be used by suitable hardware to implement
 thin-provisioning and similar schemes. This feature is supported
 in kernel version 2.6.30 and above.
 .TP
+\fBbarrier\fP
+This option, which defaults to on, causes GFS2 to send I/O barriers
+when flushing the journal. The option is automatically turned off
+if the underlying device does not support I/O barriers. We highly
+recommend the use of I/O barriers with GFS2 at all times unless
+the block device is designed so that it cannot lose its write cache
+content (e.g. its on a UPS, or it doesn't have a write cache)
+.TP
 \fBcommit=\fP\fIsecs\fR
 This is similar to the ext3 \fBcommit=\fP option in that it sets
 the maximum number of seconds between journal commits if there is
 dirty data in the journal. The default is 60 seconds. This option
 is only provided in kernel versions 2.6.31 and above.
 .TP
-\fBdata=\fP\fI[ordered/writeback]\fR
+\fBdata=\fP\fI[ordered|writeback]\fR
 When data=ordered is set, the user data modified by a transaction is
 flushed to the disk before the transaction is committed to disk.  This
 should prevent the user from seeing uninitialized blocks in a file
@@ -169,14 +178,47 @@ after a crash.  Data=writeback mode writes the user data to the disk
 at any time after it's dirtied.  This doesn't provide the same
 consistency guarantee as ordered mode, but it should be slightly
 faster for some workloads.  The default is ordered mode.
-
-.SH LINKS
-.TP 30
-http://sources.redhat.com/cluster
--- home site of GFS2
 .TP
-http://www.suse.de/~agruen/acl/linux-acls/
--- good writeup on ACL support in Linux
+\fBmeta\fP
+This option results in selecting the meta filesystem root rather than
+the normal filesystem root. This option is normally only used by
+the GFS2 utility functions. Altering any file on the GFS2 meta filesystem
+may render the filesystem unusable, so only experts in the GFS2
+on-disk layout should use this option.
+.TP
+\fBquota_quantum=\fP\fIsecs\fR
+This sets the number of seconds for which a change in the quota
+information may sit on one node before being written to the quota
+file. This is the preferred way to set this parameter. The value
+is an integer number of seconds greater than zero. The default is
+60 seconds. Shorter settings result in faster updates of the lazy
+quota information and less likelihood of someone exceeding their
+quota. Longer settings make filesystem operations involving quotas
+faster and more efficient.
+.TP
+\fBstatfs_quantum=\fP\fIsecs\fR
+Setting statfs_quantum to 0 is the preferred way to set the slow version
+of statfs. The default value is 30 secs which sets the maximum time
+period before statfs changes will be syned to the master statfs file.
+This can be adjusted to allow for faster, less accurate statfs values
+or slower more accurate values. When set to 0, statfs will always
+report the true values.
+.TP
+\fBstatfs_percent=\fP\fIvalue\fR
+This setting provides a bound on the maximum percentage change in
+the statfs information on a local basis before it is synced back
+to the master statfs file, even if the time period has not
+expired. If the setting of statfs_quantum is 0, then this setting
+is ignored.
+
+.SH BUGS
+
+GFS2 doesn't support \fBerrors=\fP\fIremount-ro\fR or \fBdata=\fP\fIjournal\fR.
+It is not possible to switch support for user and group quotas on and
+off independently of each other. Some of the error messages are rather
+cryptic, if you encounter one of these messages check firstly that gfs_controld
+is running and secondly that you have enough journals on the filesystem
+for the number of nodes in use.
 
 .SH SEE ALSO
 
diff --git a/gfs2/mkfs/main_grow.c b/gfs2/mkfs/main_grow.c
index a3f6e47..e622b3b 100644
--- a/gfs2/mkfs/main_grow.c
+++ b/gfs2/mkfs/main_grow.c
@@ -167,15 +167,11 @@ static void initialize_new_portion(struct gfs2_sbd *sdp, int *old_rg_count)
 	/* Build the remaining resource groups */
 	build_rgrps(sdp, !test);
 
-	/* Note: We do inode_put with not_updated because we only updated */
-	/* the new RGs/bitmaps themselves on disk.  The rindex file must  */
-	/* be updated through the meta_fs so the gfs2 kernel is informed. */
-	inode_put(sdp->md.riinode, not_updated);
-	inode_put(sdp->master_dir, not_updated);
+	inode_put(&sdp->md.riinode);
+	inode_put(&sdp->master_dir);
 
 	/* We're done with the libgfs portion, so commit it to disk.      */
-	bsync(&sdp->buf_list);
-	bsync(&sdp->nvbuf_list);
+	fsync(sdp->device_fd);
 }
 
 /**
@@ -210,6 +206,8 @@ static void fix_rindex(struct gfs2_sbd *sdp, int rindex_fd, int old_rg_count)
 		gfs2_rindex_out(&rl->ri, bufptr);
 		bufptr += sizeof(struct gfs2_rindex);
 	}
+	gfs2_rgrp_free(&sdp->rglist);
+	fsync(sdp->device_fd);
 	if (!test) {
 		/* Now write the new RGs to the end of the rindex */
 		lseek(rindex_fd, 0, SEEK_END);
@@ -287,8 +285,6 @@ main_grow(int argc, char *argv[])
 		}
 		log_info( _("Initializing lists...\n"));
 		osi_list_init(&sdp->rglist);
-		init_buf_list(sdp, &sdp->buf_list, 128 << 20);
-		init_buf_list(sdp, &sdp->nvbuf_list, 0xffffffff);
 
 		sdp->sd_sb.sb_bsize = GFS2_DEFAULT_BSIZE;
 		sdp->bsize = sdp->sd_sb.sb_bsize;
@@ -319,7 +315,7 @@ main_grow(int argc, char *argv[])
 		}
 		/* Get master dinode */
 		sdp->master_dir =
-			gfs2_load_inode(sdp, sdp->sd_sb.sb_master_dir.no_addr);
+			inode_read(sdp, sdp->sd_sb.sb_master_dir.no_addr);
 		gfs2_lookupi(sdp->master_dir, "rindex", 6, &sdp->md.riinode);
 		/* Fetch the rindex from disk.  We aren't using gfs2 here,  */
 		/* which means that the bitmaps will most likely be cached  */
diff --git a/gfs2/mkfs/main_jadd.c b/gfs2/mkfs/main_jadd.c
index ff2ba28..be13271 100644
--- a/gfs2/mkfs/main_jadd.c
+++ b/gfs2/mkfs/main_jadd.c
@@ -297,7 +297,9 @@ add_qc(struct gfs2_sbd *sdp)
 			sdp->qcsize << (20 - sdp->sd_sb.sb_bsize_shift);
 		unsigned int x;
 		struct gfs2_meta_header mh;
+		struct gfs2_buffer_head dummy_bh;
 
+		dummy_bh.b_data = buf;
 		make_jdata(fd, "clear");
 		memset(buf, 0, sdp->bsize);
 
@@ -319,7 +321,7 @@ add_qc(struct gfs2_sbd *sdp)
 		mh.mh_magic = GFS2_MAGIC;
 		mh.mh_type = GFS2_METATYPE_QC;
 		mh.mh_format = GFS2_FORMAT_QC;
-		gfs2_meta_header_out(&mh, buf);
+		gfs2_meta_header_out(&mh, &dummy_bh);
 
 		for (x=0; x<blocks; x++) {
 			if (write(fd, buf, sdp->bsize) != sdp->bsize) {
@@ -431,10 +433,12 @@ add_j(struct gfs2_sbd *sdp)
 
 		for (x=0; x<blocks; x++) {
 			uint32_t hash;
+			struct gfs2_buffer_head dummy_bh;
 
+			dummy_bh.b_data = buf;
 			lh.lh_sequence = seq;
 			lh.lh_blkno = x;
-			gfs2_log_header_out(&lh, buf);
+			gfs2_log_header_out(&lh, &dummy_bh);
 			hash = gfs2_disk_hash(buf, sizeof(struct gfs2_log_header));
 			((struct gfs2_log_header *)buf)->lh_hash = cpu_to_be32(hash);
 
diff --git a/gfs2/mkfs/main_mkfs.c b/gfs2/mkfs/main_mkfs.c
index 4c1d94f..50e3550 100644
--- a/gfs2/mkfs/main_mkfs.c
+++ b/gfs2/mkfs/main_mkfs.c
@@ -528,8 +528,6 @@ print_results(struct gfs2_sbd *sdp, uint64_t real_device_size,
 
 	if (sdp->debug) {
 		printf("\n");
-		printf( _("Spills:                    %u\n"),
-		       sdp->buf_list.spills);
 		printf( _("Writes:                    %u\n"), sdp->writes);
 	}
 
@@ -561,8 +559,6 @@ void main_mkfs(int argc, char *argv[])
 	strcpy(sdp->lockproto, GFS2_DEFAULT_LOCKPROTO);
 	sdp->time = time(NULL);
 	osi_list_init(&sdp->rglist);
-	init_buf_list(sdp, &sdp->buf_list, 128 << 20);
-	init_buf_list(sdp, &sdp->nvbuf_list, 0xffffffff);
 
 	decode_arguments(argc, argv, sdp);
 	if (sdp->rgsize == -1)                 /* if rg size not specified */
@@ -638,13 +634,12 @@ void main_mkfs(int argc, char *argv[])
 
 	/* Cleanup */
 
-	inode_put(sdp->md.rooti, updated);
-	inode_put(sdp->master_dir, updated);
-	inode_put(sdp->md.inum, updated);
-	inode_put(sdp->md.statfs, updated);
-	bsync(&sdp->buf_list);
-	bsync(&sdp->nvbuf_list);
+	inode_put(&sdp->md.rooti);
+	inode_put(&sdp->master_dir);
+	inode_put(&sdp->md.inum);
+	inode_put(&sdp->md.statfs);
 
+	gfs2_rgrp_free(&sdp->rglist);
 	error = fsync(sdp->device_fd);
 	if (error)
 		die( _("can't fsync device (%d): %s\n"),
diff --git a/gfs2/mount/mount.gfs2.c b/gfs2/mount/mount.gfs2.c
index 2442f59..38ec9a1 100644
--- a/gfs2/mount/mount.gfs2.c
+++ b/gfs2/mount/mount.gfs2.c
@@ -210,6 +210,8 @@ int main(int argc, char **argv)
 
 		if (errno == EBUSY)
 			die("%s already mounted or %s busy\n", mo.dev, mo.dir);
+		else if (errno == EUSERS)
+			die("Too many nodes mounting filesystem, no free journals\n");
 		die("error mounting %s on %s: %s\n", mo.dev, mo.dir,
 		    strerror(errno));
 	}
diff --git a/gfs2/quota/check.c b/gfs2/quota/check.c
index ac3e9bf..7476629 100644
--- a/gfs2/quota/check.c
+++ b/gfs2/quota/check.c
@@ -220,6 +220,7 @@ read_quota_file(struct gfs2_sbd *sdp, commandline_t *comline,
         }
 	quota_file_size = statbuf.st_size;
 	/* First find the number of extents in the quota file */
+	fmap.fm_flags = 0;
 	fmap.fm_start = 0;
 	fmap.fm_length = (~0ULL);
 	error = ioctl(fd, FS_IOC_FIEMAP, &fmap);
@@ -233,6 +234,7 @@ read_quota_file(struct gfs2_sbd *sdp, commandline_t *comline,
 		fprintf(stderr, "malloc error (%d): %s\n", errno, strerror(errno));
 		goto out;
 	}
+	fmap2->fm_flags = 0;
 	fmap2->fm_start = 0;
 	fmap2->fm_length = (~0ULL);
 	fmap2->fm_extent_count = fmap.fm_mapped_extents;
diff --git a/gfs2/quota/main.c b/gfs2/quota/main.c
index 3812669..06176db 100644
--- a/gfs2/quota/main.c
+++ b/gfs2/quota/main.c
@@ -285,7 +285,9 @@ read_superblock(struct gfs2_sb *sb, struct gfs2_sbd *sdp)
 {
 	int fd;
 	char buf[PATH_MAX];
-	
+	struct gfs2_buffer_head dummy_bh;
+
+	dummy_bh.b_data = buf;
 	fd = open(sdp->device_name, O_RDONLY);
 	if (fd < 0) {
 		die("Could not open the block device %s: %s\n",
@@ -303,7 +305,7 @@ read_superblock(struct gfs2_sb *sb, struct gfs2_sbd *sdp)
 			strerror(errno), __FUNCTION__, __LINE__);
 		exit(-1);
 	}
-	gfs2_sb_in(sb, buf);
+	gfs2_sb_in(sb, &dummy_bh);
 
 	close(fd);
 }
@@ -365,8 +367,8 @@ do_reset(struct gfs2_sbd *sdp, commandline_t *comline)
 	if (!*comline->filesystem)
 		die("need a filesystem to work on\n");
 
-	printf("This operation will permanently erase all quota information. "
-	       "You will have to re-assign all quota limit/warn values. "
+	printf("This operation will permanently erase all quota information.\n"
+	       "You will have to re-assign all quota limit/warn values.\n"
 	       "Proceed [y/N]? ");
 	c = getchar();
 	if (c != 'y' && c != 'Y')
@@ -474,6 +476,7 @@ do_list(struct gfs2_sbd *sdp, commandline_t *comline)
 	}
 	quota_file_size = statbuf.st_size;
 	/* First find the number of extents in the quota file */
+	fmap.fm_flags = 0;
 	fmap.fm_start = 0;
 	fmap.fm_length = (~0ULL);
 	error = ioctl(fd, FS_IOC_FIEMAP, &fmap);
@@ -487,6 +490,7 @@ do_list(struct gfs2_sbd *sdp, commandline_t *comline)
 		fprintf(stderr, "malloc error (%d): %s\n", errno, strerror(errno));
 		goto out;
 	}
+	fmap2->fm_flags = 0;
 	fmap2->fm_start = 0;
 	fmap2->fm_length = (~0ULL);
 	fmap2->fm_extent_count = fmap.fm_mapped_extents;
diff --git a/gfs2/tool/sb.c b/gfs2/tool/sb.c
index 3d378e9..0930aaf 100644
--- a/gfs2/tool/sb.c
+++ b/gfs2/tool/sb.c
@@ -71,7 +71,9 @@ do_sb(int argc, char **argv)
 	int fd;
 	unsigned char buf[GFS2_BASIC_BLOCK], input[256];
 	struct gfs2_sb sb;
+	struct gfs2_buffer_head dummy_bh;
 
+	dummy_bh.b_data = (char *)buf;
 	if (optind == argc)
 		die("Usage: gfs2_tool sb <device> <field> [newval]\n");
 
@@ -117,7 +119,7 @@ do_sb(int argc, char **argv)
 		exit(-1);
 	}
 
-	gfs2_sb_in(&sb, (char*) buf);
+	gfs2_sb_in(&sb, &dummy_bh);
 
 	if (sb.sb_header.mh_magic != GFS2_MAGIC ||
 	    sb.sb_header.mh_type != GFS2_METATYPE_SB)
@@ -201,7 +203,7 @@ do_sb(int argc, char **argv)
 		die( _("unknown field %s\n"), field);
 
 	if (newval) {
-		gfs2_sb_out(&sb,(char*) buf);
+		gfs2_sb_out(&sb, &dummy_bh);
 
 		if (lseek(fd, GFS2_SB_ADDR * GFS2_BASIC_BLOCK, SEEK_SET) !=
 		    GFS2_SB_ADDR * GFS2_BASIC_BLOCK) {
diff --git a/group/dlm_controld/Makefile b/group/dlm_controld/Makefile
index f9a2d05..b165dbc 100644
--- a/group/dlm_controld/Makefile
+++ b/group/dlm_controld/Makefile
@@ -44,6 +44,7 @@ PCMK_CFLAGS += -I${incdir}/heartbeat -I${incdir}/pacemaker
 PCMK_CFLAGS += `pkg-config glib-2.0 --cflags`
 PCMK_CFLAGS += `xml2-config --cflags`
 
+LDFLAGS += -lpthread
 LDFLAGS += -L${dlmlibdir} -ldlm 
 LDFLAGS += -L${logtlibdir} -llogthread
 LDFLAGS += -L${openaislibdir} -lSaCkpt
diff --git a/group/dlm_controld/cpg.c b/group/dlm_controld/cpg.c
index d5245ce..e01ecc2 100644
--- a/group/dlm_controld/cpg.c
+++ b/group/dlm_controld/cpg.c
@@ -1574,40 +1574,79 @@ static void deliver_cb(cpg_handle_t handle,
 		break;
 
 	case DLM_MSG_PLOCK:
-		receive_plock(ls, hd, len);
+		if (cfgd_enable_plock)
+			receive_plock(ls, hd, len);
+		else
+			log_error("msg %d nodeid %d enable_plock %d",
+				  hd->type, nodeid, cfgd_enable_plock);
 		break;
 
 	case DLM_MSG_PLOCK_OWN:
-		receive_own(ls, hd, len);
+		if (cfgd_enable_plock && cfgd_plock_ownership)
+			receive_own(ls, hd, len);
+		else
+			log_error("msg %d nodeid %d enable_plock %d owner %d",
+				  hd->type, nodeid, cfgd_enable_plock,
+				  cfgd_plock_ownership);
 		break;
 
 	case DLM_MSG_PLOCK_DROP:
-		receive_drop(ls, hd, len);
+		if (cfgd_enable_plock && cfgd_plock_ownership)
+			receive_drop(ls, hd, len);
+		else
+			log_error("msg %d nodeid %d enable_plock %d owner %d",
+				  hd->type, nodeid, cfgd_enable_plock,
+				  cfgd_plock_ownership);
 		break;
 
 	case DLM_MSG_PLOCK_SYNC_LOCK:
 	case DLM_MSG_PLOCK_SYNC_WAITER:
-		receive_sync(ls, hd, len);
+		if (cfgd_enable_plock && cfgd_plock_ownership)
+			receive_sync(ls, hd, len);
+		else
+			log_error("msg %d nodeid %d enable_plock %d owner %d",
+				  hd->type, nodeid, cfgd_enable_plock,
+				  cfgd_plock_ownership);
 		break;
 
 	case DLM_MSG_PLOCKS_STORED:
-		receive_plocks_stored(ls, hd, len);
+		if (cfgd_enable_plock)
+			receive_plocks_stored(ls, hd, len);
+		else
+			log_error("msg %d nodeid %d enable_plock %d",
+				  hd->type, nodeid, cfgd_enable_plock);
 		break;
 
 	case DLM_MSG_DEADLK_CYCLE_START:
-		receive_cycle_start(ls, hd, len);
+		if (cfgd_enable_deadlk)
+			receive_cycle_start(ls, hd, len);
+		else
+			log_error("msg %d nodeid %d enable_deadlk %d",
+				  hd->type, nodeid, cfgd_enable_deadlk);
 		break;
 
 	case DLM_MSG_DEADLK_CYCLE_END:
-		receive_cycle_end(ls, hd, len);
+		if (cfgd_enable_deadlk)
+			receive_cycle_end(ls, hd, len);
+		else
+			log_error("msg %d nodeid %d enable_deadlk %d",
+				  hd->type, nodeid, cfgd_enable_deadlk);
 		break;
 
 	case DLM_MSG_DEADLK_CHECKPOINT_READY:
-		receive_checkpoint_ready(ls, hd, len);
+		if (cfgd_enable_deadlk)
+			receive_checkpoint_ready(ls, hd, len);
+		else
+			log_error("msg %d nodeid %d enable_deadlk %d",
+				  hd->type, nodeid, cfgd_enable_deadlk);
 		break;
 
 	case DLM_MSG_DEADLK_CANCEL_LOCK:
-		receive_cancel_lock(ls, hd, len);
+		if (cfgd_enable_deadlk)
+			receive_cancel_lock(ls, hd, len);
+		else
+			log_error("msg %d nodeid %d enable_deadlk %d",
+				  hd->type, nodeid, cfgd_enable_deadlk);
 		break;
 
 	default:
diff --git a/group/man/Makefile b/group/man/Makefile
index a317fba..70387c3 100644
--- a/group/man/Makefile
+++ b/group/man/Makefile
@@ -1,6 +1,7 @@
 MANTARGET= \
 	dlm_controld.8 \
 	gfs_controld.8 \
+	gfs_control.8 \
 	group_tool.8 \
 	groupd.8
 
diff --git a/group/man/dlm_controld.8 b/group/man/dlm_controld.8
index e8dc1c6..7100f0e 100644
--- a/group/man/dlm_controld.8
+++ b/group/man/dlm_controld.8
@@ -1,81 +1,282 @@
-.TH dlm_controld 8
+.TH DLM_CONTROLD 8 2009-01-18 cluster cluster
 
 .SH NAME
-dlm_controld - daemon that configures dlm according to cluster events
+dlm_controld \- daemon that configures dlm according to cluster events
 
 .SH SYNOPSIS
-.B
-dlm_controld
-[\fIOPTION\fR]...
+.B dlm_controld
+[OPTIONS]
 
 .SH DESCRIPTION
-The dlm lives in the kernel, and the cluster infrastructure (cluster
+The dlm lives in the kernel, and the cluster infrastructure (corosync
 membership and group management) lives in user space.  The dlm in the
 kernel needs to adjust/recover for certain cluster events.  It's the job
 of dlm_controld to receive these events and reconfigure the kernel dlm as
 needed.  dlm_controld controls and configures the dlm through sysfs and
-configfs files that are considered dlm-internal interfaces; not a general
-API/ABI.
+configfs files that are considered dlm-internal interfaces.
 
-The dlm also exports lock state through debugfs so that dlm_controld can
-implement deadlock detection in user space.
+The cman init script usually starts the dlm_controld daemon.
 
-.SH CONFIGURATION FILE
+.SH OPTIONS
+Command line options override a corresponding setting in cluster.conf.
+
+.TP
+.B \-D
+Enable debugging to stderr and don't fork.
+.br
+See also
+.B dlm_tool dump
+in
+.BR dlm_tool (8).
+
+.TP
+.B \-L
+Enable debugging to log file.
+.br
+See also
+.B logging
+in
+.BR cluster.conf (5).
+
+.TP
+.B \-K
+Enable kernel dlm debugging messages.
+.br
+See also
+.B log_debug
+below.
+
+.TP
+.BI \-r " num"
+dlm kernel lowcomms protocol, 0 tcp, 1 sctp, 2 detect.
+2 selects tcp if corosync rrp_mode is "none", otherwise sctp.
+.br
+Default 2.
+
+.TP
+.BI \-g " num"
+groupd compatibility mode, 0 off, 1 on.
+.br
+Default 0.
+
+.TP
+.BI \-f " num"
+Enable (1) or disable (0) fencing recovery dependency.
+.br
+Default 1.
 
-Optional cluster.conf settings are placed in the <dlm> section.
+.TP
+.BI \-q " num"
+Enable (1) or disable (0) quorum recovery dependency.
+.br
+Default 0.
+
+.TP
+.BI \-d " num"
+Enable (1) or disable (0) deadlock detection code.
+.br
+Default 0.
 
-.SS Global settings
+.TP
+.BI \-p " num"
+Enable (1) or disable (0) plock code for cluster fs.
+.br
+Default 1.
+
+.TP
+.BI \-l " num"
+Limit the rate of plock operations, 0 for no limit.
+.br
+Default 0.
+
+.TP
+.BI \-o " num"
+Enable (1) or disable (0) plock ownership.
+.br
+Default 1.
+
+.TP
+.BI \-t " ms"
+Plock ownership drop resources time (milliseconds).
+.br
+Default 10000.
+
+.TP
+.BI \-c " num"
+Plock ownership drop resources count.
+.br
+Default 10.
+
+.TP
+.BI \-a " ms"
+Plock ownership drop resources age (milliseconds).
+.br
+Default 10000.
+
+.TP
+.B \-P
+Enable plock debugging messages (can produce excessive output).
+
+.TP
+.B \-h
+Print a help message describing available options, then exit.
+
+.TP
+.B \-V
+Print program version information, then exit.
+
+
+.SH FILES
+.BR cluster.conf (5)
+is usually located at /etc/cluster/cluster.conf.  It is not read directly.
+Other cluster components load the contents into memory, and the values are
+accessed through the libccs library.
+
+Configuration options for dlm (kernel) and dlm_controld are added to the
+<dlm /> section of cluster.conf, within the top level <cluster> section.
+
+.SS Kernel options
+
+.TP
+.B protocol
 The network
-.I protocol
-can be set to "tcp" or "sctp".  The default is tcp.
+.B protocol
+can be set to tcp, sctp or detect which selects tcp or sctp based on
+the corosync rrp_mode configuration (redundant ring protocol).
+The rrp_mode "none" results in tcp.  Default detect.
 
-  <dlm protocol="tcp"/>
+<dlm protocol="detect"/>
 
+.TP
+.B timewarn
 After waiting
-.I timewarn
+.B timewarn
 centiseconds, the dlm will emit a warning via netlink.  This only applies
 to lockspaces created with the DLM_LSFL_TIMEWARN flag, and is used for
-deadlock detection.  The default is 500 (5 seconds).
+deadlock detection.  Default 500 (5 seconds).
 
-  <dlm timewarn="500"/>
+<dlm timewarn="500"/>
 
+.TP
+.B log_debug
 DLM kernel debug messages can be enabled by setting
-.I log_debug
-to 1.  The default is 0.
+.B log_debug
+to 1.  Default 0.
+
+<dlm log_debug="0"/>
+
+.TP
+.B clusternode/weight
+The lock directory
+.B weight
+can be specified one the clusternode lines.  Weights would usually be
+used in the lock server configurations shown below instead.
+
+<clusternode name="node01" nodeid="1" weight="1"/>
+
+.SS Daemon options
+
+.TP
+.B enable_fencing
+See command line description.
+
+<dlm enable_fencing="1"/>
+
+.TP
+.B enable_quorum
+See command line description.
+
+<dlm enable_quorum="0"/>
+
+.TP
+.B enable_deadlk
+See command line description.
+
+<dlm enable_deadlk="0"/>
+
+.TP
+.B enable_plock
+See command line description.
+
+<dlm enable_plock="1"/>
+
+.TP
+.B plock_rate_limit
+See command line description.
+
+<dlm plock_rate_limit="0"/>
+
+.TP
+.B plock_ownership
+See command line description.
+
+<dlm plock_ownership="1"/>
+
+.TP
+.B drop_resources_time
+See command line description.
+
+<dlm drop_resources_time="10000"/>
+
+.TP
+.B drop_resources_count
+See command line description.
+
+<dlm drop_resources_count="10"/>
+
+.TP
+.B drop_resources_age
+See command line description.
+
+<dlm drop_resources_age="10000"/>
+
+.TP
+.B plock_debug
+Enable (1) or disable (0) plock debugging messages (can produce excessive
+output). Default 0.
+
+<dlm plock_debug="0"/>
 
-  <dlm log_debug="0"/>
 
 .SS Disabling resource directory
 
 Lockspaces usually use a resource directory to keep track of which node is
 the master of each resource.  The dlm can operate without the resource
 directory, though, by statically assigning the master of a resource using
-a hash of the resource name.
+a hash of the resource name.  To enable, set the per-lockspace
+.B nodir
+option to 1.
 
-  <dlm>
-    <lockspace name="foo" nodir="1">
-  </dlm>
+.nf
+<dlm>
+  <lockspace name="foo" nodir="1">
+</dlm>
+.fi
 
 .SS Lock-server configuration
 
 The nodir setting can be combined with node weights to create a
 configuration where select node(s) are the master of all resources/locks.
-These "master" nodes can be viewed as "lock servers" for the other nodes.
+These
+.B master
+nodes can be viewed as "lock servers" for the other nodes.
 
-  <dlm>
-    <lockspace name="foo" nodir="1">
-      <master name="node01"/>
-    </lockspace>
-  </dlm>
+.nf
+<dlm>
+  <lockspace name="foo" nodir="1">
+    <master name="node01"/>
+  </lockspace>
+</dlm>
 
 or,
 
-  <dlm>
-    <lockspace name="foo" nodir="1">
-      <master name="node01"/>
-      <master name="node02"/>
-    </lockspace>
-  </dlm>
+<dlm>
+  <lockspace name="foo" nodir="1">
+    <master name="node01"/>
+    <master name="node02"/>
+  </lockspace>
+</dlm>
+.fi
 
 Lock management will be partitioned among the available masters.  There
 can be any number of masters defined.  The designated master nodes will
@@ -87,37 +288,26 @@ disruption, when a non-master node joins/leaves.
 There is no special mode in the dlm for this lock server configuration,
 it's just a natural consequence of combining the "nodir" option with node
 weights.  When a lockspace has master nodes defined, the master has a
-default weight of 1 and all non-master nodes have weight of 0.  Explicit
-non-zero weights can also be assigned to master nodes, e.g.
+default weight of 1 and all non-master nodes have weight of 0.  An explicit
+non-zero
+.B weight
+can also be assigned to master nodes, e.g.
 
-  <dlm>
-    <lockspace name="foo" nodir="1">
-      <master name="node01" weight="2"/>
-      <master name="node02" weight="1"/>
-    </lockspace>
-  </dlm>
+.nf
+<dlm>
+  <lockspace name="foo" nodir="1">
+    <master name="node01" weight="2"/>
+    <master name="node02" weight="1"/>
+  </lockspace>
+</dlm>
+.fi
 
 In which case node01 will master 2/3 of the total resources and node2 will
 master the other 1/3.
 
-
-.SH OPTIONS
-.TP
-\fB-d\fP <num>
-Enable (1) or disable (0) the deadlock detection code.
-.TP
-\fB-D\fP
-Run the daemon in the foreground and print debug statements to stdout.
-.TP
-\fB-K\fP
-Enable kernel dlm debugging messages.
-.TP
-\fB-V\fP
-Print the version information and exit.
-.TP
-\fB-h\fP 
-Print out a help message describing available options, then exit.
-
 .SH SEE ALSO
-groupd(8)
+.BR dlm_tool (8),
+.BR fenced (8),
+.BR cman (5),
+.BR cluster.conf (5)
 
diff --git a/group/man/gfs_control.8 b/group/man/gfs_control.8
new file mode 100644
index 0000000..8dfd5eb
--- /dev/null
+++ b/group/man/gfs_control.8
@@ -0,0 +1,44 @@
+.TH GFS_CONTROL 8 2009-01-20 cluster cluster
+
+.SH NAME
+gfs_control \- a utility for the gfs_controld daemon
+
+.SH SYNOPSIS
+.B gfs_control
+[COMMAND] [OPTIONS]
+[
+.I name
+]
+
+.SH DESCRIPTION
+.TP
+.B ls
+Display internal gfs_controld state about mount groups.
+
+.TP
+.B dump
+Dump gfs_controld debug buffer.
+
+.TP
+.BI plocks " name"
+Dump posix locks from gfs_controld for the mount group.
+(For compat mode, otherwise see
+.BR dlm_tool (8).)
+
+.SH OPTIONS
+.TP
+.B \-n
+Show all node information in ls.
+
+.TP
+.B \-h
+Print a help message describing available options, then exit.
+
+.TP
+.B \-V
+Print program version information, then exit.
+
+.SH SEE ALSO
+.BR gfs_controld (8),
+.BR dlm_tool (8)
+
diff --git a/group/man/gfs_controld.8 b/group/man/gfs_controld.8
index 3d053ee..8e47c89 100644
--- a/group/man/gfs_controld.8
+++ b/group/man/gfs_controld.8
@@ -1,122 +1,184 @@
-.TH gfs_controld 8
+.TH GFS_CONTROLD 8 2009-01-19 cluster cluster
 
 .SH NAME
-gfs_controld - daemon that manages mounting, unmounting, recovery and
-posix locks
+gfs_controld \- daemon that coordinates gfs mounts and recovery
 
 .SH SYNOPSIS
-.B
-gfs_controld
-[\fIOPTION\fR]...
-
+.B gfs_controld
+[OPTIONS] 
 .SH DESCRIPTION
-GFS lives in the kernel, and the cluster infrastructure (cluster
-membership and group management) lives in user space.  GFS in the kernel
-needs to adjust/recover for certain cluster events.  It's the job of
-gfs_controld to receive these events and reconfigure gfs as needed.
+GFS lives in the kernel, and the cluster infrastructure (corosync
+membership and group management) lives in user space.  GFS in the
+kernel needs to adjust/recover for certain cluster events.  It's the job
+of gfs_controld to receive these events and reconfigure gfs as needed.
 gfs_controld controls and configures gfs through sysfs files that are
-considered gfs-internal interfaces; not a general API/ABI.
-
-Mounting, unmounting and node failure are the main cluster events that
-gfs_controld controls.  It also manages the assignment of journals to
-different nodes.  The mount.gfs and umount.gfs programs communicate with
-gfs_controld to join/leave the mount group and receive the necessary
-options for the kernel mount.
-
-GFS also sends all posix lock operations to gfs_controld for processing.
-gfs_controld manages cluster-wide posix locks for gfs and passes results
-back to gfs in the kernel.
+considered gfs-internal interfaces.
 
-.SH CONFIGURATION FILE
+Mounting and node failure are the main cluster events that gfs_controld
+controls.  It also manages the assignment of journals to different nodes.
+.BR mount.gfs2 (8)
+communicates with gfs_controld to join the mount group and receive the
+necessary options for the kernel mount.
 
-Optional cluster.conf settings are placed in the <gfs_controld> section.
+The cman init script usually starts the gfs_controld daemon.
 
-.SS Posix locks
-
-Heavy use of plocks can result in high network load.  The rate at which
-plocks are processed are limited by the
-.I plock_rate_limit
-setting, which limits the maximum plock performance, and limits potentially
-excessive network load.  This value is the maximum number of plock operations
-a single node will process every second.  To achieve maximum posix locking
-performance, the rate limiting should be disabled by setting it to 0.  The
-default value is 100.
+.SH OPTIONS
+Command line options override a corresponding setting in cluster.conf.
 
-  <gfs_controld plock_rate_limit="100"/>
+.TP
+.B \-D
+Enable debugging to stderr and don't fork.
+.br
+See also
+.B gfs_control dump
+in
+.BR gfs_control (8).
 
-To optimize performance for repeated locking of the same locks by
-processes on a single node,
-.I plock_ownership
-can be set to 1.  The default is 0.  If this is enabled, gfs_controld
-cannot interoperate with older versions that did not support this option.
+.TP
+.B \-L
+Enable debugging to log file.
+.br
+See also
+.B logging
+in
+.BR cluster.conf (5).
 
-  <gfs_controld plock_ownership="1"/>
+.TP
+.BI \-g " num"
+groupd compatibility mode, 0 off, 1 on.
+.br
+Default 0.
 
-Three options can be used to tune the behavior of the plock_ownership
-optimization.  All three relate to the caching of lock ownership state.
-Specifically, they define how aggressively cached ownership state is dropped.
-More caching of ownership state can result in better performance, at the
-expense of more memory usage.
+.TP
+.BI \-w " num"
+Enable (1) or disable (0) withdraw.
+.br
+Default 1.
 
-.I drop_resources_time
-is the frequency of drop attempts in milliseconds.  Default 10000 (10 sec).
+.TP
+.B \-h
+Print a help message describing available options, then exit.
 
-.I drop_resources_count
-is the maximum number of items to drop from the cache each time.  Default 10.
+.TP
+.B \-V
+Print program version information, then exit.
 
-.I drop_resources_age
-is the time in milliseconds a cached item should be unused before being
-considered for dropping.  Default 10000 (10 sec).
+.SS Cluster Upgrading
+When performing a rolling upgrade from cluster2 to cluster3, the
+gfs_controld daemon runs in cluster2 compatibility mode and processes
+posix lock (plock) requests from the dlm to remain compatible with cluster2
+nodes.  Otherwise,
+.BR dlm_controld (8)
+handles plocks.
 
-  <gfs_controld drop_resources_time="10000" drop_resources_count="10"
-   drop_resources_age="10000"/>
+.TP
+.BI \-p " num"
+Enable (1) or disable (0) plock code for cluster fs.
+.br
+Default 1.
 
+.TP
+.BI \-l " num"
+Limit the rate of plock operations, 0 for no limit.
+.br
+Default 0.
 
-.SH OPTIONS
 .TP
-\fB-D\fP
-Run the daemon in the foreground and print debug statements to stdout.
+.BI \-o " num"
+Enable (1) or disable (0) plock ownership.
+.br
+Default 1.
+
 .TP
-\fB-P\fP
-Enable posix lock debugging messages.
+.BI \-t " ms"
+Plock ownership drop resources time (milliseconds).
+.br
+Default 10000.
+
 .TP
-\fB-w\fP
-Disable the "withdraw" feature.
+.BI \-c " num"
+Plock ownership drop resources count.
+.br
+Default 10.
+
 .TP
-\fB-p\fP
-Disable posix lock handling.
+.BI \-a " ms"
+Plock ownership drop resources age (milliseconds).
+.br
+Default 10000.
+
 .TP
-\fB-l\fP <num>
-Limit the rate at which posix lock messages are sent to <num> messages per
-second.  0 disables the limit and results in the maximum performance of
-posix locks. Default 100.
+.B \-P
+Enable plock debugging messages (can produce excessive output).
+
+.SH FILES
+.BR cluster.conf (5)
+is usually located at /etc/cluster/cluster.conf.  It is not read directly.
+Other cluster components load the contents into memory, and the values are
+accessed through the libccs library.
+
+Configuration options are added to the <gfs_controld /> section of
+cluster.conf, within the top level <cluster> section.
+
 .TP
-\fB-o\fP <num>
-Enable (1) or disable (0) plock ownership optimization. Default 0.  All
-nodes must run with the same value.
+.B enable_withdraw
+Set to 1/0 to enable/disable a response to a withdraw.
+
+<gfs_controld enable_withdraw="1"/>
+
+.SS Cluster Upgrading
+See note above about compatibility plock handling during a rolling
+upgrade.  The following options are deprecated, see
+.BR dlm_controld (8)
+for the new equivalent options.
+
 .TP
-\fB-t\fP <ms>
-Ownership cache tuning, drop resources time (milliseconds). Default 10000.
+.B enable_plock
+See command line description.
+
+<gfs_controld enable_plock="1"/>
+
 .TP
-\fB-c\fP <ms>
-Ownership cache tuning, drop resources count. Default 10.
+.B plock_rate_limit
+See command line description.
+
+<gfs_controld plock_rate_limit="0"/>
+
+.TP 
+.B plock_ownership
+See command line description.
+
+<gfs_controld plock_ownership="1"/>
+
 .TP
-\fB-a\fP <ms>
-Ownership cache tuning, drop resources age (milliseconds). Default 10000.
+.B drop_resources_time
+See command line description.
+
+<gfs_controld drop_resources_time="10000"/>
+
 .TP
-\fB-h\fP 
-Print out a help message describing available options, then exit.
+.B drop_resources_count
+See command line description.
+
+<gfs_controld drop_resources_count="10"/>
+
 .TP
-\fB-V\fP
-Print the version information and exit.
+.B drop_resources_age
+See command line description.
 
-.SH DEBUGGING 
-The gfs_controld daemon keeps a circular buffer of debug messages that can
-be dumped with the 'group_tool dump gfs' command.
+<gfs_controld drop_resources_age="10000"/>
+
+.TP
+.B plock_debug
+Enable (1) or disable (0) plock debugging messages (can produce excessive
+output). Default 0.
 
-The state of all gfs posix locks can also be dumped from gfs_controld with
-the 'group_tool dump plocks <fsname>' command.
+<gfs_controld plock_debug="0"/>
 
 .SH SEE ALSO
-groupd(8), group_tool(8)
+.BR mount.gfs2 (8),
+.BR gfs_control (8),
+.BR dlm_controld (8),
+.BR cman (5),
+.BR cluster.conf (5)
 
diff --git a/group/man/group_tool.8 b/group/man/group_tool.8
index b0c623d..53cd3da 100644
--- a/group/man/group_tool.8
+++ b/group/man/group_tool.8
@@ -1,90 +1,80 @@
-.TH group_tool 8
+.TH GROUP_TOOL 8 2009-01-19 cluster cluster
 
 .SH NAME
-group_tool - display/dump information about fence, dlm and gfs groups
+group_tool \- run common fence_tool, dlm_tool, gfs_control commands
 
 .SH SYNOPSIS
-.B
-group_tool
-[\fISUBCOMMAND\fR] [\fIOPTION\fR]...
+.B group_tool
+[COMMAND] [OPTIONS]
 
 .SH DESCRIPTION
+This utility is deprecated and the new replacements are shown.  See
+.BR fence_tool (8),
+.BR dlm_tool (8),
+.BR gfs_control (8)
+man pages for more information about the new commands.
 
-The group_tool program displays information about fence, dlm and gfs groups.
-
-.SH SUBCOMMANDS
-
-\fBgroup_tool ls\fP [-n]
-
-  Queries individual daemons, and displays full daemon and group state.
-  It is a shortcut for:
-
-  fence_tool ls [-n]
-.br
-  dlm_tool ls [-n]
-.br
-  gfs_control ls [-n]
-
-\fBgroup_tool compat\fP
-
-  Queries groupd about compatibility mode currently running:
-
-  groupd not running
-.br
-  groupd compatibility mode 0
-.br
-  groupd compatibility mode 1
-.br
-  groupd compatibility mode 2 (pending)
-.br
-
-\fBgroup_tool dump\fP [gfs|dlm|fence|plocks] [name]
-
-  Dumps debug info from daemons:
+.TP
+.B ls
+runs
+.BR "fence_tool ls " ;
+.BR "dlm_tool ls " ;
+.B gfs_control ls
+.TP
+.B dump fence
+equivalent to new
+.B fence_tool dump
+.TP
+.B dump dlm
+equivalent to new
+.B dlm_tool dump
+.TP
+.B dump gfs
+equivalent to new
+.B gfs_control dump
+.TP
+.BI "dump plocks" " name"
+equivalent to new
+.BI "dlm_tool plocks" " name"
 
-  group_tool dump gfs: dump gfs_controld debug log
-.br
-	(same as gfs_control dump)
-.br
-  group_tool dump dlm: dump dlm_controld debug log
-.br
-	(same as dlm_tool dump)
-.br
-  group_tool dump fence: dump fenced debug log
-.br
-	(same as fence_tool dump)
-.br
-  group_tool dump plocks <x>: dump plock state
-.br
-	(same as dlm_tool plocks <x>)
-.br
-  group_tool dump: dump groupd debug log
-.br
+.SS Cluster Uprading
 
+When performing a rolling upgrade from cluster2 to cluster3, the
+.BR groupd (8)
+daemon will be running and group_tool can be used with groupd like it was in
+cluster2.
 
-.SH OPTIONS
 .TP
-\fB-g <num>\fP
-Select daemons to query with ls
-.br
-0: query fenced, dlm_controld, gfs_controld (default)
-.br
-1: query groupd (for old compat mode)
-.br
-2: query groupd (1) when in compat mode, otherwise daemons (0)
+.B ls \-g1
+queries and displays the internal groupd state.
+.TP
+.B dump
+queries and dumps the groupd debug buffer.
 .TP
-\fB-n\fP
-Show all node information (with ls -g0)
+.B compat
+queries and displays the internal groupd compatibility mode.
+
+.SH OPTIONS
 .TP
-\fB-v\fP
-Show extra event information (with ls -g1)
+.BI \-g " num"
+Force old groupd queries with 1.
+.TP 
+.B \-n
+Show all node information in ls.
+.TP 
+.B \-v
+Show verbose information from groupd with ls \-g1.
 .TP
-\fB-V\fP
-Print the version information and exit.
+.B \-h
+Print a help message describing available options, then exit.
 .TP
-\fB-h\fP 
-Print out a help message describing available options, then exit.
+.B \-V
+Print program version information, then exit.
 
 .SH SEE ALSO
-groupd(8), fence_tool(8), dlm_tool(8), gfs_control(8)
+.BR cman (5),
+.BR groupd (8),
+.BR fence_tool (8),
+.BR dlm_tool (8),
+.BR gfs_control(8)
 
diff --git a/group/man/groupd.8 b/group/man/groupd.8
index f21d1b9..5c83cd6 100644
--- a/group/man/groupd.8
+++ b/group/man/groupd.8
@@ -1,89 +1,64 @@
-.TH groupd 8
+.TH GROUPD 8 2009-01-19 cluster cluster
 
 .SH NAME
-groupd - the group manager for fenced, dlm_controld and gfs_controld
+groupd \- compatibility daemon for fenced, dlm_controld and gfs_controld
 
 .SH SYNOPSIS
-.B
-groupd
-[\fIOPTION\fR]...
+.B groupd
+[OPTIONS]
 
 .SH DESCRIPTION
 
-The group daemon, groupd, provides a compatibility layer between the corosync
-closed process group (CPG) service and the fenced, dlm_controld and
-gfs_controld daemons.  New versions of fenced/dlm_controld/gfs_controld no
-longer require this compat layer, and can run without groupd.  They use libcpg
-directly, making them much more robust.  This makes them incompatible with old
-(cluster2) versions.  Explicitly enable the new incompatible mode with:
+The groupd daemon and libgroup library are used by the fenced,
+dlm_controld and gfs_controld daemons when they are operating in
+cluster2-compatible mode to perform a rolling cluster upgrade from
+cluster2 to cluster3.
 
-	<group groupd_compat="0"/>
+See
+.BR cman (5)
+for more information on the
+.B upgrading
+configuration option needed to perform a rolling upgrade.
 
-This is the default if no groupd_compat setting exists.  If compatibility with
-old (cluster2) nodes is necessary (mixing cluster2 and cluster3 nodes should
-be avoided), the daemons can be run in the old mode, using groupd as before.
-Enable this mode with:
+When the upgrading option is enabled, cman adds the following to the
+online configuration:
 
 	<group groupd_compat="1"/>
 
-To perform a rolling upgrade from cluster2 to cluster3, add the
-groupd_compat="1" setting to cluster.conf before upgrading any existing nodes
-or adding any new cluster3 nodes.  The next time the entire cluster is shut
-down, after all nodes are upgraded, groupd_compat can be changed to 0 (or
-removed entirely).  \fBWarning:\fP groupd_compat must not be changed from 1 to
-0 in a running cluster (or from 0 to 1).  cluster2 nodes are equivalent to
-groupd_compat="1", so it is fine to add groupd_compat="1" to a running
-cluster2 cluster as described above to perform a rolling upgrade.
+This setting causes the cman init script to start the groupd daemon,
+and causes the groupd, fenced, dlm_controld and gfs_controld daemons to
+operate in the old cluster2 mode so they will be compatible with cluster2
+nodes in the cluster that have not yet been upgraded.
 
-A third option, groupd_compat="2", enables a special phase during startup
-where the daemons attempt to discover cluster2 nodes or cluster3 nodes running
-in mode 1.  If any are found, they run in mode 1, otherwise mode 0.  This mode
-will delay startup and makes cluster operation less predictable.
-
-The cman init script will not start the groupd daemon unless it is needed,
-i.e. groupd_compat=1 (or 2).
-
-.B Logging
-
-By default, log messages at INFO level and above are sent to both
-\fBsyslog\fP(3) and /var/log/cluster/groupd.log.  For complete logging
-configuration options see \fBcluster.conf\fP(5).
+The upgrading setting, including the groupd_compat setting,
+.B cannot be changed in a running cluster.
+The entire cluster must be taken offline to change these because the new
+cluster3 default modes are not compatible with the old cluster2 modes.
+The upgrading/compat settings cause the new cluster3 daemons to run the
+old cluster2 code and protocols.
 
 .SH OPTIONS
+Command line options override a corresponding setting in cluster.conf.
+
 .TP
-\fB-D\fP
-Enable debugging to stderr and don't fork
-.TP
-\fB-L\fP
-Enable debugging to log file (see above)
-.TP
-\fB-g <num>\fP
-group compatibility mode, 0 off, 1 on, 2 detect
-.br
-0: use libcpg, no backward compat, best performance
-.br
-1: use libgroup for compat with cluster2/rhel5
-.br
-2: detect old, or mode 1, nodes that require compat
+.B \-D
+Enable debugging to stderr and don't fork.
 .TP
-\fB-w <secs>\fP
-seconds to wait for a node's version message before
-assuming an old version requiring compat mode
+.B \-L
+Enable debugging to log file.
 .TP
-\fB-d <secs>\fP
-seconds to delay the mode selection to give time
-for an old version to join and force compat mode
+.BI \-g " num"
+groupd compatibility mode, 0 off, 1 on. Default 0.
 .TP
-\fB-V\fP
-Print the version information and exit.
+.B \-h
+Print a help message describing available options, then exit.
 .TP
-\fB-h\fP 
-Print out a help message describing available options, then exit.
-
-.SH DEBUGGING
-The groupd daemon keeps a circular buffer of debug messages that can be
-dumped with the 'group_tool dump' command.
+.B \-V
+Print program version information, then exit.
 
 .SH SEE ALSO
-group_tool(8)
+.BR cman (5),
+.BR fenced (8),
+.BR dlm_controld (8),
+.BR gfs_controld (8)
 
diff --git a/make/fencebuild.mk b/make/fencebuild.mk
index 5f9915a..30cd26b 100644
--- a/make/fencebuild.mk
+++ b/make/fencebuild.mk
@@ -2,6 +2,9 @@ ifndef FENCEAGENTSLIB
 	ifndef SBINDIRT
 		SBINDIRT=$(TARGET)
 	endif
+	ifndef MANTARGET
+		MANTARGET=$(TARGET).8
+	endif
 endif
 
 all: $(TARGET) $(MANTARGET)
@@ -22,6 +25,7 @@ $(TARGET):
 		-e 's#@SBINDIR@#${sbindir}#g' \
 	> $@
 
+ifdef MAKEMAN
 $(MANTARGET): $(MANTARGET:.8=) ${SRCDIR}/fence/agents/lib/fence2man.xsl
 	set -e && \
 	PYTHONPATH=${OBJDIR}/fence/agents/lib \
@@ -30,3 +34,10 @@ $(MANTARGET): $(MANTARGET:.8=) ${SRCDIR}/fence/agents/lib/fence2man.xsl
 
 clean: generalclean
 	rm -f $(MANTARGET) .$(MANTARGET).tmp
+else
+$(MANTARGET): $(S)/$(MANTARGET)
+	cp $< $@
+
+clean: generalclean
+	if [ "$(OBJDIR)" != "$(SRCDIR)" ]; then rm -f $(MANTARGET); fi
+endif
diff --git a/make/official_release_version b/make/official_release_version
index eb53242..54d95df 100644
--- a/make/official_release_version
+++ b/make/official_release_version
@@ -1,2 +1,2 @@
 SONAME "3.0"
-VERSION "3.0.7"
+VERSION "3.0.9"
diff --git a/make/release.mk b/make/release.mk
index be29aaf..f5b1481 100644
--- a/make/release.mk
+++ b/make/release.mk
@@ -1,157 +1,202 @@
-# NOTE: this make file snippet is only used by the release manager
+# NOTE: this make file snippet is only used by the release managers
 # to build official release tarballs, handle tagging and publish.
 #
+# this script is NOT "make -j" safe
+#
 # do _NOT_ use for anything else!!!!!!!!!
 
-## do sanity checks
-
-ifndef VERSION
-
-all:
-	@echo WARNING: VERSION= is not defined!
-	@exit 1
-
-else ifndef OLDVER
-
-all:
-	@echo WARNING: OLDVER= is not defined!
-	@exit 1
-
-else
-
-## setup stuff
-
-MASTERPROJECT=cluster
-
-ifdef RELEASE
-MASTERPV=$(MASTERPROJECT)-$(VERSION)
-TEST=""
+# setup tons of vars
+
+# signing key
+gpgsignkey=0x6CE95CA7
+
+# project layout
+project=cluster
+projectver=$(project)-$(version)
+projecttar=$(projectver).tar
+projectgz=$(projecttar).gz
+projectbz=$(projecttar).bz2
+
+fenceproject=fence-agents
+fenceprojectver=$(fenceproject)-$(version)
+fenceprojecttar=$(fenceprojectver).tar
+fenceprojectgz=$(fenceprojecttar).gz
+fenceprojectbz=$(fenceprojecttar).bz2
+
+rasproject=resource-agents
+rasprojectver=$(rasproject)-$(version)
+rasprojecttar=$(rasprojectver).tar
+rasprojectgz=$(rasprojecttar).gz
+rasprojectbz=$(rasprojecttar).bz2
+
+rgmproject=rgmanager
+rgmprojectver=$(rgmproject)-$(version)
+rgmprojecttar=$(rgmprojectver).tar
+rgmprojectgz=$(rgmprojecttar).gz
+rgmprojectbz=$(rgmprojecttar).bz2
+
+# temp dirs
+
+ifdef release
+reldir=release
+gitver=$(projectver)
+forceclean=clean
 else
-MASTERPV=HEAD
-TEST="test"
+reldir=release-candidate
+gitver=HEAD
+forceclean=
 endif
-MASTERTGZ=$(TEST)$(MASTERPROJECT)-$(VERSION).tar.gz
 
-# fence-agents
-FENCEPROJECT=fence-agents
-FENCEPV=$(FENCEPROJECT)-$(VERSION)
-FENCETGZ=$(TEST)$(FENCEPV).tar.gz
+releasearea=$(shell pwd)/../$(projectver)-$(reldir)
 
-# resource-agents
-RASPROJECT=resource-agents
-RASPV=$(RASPROJECT)-$(VERSION)
-RASTGZ=$(TEST)$(RASPV).tar.gz
+all: $(forceclean) checks setup tag tarballs changelog sha256 sign
 
-# rgmanager
-RGMPROJECT=rgmanager
-RGMPV=$(RGMPROJECT)-$(VERSION)
-RGMTGZ=$(TEST)$(RGMPV).tar.gz
+checks:
+ifeq (,$(version))
+	@echo ERROR: need to define version=
+	@exit 1
+endif
+ifeq (,$(oldversion))
+	@echo ERROR: need to define oldversion=
+	@exit 1
+endif
+	@if [ ! -d .git ]; then \
+		echo This script needs to be executed from top level cluster git tree; \
+		exit 1; \
+	fi
 
-# gfs-utils
-GFSPROJECT=gfs-utils
-GFSPV=$(GFSPROJECT)-$(VERSION)
-GFSTGZ=$(TEST)$(GFSPV).tar.gz
+setup: checks $(releasearea)
 
-all: tag tarballs
+$(releasearea):
+	mkdir $@
 
-ifdef RELEASE
-tag:
-	git tag -a -m "$(MASTERPV) release" $(MASTERPV) HEAD
+tag: setup $(releasearea)/tag-$(version)
 
+$(releasearea)/tag-$(version):
+ifeq (,$(release))
+	@echo Building test release $(version), no tagging
 else
-tag:
-
+	git tag -a -m "$(projectver) release" $(projectver) HEAD
 endif
-
-tarballs: master-tarball
-tarballs: fence-agents-tarball
-tarballs: resource-agents-tarball
-tarballs: rgmanager-tarball
-tarballs: gfs-tarball
-
-master-tarball:
+	@touch $@
+
+tarballs: tag
+tarballs: $(releasearea)/$(projecttar)
+tarballs: $(releasearea)/$(projectgz)
+tarballs: $(releasearea)/$(projectbz)
+tarballs: $(releasearea)/$(fenceprojecttar)
+tarballs: $(releasearea)/$(fenceprojectgz)
+tarballs: $(releasearea)/$(fenceprojectbz)
+tarballs: $(releasearea)/$(rasprojecttar)
+tarballs: $(releasearea)/$(rasprojectgz)
+tarballs: $(releasearea)/$(rasprojectbz)
+tarballs: $(releasearea)/$(rgmprojecttar)
+tarballs: $(releasearea)/$(rgmprojectgz)
+tarballs: $(releasearea)/$(rgmprojectbz)
+
+$(releasearea)/$(projecttar):
+	@echo Creating $(project) tarball
+	rm -rf $(releasearea)/$(projectver)
 	git archive \
 		--format=tar \
-		--prefix=$(MASTERPROJECT)-$(VERSION)/ \
-		$(MASTERPV) | \
-		tar xp
+		--prefix=$(projectver)/ \
+		$(gitver) | \
+		(cd $(releasearea)/ && tar xf -)
+	cd $(releasearea) && \
 	sed -i -e \
-		's#<CVS>#$(VERSION)#g' \
-		$(MASTERPROJECT)-$(VERSION)/gfs-kernel/src/gfs/gfs.h
-	echo "VERSION \"$(VERSION)\"" \
-		>> $(MASTERPROJECT)-$(VERSION)/make/official_release_version
-	tar cp $(MASTERPROJECT)-$(VERSION) | \
-		gzip -9 \
-		> ../$(MASTERTGZ)
-	rm -rf $(MASTERPROJECT)-$(VERSION)
-
-fence-agents-tarball: master-tarball
-	tar zxpf ../$(MASTERTGZ)
-	mv $(MASTERPROJECT)-$(VERSION) $(FENCEPV)
-	cd $(FENCEPV) && \
-		rm -rf bindings cman common config contrib dlm gfs* group \
-			rgmanager fence/fenced fence/fence_node \
-			fence/fence_tool fence/include fence/libfence \
-			fence/libfenced fence/man/fenced.8 \
-			fence/man/fence_node.8 fence/man/fence_tool.8 && \
-		sed -i -e 's/fence.8//g' -e 's/fenced.8//g' \
-			-e 's/fence_node.8//g' -e 's/fence_tool.8//g' \
-			fence/man/Makefile
-	tar cp $(FENCEPV) | \
-		gzip -9 \
-		> ../$(FENCETGZ)
-	rm -rf $(FENCEPV)
-
-resource-agents-tarball: master-tarball
-	tar zxpf ../$(MASTERTGZ)
-	mv $(MASTERPROJECT)-$(VERSION) $(RASPV)
-	cd $(RASPV) && \
-		rm -rf bindings cman common config contrib dlm fence gfs* \
-			group rgmanager/ChangeLog rgmanager/errors.txt \
-			rgmanager/event-script.txt rgmanager/examples \
-			rgmanager/include rgmanager/init.d rgmanager/man \
-			rgmanager/README rgmanager/src/clulib \
-			rgmanager/src/daemons rgmanager/src/utils
-	tar cp $(RASPV) | \
-		gzip -9 \
-		> ../$(RASTGZ)
-	rm -rf $(RASPV)
-
-rgmanager-tarball: master-tarball
-	tar zxpf ../$(MASTERTGZ)
-	mv $(MASTERPROJECT)-$(VERSION) $(RGMPV)
-	cd $(RGMPV) && \
-		rm -rf bindings cman common config contrib dlm fence gfs* group \
-			rgmanager/src/resources
-	tar cp $(RGMPV) | \
-		gzip -9 \
-		> ../$(RGMTGZ)
-	rm -rf $(RGMPV)
-
-gfs-tarball: master-tarball
-	tar zxpf ../$(MASTERTGZ)
-	mv $(MASTERPROJECT)-$(VERSION) $(GFSPV)
-	cd $(GFSPV) && \
-		rm -rf bindings cman common config contrib dlm fence group \
-			rgmanager gfs2
-	tar cp $(GFSPV) | \
-		gzip -9 \
-		> ../$(GFSTGZ)
-	rm -rf $(GFSPV)
-
-publish:
+		's#<CVS>#$(version)#g' \
+		$(projectver)/gfs-kernel/src/gfs/gfs.h && \
+	echo "VERSION \"$(version)\"" \
+		>> $(projectver)/make/official_release_version && \
+	tar cpf $(projecttar) $(projectver) && \
+	rm -rf $(projectver)
+
+$(releasearea)/$(fenceprojecttar): $(releasearea)/$(projecttar)
+	@echo Creating $(fenceproject) tarball
+	cd $(releasearea) && \
+	rm -rf $(projectver) $(fenceprojectver) && \
+	tar xpf $(projecttar) && \
+	mv $(projectver) $(fenceprojectver) && \
+	cd $(fenceprojectver) && \
+	rm -rf bindings cman common config contrib dlm gfs* group \
+		rgmanager fence/fenced fence/fence_node \
+		fence/fence_tool fence/include fence/libfence \
+		fence/libfenced fence/man && \
+	cd .. && \
+	tar cpf $(fenceprojecttar) $(fenceprojectver) && \
+	rm -rf $(fenceprojectver)
+
+$(releasearea)/$(rasprojecttar): $(releasearea)/$(projecttar)
+	@echo Creating $(rasproject) tarball
+	cd $(releasearea) && \
+	rm -rf $(projectver) $(rasprojectver) && \
+	tar xpf $(projecttar) && \
+	mv $(projectver) $(rasprojectver) && \
+	cd $(rasprojectver) && \
+	rm -rf bindings cman common config contrib dlm fence gfs* \
+		group rgmanager/ChangeLog rgmanager/errors.txt \
+		rgmanager/event-script.txt rgmanager/examples \
+		rgmanager/include rgmanager/init.d rgmanager/man \
+		rgmanager/README rgmanager/src/clulib \
+		rgmanager/src/daemons rgmanager/src/utils && \
+	cd .. && \
+	tar cpf $(rasprojecttar) $(rasprojectver) && \
+	rm -rf $(rasprojectver)
+
+$(releasearea)/$(rgmprojecttar): $(releasearea)/$(projecttar)
+	@echo Creating $(rgmproject) tarball
+	cd $(releasearea) && \
+	rm -rf $(projectver) $(rgmprojectver) && \
+	tar xpf $(projecttar) && \
+	mv $(projectver) $(rgmprojectver) && \
+	cd $(rgmprojectver) && \
+	rm -rf bindings cman common config contrib dlm fence gfs* group \
+		rgmanager/src/resources && \
+	cd .. && \
+	tar cpf $(rgmprojecttar) $(rgmprojectver) && \
+	rm -rf $(rgmprojectver)
+
+$(releasearea)/%.gz: $(releasearea)/%
+	@echo Creating $@
+	cat $< | gzip -9 > $@
+
+$(releasearea)/%.bz2: $(releasearea)/%
+	@echo Creating $@
+	cat $< | bzip2 -c > $@
+
+changelog: checks setup $(releasearea)/Changelog-$(version)
+
+$(releasearea)/Changelog-$(version): $(releasearea)/$(projecttar)
+	git log $(project)-$(oldversion)..$(gitver) | \
+	git shortlog > $@
+	git diff --stat $(project)-$(oldversion)..$(gitver) >> $@
+
+sha256: changelog tarballs $(releasearea)/$(projectver).sha256
+
+$(releasearea)/$(projectver).sha256: $(releasearea)/Changelog-$(version)
+	cd $(releasearea) && \
+	sha256sum Changelog-$(version) *.gz *.bz2 | sort -k2 > $@
+
+sign: sha256 $(releasearea)/$(projectver).sha256.asc
+
+$(releasearea)/$(projectver).sha256.asc: $(releasearea)/$(projectver).sha256
+	cd $(releasearea) && \
+	gpg --default-key $(gpgsignkey) \
+		--detach-sign \
+		--armor \
+		$<
+
+publish: sign
+ifeq (,$(release))
+	@echo Nothing to publish
+else
 	git push --tags origin
-	scp ../$(MASTERTGZ) \
-	    ../$(FENCETGZ) \
-	    ../$(RASTGZ) \
-	    ../$(GFSTGZ) \
-	    ../$(RGMTGZ) \
-		fedorahosted.org:$(MASTERPROJECT)
-	git log $(MASTERPROJECT)-$(OLDVER)..$(MASTERPV) | \
-		git shortlog > ../$(MASTERPV).emaildata
-	git diff --stat $(MASTERPROJECT)-$(OLDVER)..$(MASTERPV) \
-		>> ../$(MASTERPV).emaildata
+	cd $(releasearea) && \
+	scp *.gz *.bz2 Changelog-* *sha256* \
+		fedorahosted.org:$(project)
 	@echo Hey you!.. yeah you looking somewhere else!
 	@echo remember to update the wiki and send the email to cluster-devel and linux-cluster
-
 endif
+
+clean: checks
+	rm -rf $(releasearea)
diff --git a/rgmanager/include/vf.h b/rgmanager/include/vf.h
index 528930c..d57b0cf 100644
--- a/rgmanager/include/vf.h
+++ b/rgmanager/include/vf.h
@@ -152,7 +152,7 @@ typedef struct _key_node {
 /* 
  * VF Stuff.  VF only talks to peers.
  */
-int vf_init(int, uint16_t, vf_vote_cb_t, vf_commit_cb_t);
+int vf_init(int, uint16_t, vf_vote_cb_t, vf_commit_cb_t, int);
 int vf_invalidate(void);
 int vf_shutdown(void);
 
diff --git a/rgmanager/src/clulib/vft.c b/rgmanager/src/clulib/vft.c
index eefba2f..0113bad 100644
--- a/rgmanager/src/clulib/vft.c
+++ b/rgmanager/src/clulib/vft.c
@@ -27,6 +27,7 @@
 static key_node_t *key_list = NULL;	/** List of key nodes. */
 static int _node_id = (int)-1;/** Our node ID, set with vf_init. */
 static uint16_t _port = 0;		/** Our daemon ID, set with vf_init. */
+static int _vf_timeout = 10;
 
 /*
  * TODO: We could make it thread safe, but this might be unnecessary work
@@ -884,7 +885,7 @@ vf_server(void *arg)
  */
 int
 vf_init(int my_node_id, uint16_t my_port, vf_vote_cb_t vcb,
-	vf_commit_cb_t ccb)
+	vf_commit_cb_t ccb, int _cluster_timeout)
 {
 	struct vf_args *args;
 	msgctx_t *ctx;
@@ -911,6 +912,8 @@ vf_init(int my_node_id, uint16_t my_port, vf_vote_cb_t vcb,
 	pthread_mutex_lock(&vf_mutex);
 	_port = my_port;
 	_node_id = my_node_id;
+	if (_cluster_timeout)
+		_vf_timeout = _cluster_timeout;
 	default_vote_cb = vcb;
 	default_commit_cb = ccb;
 	pthread_mutex_unlock(&vf_mutex);
@@ -1223,7 +1226,7 @@ vf_write(cluster_member_list_t *membership, uint32_t flags,
 	 * See if we have a consensus =)
 	 */
 	if ((rv = (vf_unanimous(&everyone, trans, remain,
-				5))) == VFR_OK) {
+				_vf_timeout))) == VFR_OK) {
 		vf_send_commit(&everyone, trans);
 #ifdef DEBUG
 		printf("VF: Consensus reached!\n");
diff --git a/rgmanager/src/daemons/main.c b/rgmanager/src/daemons/main.c
index 885d89d..4e34246 100644
--- a/rgmanager/src/daemons/main.c
+++ b/rgmanager/src/daemons/main.c
@@ -27,7 +27,7 @@
 #ifdef WRAP_THREADS
 void dump_thread_states(FILE *);
 #endif
-static int configure_rgmanager(int ccsfd, int debug);
+static int configure_rgmanager(int ccsfd, int debug, int *cluster_timeout);
 void set_transition_throttling(int);
 
 void flag_shutdown(int sig);
@@ -742,7 +742,7 @@ event_loop(msgctx_t *localctx, msgctx_t *clusterctx)
 
 	if (need_reconfigure) {
 		need_reconfigure = 0;
-		configure_rgmanager(-1, 0);
+		configure_rgmanager(-1, 0, NULL);
 		config_event_q();
 		return 0;
 	}
@@ -788,11 +788,12 @@ statedump(int __attribute__ ((unused)) sig)
  * Configure logging based on data in cluster.conf
  */
 static int
-configure_rgmanager(int ccsfd, int dbg)
+configure_rgmanager(int ccsfd, int dbg, int *token_secs)
 {
 	char *v;
 	char internal = 0;
 	int status_child_max = 0;
+	int tmp;
 
 	if (ccsfd < 0) {
 		internal = 1;
@@ -803,6 +804,16 @@ configure_rgmanager(int ccsfd, int dbg)
 
 	setup_logging(ccsfd);
 
+	if (token_secs && ccs_get(ccsfd, "/cluster/totem/@token", &v) == 0) {
+		tmp = atoi(v);
+		if (tmp >= 1000) {
+			*token_secs = tmp / 1000;
+			if (tmp % 1000)
+				++(*token_secs);
+		}
+		free(v);
+	}
+
 	if (ccs_get(ccsfd, "/cluster/rm/@transition_throttling", &v) == 0) {
 		set_transition_throttling(atoi(v));
 		free(v);
@@ -1018,7 +1029,7 @@ main(int argc, char **argv)
 	   We know we're quorate.  At this point, we need to
 	   read the resource group trees from ccsd.
 	 */
-	configure_rgmanager(-1, debug);
+	configure_rgmanager(-1, debug, &cluster_timeout);
 	logt_print(LOG_NOTICE, "Resource Group Manager Starting\n");
 
 	if (init_resource_groups(0, do_init) != 0) {
@@ -1063,7 +1074,7 @@ main(int argc, char **argv)
 
 	ds_key_init("rg_lockdown", 32, 10);
 #else
-	if (vf_init(me.cn_nodeid, port, NULL, NULL) != 0) {
+	if (vf_init(me.cn_nodeid, port, NULL, NULL, cluster_timeout) != 0) {
 		logt_print(LOG_CRIT, "#11: Couldn't set up VF listen socket\n");
 		goto out_ls;
 	}
diff --git a/rgmanager/src/daemons/rg_state.c b/rgmanager/src/daemons/rg_state.c
index 029100e..d9f02b3 100644
--- a/rgmanager/src/daemons/rg_state.c
+++ b/rgmanager/src/daemons/rg_state.c
@@ -1669,18 +1669,17 @@ handle_relocate_req(char *svcName, int orig_request, int preferred_target,
 		/* TODO: simplify this and don't keep alloc/freeing 
 		   member lists */
 		allowed_nodes = member_list();
-		/* Avoid even bothering the other node if we can */
-		m = memb_id_to_p(allowed_nodes, preferred_target);
-		if (!m) {
-			free_member_list(allowed_nodes);
-			return RG_EINVAL;
-		}
 
-		count_resource_groups_local(m);
-		if (m->cn_svcexcl ||
-	    	    (m->cn_svccount && is_exclusive(svcName))) {
-			free_member_list(allowed_nodes);
-			return RG_EDEPEND;
+		m = memb_id_to_p(allowed_nodes, preferred_target);
+		if (m && m->cn_member) {
+			count_resource_groups_local(m);
+			if (m->cn_svcexcl ||
+			    (m->cn_svccount && is_exclusive(svcName))) {
+				free_member_list(allowed_nodes);
+				return RG_EDEPEND;
+			}
+		} else {
+			target = preferred_target = -1;
 		}
 		free_member_list(allowed_nodes);
 	}
diff --git a/rgmanager/src/resources/SAPDatabase b/rgmanager/src/resources/SAPDatabase
index d6972fd..8ac3005 100644
--- a/rgmanager/src/resources/SAPDatabase
+++ b/rgmanager/src/resources/SAPDatabase
@@ -184,6 +184,19 @@ trap_handler() {
 }
 
 
+do_exit() {
+  # If we've got a tempfile variable and the tempfile exists...
+  #  ... if the return code is 0 *or* the temp file is empty
+  #    remove it.
+  if [ -n "$TEMPFILE" ] && [ -e "$TEMPFILE" ]; then
+    if [ $1 -eq 0 ] || [ "$(stat -c %s $TEMPFILE)" = "0" ]; then
+      rm -f $TEMPFILE
+    fi
+  fi
+  exit $1
+}
+
+
 #
 # listener_start: Start the given listener
 #
@@ -822,10 +835,6 @@ then
   exit $OCF_ERR_ARGS
 fi
 
-# Set a tempfile and make sure to clean it up again
-TEMPFILE="$(mktemp /tmp/SAPDatabase.tmp.XXXXXX)"
-trap trap_handler INT TERM
-
 # These operations don't require OCF instance parameters to be set
 case "$1" in
   meta-data)	meta_data
@@ -840,25 +849,32 @@ case "$1" in
   *);;
 esac
 
+# Set a tempfile and make sure to clean it up again
+TEMPFILE="$(mktemp /tmp/SAPDatabase.tmp.XXXXXX)"
+trap trap_handler INT TERM
+
+# Everything after here must call do_exit to remove temp file
+
+
 US=`id -u -n`
 US=`echo $US`
 if
   [ $US != root  ]
 then
   ocf_log err "$0 must be run as root"
-  exit $OCF_ERR_PERM
+  do_exit $OCF_ERR_PERM
 fi
 
 # mandatory parameter check
 if  [ -z "$OCF_RESKEY_SID" ]; then
   ocf_log err "Please set OCF_RESKEY_SID to the SAP system id!"
-  exit $OCF_ERR_ARGS
+  do_exit $OCF_ERR_ARGS
 fi
 SID=`echo "$OCF_RESKEY_SID"`
 
 if [ -z "$OCF_RESKEY_DBTYPE" ]; then
   ocf_log err "Please set OCF_RESKEY_DBTYPE to the database vendor specific tag (ORA,ADA,DB6)!"
-  exit $OCF_ERR_ARGS
+  do_exit $OCF_ERR_ARGS
 fi
 DBTYPE=`echo "$OCF_RESKEY_DBTYPE" | tr "[a-z]" "[A-Z]"`
 
@@ -877,7 +893,7 @@ else
                         ;;
    0|false|FALSE|no|NO) DBJ2EE_ONLY=0;;
    *) ocf_log err "Parsing parameter DBJ2EE_ONLY: '$DBJ2EE_ONLY' is not a boolean value!"
-      exit $OCF_ERR_ARGS ;;
+      do_exit $OCF_ERR_ARGS ;;
   esac
 fi
 
@@ -897,7 +913,7 @@ else
    1|true|TRUE|yes|YES) OCF_RESKEY_STRICT_MONITORING=1;;
    0|false|FALSE|no|NO) OCF_RESKEY_STRICT_MONITORING=0;;
    *)  ocf_log err "Parsing parameter STRICT_MONITORING: '$OCF_RESKEY_STRICT_MONITORING' is not a boolean value!"
-       exit $OCF_ERR_ARGS ;;
+       do_exit $OCF_ERR_ARGS ;;
   esac
 fi
 
@@ -922,7 +938,7 @@ done
 if [ -z "$DIR_EXECUTABLE" ]
 then
   ocf_log warn "Cannot find $EXESTARTDB,$EXESTOPDB and $EXEDBCONNECT executable, please set DIR_EXECUTABLE parameter!"
-  exit $OCF_NOT_RUNNING
+  do_exit $OCF_NOT_RUNNING
 fi
 
 if [ $DBJ2EE_ONLY -eq 1 ]
@@ -951,7 +967,7 @@ then
       PATH=$JAVA_HOME/bin:$PATH
     else
       ocf_log err "Cannot find JAVA_HOME directory, please set JAVA_HOME parameter!"
-      exit $OCF_NOT_RUNNING
+      do_exit $OCF_NOT_RUNNING
     fi
   fi
 
@@ -985,25 +1001,25 @@ sidadm="`echo $SID | tr [:upper:] [:lower:]`adm"
 case "$1" in
 
   start)	sapdatabase_start
-		exit $?;;
+		do_exit $?;;
 
   stop)		sapdatabase_stop
-		exit $?;;
+		do_exit $?;;
 
   monitor)
           	sapdatabase_monitor $OCF_RESKEY_STRICT_MONITORING
-		exit $?;;
+		do_exit $?;;
 
   status)
                 sapdatabase_status
-                exit $?;;
+                do_exit $?;;
 
   recover)      sapdatabase_recover
-                exit $?;;
+                do_exit $?;;
 
   validate-all)	sapdatabase_validate
-		exit $?;;
+		do_exit $?;;
 
   *)		sapdatabase_methods
-		exit $OCF_ERR_UNIMPLEMENTED;;
+		do_exit $OCF_ERR_UNIMPLEMENTED;;
 esac
diff --git a/rgmanager/src/resources/clusterfs.sh b/rgmanager/src/resources/clusterfs.sh
index 8558d5f..0ccc93d 100644
--- a/rgmanager/src/resources/clusterfs.sh
+++ b/rgmanager/src/resources/clusterfs.sh
@@ -424,8 +424,9 @@ Device $dev is mounted on $tmp_mp instead of $mp"
 # 
 isAlive()
 {
+	declare errcode
 	declare mount_point
-	declare file=".writable_test"
+	declare file=".writable_test.$(hostname)"
 	declare rw
 	
 	if [ $# -ne 1 ]; then
@@ -444,8 +445,10 @@ isAlive()
 	
 	# depth 10 test (read test)
 	ls $mount_point > /dev/null 2> /dev/null
-	if [ $? -ne 0 ]; then
-	       return $NO
+	errcode=$?
+	if [ $errcode -ne 0 ]; then
+		ocf_log err "clusterfs:${OCF_RESKEY_name}: isAlive failed read test on [$mount_point]. Return code: $errcode"
+		return $NO
 	fi
 	
 	[ $OCF_CHECK_LEVEL -lt 20 ] && return $YES
@@ -468,7 +471,11 @@ isAlive()
 			fi
 		done
 		touch $file > /dev/null 2> /dev/null
-		[ $? -ne 0 ] && return $NO
+		errcode=$?
+		if [ $errcode -ne 0 ]; then
+			ocf_log err "clusterfs:${OCF_RESKEY_name}: isAlive failed write test on [$mount_point]. Return code: $errcode"
+			return $NO
+		fi
 		rm -f $file > /dev/null 2> /dev/null
 	fi
 	
diff --git a/rgmanager/src/resources/fs.sh.in b/rgmanager/src/resources/fs.sh.in
index 65cff9c..b037796 100644
--- a/rgmanager/src/resources/fs.sh.in
+++ b/rgmanager/src/resources/fs.sh.in
@@ -620,6 +620,7 @@ isMounted () {
 # 
 isAlive()
 {
+	declare errcode
 	declare mount_point
 	declare file=".writable_test"
 	declare rw
@@ -640,8 +641,10 @@ isAlive()
 	
 	# depth 10 test (read test)
 	ls $mount_point > /dev/null 2> /dev/null
-	if [ $? -ne 0 ]; then
-	       return $NO
+	errcode=$?
+	if [ $errcode -ne 0 ]; then
+		ocf_log err "fs:${OCF_RESKEY_name}: isAlive failed read test on [$mount_point]. Return code: $errcode"
+		return $NO
 	fi
 	
 	[ $OCF_CHECK_LEVEL -lt 20 ] && return $YES
@@ -664,7 +667,11 @@ isAlive()
 			fi
 		done
 		touch $file > /dev/null 2> /dev/null
-		[ $? -ne 0 ] && return $NO
+		errcode=$?
+		if [ $errcode -ne 0 ]; then
+			ocf_log err "fs:${OCF_RESKEY_name}: isAlive failed write test on [$mount_point]. Return code: $errcode"
+			return $NO
+		fi
 		rm -f $file > /dev/null 2> /dev/null
 	fi
 	
@@ -684,6 +691,7 @@ killMountProcesses()
 	typeset have_lsof=""
 	typeset have_fuser=""
 	typeset try
+	typeset command pid user line
 
 	if [ $# -ne 1 ]; then
 		ocf_log err \
@@ -720,30 +728,30 @@ killMountProcesses()
 			#
 			# Use lsof to free up mount point
 			#
-	    		while read command pid user
+			while read line
 			do
-				if [ -z "$pid" ]; then
+				path=$(echo $line | awk '{for (i=9;i<NF+1;i++) {printf "%s ",$i};}')
+				#if [ -z $(echo $path | grep -E "$mp(/| |$)") ]
+				if [ -z $(echo $path | grep -E "^$mp/.*") ]; then
 					continue
 				fi
-
+				command=$(echo $line | awk '{print $1}')
+				pid=$(echo $line | awk '{print $2}')
+				user=$(echo $line | awk '{print $3}')
 				if [ $try -eq 1 ]; then
-					ocf_log warn \
-			 	  "killing process $pid ($user $command $mp)"
+					ocf_log warn "killing process $pid ($user $command $mp)"
 				elif [ $try -eq 3 ]; then
-					ocf_log crit \
-		    		  "Could not clean up mountpoint $mp"
-				ret=$FAIL
+					ocf_log crit "Could not clean up mountpoint $mp"
+					ret=$FAIL
 				fi
-
 				if [ $try -gt 1 ]; then
 					kill -9 $pid
 				else
 					kill -TERM $pid
 				fi
-			done < <(lsof -bn 2>/dev/null | \
-			    grep -E " $mp(/| |$)" | \
-			    awk '{print $1,$2,$3}' | \
-			    sort -u -k 1,3)
+			done< <(lsof -bn 2>/dev/null | \
+				grep -E " $mp(/| |$)" | \
+				sort -u -k 1,3) 
 		elif [ -n "$have_fuser" ]; then
 			#
 			# Use fuser to free up mount point
diff --git a/rgmanager/src/resources/lvm_by_lv.sh b/rgmanager/src/resources/lvm_by_lv.sh
index 2c6262f..54e67bf 100644
--- a/rgmanager/src/resources/lvm_by_lv.sh
+++ b/rgmanager/src/resources/lvm_by_lv.sh
@@ -199,6 +199,15 @@ lv_activate_and_tag()
 			ocf_log err "Unable to delete tag from $lv_path"
 			return $OCF_ERR_GENERIC
 		fi
+
+		if [ `lvs --noheadings -o lv_tags $lv_path` == $tag ]; then
+			ocf_log notice "Removing ownership tag ($tag) from $lv_path"
+			lvchange --deltag $tag $lv_path
+			if [ $? -ne 0 ]; then
+				ocf_log err "Unable to delete tag from $lv_path"
+				return $OCF_ERR_GENERIC
+			fi
+		fi
 	fi
 
 	return $OCF_SUCCESS
diff --git a/rgmanager/src/resources/utils/ra-skelet.sh b/rgmanager/src/resources/utils/ra-skelet.sh
index e892656..5baaaa9 100644
--- a/rgmanager/src/resources/utils/ra-skelet.sh
+++ b/rgmanager/src/resources/utils/ra-skelet.sh
@@ -14,7 +14,13 @@ status_check_pid()
 		return $OCF_ERR_GENERIC
 	fi
 
-	if [ ! -d /proc/`cat "$pid_file"` ]; then
+	read pid < "$pid_file"
+	
+	if [ -z "$pid" ]; then
+		return $OCF_ERR_GENERIC
+	fi
+	
+	if [ ! -d /proc/$pid ]; then
 		return $OCF_ERR_GENERIC
 	fi	
 
diff --git a/rgmanager/src/resources/vm.sh b/rgmanager/src/resources/vm.sh
index 0b5f4b1..35de1d8 100644
--- a/rgmanager/src/resources/vm.sh
+++ b/rgmanager/src/resources/vm.sh
@@ -128,7 +128,7 @@ meta_data()
 		virtual machines instead of xm.  This is required when
 		using non-Xen virtual machines (e.g. qemu / KVM).
 	    </shortdesc>
-            <content type="integer" default="1"/>
+            <content type="integer" default=""/>
         </parameter>
 
 	<parameter name="xmlfile">
diff --git a/scripts/fenceparse b/scripts/fenceparse
index c34cb82..49457a6 100644
--- a/scripts/fenceparse
+++ b/scripts/fenceparse
@@ -10,7 +10,9 @@ infile="$5"
 
 definedata="$(cat $definefile | grep "^\#define $define" | sed -e 's/.*'$define' //')"
 
-realinfile="$(ls $srcdir/$infile.*)"
+realinfile="$(ls $srcdir/$infile.*{py,pl,sh} 2>/dev/null || true)"
+
+[ -z "$realinfile" ] && exit 1
 
 interpreter="$(cat $realinfile | head -n 1 | awk -F "/" '{print $NF}')"
 interpreter="$(echo $interpreter)"

-- 
cluster suite Debian packaging



More information about the Debian-ha-svn-commits mailing list