[Pkg-ceph-commits] [ceph] 01/01: Imported Upstream version 0.80.3

Dmitry Smirnov onlyjob at moszumanska.debian.org
Mon Jul 14 18:12:48 UTC 2014


This is an automated email from the git hooks/post-receive script.

onlyjob pushed a commit to branch upstream
in repository ceph.

commit 09c4b18 (upstream)
Author: Dmitry Smirnov <onlyjob at member.fsf.org>
Date:   Mon Jul 14 18:11:30 2014

    Imported Upstream version 0.80.3
---
 ceph.spec                              | 179 ++++++++++++++++---------
 ceph.spec.in                           | 177 +++++++++++++++---------
 configure                              |  28 ++--
 configure.ac                           |   4 +-
 m4/ac_prog_javac.m4                    |   4 +-
 src/.git_version                       |   4 +-
 src/brag/client/ceph-brag              | 205 ++++++++++++++++++++++++++--
 src/ceph-disk                          |  14 +-
 src/ceph_common.sh                     |  18 +++
 src/crush/CrushWrapper.cc              |  12 ++
 src/crush/CrushWrapper.h               |  32 ++++-
 src/include/ceph_features.h            |   2 +
 src/init-ceph.in                       |   8 +-
 src/java/Makefile.am                   |   4 +-
 src/java/Makefile.in                   |   4 +-
 src/java/native/libcephfs_jni.cc       |   2 +-
 src/librados/librados.cc               |   6 +-
 src/messages/MForward.h                |   2 +-
 src/mon/ConfigKeyService.h             |   8 +-
 src/mon/DataHealthService.cc           |  25 ++--
 src/mon/DataHealthService.h            |   3 +-
 src/mon/HealthMonitor.cc               |  12 +-
 src/mon/HealthMonitor.h                |   5 +-
 src/mon/HealthService.h                |   5 +-
 src/mon/MonCommands.h                  |   8 +-
 src/mon/Monitor.cc                     |   6 +-
 src/mon/OSDMonitor.cc                  | 120 +++++++++++------
 src/mon/OSDMonitor.h                   |  11 +-
 src/mon/QuorumService.h                |   3 +-
 src/msg/Pipe.cc                        |  14 +-
 src/os/FileStore.cc                    |  20 +++
 src/os/HashIndex.cc                    |   6 +-
 src/os/XfsFileStoreBackend.cc          |   8 ++
 src/osd/OSD.cc                         |  72 ++++++----
 src/osd/OSDMap.cc                      |  30 +++--
 src/osd/OSDMap.h                       |   3 +-
 src/osd/PG.cc                          |  30 ++++-
 src/osd/PG.h                           |   7 +
 src/osd/ReplicatedPG.cc                |  40 +++++-
 src/osd/ReplicatedPG.h                 |   2 +
 src/osd/osd_types.cc                   |  20 ++-
 src/osd/osd_types.h                    |   7 +-
 src/osdc/ObjectCacher.cc               |   3 +
 src/osdc/Objecter.cc                   |  21 ++-
 src/pybind/ceph_rest_api.py            |   2 +-
 src/rgw/rgw_json_enc.cc                |   1 +
 src/rgw/rgw_op.cc                      | 237 +++++++++++++++++++++++++++------
 src/rgw/rgw_op.h                       |  28 ++--
 src/rgw/rgw_rados.cc                   | 100 +++++++++-----
 src/rgw/rgw_rados.h                    |  35 ++---
 src/test/cli/osdmaptool/clobber.t      |  12 +-
 src/test/cli/osdmaptool/create-print.t |  20 +--
 src/test/cli/osdmaptool/create-racks.t |  20 +--
 src/test/librados/pool.cc              |  12 +-
 src/test/librados/tier.cc              |   3 +
 src/tools/rados/rados.cc               |   9 ++
 56 files changed, 1218 insertions(+), 455 deletions(-)

diff --git a/ceph.spec b/ceph.spec
index fca0c34..20c0dad 100644
--- a/ceph.spec
+++ b/ceph.spec
@@ -9,7 +9,7 @@
 # common
 #################################################################################
 Name:		ceph
-Version:	0.80.1
+Version:	0.80.3
 Release:	0%{?dist}
 Summary:	User space components of the Ceph file system
 License:	GPL-2.0
@@ -19,16 +19,16 @@ Source0:	http://ceph.com/download/%{name}-%{version}.tar.bz2
 Requires:	librbd1 = %{version}-%{release}
 Requires:	librados2 = %{version}-%{release}
 Requires:	libcephfs1 = %{version}-%{release}
+Requires:	ceph-common = %{version}-%{release}
 Requires:	python
 Requires:	python-argparse
 Requires:	python-ceph
 Requires:	python-requests
-Requires:       xfsprogs
+Requires:	xfsprogs
 Requires:	cryptsetup
 Requires:	parted
 Requires:	util-linux
 Requires:	hdparm
-Requires:       redhat-lsb-core
 Requires(post):	binutils
 BuildRoot:      %{_tmppath}/%{name}-%{version}-build
 BuildRequires:	make
@@ -42,41 +42,41 @@ BuildRequires:	pkgconfig
 BuildRequires:	python
 BuildRequires:	python-nose
 BuildRequires:	python-argparse
-BuildRequires:  libaio-devel
-BuildRequires:  libcurl-devel
-BuildRequires:  libxml2-devel
-BuildRequires:  libuuid-devel
-BuildRequires:  libblkid-devel >= 2.17
-BuildRequires:  leveldb-devel > 1.2
-BuildRequires:  xfsprogs-devel
-BuildRequires:  yasm
+BuildRequires:	libaio-devel
+BuildRequires:	libcurl-devel
+BuildRequires:	libxml2-devel
+BuildRequires:	libuuid-devel
+BuildRequires:	libblkid-devel >= 2.17
+BuildRequires:	leveldb-devel > 1.2
+BuildRequires:	xfsprogs-devel
+BuildRequires:	yasm
 %if 0%{?rhel_version} || 0%{?centos_version} || 0%{?fedora}
-BuildRequires:  snappy-devel
+BuildRequires:	snappy-devel
 %endif
 
 #################################################################################
 # specific
 #################################################################################
 %if ! 0%{?rhel}
-BuildRequires:  sharutils
+BuildRequires:	sharutils
 %endif
 
 %if 0%{defined suse_version}
 %if 0%{?suse_version} > 1210
-Requires:       gptfdisk
-BuildRequires:  gperftools-devel
+Requires:	gptfdisk
+BuildRequires:	gperftools-devel
 %else
-Requires:       scsirastools
-BuildRequires:  google-perftools-devel
+Requires:	scsirastools
+BuildRequires:	google-perftools-devel
 %endif
 Recommends:	logrotate
 BuildRequires:	%insserv_prereq
 BuildRequires:	mozilla-nss-devel
 BuildRequires:	keyutils-devel
 BuildRequires:	libatomic-ops-devel
-BuildRequires:  fdupes
+BuildRequires:	fdupes
 %else
-Requires:       gdisk
+Requires:	gdisk
 BuildRequires:	nss-devel
 BuildRequires:	keyutils-libs-devel
 BuildRequires:	libatomic_ops-devel
@@ -84,7 +84,7 @@ Requires:	gdisk
 Requires(post):	chkconfig
 Requires(preun):chkconfig
 Requires(preun):initscripts
-BuildRequires:  gperftools-devel
+BuildRequires:	gperftools-devel
 %endif
 
 %description
@@ -96,6 +96,17 @@ block and file system storage.
 #################################################################################
 # packages
 #################################################################################
+%package -n ceph-common
+Summary:	Ceph Common
+Group:		System Environment/Base
+Requires:	librbd1 = %{version}-%{release}
+Requires:	librados2 = %{version}-%{release}
+Requires:	python-ceph = %{version}-%{release}
+Requires:	python-requests
+Requires:	redhat-lsb-core
+%description -n ceph-common
+common utilities to mount and interact with a ceph storage cluster
+
 %package fuse
 Summary:	Ceph fuse-based client
 Group:		System Environment/Base
@@ -108,6 +119,8 @@ FUSE based client for Ceph distributed network file system
 Summary:	Ceph fuse-based client
 Group:		System Environment/Base
 Requires:	%{name}
+Requires:	librados2 = %{version}-%{release}
+Requires:	librbd1 = %{version}-%{release}
 BuildRequires:	fuse-devel
 %description -n rbd-fuse
 FUSE based client to map Ceph rbd images to files
@@ -117,9 +130,10 @@ Summary:	Ceph headers
 Group:		Development/Libraries
 License:	LGPL-2.0
 Requires:	%{name} = %{version}-%{release}
-Requires:	librados2 = %{version}
-Requires:	librbd1 = %{version}
-Requires:	libcephfs1 = %{version}
+Requires:	librados2 = %{version}-%{release}
+Requires:	librbd1 = %{version}-%{release}
+Requires:	libcephfs1 = %{version}-%{release}
+Requires:	libcephfs_jni1 = %{version}-%{release}
 %description devel
 This package contains libraries and headers needed to develop programs
 that use Ceph.
@@ -127,6 +141,7 @@ that use Ceph.
 %package radosgw
 Summary:	Rados REST gateway
 Group:		Development/Libraries
+Requires:	ceph-common = %{version}-%{release}
 Requires:	librados2 = %{version}-%{release}
 %if 0%{defined suse_version}
 BuildRequires:	libexpat-devel
@@ -171,6 +186,7 @@ store using a simple file-like interface.
 Summary:	RADOS block device client library
 Group:		System Environment/Libraries
 License:	LGPL-2.0
+Requires:	librados2 = %{version}-%{release}
 %if 0%{?rhel_version} || 0%{?centos_version} || 0%{?fedora}
 Obsoletes:	ceph-libs
 %endif
@@ -199,7 +215,6 @@ Group:		System Environment/Libraries
 License:	LGPL-2.0
 Requires:	librados2 = %{version}-%{release}
 Requires:	librbd1 = %{version}-%{release}
-Requires:	libcephfs1 = %{version}-%{release}
 Requires:	python-flask
 %if 0%{defined suse_version}
 %py_requires
@@ -212,9 +227,7 @@ object storage.
 Summary:	RESTful benchmark
 Group:		System Environment/Libraries
 License:	LGPL-2.0
-Requires:	librados2 = %{version}-%{release}
-Requires:	librbd1 = %{version}-%{release}
-Requires:	libcephfs1 = %{version}-%{release}
+Requires:	ceph-common = %{version}-%{release}
 %description -n rest-bench
 RESTful bencher that can be used to benchmark radosgw performance.
 
@@ -234,7 +247,7 @@ Group:		System Environment/Libraries
 License:	LGPL-2.0
 Requires:	java
 Requires:	libcephfs1 = %{version}-%{release}
-BuildRequires:  java-devel
+BuildRequires:	java-devel
 %description -n libcephfs_jni1
 This package contains the Java Native Interface library for CephFS Java
 bindings.
@@ -245,9 +258,9 @@ Group:		System Environment/Libraries
 License:	LGPL-2.0
 Requires:	java
 Requires:	libcephfs_jni1 = %{version}-%{release}
-BuildRequires:  java-devel
-Requires:       junit4
-BuildRequires:  junit4
+BuildRequires:	java-devel
+Requires:	junit4
+BuildRequires:	junit4
 %description -n cephfs-java
 This package contains the Java libraries for the Ceph File System.
 
@@ -316,8 +329,13 @@ chmod 0644 $RPM_BUILD_ROOT%{_docdir}/ceph/sample.ceph.conf
 chmod 0644 $RPM_BUILD_ROOT%{_docdir}/ceph/sample.fetch_config
 
 # udev rules
+%if 0%{?rhel} >= 7
+install -m 0644 -D udev/50-rbd.rules $RPM_BUILD_ROOT/usr/lib/udev/rules.d/50-rbd.rules
+install -m 0644 -D udev/60-ceph-partuuid-workaround.rules $RPM_BUILD_ROOT/usr/lib/udev/rules.d/60-ceph-partuuid-workaround.rules
+%else
 install -m 0644 -D udev/50-rbd.rules $RPM_BUILD_ROOT/lib/udev/rules.d/50-rbd.rules
 install -m 0644 -D udev/60-ceph-partuuid-workaround.rules $RPM_BUILD_ROOT/lib/udev/rules.d/60-ceph-partuuid-workaround.rules
+%endif
 
 %if (0%{?rhel} || 0%{?rhel} < 7)
 install -m 0644 -D udev/95-ceph-osd-alt.rules $RPM_BUILD_ROOT/lib/udev/rules.d/95-ceph-osd.rules
@@ -325,6 +343,13 @@ install -m 0644 -D udev/95-ceph-osd-alt.rules $RPM_BUILD_ROOT/lib/udev/rules.d/9
 install -m 0644 -D udev/95-ceph-osd.rules $RPM_BUILD_ROOT/lib/udev/rules.d/95-ceph-osd.rules
 %endif
 
+%if 0%{?rhel} >= 7
+mv $RPM_BUILD_ROOT/lib/udev/rules.d/95-ceph-osd.rules $RPM_BUILD_ROOT/usr/lib/udev/rules.d/95-ceph-osd.rules
+mv $RPM_BUILD_ROOT/sbin/mkcephfs $RPM_BUILD_ROOT/usr/sbin/mkcephfs
+mv $RPM_BUILD_ROOT/sbin/mount.ceph $RPM_BUILD_ROOT/usr/sbin/mount.ceph
+mv $RPM_BUILD_ROOT/sbin/mount.fuse.ceph $RPM_BUILD_ROOT/usr/sbin/mount.fuse.ceph
+%endif
+
 #set up placeholder directories
 mkdir -p $RPM_BUILD_ROOT%{_sysconfdir}/ceph
 mkdir -p $RPM_BUILD_ROOT%{_localstatedir}/run/ceph
@@ -369,11 +394,7 @@ fi
 %restart_on_update ceph
 %insserv_cleanup
 %endif
-# Package removal cleanup
-if [ "$1" -eq "0" ] ; then
-    rm -rf /var/log/ceph
-    rm -rf /etc/ceph
-fi
+
 
 #################################################################################
 # files
@@ -384,31 +405,18 @@ fi
 %dir %{_docdir}/ceph
 %{_docdir}/ceph/sample.ceph.conf
 %{_docdir}/ceph/sample.fetch_config
-%{_datadir}/ceph/known_hosts_drop.ceph.com
-%{_datadir}/ceph/id_dsa_drop.ceph.com
-%{_datadir}/ceph/id_dsa_drop.ceph.com.pub
-%{_bindir}/ceph
 %{_bindir}/cephfs
-%{_bindir}/ceph-conf
 %{_bindir}/ceph-clsinfo
 %{_bindir}/ceph-rest-api
 %{_bindir}/crushtool
 %{_bindir}/monmaptool
 %{_bindir}/osdmaptool
-%{_bindir}/ceph-authtool
-%{_bindir}/ceph-syn
-%{_bindir}/ceph-post-file
-%{_bindir}/ceph-brag
-%{_bindir}/ceph-crush-location
 %{_bindir}/ceph-run
 %{_bindir}/ceph-mon
 %{_bindir}/ceph-mds
 %{_bindir}/ceph-osd
 %{_bindir}/ceph-rbdnamer
-%{_bindir}/ceph-dencoder
 %{_bindir}/librados-config
-%{_bindir}/rados
-%{_bindir}/rbd
 %{_bindir}/ceph-client-debug
 %{_bindir}/ceph-debugpack
 %{_bindir}/ceph-coverage
@@ -420,8 +428,11 @@ fi
 %{_sbindir}/ceph-disk-udev
 %{_sbindir}/ceph-create-keys
 %{_sbindir}/rcceph
-/sbin/mkcephfs
+%if 0%{?rhel} >= 7
+%{_sbindir}/mount.ceph
+%else
 /sbin/mount.ceph
+%endif
 %dir %{_libdir}/ceph
 %{_libdir}/ceph/ceph_common.sh
 %dir %{_libdir}/rados-classes
@@ -444,16 +455,14 @@ fi
 %{_libdir}/ceph/erasure-code/libec_jerasure*.so*
 %{_libdir}/ceph/erasure-code/libec_test_jerasure*.so*
 %{_libdir}/ceph/erasure-code/libec_missing_entry_point.so*
-/lib/udev/rules.d/50-rbd.rules
+%if 0%{?rhel} >= 7
+/usr/lib/udev/rules.d/60-ceph-partuuid-workaround.rules
+/usr/lib/udev/rules.d/95-ceph-osd.rules
+%else
 /lib/udev/rules.d/60-ceph-partuuid-workaround.rules
 /lib/udev/rules.d/95-ceph-osd.rules
-%dir %{_sysconfdir}/ceph/
+%endif
 %config %{_sysconfdir}/bash_completion.d/ceph
-%config %{_sysconfdir}/bash_completion.d/rados
-%config %{_sysconfdir}/bash_completion.d/radosgw-admin
-%config %{_sysconfdir}/bash_completion.d/rbd
-%{_initrddir}/rbdmap
-%config(noreplace) %{_sysconfdir}/ceph/rbdmap
 %config(noreplace) %{_sysconfdir}/logrotate.d/ceph
 %config(noreplace) %{_sysconfdir}/logrotate.d/radosgw
 %{_mandir}/man8/ceph-mon.8*
@@ -461,21 +470,13 @@ fi
 %{_mandir}/man8/ceph-osd.8*
 %{_mandir}/man8/mkcephfs.8*
 %{_mandir}/man8/ceph-run.8*
-%{_mandir}/man8/ceph-syn.8*
-%{_mandir}/man8/ceph-post-file.8*
-%{_mandir}/man8/ceph-dencoder.8*
 %{_mandir}/man8/ceph-rest-api.8*
 %{_mandir}/man8/crushtool.8*
 %{_mandir}/man8/osdmaptool.8*
 %{_mandir}/man8/monmaptool.8*
-%{_mandir}/man8/ceph-conf.8*
-%{_mandir}/man8/ceph.8*
 %{_mandir}/man8/cephfs.8*
 %{_mandir}/man8/mount.ceph.8*
-%{_mandir}/man8/rados.8*
-%{_mandir}/man8/rbd.8*
 %{_mandir}/man8/ceph-rbdnamer.8*
-%{_mandir}/man8/ceph-authtool.8*
 %{_mandir}/man8/ceph-debugpack.8*
 %{_mandir}/man8/ceph-clsinfo.8.gz
 %{_mandir}/man8/librados-config.8.gz
@@ -487,16 +488,56 @@ fi
 %dir %{_localstatedir}/lib/ceph/mds
 %dir %{_localstatedir}/lib/ceph/bootstrap-osd
 %dir %{_localstatedir}/lib/ceph/bootstrap-mds
-%dir %{_localstatedir}/log/ceph/
 %ghost %dir %{_localstatedir}/run/ceph/
 
+#################################################################################
+%files -n ceph-common
+%defattr(-,root,root,-)
+%{_bindir}/ceph
+%{_bindir}/ceph-authtool
+%{_bindir}/ceph-conf
+%{_bindir}/ceph-dencoder
+%{_bindir}/ceph-syn
+%{_bindir}/ceph-crush-location
+%{_bindir}/rados
+%{_bindir}/rbd
+%{_bindir}/ceph-post-file
+%{_bindir}/ceph-brag
+%{_mandir}/man8/ceph-authtool.8*
+%{_mandir}/man8/ceph-conf.8*
+%{_mandir}/man8/ceph-dencoder.8*
+%{_mandir}/man8/ceph-syn.8*
+%{_mandir}/man8/ceph-post-file.8*
+%{_mandir}/man8/ceph.8*
+%{_mandir}/man8/rados.8*
+%{_mandir}/man8/rbd.8*
+%{_datadir}/ceph/known_hosts_drop.ceph.com
+%{_datadir}/ceph/id_dsa_drop.ceph.com
+%{_datadir}/ceph/id_dsa_drop.ceph.com.pub
+%dir %{_sysconfdir}/ceph/
+%dir %{_localstatedir}/log/ceph/
+%config %{_sysconfdir}/bash_completion.d/rados
+%config %{_sysconfdir}/bash_completion.d/rbd
+%config(noreplace) %{_sysconfdir}/ceph/rbdmap
+%{_initrddir}/rbdmap
+
+%postun -n ceph-common
+# Package removal cleanup
+if [ "$1" -eq "0" ] ; then
+    rm -rf /var/log/ceph
+    rm -rf /etc/ceph
+fi
 
 #################################################################################
 %files fuse
 %defattr(-,root,root,-)
 %{_bindir}/ceph-fuse
 %{_mandir}/man8/ceph-fuse.8*
+%if 0%{?rhel} >= 7
+%{_sbindir}/mount.fuse.ceph
+%else
 /sbin/mount.fuse.ceph
+%endif
 
 #################################################################################
 %files -n rbd-fuse
@@ -536,6 +577,7 @@ fi
 %{_mandir}/man8/radosgw.8*
 %{_mandir}/man8/radosgw-admin.8*
 %{_sbindir}/rcceph-radosgw
+%config %{_sysconfdir}/bash_completion.d/radosgw-admin
 %dir %{_localstatedir}/log/radosgw/
 
 %post radosgw
@@ -586,6 +628,11 @@ fi
 %files -n librbd1
 %defattr(-,root,root,-)
 %{_libdir}/librbd.so.*
+%if 0%{?rhel} >= 7
+/usr/lib/udev/rules.d/50-rbd.rules
+%else
+/lib/udev/rules.d/50-rbd.rules
+%endif
 
 %post -n librbd1
 /sbin/ldconfig
diff --git a/ceph.spec.in b/ceph.spec.in
index ab30883..70f804d 100644
--- a/ceph.spec.in
+++ b/ceph.spec.in
@@ -19,16 +19,16 @@ Source0:	http://ceph.com/download/%{name}-%{version}.tar.bz2
 Requires:	librbd1 = %{version}-%{release}
 Requires:	librados2 = %{version}-%{release}
 Requires:	libcephfs1 = %{version}-%{release}
+Requires:	ceph-common = %{version}-%{release}
 Requires:	python
 Requires:	python-argparse
 Requires:	python-ceph
 Requires:	python-requests
-Requires:       xfsprogs
+Requires:	xfsprogs
 Requires:	cryptsetup
 Requires:	parted
 Requires:	util-linux
 Requires:	hdparm
-Requires:       redhat-lsb-core
 Requires(post):	binutils
 BuildRoot:      %{_tmppath}/%{name}-%{version}-build
 BuildRequires:	make
@@ -42,41 +42,41 @@ BuildRequires:	pkgconfig
 BuildRequires:	python
 BuildRequires:	python-nose
 BuildRequires:	python-argparse
-BuildRequires:  libaio-devel
-BuildRequires:  libcurl-devel
-BuildRequires:  libxml2-devel
-BuildRequires:  libuuid-devel
-BuildRequires:  libblkid-devel >= 2.17
-BuildRequires:  leveldb-devel > 1.2
-BuildRequires:  xfsprogs-devel
-BuildRequires:  yasm
+BuildRequires:	libaio-devel
+BuildRequires:	libcurl-devel
+BuildRequires:	libxml2-devel
+BuildRequires:	libuuid-devel
+BuildRequires:	libblkid-devel >= 2.17
+BuildRequires:	leveldb-devel > 1.2
+BuildRequires:	xfsprogs-devel
+BuildRequires:	yasm
 %if 0%{?rhel_version} || 0%{?centos_version} || 0%{?fedora}
-BuildRequires:  snappy-devel
+BuildRequires:	snappy-devel
 %endif
 
 #################################################################################
 # specific
 #################################################################################
 %if ! 0%{?rhel}
-BuildRequires:  sharutils
+BuildRequires:	sharutils
 %endif
 
 %if 0%{defined suse_version}
 %if 0%{?suse_version} > 1210
-Requires:       gptfdisk
-BuildRequires:  gperftools-devel
+Requires:	gptfdisk
+BuildRequires:	gperftools-devel
 %else
-Requires:       scsirastools
-BuildRequires:  google-perftools-devel
+Requires:	scsirastools
+BuildRequires:	google-perftools-devel
 %endif
 Recommends:	logrotate
 BuildRequires:	%insserv_prereq
 BuildRequires:	mozilla-nss-devel
 BuildRequires:	keyutils-devel
 BuildRequires:	libatomic-ops-devel
-BuildRequires:  fdupes
+BuildRequires:	fdupes
 %else
-Requires:       gdisk
+Requires:	gdisk
 BuildRequires:	nss-devel
 BuildRequires:	keyutils-libs-devel
 BuildRequires:	libatomic_ops-devel
@@ -84,7 +84,7 @@ Requires:	gdisk
 Requires(post):	chkconfig
 Requires(preun):chkconfig
 Requires(preun):initscripts
-BuildRequires:  gperftools-devel
+BuildRequires:	gperftools-devel
 %endif
 
 %description
@@ -96,6 +96,17 @@ block and file system storage.
 #################################################################################
 # packages
 #################################################################################
+%package -n ceph-common
+Summary:	Ceph Common
+Group:		System Environment/Base
+Requires:	librbd1 = %{version}-%{release}
+Requires:	librados2 = %{version}-%{release}
+Requires:	python-ceph = %{version}-%{release}
+Requires:	python-requests
+Requires:	redhat-lsb-core
+%description -n ceph-common
+common utilities to mount and interact with a ceph storage cluster
+
 %package fuse
 Summary:	Ceph fuse-based client
 Group:		System Environment/Base
@@ -108,6 +119,8 @@ FUSE based client for Ceph distributed network file system
 Summary:	Ceph fuse-based client
 Group:		System Environment/Base
 Requires:	%{name}
+Requires:	librados2 = %{version}-%{release}
+Requires:	librbd1 = %{version}-%{release}
 BuildRequires:	fuse-devel
 %description -n rbd-fuse
 FUSE based client to map Ceph rbd images to files
@@ -117,9 +130,10 @@ Summary:	Ceph headers
 Group:		Development/Libraries
 License:	LGPL-2.0
 Requires:	%{name} = %{version}-%{release}
-Requires:	librados2 = %{version}
-Requires:	librbd1 = %{version}
-Requires:	libcephfs1 = %{version}
+Requires:	librados2 = %{version}-%{release}
+Requires:	librbd1 = %{version}-%{release}
+Requires:	libcephfs1 = %{version}-%{release}
+Requires:	libcephfs_jni1 = %{version}-%{release}
 %description devel
 This package contains libraries and headers needed to develop programs
 that use Ceph.
@@ -127,6 +141,7 @@ that use Ceph.
 %package radosgw
 Summary:	Rados REST gateway
 Group:		Development/Libraries
+Requires:	ceph-common = %{version}-%{release}
 Requires:	librados2 = %{version}-%{release}
 %if 0%{defined suse_version}
 BuildRequires:	libexpat-devel
@@ -171,6 +186,7 @@ store using a simple file-like interface.
 Summary:	RADOS block device client library
 Group:		System Environment/Libraries
 License:	LGPL-2.0
+Requires:	librados2 = %{version}-%{release}
 %if 0%{?rhel_version} || 0%{?centos_version} || 0%{?fedora}
 Obsoletes:	ceph-libs
 %endif
@@ -199,7 +215,6 @@ Group:		System Environment/Libraries
 License:	LGPL-2.0
 Requires:	librados2 = %{version}-%{release}
 Requires:	librbd1 = %{version}-%{release}
-Requires:	libcephfs1 = %{version}-%{release}
 Requires:	python-flask
 %if 0%{defined suse_version}
 %py_requires
@@ -212,9 +227,7 @@ object storage.
 Summary:	RESTful benchmark
 Group:		System Environment/Libraries
 License:	LGPL-2.0
-Requires:	librados2 = %{version}-%{release}
-Requires:	librbd1 = %{version}-%{release}
-Requires:	libcephfs1 = %{version}-%{release}
+Requires:	ceph-common = %{version}-%{release}
 %description -n rest-bench
 RESTful bencher that can be used to benchmark radosgw performance.
 
@@ -234,7 +247,7 @@ Group:		System Environment/Libraries
 License:	LGPL-2.0
 Requires:	java
 Requires:	libcephfs1 = %{version}-%{release}
-BuildRequires:  java-devel
+BuildRequires:	java-devel
 %description -n libcephfs_jni1
 This package contains the Java Native Interface library for CephFS Java
 bindings.
@@ -245,9 +258,9 @@ Group:		System Environment/Libraries
 License:	LGPL-2.0
 Requires:	java
 Requires:	libcephfs_jni1 = %{version}-%{release}
-BuildRequires:  java-devel
-Requires:       junit4
-BuildRequires:  junit4
+BuildRequires:	java-devel
+Requires:	junit4
+BuildRequires:	junit4
 %description -n cephfs-java
 This package contains the Java libraries for the Ceph File System.
 
@@ -316,8 +329,13 @@ chmod 0644 $RPM_BUILD_ROOT%{_docdir}/ceph/sample.ceph.conf
 chmod 0644 $RPM_BUILD_ROOT%{_docdir}/ceph/sample.fetch_config
 
 # udev rules
+%if 0%{?rhel} >= 7
+install -m 0644 -D udev/50-rbd.rules $RPM_BUILD_ROOT/usr/lib/udev/rules.d/50-rbd.rules
+install -m 0644 -D udev/60-ceph-partuuid-workaround.rules $RPM_BUILD_ROOT/usr/lib/udev/rules.d/60-ceph-partuuid-workaround.rules
+%else
 install -m 0644 -D udev/50-rbd.rules $RPM_BUILD_ROOT/lib/udev/rules.d/50-rbd.rules
 install -m 0644 -D udev/60-ceph-partuuid-workaround.rules $RPM_BUILD_ROOT/lib/udev/rules.d/60-ceph-partuuid-workaround.rules
+%endif
 
 %if (0%{?rhel} || 0%{?rhel} < 7)
 install -m 0644 -D udev/95-ceph-osd-alt.rules $RPM_BUILD_ROOT/lib/udev/rules.d/95-ceph-osd.rules
@@ -325,6 +343,13 @@ install -m 0644 -D udev/95-ceph-osd-alt.rules $RPM_BUILD_ROOT/lib/udev/rules.d/9
 install -m 0644 -D udev/95-ceph-osd.rules $RPM_BUILD_ROOT/lib/udev/rules.d/95-ceph-osd.rules
 %endif
 
+%if 0%{?rhel} >= 7
+mv $RPM_BUILD_ROOT/lib/udev/rules.d/95-ceph-osd.rules $RPM_BUILD_ROOT/usr/lib/udev/rules.d/95-ceph-osd.rules
+mv $RPM_BUILD_ROOT/sbin/mkcephfs $RPM_BUILD_ROOT/usr/sbin/mkcephfs
+mv $RPM_BUILD_ROOT/sbin/mount.ceph $RPM_BUILD_ROOT/usr/sbin/mount.ceph
+mv $RPM_BUILD_ROOT/sbin/mount.fuse.ceph $RPM_BUILD_ROOT/usr/sbin/mount.fuse.ceph
+%endif
+
 #set up placeholder directories
 mkdir -p $RPM_BUILD_ROOT%{_sysconfdir}/ceph
 mkdir -p $RPM_BUILD_ROOT%{_localstatedir}/run/ceph
@@ -369,11 +394,7 @@ fi
 %restart_on_update ceph
 %insserv_cleanup
 %endif
-# Package removal cleanup
-if [ "$1" -eq "0" ] ; then
-    rm -rf /var/log/ceph
-    rm -rf /etc/ceph
-fi
+
 
 #################################################################################
 # files
@@ -384,31 +405,18 @@ fi
 %dir %{_docdir}/ceph
 %{_docdir}/ceph/sample.ceph.conf
 %{_docdir}/ceph/sample.fetch_config
-%{_datadir}/ceph/known_hosts_drop.ceph.com
-%{_datadir}/ceph/id_dsa_drop.ceph.com
-%{_datadir}/ceph/id_dsa_drop.ceph.com.pub
-%{_bindir}/ceph
 %{_bindir}/cephfs
-%{_bindir}/ceph-conf
 %{_bindir}/ceph-clsinfo
 %{_bindir}/ceph-rest-api
 %{_bindir}/crushtool
 %{_bindir}/monmaptool
 %{_bindir}/osdmaptool
-%{_bindir}/ceph-authtool
-%{_bindir}/ceph-syn
-%{_bindir}/ceph-post-file
-%{_bindir}/ceph-brag
-%{_bindir}/ceph-crush-location
 %{_bindir}/ceph-run
 %{_bindir}/ceph-mon
 %{_bindir}/ceph-mds
 %{_bindir}/ceph-osd
 %{_bindir}/ceph-rbdnamer
-%{_bindir}/ceph-dencoder
 %{_bindir}/librados-config
-%{_bindir}/rados
-%{_bindir}/rbd
 %{_bindir}/ceph-client-debug
 %{_bindir}/ceph-debugpack
 %{_bindir}/ceph-coverage
@@ -420,8 +428,11 @@ fi
 %{_sbindir}/ceph-disk-udev
 %{_sbindir}/ceph-create-keys
 %{_sbindir}/rcceph
-/sbin/mkcephfs
+%if 0%{?rhel} >= 7
+%{_sbindir}/mount.ceph
+%else
 /sbin/mount.ceph
+%endif
 %dir %{_libdir}/ceph
 %{_libdir}/ceph/ceph_common.sh
 %dir %{_libdir}/rados-classes
@@ -444,16 +455,14 @@ fi
 %{_libdir}/ceph/erasure-code/libec_jerasure*.so*
 %{_libdir}/ceph/erasure-code/libec_test_jerasure*.so*
 %{_libdir}/ceph/erasure-code/libec_missing_entry_point.so*
-/lib/udev/rules.d/50-rbd.rules
+%if 0%{?rhel} >= 7
+/usr/lib/udev/rules.d/60-ceph-partuuid-workaround.rules
+/usr/lib/udev/rules.d/95-ceph-osd.rules
+%else
 /lib/udev/rules.d/60-ceph-partuuid-workaround.rules
 /lib/udev/rules.d/95-ceph-osd.rules
-%dir %{_sysconfdir}/ceph/
+%endif
 %config %{_sysconfdir}/bash_completion.d/ceph
-%config %{_sysconfdir}/bash_completion.d/rados
-%config %{_sysconfdir}/bash_completion.d/radosgw-admin
-%config %{_sysconfdir}/bash_completion.d/rbd
-%{_initrddir}/rbdmap
-%config(noreplace) %{_sysconfdir}/ceph/rbdmap
 %config(noreplace) %{_sysconfdir}/logrotate.d/ceph
 %config(noreplace) %{_sysconfdir}/logrotate.d/radosgw
 %{_mandir}/man8/ceph-mon.8*
@@ -461,21 +470,13 @@ fi
 %{_mandir}/man8/ceph-osd.8*
 %{_mandir}/man8/mkcephfs.8*
 %{_mandir}/man8/ceph-run.8*
-%{_mandir}/man8/ceph-syn.8*
-%{_mandir}/man8/ceph-post-file.8*
-%{_mandir}/man8/ceph-dencoder.8*
 %{_mandir}/man8/ceph-rest-api.8*
 %{_mandir}/man8/crushtool.8*
 %{_mandir}/man8/osdmaptool.8*
 %{_mandir}/man8/monmaptool.8*
-%{_mandir}/man8/ceph-conf.8*
-%{_mandir}/man8/ceph.8*
 %{_mandir}/man8/cephfs.8*
 %{_mandir}/man8/mount.ceph.8*
-%{_mandir}/man8/rados.8*
-%{_mandir}/man8/rbd.8*
 %{_mandir}/man8/ceph-rbdnamer.8*
-%{_mandir}/man8/ceph-authtool.8*
 %{_mandir}/man8/ceph-debugpack.8*
 %{_mandir}/man8/ceph-clsinfo.8.gz
 %{_mandir}/man8/librados-config.8.gz
@@ -487,16 +488,56 @@ fi
 %dir %{_localstatedir}/lib/ceph/mds
 %dir %{_localstatedir}/lib/ceph/bootstrap-osd
 %dir %{_localstatedir}/lib/ceph/bootstrap-mds
-%dir %{_localstatedir}/log/ceph/
 %ghost %dir %{_localstatedir}/run/ceph/
 
+#################################################################################
+%files -n ceph-common
+%defattr(-,root,root,-)
+%{_bindir}/ceph
+%{_bindir}/ceph-authtool
+%{_bindir}/ceph-conf
+%{_bindir}/ceph-dencoder
+%{_bindir}/ceph-syn
+%{_bindir}/ceph-crush-location
+%{_bindir}/rados
+%{_bindir}/rbd
+%{_bindir}/ceph-post-file
+%{_bindir}/ceph-brag
+%{_mandir}/man8/ceph-authtool.8*
+%{_mandir}/man8/ceph-conf.8*
+%{_mandir}/man8/ceph-dencoder.8*
+%{_mandir}/man8/ceph-syn.8*
+%{_mandir}/man8/ceph-post-file.8*
+%{_mandir}/man8/ceph.8*
+%{_mandir}/man8/rados.8*
+%{_mandir}/man8/rbd.8*
+%{_datadir}/ceph/known_hosts_drop.ceph.com
+%{_datadir}/ceph/id_dsa_drop.ceph.com
+%{_datadir}/ceph/id_dsa_drop.ceph.com.pub
+%dir %{_sysconfdir}/ceph/
+%dir %{_localstatedir}/log/ceph/
+%config %{_sysconfdir}/bash_completion.d/rados
+%config %{_sysconfdir}/bash_completion.d/rbd
+%config(noreplace) %{_sysconfdir}/ceph/rbdmap
+%{_initrddir}/rbdmap
+
+%postun -n ceph-common
+# Package removal cleanup
+if [ "$1" -eq "0" ] ; then
+    rm -rf /var/log/ceph
+    rm -rf /etc/ceph
+fi
 
 #################################################################################
 %files fuse
 %defattr(-,root,root,-)
 %{_bindir}/ceph-fuse
 %{_mandir}/man8/ceph-fuse.8*
+%if 0%{?rhel} >= 7
+%{_sbindir}/mount.fuse.ceph
+%else
 /sbin/mount.fuse.ceph
+%endif
 
 #################################################################################
 %files -n rbd-fuse
@@ -536,6 +577,7 @@ fi
 %{_mandir}/man8/radosgw.8*
 %{_mandir}/man8/radosgw-admin.8*
 %{_sbindir}/rcceph-radosgw
+%config %{_sysconfdir}/bash_completion.d/radosgw-admin
 %dir %{_localstatedir}/log/radosgw/
 
 %post radosgw
@@ -586,6 +628,11 @@ fi
 %files -n librbd1
 %defattr(-,root,root,-)
 %{_libdir}/librbd.so.*
+%if 0%{?rhel} >= 7
+/usr/lib/udev/rules.d/50-rbd.rules
+%else
+/lib/udev/rules.d/50-rbd.rules
+%endif
 
 %post -n librbd1
 /sbin/ldconfig
diff --git a/configure b/configure
index 961026d..e476e1c 100755
--- a/configure
+++ b/configure
@@ -1,6 +1,6 @@
 #! /bin/sh
 # Guess values for system-dependent variables and create Makefiles.
-# Generated by GNU Autoconf 2.68 for ceph 0.80.1.
+# Generated by GNU Autoconf 2.68 for ceph 0.80.3.
 #
 # Report bugs to <ceph-devel at vger.kernel.org>.
 #
@@ -570,8 +570,8 @@ MAKEFLAGS=
 # Identity of this package.
 PACKAGE_NAME='ceph'
 PACKAGE_TARNAME='ceph'
-PACKAGE_VERSION='0.80.1'
-PACKAGE_STRING='ceph 0.80.1'
+PACKAGE_VERSION='0.80.3'
+PACKAGE_STRING='ceph 0.80.3'
 PACKAGE_BUGREPORT='ceph-devel at vger.kernel.org'
 PACKAGE_URL=''
 
@@ -1441,7 +1441,7 @@ if test "$ac_init_help" = "long"; then
   # Omit some internal or obsolete options to make the list less imposing.
   # This message is too long to be a string in the A/UX 3.1 sh.
   cat <<_ACEOF
-\`configure' configures ceph 0.80.1 to adapt to many kinds of systems.
+\`configure' configures ceph 0.80.3 to adapt to many kinds of systems.
 
 Usage: $0 [OPTION]... [VAR=VALUE]...
 
@@ -1512,7 +1512,7 @@ fi
 
 if test -n "$ac_init_help"; then
   case $ac_init_help in
-     short | recursive ) echo "Configuration of ceph 0.80.1:";;
+     short | recursive ) echo "Configuration of ceph 0.80.3:";;
    esac
   cat <<\_ACEOF
 
@@ -1657,7 +1657,7 @@ fi
 test -n "$ac_init_help" && exit $ac_status
 if $ac_init_version; then
   cat <<\_ACEOF
-ceph configure 0.80.1
+ceph configure 0.80.3
 generated by GNU Autoconf 2.68
 
 Copyright (C) 2010 Free Software Foundation, Inc.
@@ -2504,7 +2504,7 @@ cat >config.log <<_ACEOF
 This file contains any messages produced by compilers while
 running configure, to aid debugging if configure makes a mistake.
 
-It was created by ceph $as_me 0.80.1, which was
+It was created by ceph $as_me 0.80.3, which was
 generated by GNU Autoconf 2.68.  Invocation command line was
 
   $ $0 $@
@@ -4504,7 +4504,7 @@ fi
 
 # Define the identity of the package.
  PACKAGE='ceph'
- VERSION='0.80.1'
+ VERSION='0.80.3'
 
 
 cat >>confdefs.h <<_ACEOF
@@ -12482,7 +12482,7 @@ fi
 
 # Define the identity of the package.
  PACKAGE='ceph'
- VERSION='0.80.1'
+ VERSION='0.80.3'
 
 
 cat >>confdefs.h <<_ACEOF
@@ -18754,7 +18754,7 @@ fi
 	if test -z "$with_jdk_dir"; then :
 
 		   # This works with Debian's and CentOS' default-jdk package
-       for dir in '/usr/lib/jvm/default-java/' '/usr/lib/jvm/java/' ; do
+       for dir in '/usr/lib/jvm/default-java/' '/usr/lib/jvm/java/' '/usr/lib/jvm/java-gcj/'; do
           # only test if a suitable path has not yet been found
           if test "$EXTRA_JDK_BIN_DIR" == ""; then :
 
@@ -18808,7 +18808,7 @@ fi
 
 
 if test "x$JAVAPREFIX" = x; then
-        test "x$JAVAC" = x && for ac_prog in "gcj$EXEEXT -C" guavac$EXEEXT jikes$EXEEXT javac$EXEEXT
+        test "x$JAVAC" = x && for ac_prog in javac$EXEEXT "gcj$EXEEXT -C" guavac$EXEEXT jikes$EXEEXT
 do
   # Extract the first word of "$ac_prog", so it can be a program name with args.
 set dummy $ac_prog; ac_word=$2
@@ -18851,7 +18851,7 @@ fi
 done
 
 else
-        test "x$JAVAC" = x && for ac_prog in "gcj$EXEEXT -C" guavac$EXEEXT jikes$EXEEXT javac$EXEEXT
+        test "x$JAVAC" = x && for ac_prog in javac$EXEEXT "gcj$EXEEXT -C" guavac$EXEEXT jikes$EXEEXT
 do
   # Extract the first word of "$ac_prog", so it can be a program name with args.
 set dummy $ac_prog; ac_word=$2
@@ -22258,7 +22258,7 @@ cat >>$CONFIG_STATUS <<\_ACEOF || ac_write_fail=1
 # report actual input values of CONFIG_FILES etc. instead of their
 # values after options handling.
 ac_log="
-This file was extended by ceph $as_me 0.80.1, which was
+This file was extended by ceph $as_me 0.80.3, which was
 generated by GNU Autoconf 2.68.  Invocation command line was
 
   CONFIG_FILES    = $CONFIG_FILES
@@ -22324,7 +22324,7 @@ _ACEOF
 cat >>$CONFIG_STATUS <<_ACEOF || ac_write_fail=1
 ac_cs_config="`$as_echo "$ac_configure_args" | sed 's/^ //; s/[\\""\`\$]/\\\\&/g'`"
 ac_cs_version="\\
-ceph config.status 0.80.1
+ceph config.status 0.80.3
 configured by $0, generated by GNU Autoconf 2.68,
   with options \\"\$ac_cs_config\\"
 
diff --git a/configure.ac b/configure.ac
index 36312cf..cf8bd38 100644
--- a/configure.ac
+++ b/configure.ac
@@ -8,7 +8,7 @@ AC_PREREQ(2.59)
 # VERSION define is not used by the code.  It gets a version string
 # from 'git describe'; see src/ceph_ver.[ch]
 
-AC_INIT([ceph], [0.80.1], [ceph-devel at vger.kernel.org])
+AC_INIT([ceph], [0.80.3], [ceph-devel at vger.kernel.org])
 
 # Create release string.  Used with VERSION for RPMs.
 RPM_RELEASE=0
@@ -392,7 +392,7 @@ if test "x$enable_cephfs_java" = "xyes"; then
 	# setup defaults for Debian default-jdk package (without --with-jdk-dir)
 	AS_IF([test -z "$with_jdk_dir"], [
 		   # This works with Debian's and CentOS' default-jdk package
-       for dir in '/usr/lib/jvm/default-java/' '/usr/lib/jvm/java/' ; do
+       for dir in '/usr/lib/jvm/default-java/' '/usr/lib/jvm/java/' '/usr/lib/jvm/java-gcj/'; do
           # only test if a suitable path has not yet been found
           AS_IF([test "$EXTRA_JDK_BIN_DIR" == ""], [
 		          AS_IF([test -x "$javac_prog"], [
diff --git a/m4/ac_prog_javac.m4 b/m4/ac_prog_javac.m4
index 91463e8..f6a2fb2 100644
--- a/m4/ac_prog_javac.m4
+++ b/m4/ac_prog_javac.m4
@@ -35,9 +35,9 @@ dnl @license GPLWithACException
 AC_DEFUN([AC_PROG_JAVAC],[
 AC_REQUIRE([AC_EXEEXT])dnl
 if test "x$JAVAPREFIX" = x; then
-        test "x$JAVAC" = x && AC_CHECK_PROGS(JAVAC, "gcj$EXEEXT -C" guavac$EXEEXT jikes$EXEEXT javac$EXEEXT)
+        test "x$JAVAC" = x && AC_CHECK_PROGS(JAVAC, javac$EXEEXT "gcj$EXEEXT -C" guavac$EXEEXT jikes$EXEEXT)
 else
-        test "x$JAVAC" = x && AC_CHECK_PROGS(JAVAC, "gcj$EXEEXT -C" guavac$EXEEXT jikes$EXEEXT javac$EXEEXT, $JAVAPREFIX)
+        test "x$JAVAC" = x && AC_CHECK_PROGS(JAVAC, javac$EXEEXT "gcj$EXEEXT -C" guavac$EXEEXT jikes$EXEEXT, $JAVAPREFIX)
 fi
 test "x$JAVAC" = x && AC_MSG_ERROR([no acceptable Java compiler found in \$PATH])
 AC_PROG_JAVAC_WORKS
diff --git a/src/.git_version b/src/.git_version
index 2ca1a25..6c56b55 100644
--- a/src/.git_version
+++ b/src/.git_version
@@ -1,2 +1,2 @@
-a38fe1169b6d2ac98b427334c12d7cf81f809b74
-v0.80.1
+a129e85cdc3446a427fffffe4c31617d49024946
+v0.80.3
diff --git a/src/brag/client/ceph-brag b/src/brag/client/ceph-brag
index 91981e5..9ae8e5a 100755
--- a/src/brag/client/ceph-brag
+++ b/src/brag/client/ceph-brag
@@ -7,17 +7,204 @@ import json
 import sys
 import ast
 import requests
-from collections import Counter
+from operator import itemgetter
+from heapq import nlargest
+from itertools import repeat, ifilter
+
 
 CLUSTER_UUID_NAME='cluster-uuid'
 CLUSTER_OWNERSHIP_NAME='cluster-ownership'
 
+
+class Counter(dict):
+    '''Dict subclass for counting hashable objects.  Sometimes called a bag
+    or multiset.  Elements are stored as dictionary keys and their counts
+    are stored as dictionary values.
+
+    >>> Counter('zyzygy')
+    Counter({'y': 3, 'z': 2, 'g': 1})
+
+    '''
+
+    def __init__(self, iterable=None, **kwds):
+        '''Create a new, empty Counter object.  And if given, count elements
+        from an input iterable.  Or, initialize the count from another mapping
+        of elements to their counts.
+
+        >>> c = Counter()                           # a new, empty counter
+        >>> c = Counter('gallahad')                 # a new counter from an iterable
+        >>> c = Counter({'a': 4, 'b': 2})           # a new counter from a mapping
+        >>> c = Counter(a=4, b=2)                   # a new counter from keyword args
+
+        '''
+        self.update(iterable, **kwds)
+
+    def __missing__(self, key):
+        return 0
+
+    def most_common(self, n=None):
+        '''List the n most common elements and their counts from the most
+        common to the least.  If n is None, then list all element counts.
+
+        >>> Counter('abracadabra').most_common(3)
+        [('a', 5), ('r', 2), ('b', 2)]
+
+        '''
+        if n is None:
+            return sorted(self.iteritems(), key=itemgetter(1), reverse=True)
+        return nlargest(n, self.iteritems(), key=itemgetter(1))
+
+    def elements(self):
+        '''Iterator over elements repeating each as many times as its count.
+
+        >>> c = Counter('ABCABC')
+        >>> sorted(c.elements())
+        ['A', 'A', 'B', 'B', 'C', 'C']
+
+        If an element's count has been set to zero or is a negative number,
+        elements() will ignore it.
+
+        '''
+        for elem, count in self.iteritems():
+            for _ in repeat(None, count):
+                yield elem
+
+    # Override dict methods where the meaning changes for Counter objects.
+
+    @classmethod
+    def fromkeys(cls, iterable, v=None):
+        raise NotImplementedError(
+            'Counter.fromkeys() is undefined.  Use Counter(iterable) instead.')
+
+    def update(self, iterable=None, **kwds):
+        '''Like dict.update() but add counts instead of replacing them.
+
+        Source can be an iterable, a dictionary, or another Counter instance.
+
+        >>> c = Counter('which')
+        >>> c.update('witch')           # add elements from another iterable
+        >>> d = Counter('watch')
+        >>> c.update(d)                 # add elements from another counter
+        >>> c['h']                      # four 'h' in which, witch, and watch
+        4
+
+        '''
+        if iterable is not None:
+            if hasattr(iterable, 'iteritems'):
+                if self:
+                    self_get = self.get
+                    for elem, count in iterable.iteritems():
+                        self[elem] = self_get(elem, 0) + count
+                else:
+                    dict.update(self, iterable) # fast path when counter is empty
+            else:
+                self_get = self.get
+                for elem in iterable:
+                    self[elem] = self_get(elem, 0) + 1
+        if kwds:
+            self.update(kwds)
+
+    def copy(self):
+        'Like dict.copy() but returns a Counter instance instead of a dict.'
+        return Counter(self)
+
+    def __delitem__(self, elem):
+        'Like dict.__delitem__() but does not raise KeyError for missing values.'
+        if elem in self:
+            dict.__delitem__(self, elem)
+
+    def __repr__(self):
+        if not self:
+            return '%s()' % self.__class__.__name__
+        items = ', '.join(map('%r: %r'.__mod__, self.most_common()))
+        return '%s({%s})' % (self.__class__.__name__, items)
+
+    # Multiset-style mathematical operations discussed in:
+    #       Knuth TAOCP Volume II section 4.6.3 exercise 19
+    #       and at http://en.wikipedia.org/wiki/Multiset
+    #
+    # Outputs guaranteed to only include positive counts.
+    #
+    # To strip negative and zero counts, add-in an empty counter:
+    #       c += Counter()
+
+    def __add__(self, other):
+        '''Add counts from two counters.
+
+        >>> Counter('abbb') + Counter('bcc')
+        Counter({'b': 4, 'c': 2, 'a': 1})
+
+
+        '''
+        if not isinstance(other, Counter):
+            return NotImplemented
+        result = Counter()
+        for elem in set(self) | set(other):
+            newcount = self[elem] + other[elem]
+            if newcount > 0:
+                result[elem] = newcount
+        return result
+
+    def __sub__(self, other):
+        ''' Subtract count, but keep only results with positive counts.
+
+        >>> Counter('abbbc') - Counter('bccd')
+        Counter({'b': 2, 'a': 1})
+
+        '''
+        if not isinstance(other, Counter):
+            return NotImplemented
+        result = Counter()
+        for elem in set(self) | set(other):
+            newcount = self[elem] - other[elem]
+            if newcount > 0:
+                result[elem] = newcount
+        return result
+
+    def __or__(self, other):
+        '''Union is the maximum of value in either of the input counters.
+
+        >>> Counter('abbb') | Counter('bcc')
+        Counter({'b': 3, 'c': 2, 'a': 1})
+
+        '''
+        if not isinstance(other, Counter):
+            return NotImplemented
+        _max = max
+        result = Counter()
+        for elem in set(self) | set(other):
+            newcount = _max(self[elem], other[elem])
+            if newcount > 0:
+                result[elem] = newcount
+        return result
+
+    def __and__(self, other):
+        ''' Intersection is the minimum of corresponding counts.
+
+        >>> Counter('abbb') & Counter('bcc')
+        Counter({'b': 1})
+
+        '''
+        if not isinstance(other, Counter):
+            return NotImplemented
+        _min = min
+        result = Counter()
+        if len(self) < len(other):
+            self, other = other, self
+        for elem in ifilter(self.__contains__, other):
+            newcount = _min(self[elem], other[elem])
+            if newcount > 0:
+                result[elem] = newcount
+        return result
+
+
 def run_command(cmd):
   child = subprocess.Popen(cmd, stdout=subprocess.PIPE,
                        stderr=subprocess.PIPE)
   (o, e) = child.communicate()
   return (child.returncode, o, e)
 
+
 def get_uuid():
   (rc,uid,e) = run_command(['ceph', 'config-key', 'get', CLUSTER_UUID_NAME])
   if rc is not 0:
@@ -43,7 +230,7 @@ def bytes_pretty_to_raw(byte_count, byte_scale):
     return byte_count >> 50
   if byte_scale == 'EB':
     return byte_count >> 60
-  
+
   return byte_count
 
 def get_nums():
@@ -64,7 +251,7 @@ def get_nums():
   (rc, o, e) = run_command(['ceph', 'pg', 'dump', 'pools', '-f', 'json-pretty'])
   if rc is not 0:
     raise RuntimeError("\'ceph pg dump pools\' failed - " + e)
- 
+
   pools = json.loads(o)
   num_pools = len(pools)
   num_objs = 0
@@ -126,7 +313,7 @@ def get_osd_dump_info():
 def get_sysinfo(max_osds):
   count = 0
   osd_metadata_available = False
-  
+
   os = {}
   kern_version = {}
   kern_description = {}
@@ -165,10 +352,10 @@ def get_sysinfo(max_osds):
         distro[dstr] = incr(distro, dstr)
       except KeyError as ke:
         pass
-  
+
       cpu[jmeta['cpu']] = incr(cpu, jmeta['cpu'])
       arch[jmeta['arch']] = incr(arch, jmeta['arch'])
-  
+
     count = count + 1
 
   sysinfo = {}
@@ -202,7 +389,7 @@ def get_ownership_info():
 def output_json():
   out = {}
   url = None
-  
+
   out['uuid'] = get_uuid()
   nums = get_nums()
   num_osds = int(nums['num_osds'])
@@ -305,12 +492,12 @@ def unpublish():
     return 1
 
   uuid = get_uuid()
-  
+
   params = {'uuid':uuid}
   req = requests.delete(url, params=params)
   if req.status_code is not 200:
     print >> sys.stderr, "Failed to unpublish, server responsed with code " + str(req.status_code)
-    return 1 
+    return 1
 
   return 0
 
diff --git a/src/ceph-disk b/src/ceph-disk
index f79e341..9c3732a 100755
--- a/src/ceph-disk
+++ b/src/ceph-disk
@@ -280,7 +280,7 @@ def command(arguments, **kwargs):
     process in a tuple: (output, returncode).
     """
     arguments = _get_command_executable(arguments)
-
+    LOG.info('Running command: %s' % ' '.join(arguments))
     process = subprocess.Popen(
         arguments,
         stdout=subprocess.PIPE,
@@ -300,6 +300,7 @@ def command_check_call(arguments):
     otherwise.
     """
     arguments = _get_command_executable(arguments)
+    LOG.info('Running command: %s' % ' '.join(arguments))
     return subprocess.check_call(arguments)
 
 
@@ -1182,6 +1183,9 @@ def prepare_dev(
         LOG.debug('OSD data device %s is a partition', data)
         rawdev = data
     else:
+        if journal_dmcrypt is not None:
+            dmcrypt_unmap(journal)
+
         LOG.debug('Creating osd partition on %s', data)
         try:
             command_check_call(
@@ -1199,6 +1203,12 @@ def prepare_dev(
             )
             command(
                 [
+                    'partprobe',
+                    data,
+                    ],
+                )
+            command(
+                [
                     # wait for udev event queue to clear
                     'udevadm',
                     'settle',
@@ -1257,6 +1267,8 @@ def prepare_dev(
     finally:
         if rawdev != dev:
             dmcrypt_unmap(osd_uuid)
+        if journal_dmcrypt is not None:
+            dmcrypt_unmap(journal)
 
     if not is_partition(data):
         try:
diff --git a/src/ceph_common.sh b/src/ceph_common.sh
index 01781b7..d78f831 100644
--- a/src/ceph_common.sh
+++ b/src/ceph_common.sh
@@ -137,6 +137,24 @@ do_root_cmd() {
     fi
 }
 
+do_root_cmd_okfail() {
+    ERR=0
+    if [ -z "$ssh" ]; then
+	[ $verbose -eq 1 ] && echo "--- $host# $1"
+	ulimit -c unlimited
+	whoami=`whoami`
+	if [ "$whoami" = "root" ]; then
+	    bash -c "$1" || { [ -z "$3" ] && echo "failed: '$1'" && ERR=1 && return 1; }
+	else
+	    sudo bash -c "$1" || { [ -z "$3" ] && echo "failed: '$1'" && ERR=1 && return 1; }
+	fi
+    else
+	[ $verbose -eq 1 ] && echo "--- $rootssh $2 \"if [ ! -d $sshdir ]; then mkdir -p $sshdir; fi; cd $sshdir ; ulimit -c unlimited ; $1\""
+	$rootssh $2 "if [ ! -d $sshdir ]; then mkdir -p $sshdir; fi; cd $sshdir ; ulimit -c unlimited ; $1" || { [ -z "$3" ] && echo "failed: '$rootssh $1'" && ERR=1 && return 1; }
+    fi
+    return 0
+}
+
 get_local_daemon_list() {
     type=$1
     if [ -d "/var/lib/ceph/$type" ]; then
diff --git a/src/crush/CrushWrapper.cc b/src/crush/CrushWrapper.cc
index b5ecbc6..4ed3fa9 100644
--- a/src/crush/CrushWrapper.cc
+++ b/src/crush/CrushWrapper.cc
@@ -1,4 +1,5 @@
 
+#include "osd/osd_types.h"
 #include "common/debug.h"
 #include "common/Formatter.h"
 #include "common/errno.h"
@@ -1374,6 +1375,12 @@ void CrushWrapper::generate_test_instances(list<CrushWrapper*>& o)
   // fixme
 }
 
+/**
+ * Determine the default CRUSH ruleset ID to be used with
+ * newly created replicated pools.
+ *
+ * @returns a ruleset ID (>=0) or an error (<0)
+ */
 int CrushWrapper::get_osd_pool_default_crush_replicated_ruleset(CephContext *cct)
 {
   int crush_ruleset = cct->_conf->osd_pool_default_crush_replicated_ruleset;
@@ -1388,6 +1395,11 @@ int CrushWrapper::get_osd_pool_default_crush_replicated_ruleset(CephContext *cct
                   << dendl;
     crush_ruleset = cct->_conf->osd_pool_default_crush_rule;
   }
+
+  if (crush_ruleset == CEPH_DEFAULT_CRUSH_REPLICATED_RULESET) {
+    crush_ruleset = find_first_ruleset(pg_pool_t::TYPE_REPLICATED);
+  }
+
   return crush_ruleset;
 }
 
diff --git a/src/crush/CrushWrapper.h b/src/crush/CrushWrapper.h
index 3b2e6e6..282cbeb 100644
--- a/src/crush/CrushWrapper.h
+++ b/src/crush/CrushWrapper.h
@@ -863,6 +863,36 @@ public:
     if (!crush) return -1;
     return crush_find_rule(crush, ruleset, type, size);
   }
+
+  bool ruleset_exists(int ruleset) const {
+    for (size_t i = 0; i < crush->max_rules; ++i) {
+     if (crush->rules[i]->mask.ruleset == ruleset) {
+       return true;
+     }
+    }
+
+    return false;
+  }
+
+  /**
+   * Return the lowest numbered ruleset of type `type`
+   *
+   * @returns a ruleset ID, or -1 if no matching rulesets found.
+   */
+  int find_first_ruleset(int type) const {
+    int result = -1;
+
+    for (size_t i = 0; i < crush->max_rules; ++i) {
+      if (crush->rules[i]
+          && crush->rules[i]->mask.type == type
+          && (crush->rules[i]->mask.ruleset < result || result == -1)) {
+        result = crush->rules[i]->mask.ruleset;
+      }
+    }
+
+    return result;
+  }
+
   void do_rule(int rule, int x, vector<int>& out, int maxout,
 	       const vector<__u32>& weight) const {
     Mutex::Locker l(mapper_lock);
@@ -902,7 +932,7 @@ public:
   void dump_tree(const vector<__u32>& w, ostream *out, Formatter *f) const;
   static void generate_test_instances(list<CrushWrapper*>& o);
 
-  static int get_osd_pool_default_crush_replicated_ruleset(CephContext *cct);
+  int get_osd_pool_default_crush_replicated_ruleset(CephContext *cct);
 
   static bool is_valid_crush_name(const string& s);
   static bool is_valid_crush_loc(CephContext *cct,
diff --git a/src/include/ceph_features.h b/src/include/ceph_features.h
index d6c7d6f..6b2a5fb 100644
--- a/src/include/ceph_features.h
+++ b/src/include/ceph_features.h
@@ -51,6 +51,7 @@
 #define CEPH_FEATURE_CRUSH_TUNABLES3     (1ULL<<41)
 #define CEPH_FEATURE_OSD_PRIMARY_AFFINITY (1ULL<<41)  /* overlap w/ tunables3 */
 #define CEPH_FEATURE_MSGR_KEEPALIVE2   (1ULL<<42)
+#define CEPH_FEATURE_OSD_POOLRESEND    (1ULL<<43)
 
 /*
  * The introduction of CEPH_FEATURE_OSD_SNAPMAPPER caused the feature
@@ -122,6 +123,7 @@ static inline unsigned long long ceph_sanitize_features(unsigned long long f) {
 	 CEPH_FEATURE_CRUSH_TUNABLES3 |	    \
 	 CEPH_FEATURE_OSD_PRIMARY_AFFINITY |	\
 	 CEPH_FEATURE_MSGR_KEEPALIVE2 |	\
+	 CEPH_FEATURE_OSD_POOLRESEND |	\
 	 0ULL)
 
 #define CEPH_FEATURES_SUPPORTED_DEFAULT  CEPH_FEATURES_ALL
diff --git a/src/init-ceph.in b/src/init-ceph.in
index 846bd57..1effb56 100644
--- a/src/init-ceph.in
+++ b/src/init-ceph.in
@@ -311,10 +311,14 @@ for name in $what; do
 
 		if [ "$fs_type" = "btrfs" ]; then
 		    echo Mounting Btrfs on $host:$fs_path
-		    do_root_cmd "modprobe btrfs ; btrfs device scan || btrfsctl -a ; egrep -q '^[^ ]+ $fs_path' /proc/mounts || mount -t btrfs $fs_opt $first_dev $fs_path"
+		    do_root_cmd_okfail "modprobe btrfs ; btrfs device scan || btrfsctl -a ; egrep -q '^[^ ]+ $fs_path' /proc/mounts || mount -t btrfs $fs_opt $first_dev $fs_path"
 		else
 		    echo Mounting $fs_type on $host:$fs_path
-		    do_root_cmd "modprobe $fs_type ; egrep -q '^[^ ]+ $fs_path' /proc/mounts || mount -t $fs_type $fs_opt $first_dev $fs_path"
+		    do_root_cmd_okfail "modprobe $fs_type ; egrep -q '^[^ ]+ $fs_path' /proc/mounts || mount -t $fs_type $fs_opt $first_dev $fs_path"
+		fi
+		if [ "$ERR" != "0" ]; then
+		    EXIT_STATUS=$ERR
+		    continue
 		fi
 	    fi
 
diff --git a/src/java/Makefile.am b/src/java/Makefile.am
index 8b28f83..4b3d1fb 100644
--- a/src/java/Makefile.am
+++ b/src/java/Makefile.am
@@ -44,11 +44,11 @@ CEPH_PROXY=java/com/ceph/fs/CephMount.class
 
 $(CEPH_PROXY): $(JAVA_SRC)
 	export CLASSPATH=java/ ; \
-	$(JAVAC) -source 1.5 -target 1.5 -Xlint:-options java/com/ceph/fs/*.java
+	$(JAVAC) -classpath java -source 1.5 -target 1.5 -Xlint:-options java/com/ceph/fs/*.java
 
 $(JAVA_H): $(CEPH_PROXY)
 	export CLASSPATH=java/ ; \
-	$(JAVAH) -jni -o $@ com.ceph.fs.CephMount
+	$(JAVAH) -classpath java -jni -o $@ com.ceph.fs.CephMount
 
 libcephfs.jar: $(CEPH_PROXY)
 	$(JAR) cf $@ $(JAVA_CLASSES:%=-C java %) 
diff --git a/src/java/Makefile.in b/src/java/Makefile.in
index 0e967f6..013bb6c 100644
--- a/src/java/Makefile.in
+++ b/src/java/Makefile.in
@@ -543,11 +543,11 @@ uninstall-am: uninstall-javaDATA
 
 @ENABLE_CEPHFS_JAVA_TRUE@$(CEPH_PROXY): $(JAVA_SRC)
 @ENABLE_CEPHFS_JAVA_TRUE@	export CLASSPATH=java/ ; \
- at ENABLE_CEPHFS_JAVA_TRUE@	$(JAVAC) -source 1.5 -target 1.5 -Xlint:-options java/com/ceph/fs/*.java
+ at ENABLE_CEPHFS_JAVA_TRUE@	$(JAVAC) -classpath java -source 1.5 -target 1.5 -Xlint:-options java/com/ceph/fs/*.java
 
 @ENABLE_CEPHFS_JAVA_TRUE@$(JAVA_H): $(CEPH_PROXY)
 @ENABLE_CEPHFS_JAVA_TRUE@	export CLASSPATH=java/ ; \
- at ENABLE_CEPHFS_JAVA_TRUE@	$(JAVAH) -jni -o $@ com.ceph.fs.CephMount
+ at ENABLE_CEPHFS_JAVA_TRUE@	$(JAVAH) -classpath java -jni -o $@ com.ceph.fs.CephMount
 
 @ENABLE_CEPHFS_JAVA_TRUE at libcephfs.jar: $(CEPH_PROXY)
 @ENABLE_CEPHFS_JAVA_TRUE@	$(JAR) cf $@ $(JAVA_CLASSES:%=-C java %) 
diff --git a/src/java/native/libcephfs_jni.cc b/src/java/native/libcephfs_jni.cc
index 189743d..a96b20d 100644
--- a/src/java/native/libcephfs_jni.cc
+++ b/src/java/native/libcephfs_jni.cc
@@ -2875,7 +2875,7 @@ jobject sockaddrToInetAddress(JNIEnv* env, const sockaddr_storage& ss, jint* por
         return NULL;
     }
     env->SetByteArrayRegion(byteArray.get(), 0, addressLength,
-            reinterpret_cast<const jbyte*>(rawAddress));
+			    reinterpret_cast<jbyte*>(const_cast<void*>(rawAddress)));
 
     if (ss.ss_family == AF_UNIX) {
         // Note that we get here for AF_UNIX sockets on accept(2). The unix(7) man page claims
diff --git a/src/librados/librados.cc b/src/librados/librados.cc
index 2358fb4..26b94bd 100644
--- a/src/librados/librados.cc
+++ b/src/librados/librados.cc
@@ -2058,10 +2058,10 @@ extern "C" int rados_pool_list(rados_t cluster, char *buf, size_t len)
   std::list<std::string>::const_iterator i = pools.begin();
   std::list<std::string>::const_iterator p_end = pools.end();
   for (; i != p_end; ++i) {
-    if (len == 0)
-      break;
     int rl = i->length() + 1;
-    strncat(b, i->c_str(), len - 2); // leave space for two NULLs
+    if (len < (unsigned)rl)
+      break;
+    strncat(b, i->c_str(), rl);
     needed += rl;
     len -= rl;
     b += rl;
diff --git a/src/messages/MForward.h b/src/messages/MForward.h
index 3128197..6a0e2bf 100644
--- a/src/messages/MForward.h
+++ b/src/messages/MForward.h
@@ -31,7 +31,7 @@ struct MForward : public Message {
   uint64_t con_features;
 
   static const int HEAD_VERSION = 2;
-  static const int COMPAT_VERSION = 0;
+  static const int COMPAT_VERSION = 1;
 
   MForward() : Message(MSG_FORWARD, HEAD_VERSION, COMPAT_VERSION),
                tid(0), msg(NULL), con_features(0) {}
diff --git a/src/mon/ConfigKeyService.h b/src/mon/ConfigKeyService.h
index e379af6..e33070b 100644
--- a/src/mon/ConfigKeyService.h
+++ b/src/mon/ConfigKeyService.h
@@ -52,11 +52,9 @@ public:
    * @{
    */
   virtual void init() { }
-  virtual health_status_t get_health(
-                          Formatter *f,
-                          list<pair<health_status_t,string> > *detail) {
-    return HEALTH_OK;
-  }
+  virtual void get_health(Formatter *f,
+			  list<pair<health_status_t,string> >& summary,
+                          list<pair<health_status_t,string> > *detail) { }
   virtual bool service_dispatch(Message *m);
 
   virtual void start_epoch() { }
diff --git a/src/mon/DataHealthService.cc b/src/mon/DataHealthService.cc
index 8dda929..78732ac 100644
--- a/src/mon/DataHealthService.cc
+++ b/src/mon/DataHealthService.cc
@@ -68,8 +68,9 @@ void DataHealthService::start_epoch()
   last_warned_percent = 0;
 }
 
-health_status_t DataHealthService::get_health(
+void DataHealthService::get_health(
     Formatter *f,
+    list<pair<health_status_t,string> >& summary,
     list<pair<health_status_t,string> > *detail)
 {
   dout(10) << __func__ << dendl;
@@ -78,8 +79,6 @@ health_status_t DataHealthService::get_health(
     f->open_array_section("mons");
   }
 
-  health_status_t overall_status = HEALTH_OK;
-
   for (map<entity_inst_t,DataStats>::iterator it = stats.begin();
        it != stats.end(); ++it) {
     string mon_name = mon->monmap->get_name(it->first.addr);
@@ -89,10 +88,10 @@ health_status_t DataHealthService::get_health(
     string health_detail;
     if (stats.latest_avail_percent <= g_conf->mon_data_avail_crit) {
       health_status = HEALTH_ERR;
-      health_detail = "shutdown iminent!";
+      health_detail = "low disk space, shutdown imminent";
     } else if (stats.latest_avail_percent <= g_conf->mon_data_avail_warn) {
       health_status = HEALTH_WARN;
-      health_detail = "low disk space!";
+      health_detail = "low disk space";
     }
 
     if (stats.store_stats.bytes_total >= g_conf->mon_leveldb_size_warn) {
@@ -107,15 +106,13 @@ health_status_t DataHealthService::get_health(
       health_detail.append(ss.str());
     }
 
-    if (overall_status > health_status)
-      overall_status = health_status;
-
-    if (detail && health_status != HEALTH_OK) {
+    if (health_status != HEALTH_OK) {
       stringstream ss;
-      ss << "mon." << mon_name << " addr " << it->first.addr
-          << " has " << stats.latest_avail_percent
-          << "\% avail disk space -- " << health_detail;
-      detail->push_back(make_pair(health_status, ss.str()));
+      ss << "mon." << mon_name << " " << health_detail;
+      summary.push_back(make_pair(health_status, ss.str()));
+      ss << " -- " <<  stats.latest_avail_percent << "% avail";
+      if (detail)
+	detail->push_back(make_pair(health_status, ss.str()));
     }
 
     if (f) {
@@ -134,8 +131,6 @@ health_status_t DataHealthService::get_health(
     f->close_section(); // mons
     f->close_section(); // data_health
   }
-
-  return overall_status;
 }
 
 int DataHealthService::update_store_stats(DataStats &ours)
diff --git a/src/mon/DataHealthService.h b/src/mon/DataHealthService.h
index 750c58e..221e179 100644
--- a/src/mon/DataHealthService.h
+++ b/src/mon/DataHealthService.h
@@ -70,7 +70,8 @@ public:
     start_tick();
   }
 
-  virtual health_status_t get_health(Formatter *f,
+  virtual void get_health(Formatter *f,
+                          list<pair<health_status_t,string> >& summary,
                           list<pair<health_status_t,string> > *detail);
 
   virtual int get_type() {
diff --git a/src/mon/HealthMonitor.cc b/src/mon/HealthMonitor.cc
index c6ab6f4..7cba39b 100644
--- a/src/mon/HealthMonitor.cc
+++ b/src/mon/HealthMonitor.cc
@@ -80,10 +80,10 @@ void HealthMonitor::service_shutdown()
   services.clear();
 }
 
-health_status_t HealthMonitor::get_health(Formatter *f,
-					  list<pair<health_status_t,string> > *detail)
+void HealthMonitor::get_health(Formatter *f,
+			       list<pair<health_status_t,string> >& summary,
+			       list<pair<health_status_t,string> > *detail)
 {
-  health_status_t overall = HEALTH_OK;
   if (f) {
     f->open_object_section("health");
     f->open_array_section("health_services");
@@ -92,16 +92,12 @@ health_status_t HealthMonitor::get_health(Formatter *f,
   for (map<int,HealthService*>::iterator it = services.begin();
        it != services.end();
        ++it) {
-    health_status_t h = it->second->get_health(f, detail);
-    if (overall > h)
-      overall = h;
+    it->second->get_health(f, summary, detail);
   }
 
   if (f) {
     f->close_section(); // health_services
     f->close_section(); // health
   }
-
-  return overall;
 }
 
diff --git a/src/mon/HealthMonitor.h b/src/mon/HealthMonitor.h
index a1c98a9..3d84261 100644
--- a/src/mon/HealthMonitor.h
+++ b/src/mon/HealthMonitor.h
@@ -42,8 +42,9 @@ public:
    * @{
    */
   virtual void init();
-  virtual health_status_t get_health(Formatter *f,
-                          list<pair<health_status_t,string> > *detail);
+  virtual void get_health(Formatter *f,
+		     list<pair<health_status_t,string> >& summary,
+		     list<pair<health_status_t,string> > *detail);
   virtual bool service_dispatch(Message *m);
 
   virtual void start_epoch() {
diff --git a/src/mon/HealthService.h b/src/mon/HealthService.h
index 11d6e48..2a46f88 100644
--- a/src/mon/HealthService.h
+++ b/src/mon/HealthService.h
@@ -37,8 +37,9 @@ struct HealthService : public QuorumService
   virtual bool service_dispatch(MMonHealth *m) = 0;
 
 public:
-  virtual health_status_t get_health(Formatter *f,
-                          list<pair<health_status_t,string> > *detail) = 0;
+  virtual void get_health(Formatter *f,
+			  list<pair<health_status_t,string> >& summary,
+			  list<pair<health_status_t,string> > *detail) = 0;
   virtual int get_type() = 0;
   virtual string get_name() const = 0;
 };
diff --git a/src/mon/MonCommands.h b/src/mon/MonCommands.h
index 36f1e9f..eb63303 100644
--- a/src/mon/MonCommands.h
+++ b/src/mon/MonCommands.h
@@ -585,11 +585,13 @@ COMMAND("osd tier add " \
 	"name=pool,type=CephPoolname " \
 	"name=tierpool,type=CephPoolname " \
 	"name=force_nonempty,type=CephChoices,strings=--force-nonempty,req=false",
-	"add the tier <tierpool> to base pool <pool>", "osd", "rw", "cli,rest")
+	"add the tier <tierpool> (the second one) to base pool <pool> (the first one)", \
+	"osd", "rw", "cli,rest")
 COMMAND("osd tier remove " \
 	"name=pool,type=CephPoolname " \
 	"name=tierpool,type=CephPoolname",
-	"remove the tier <tierpool> from base pool <pool>", "osd", "rw", "cli,rest")
+	"remove the tier <tierpool> (the second one) from base pool <pool> (the first one)", \
+	"osd", "rw", "cli,rest")
 COMMAND("osd tier cache-mode " \
 	"name=pool,type=CephPoolname " \
 	"name=mode,type=CephChoices,strings=none|writeback|forward|readonly", \
@@ -606,7 +608,7 @@ COMMAND("osd tier add-cache " \
 	"name=pool,type=CephPoolname " \
 	"name=tierpool,type=CephPoolname " \
 	"name=size,type=CephInt,range=0", \
-	"add a cache <tierpool> of size <size> to existing pool <pool>", \
+	"add a cache <tierpool> (the second one) of size <size> to existing pool <pool> (the first one)", \
 	"osd", "rw", "cli,rest")
 
 /*
diff --git a/src/mon/Monitor.cc b/src/mon/Monitor.cc
index 0982327..cd447e7 100644
--- a/src/mon/Monitor.cc
+++ b/src/mon/Monitor.cc
@@ -1889,6 +1889,8 @@ void Monitor::get_health(string& status, bufferlist *detailbl, Formatter *f)
     s->get_health(summary, detailbl ? &detail : NULL);
   }
 
+  health_monitor->get_health(f, summary, (detailbl ? &detail : NULL));
+
   if (f)
     f->open_array_section("summary");
   stringstream ss;
@@ -1974,10 +1976,6 @@ void Monitor::get_health(string& status, bufferlist *detailbl, Formatter *f)
   if (f)
     f->close_section();
 
-  health_status_t hmstatus = health_monitor->get_health(f, (detailbl ? &detail : NULL));
-  if (overall > hmstatus)
-    overall = hmstatus;
-
   stringstream fss;
   fss << overall;
   status = fss.str() + ss.str();
diff --git a/src/mon/OSDMonitor.cc b/src/mon/OSDMonitor.cc
index dd027b2..eab5122 100644
--- a/src/mon/OSDMonitor.cc
+++ b/src/mon/OSDMonitor.cc
@@ -283,15 +283,14 @@ void OSDMonitor::update_from_paxos(bool *need_bootstrap)
 
 void OSDMonitor::update_msgr_features()
 {
-  uint64_t mask;
-  uint64_t features = osdmap.get_features(&mask);
-
   set<int> types;
   types.insert((int)entity_name_t::TYPE_OSD);
   types.insert((int)entity_name_t::TYPE_CLIENT);
   types.insert((int)entity_name_t::TYPE_MDS);
   types.insert((int)entity_name_t::TYPE_MON);
   for (set<int>::iterator q = types.begin(); q != types.end(); ++q) {
+    uint64_t mask;
+    uint64_t features = osdmap.get_features(*q, &mask);
     if ((mon->messenger->get_policy(*q).features_required & mask) != features) {
       dout(0) << "crush map has features " << features << ", adjusting msgr requires" << dendl;
       Messenger::Policy p = mon->messenger->get_policy(*q);
@@ -1179,7 +1178,8 @@ bool OSDMonitor::preprocess_boot(MOSDBoot *m)
   assert(m->get_orig_source_inst().name.is_osd());
 
   // check if osd has required features to boot
-  if ((osdmap.get_features(NULL) & CEPH_FEATURE_OSD_ERASURE_CODES) &&
+  if ((osdmap.get_features(CEPH_ENTITY_TYPE_OSD, NULL) &
+       CEPH_FEATURE_OSD_ERASURE_CODES) &&
       !(m->get_connection()->get_features() & CEPH_FEATURE_OSD_ERASURE_CODES)) {
     dout(0) << __func__ << " osdmap requires Erasure Codes but osd at "
             << m->get_orig_source_inst()
@@ -2007,29 +2007,33 @@ void OSDMonitor::get_health(list<pair<health_status_t,string> >& summary,
 			    list<pair<health_status_t,string> > *detail) const
 {
   int num_osds = osdmap.get_num_osds();
-  int num_up_osds = osdmap.get_num_up_osds();
-  int num_in_osds = osdmap.get_num_in_osds();
 
   if (num_osds == 0) {
     summary.push_back(make_pair(HEALTH_ERR, "no osds"));
   } else {
-    if (num_up_osds < num_in_osds) {
-      ostringstream ss;
-      ss << (num_in_osds - num_up_osds) << "/" << num_in_osds << " in osds are down";
-      summary.push_back(make_pair(HEALTH_WARN, ss.str()));
-
-      if (detail) {
-	for (int i = 0; i < osdmap.get_max_osd(); i++) {
-	  if (osdmap.exists(i) && !osdmap.is_up(i)) {
-	    const osd_info_t& info = osdmap.get_info(i);
-	    ostringstream ss;
-	    ss << "osd." << i << " is down since epoch " << info.down_at
-	       << ", last address " << osdmap.get_addr(i);
-	    detail->push_back(make_pair(HEALTH_WARN, ss.str()));
-	  }
+    int num_in_osds = 0;
+    int num_down_in_osds = 0;
+    for (int i = 0; i < osdmap.get_max_osd(); i++) {
+      if (!osdmap.exists(i) || osdmap.is_out(i))
+	continue;
+      ++num_in_osds;
+      if (!osdmap.is_up(i)) {
+	++num_down_in_osds;
+	if (detail) {
+	  const osd_info_t& info = osdmap.get_info(i);
+	  ostringstream ss;
+	  ss << "osd." << i << " is down since epoch " << info.down_at
+	     << ", last address " << osdmap.get_addr(i);
+	  detail->push_back(make_pair(HEALTH_WARN, ss.str()));
 	}
       }
     }
+    assert(num_down_in_osds <= num_in_osds);
+    if (num_down_in_osds > 0) {
+      ostringstream ss;
+      ss << num_down_in_osds << "/" << num_in_osds << " in osds are down";
+      summary.push_back(make_pair(HEALTH_WARN, ss.str()));
+    }
 
     // warn about flags
     if (osdmap.test_flag(CEPH_OSDMAP_PAUSERD |
@@ -2952,7 +2956,7 @@ int OSDMonitor::crush_ruleset_create_erasure(const string &name,
 
 int OSDMonitor::get_erasure_code(const string &erasure_code_profile,
 				 ErasureCodeInterfaceRef *erasure_code,
-				 stringstream &ss)
+				 stringstream &ss) const
 {
   if (pending_inc.has_erasure_code_profile(erasure_code_profile))
     return -EAGAIN;
@@ -2975,7 +2979,7 @@ int OSDMonitor::check_cluster_features(uint64_t features,
 {
   stringstream unsupported_ss;
   int unsupported_count = 0;
-  if (!(mon->get_quorum_features() & features)) {
+  if ((mon->get_quorum_features() & features) != features) {
     unsupported_ss << "the monitor cluster";
     ++unsupported_count;
   }
@@ -3014,6 +3018,27 @@ int OSDMonitor::check_cluster_features(uint64_t features,
   return 0;
 }
 
+bool OSDMonitor::validate_crush_against_features(const CrushWrapper *newcrush,
+                                                 stringstream& ss)
+{
+  OSDMap::Incremental new_pending = pending_inc;
+  ::encode(*newcrush, new_pending.crush);
+  OSDMap newmap;
+  newmap.deepish_copy_from(osdmap);
+  newmap.apply_incremental(new_pending);
+  uint64_t features = newmap.get_features(CEPH_ENTITY_TYPE_MON, NULL);
+
+  stringstream features_ss;
+
+  int r = check_cluster_features(features, features_ss);
+
+  if (!r)
+    return true;
+
+  ss << "Could not change CRUSH: " << features_ss.str();
+  return false;
+}
+
 bool OSDMonitor::erasure_code_profile_in_use(const map<int64_t, pg_pool_t> &pools,
 					     const string &profile,
 					     ostream &ss)
@@ -3126,8 +3151,12 @@ int OSDMonitor::prepare_pool_crush_ruleset(const unsigned pool_type,
   if (*crush_ruleset < 0) {
     switch (pool_type) {
     case pg_pool_t::TYPE_REPLICATED:
-      *crush_ruleset =
-	CrushWrapper::get_osd_pool_default_crush_replicated_ruleset(g_ceph_context);
+      *crush_ruleset = osdmap.crush->get_osd_pool_default_crush_replicated_ruleset(g_ceph_context);
+      if (*crush_ruleset < 0) {
+        // Errors may happen e.g. if no valid ruleset is available
+        ss << "No suitable CRUSH ruleset exists";
+        return *crush_ruleset;
+      }
       break;
     case pg_pool_t::TYPE_ERASURE:
       {
@@ -3155,6 +3184,11 @@ int OSDMonitor::prepare_pool_crush_ruleset(const unsigned pool_type,
       return -EINVAL;
       break;
     }
+  } else {
+    if (!osdmap.crush->ruleset_exists(*crush_ruleset)) {
+      ss << "CRUSH ruleset " << *crush_ruleset << " not found";
+      return -ENOENT;
+    }
   }
 
   return 0;
@@ -3411,7 +3445,7 @@ int OSDMonitor::prepare_command_pool_set(map<string,cmd_vartype> &cmdmap,
       ss << "error parsing integer value '" << val << "': " << interr;
       return -EINVAL;
     }
-    if (!osdmap.crush->rule_exists(n)) {
+    if (!osdmap.crush->ruleset_exists(n)) {
       ss << "crush ruleset " << n << " does not exist";
       return -ENOENT;
     }
@@ -3504,7 +3538,7 @@ int OSDMonitor::prepare_command_pool_set(map<string,cmd_vartype> &cmdmap,
       ss << "value must be in the range 0..1";
       return -ERANGE;
     }
-    p.cache_target_full_ratio_micro = n;
+    p.cache_target_full_ratio_micro = f * 1000000;
   } else if (var == "cache_min_flush_age") {
     if (interr.length()) {
       ss << "error parsing int '" << val << "': " << interr;
@@ -3616,6 +3650,11 @@ bool OSDMonitor::prepare_command_impl(MMonCommand *m,
       goto reply;
     }
 
+    if (!validate_crush_against_features(&crush, ss)) {
+      err = -EINVAL;
+      goto reply;
+    }
+
     // sanity check: test some inputs to make sure this map isn't totally broken
     dout(10) << " testing map" << dendl;
     stringstream ess;
@@ -3997,6 +4036,12 @@ bool OSDMonitor::prepare_command_impl(MMonCommand *m,
       err = -EINVAL;
       goto reply;
     }
+
+    if (!validate_crush_against_features(&newcrush, ss)) {
+      err = -EINVAL;
+      goto reply;
+    }
+
     pending_inc.crush.clear();
     newcrush.encode(pending_inc.crush);
     ss << "adjusted tunables profile to " << profile;
@@ -4490,7 +4535,8 @@ bool OSDMonitor::prepare_command_impl(MMonCommand *m,
       pending_inc.new_primary_affinity[id] = ww;
       ss << "set osd." << id << " primary-affinity to " << w << " (" << ios::hex << ww << ios::dec << ")";
       getline(ss, rs);
-      wait_for_finished_proposal(new Monitor::C_Command(mon, m, 0, rs, get_last_committed()));
+      wait_for_finished_proposal(new Monitor::C_Command(mon, m, 0, rs,
+                                                get_last_committed() + 1));
       return true;
     }
   } else if (prefix == "osd reweight") {
@@ -5074,8 +5120,10 @@ done:
       goto reply;
     }
     // go
-    pending_inc.get_new_pool(pool_id, p)->read_tier = overlaypool_id;
-    pending_inc.get_new_pool(pool_id, p)->write_tier = overlaypool_id;
+    pg_pool_t *np = pending_inc.get_new_pool(pool_id, p);
+    np->read_tier = overlaypool_id;
+    np->write_tier = overlaypool_id;
+    np->last_force_op_resend = pending_inc.epoch;
     ss << "overlay for '" << poolstr << "' is now (or already was) '" << overlaypoolstr << "'";
     wait_for_finished_proposal(new Monitor::C_Command(mon, m, 0, ss.str(),
 					      get_last_committed() + 1));
@@ -5097,8 +5145,10 @@ done:
       goto reply;
     }
     // go
-    pending_inc.get_new_pool(pool_id, p)->clear_read_tier();
-    pending_inc.get_new_pool(pool_id, p)->clear_write_tier();
+    pg_pool_t *np = pending_inc.get_new_pool(pool_id, p);
+    np->clear_read_tier();
+    np->clear_write_tier();
+    np->last_force_op_resend = pending_inc.epoch;
     ss << "there is now (or already was) no overlay for '" << poolstr << "'";
     wait_for_finished_proposal(new Monitor::C_Command(mon, m, 0, ss.str(),
 					      get_last_committed() + 1));
@@ -5567,14 +5617,6 @@ int OSDMonitor::_check_remove_pool(int64_t pool, const pg_pool_t *p,
 {
   string poolstr = osdmap.get_pool_name(pool);
 
-  // If the Pool is in use by CephFS, refuse to delete it
-  MDSMap const &pending_mdsmap = mon->mdsmon()->pending_mdsmap;
-  if (pending_mdsmap.is_data_pool(pool) ||
-      pending_mdsmap.get_metadata_pool() == pool) {
-    *ss << "pool '" << poolstr << "' is in use by CephFS";
-    return -EBUSY;
-  }
-
   if (p->tier_of >= 0) {
     *ss << "pool '" << poolstr << "' is a tier of '"
 	<< osdmap.get_pool_name(p->tier_of) << "'";
diff --git a/src/mon/OSDMonitor.h b/src/mon/OSDMonitor.h
index afc08f0..2d4f379 100644
--- a/src/mon/OSDMonitor.h
+++ b/src/mon/OSDMonitor.h
@@ -186,6 +186,15 @@ private:
 
   void update_msgr_features();
   int check_cluster_features(uint64_t features, stringstream &ss);
+  /**
+   * check if the cluster supports the features required by the
+   * given crush map. Outputs the daemons which don't support it
+   * to the stringstream.
+   *
+   * @returns true if the map is passable, false otherwise
+   */
+  bool validate_crush_against_features(const CrushWrapper *newcrush,
+                                      stringstream &ss);
 
   void share_map_with_random_osd();
 
@@ -249,7 +258,7 @@ private:
 				   stringstream &ss);
   int get_erasure_code(const string &erasure_code_profile,
 		       ErasureCodeInterfaceRef *erasure_code,
-		       stringstream &ss);
+		       stringstream &ss) const;
   int prepare_pool_crush_ruleset(const unsigned pool_type,
 				 const string &erasure_code_profile,
 				 const string &ruleset_name,
diff --git a/src/mon/QuorumService.h b/src/mon/QuorumService.h
index 27971b7..ef9dcdc 100644
--- a/src/mon/QuorumService.h
+++ b/src/mon/QuorumService.h
@@ -124,7 +124,8 @@ public:
 
   virtual void init() { }
 
-  virtual health_status_t get_health(Formatter *f,
+  virtual void get_health(Formatter *f,
+			  list<pair<health_status_t,string> >& summary,
                           list<pair<health_status_t,string> > *detail) = 0;
   virtual int get_type() = 0;
   virtual string get_name() const = 0;
diff --git a/src/msg/Pipe.cc b/src/msg/Pipe.cc
index 0693c09..54ce679 100644
--- a/src/msg/Pipe.cc
+++ b/src/msg/Pipe.cc
@@ -249,6 +249,10 @@ int Pipe::accept()
   bool authorizer_valid;
   uint64_t feat_missing;
   bool replaced = false;
+  // this variable denotes if the connection attempt from peer is a hard 
+  // reset or not, it is true if there is an existing connection and the
+  // connection sequence from peer is equal to zero
+  bool is_reset_from_peer = false;
   CryptoKey session_key;
   int removed; // single-use down below
 
@@ -462,6 +466,8 @@ int Pipe::accept()
 
       if (connect.connect_seq == 0 && existing->connect_seq > 0) {
 	ldout(msgr->cct,0) << "accept peer reset, then tried to connect to us, replacing" << dendl;
+        // this is a hard reset from peer
+        is_reset_from_peer = true;
 	if (policy.resetcheck)
 	  existing->was_session_reset(); // this resets out_queue, msg_ and connect_seq #'s
 	goto replace;
@@ -584,7 +590,8 @@ int Pipe::accept()
  replace:
   assert(existing->pipe_lock.is_locked());
   assert(pipe_lock.is_locked());
-  if (connect.features & CEPH_FEATURE_RECONNECT_SEQ) {
+  // if it is a hard reset from peer, we don't need a round-trip to negotiate in/out sequence
+  if ((connect.features & CEPH_FEATURE_RECONNECT_SEQ) && !is_reset_from_peer) {
     reply_tag = CEPH_MSGR_TAG_SEQ;
     existing_seq = existing->in_seq;
   }
@@ -618,7 +625,10 @@ int Pipe::accept()
     uint64_t replaced_conn_id = conn_id;
     conn_id = existing->conn_id;
     existing->conn_id = replaced_conn_id;
-    in_seq = existing->in_seq;
+
+    // reset the in_seq if this is a hard reset from peer,
+    // otherwise we respect our original connection's value
+    in_seq = is_reset_from_peer ? 0 : existing->in_seq;
     in_seq_acked = in_seq;
 
     // steal outgoing queue and out_seq
diff --git a/src/os/FileStore.cc b/src/os/FileStore.cc
index 9d6252c..f73d930 100644
--- a/src/os/FileStore.cc
+++ b/src/os/FileStore.cc
@@ -275,6 +275,14 @@ int FileStore::lfn_open(coll_t cid,
 	     << ") in index: " << cpp_strerror(-r) << dendl;
 	goto fail;
       }
+      r = chain_fsetxattr(fd, XATTR_SPILL_OUT_NAME,
+                          XATTR_NO_SPILL_OUT, sizeof(XATTR_NO_SPILL_OUT));
+      if (r < 0) {
+        VOID_TEMP_FAILURE_RETRY(::close(fd));
+        derr << "error setting spillout xattr for oid " << oid << " (" << (*path)->path()
+                       << "):" << cpp_strerror(-r) << dendl;
+        goto fail;
+      }
     }
   }
 
@@ -2905,11 +2913,23 @@ int FileStore::_clone(coll_t cid, const ghobject_t& oldoid, const ghobject_t& ne
   }
 
   {
+    char buf[2];
     map<string, bufferptr> aset;
     r = _fgetattrs(**o, aset, false);
     if (r < 0)
       goto out3;
 
+    r = chain_fgetxattr(**o, XATTR_SPILL_OUT_NAME, buf, sizeof(buf));
+    if (r >= 0 && !strncmp(buf, XATTR_NO_SPILL_OUT, sizeof(XATTR_NO_SPILL_OUT))) {
+      r = chain_fsetxattr(**n, XATTR_SPILL_OUT_NAME, XATTR_NO_SPILL_OUT,
+                          sizeof(XATTR_NO_SPILL_OUT));
+    } else {
+      r = chain_fsetxattr(**n, XATTR_SPILL_OUT_NAME, XATTR_SPILL_OUT,
+                          sizeof(XATTR_SPILL_OUT));
+    }
+    if (r < 0)
+      goto out3;
+
     r = _fsetattrs(**n, aset);
     if (r < 0)
       goto out3;
diff --git a/src/os/HashIndex.cc b/src/os/HashIndex.cc
index d556cbc..35cb49b 100644
--- a/src/os/HashIndex.cc
+++ b/src/os/HashIndex.cc
@@ -36,8 +36,12 @@ int HashIndex::cleanup() {
   InProgressOp in_progress(i);
   subdir_info_s info;
   r = get_info(in_progress.path, &info);
-  if (r < 0)
+  if (r == -ENOENT) {
+    return end_split_or_merge(in_progress.path);
+  } else if (r < 0) {
     return r;
+  }
+
   if (in_progress.is_split())
     return complete_split(in_progress.path, info);
   else if (in_progress.is_merge())
diff --git a/src/os/XfsFileStoreBackend.cc b/src/os/XfsFileStoreBackend.cc
index 7b632d8..ba51e6b 100644
--- a/src/os/XfsFileStoreBackend.cc
+++ b/src/os/XfsFileStoreBackend.cc
@@ -61,6 +61,14 @@ int XfsFileStoreBackend::set_extsize(int fd, unsigned int val)
     goto out;
   }
 
+  // already set?
+  if ((fsx.fsx_xflags & XFS_XFLAG_EXTSIZE) && fsx.fsx_extsize == val)
+    return 0;
+
+  // xfs won't change extent size if any extents are allocated
+  if (fsx.fsx_nextents != 0)
+    return 0;
+
   fsx.fsx_xflags |= XFS_XFLAG_EXTSIZE;
   fsx.fsx_extsize = val;
 
diff --git a/src/osd/OSD.cc b/src/osd/OSD.cc
index 4240ba8..418c188 100644
--- a/src/osd/OSD.cc
+++ b/src/osd/OSD.cc
@@ -2505,24 +2505,31 @@ void OSD::calc_priors_during(
     oldmap->pg_to_acting_osds(pgid.pgid, acting);
     dout(20) << "  " << pgid << " in epoch " << e << " was " << acting << dendl;
     int up = 0;
-    for (unsigned i=0; i<acting.size(); i++)
-      if (osdmap->is_up(acting[i])) {
-	if (acting[i] != whoami) {
-	  pset.insert(
-	    pg_shard_t(
-	      acting[i],
-	      osdmap->pg_is_ec(pgid.pgid) ? i : ghobject_t::NO_SHARD));
+    int actual_osds = 0;
+    for (unsigned i=0; i<acting.size(); i++) {
+      if (acting[i] != CRUSH_ITEM_NONE) {
+	if (osdmap->is_up(acting[i])) {
+	  if (acting[i] != whoami) {
+	    pset.insert(
+	      pg_shard_t(
+		acting[i],
+		osdmap->pg_is_ec(pgid.pgid) ? shard_id_t(i) : ghobject_t::NO_SHARD));
+	  }
+	  up++;
 	}
-	up++;
+	actual_osds++;
       }
-    if (!up && !acting.empty()) {
+    }
+    if (!up && actual_osds) {
       // sucky.  add down osds, even tho we can't reach them right now.
-      for (unsigned i=0; i<acting.size(); i++)
-	if (acting[i] != whoami)
+      for (unsigned i=0; i<acting.size(); i++) {
+	if (acting[i] != whoami && acting[i] != CRUSH_ITEM_NONE) {
 	  pset.insert(
 	    pg_shard_t(
 	      acting[i],
 	      osdmap->pg_is_ec(pgid.pgid) ? i : ghobject_t::NO_SHARD));
+	}
+      }
     }
   }
   dout(10) << "calc_priors_during " << pgid
@@ -3659,7 +3666,7 @@ void OSD::_maybe_boot(epoch_t oldest, epoch_t newest)
   }
   
   // get all the latest maps
-  if (osdmap->get_epoch() > oldest)
+  if (osdmap->get_epoch() + 1 >= oldest)
     osdmap_subscribe(osdmap->get_epoch() + 1, true);
   else
     osdmap_subscribe(oldest - 1, true);
@@ -3977,7 +3984,7 @@ void OSD::send_pg_stats(const utime_t &now)
   osd_stat_t cur_stat = osd_stat;
   stat_lock.Unlock();
 
-  osd_stat.fs_perf_stat = store->get_cur_stats();
+  cur_stat.fs_perf_stat = store->get_cur_stats();
    
   pg_stat_queue_lock.Lock();
 
@@ -5571,7 +5578,7 @@ void OSD::handle_osd_map(MOSDMap *m)
 	       (osdmap->get_hb_front_addr(whoami) != entity_addr_t() &&
                 !osdmap->get_hb_front_addr(whoami).probably_equals(hb_front_server_messenger->get_myaddr()))) {
       if (!osdmap->is_up(whoami)) {
-	if (service.is_preparing_to_stop()) {
+	if (service.is_preparing_to_stop() || service.is_stopping()) {
 	  service.got_stop_ack();
 	} else {
 	  clog.warn() << "map e" << osdmap->get_epoch()
@@ -5687,11 +5694,10 @@ void OSD::check_osdmap_features(ObjectStore *fs)
   // current memory location, and setting or clearing bits in integer
   // fields, and we are the only writer, this is not a problem.
 
-  uint64_t mask;
-  uint64_t features = osdmap->get_features(&mask);
-
   {
     Messenger::Policy p = client_messenger->get_default_policy();
+    uint64_t mask;
+    uint64_t features = osdmap->get_features(entity_name_t::TYPE_CLIENT, &mask);
     if ((p.features_required & mask) != features) {
       dout(0) << "crush map has features " << features
 	      << ", adjusting msgr requires for clients" << dendl;
@@ -5700,24 +5706,38 @@ void OSD::check_osdmap_features(ObjectStore *fs)
     }
   }
   {
+    Messenger::Policy p = cluster_messenger->get_policy(entity_name_t::TYPE_MON);
+    uint64_t mask;
+    uint64_t features = osdmap->get_features(entity_name_t::TYPE_MON, &mask);
+    if ((p.features_required & mask) != features) {
+      dout(0) << "crush map has features " << features
+	      << ", adjusting msgr requires for mons" << dendl;
+      p.features_required = (p.features_required & ~mask) | features;
+      client_messenger->set_policy(entity_name_t::TYPE_MON, p);
+    }
+  }
+  {
     Messenger::Policy p = cluster_messenger->get_policy(entity_name_t::TYPE_OSD);
+    uint64_t mask;
+    uint64_t features = osdmap->get_features(entity_name_t::TYPE_OSD, &mask);
+
     if ((p.features_required & mask) != features) {
       dout(0) << "crush map has features " << features
 	      << ", adjusting msgr requires for osds" << dendl;
       p.features_required = (p.features_required & ~mask) | features;
       cluster_messenger->set_policy(entity_name_t::TYPE_OSD, p);
     }
-  }
 
-  if ((features & CEPH_FEATURE_OSD_ERASURE_CODES) &&
+    if ((features & CEPH_FEATURE_OSD_ERASURE_CODES) &&
 	!fs->get_allow_sharded_objects()) {
-    dout(0) << __func__ << " enabling on-disk ERASURE CODES compat feature" << dendl;
-    superblock.compat_features.incompat.insert(CEPH_OSD_FEATURE_INCOMPAT_SHARDS);
-    ObjectStore::Transaction *t = new ObjectStore::Transaction;
-    write_superblock(*t);
-    int err = store->queue_transaction_and_cleanup(NULL, t);
-    assert(err == 0);
-    fs->set_allow_sharded_objects();
+      dout(0) << __func__ << " enabling on-disk ERASURE CODES compat feature" << dendl;
+      superblock.compat_features.incompat.insert(CEPH_OSD_FEATURE_INCOMPAT_SHARDS);
+      ObjectStore::Transaction *t = new ObjectStore::Transaction;
+      write_superblock(*t);
+      int err = store->queue_transaction_and_cleanup(NULL, t);
+      assert(err == 0);
+      fs->set_allow_sharded_objects();
+    }
   }
 }
 
diff --git a/src/osd/OSDMap.cc b/src/osd/OSDMap.cc
index fc61d43..8c66c76 100644
--- a/src/osd/OSDMap.cc
+++ b/src/osd/OSDMap.cc
@@ -950,7 +950,7 @@ bool OSDMap::find_osd_on_ip(const entity_addr_t& ip) const
 }
 
 
-uint64_t OSDMap::get_features(uint64_t *pmask) const
+uint64_t OSDMap::get_features(int entity_type, uint64_t *pmask) const
 {
   uint64_t features = 0;  // things we actually have
   uint64_t mask = 0;      // things we could have
@@ -970,7 +970,8 @@ uint64_t OSDMap::get_features(uint64_t *pmask) const
     if (p->second.flags & pg_pool_t::FLAG_HASHPSPOOL) {
       features |= CEPH_FEATURE_OSDHASHPSPOOL;
     }
-    if (p->second.is_erasure()) {
+    if (p->second.is_erasure() &&
+	entity_type != CEPH_ENTITY_TYPE_CLIENT) { // not for clients
       features |= CEPH_FEATURE_OSD_ERASURE_CODES;
     }
     if (!p->second.tiers.empty() ||
@@ -978,8 +979,9 @@ uint64_t OSDMap::get_features(uint64_t *pmask) const
       features |= CEPH_FEATURE_OSD_CACHEPOOL;
     }
   }
-  mask |= CEPH_FEATURE_OSDHASHPSPOOL | CEPH_FEATURE_OSD_CACHEPOOL |
-          CEPH_FEATURE_OSD_ERASURE_CODES;
+  mask |= CEPH_FEATURE_OSDHASHPSPOOL | CEPH_FEATURE_OSD_CACHEPOOL;
+  if (entity_type != CEPH_ENTITY_TYPE_CLIENT)
+    mask |= CEPH_FEATURE_OSD_ERASURE_CODES;
 
   if (osd_primary_affinity) {
     for (int i = 0; i < max_osd; ++i) {
@@ -2509,8 +2511,18 @@ int OSDMap::build_simple(CephContext *cct, epoch_t e, uuid_d &fsid,
   pool_names.push_back("metadata");
   pool_names.push_back("rbd");
 
+  stringstream ss;
+  int r;
+  if (nosd >= 0)
+    r = build_simple_crush_map(cct, *crush, nosd, &ss);
+  else
+    r = build_simple_crush_map_from_conf(cct, *crush, &ss);
+
   int poolbase = get_max_osd() ? get_max_osd() : 1;
 
+  int const default_replicated_ruleset = crush->get_osd_pool_default_crush_replicated_ruleset(cct);
+  assert(default_replicated_ruleset >= 0);
+
   for (vector<string>::iterator p = pool_names.begin();
        p != pool_names.end(); ++p) {
     int64_t pool = ++pool_max;
@@ -2520,8 +2532,7 @@ int OSDMap::build_simple(CephContext *cct, epoch_t e, uuid_d &fsid,
       pools[pool].flags |= pg_pool_t::FLAG_HASHPSPOOL;
     pools[pool].size = cct->_conf->osd_pool_default_size;
     pools[pool].min_size = cct->_conf->get_osd_pool_default_min_size();
-    pools[pool].crush_ruleset =
-      CrushWrapper::get_osd_pool_default_crush_replicated_ruleset(cct);
+    pools[pool].crush_ruleset = default_replicated_ruleset;
     pools[pool].object_hash = CEPH_STR_HASH_RJENKINS;
     pools[pool].set_pg_num(poolbase << pg_bits);
     pools[pool].set_pgp_num(poolbase << pgp_bits);
@@ -2532,13 +2543,6 @@ int OSDMap::build_simple(CephContext *cct, epoch_t e, uuid_d &fsid,
     name_pool[*p] = pool;
   }
 
-  stringstream ss;
-  int r;
-  if (nosd >= 0)
-    r = build_simple_crush_map(cct, *crush, nosd, &ss);
-  else
-    r = build_simple_crush_map_from_conf(cct, *crush, &ss);
-
   if (r < 0)
     lderr(cct) << ss.str() << dendl;
   
diff --git a/src/osd/OSDMap.h b/src/osd/OSDMap.h
index 650957d..08064f8 100644
--- a/src/osd/OSDMap.h
+++ b/src/osd/OSDMap.h
@@ -533,10 +533,11 @@ public:
   /**
    * get feature bits required by the current structure
    *
+   * @param entity_type [in] what entity type we are asking about
    * @param mask [out] set of all possible map-related features we could set
    * @return feature bits used by this map
    */
-  uint64_t get_features(uint64_t *mask) const;
+  uint64_t get_features(int entity_type, uint64_t *mask) const;
 
   /**
    * get intersection of features supported by up osds
diff --git a/src/osd/PG.cc b/src/osd/PG.cc
index 6deb099..42099fb 100644
--- a/src/osd/PG.cc
+++ b/src/osd/PG.cc
@@ -397,7 +397,8 @@ bool PG::search_for_missing(
   if (found_missing && num_unfound_before != missing_loc.num_unfound())
     publish_stats_to_osd();
   if (found_missing &&
-    (get_osdmap()->get_features(NULL) & CEPH_FEATURE_OSD_ERASURE_CODES)) {
+      (get_osdmap()->get_features(CEPH_ENTITY_TYPE_OSD, NULL) &
+       CEPH_FEATURE_OSD_ERASURE_CODES)) {
     pg_info_t tinfo(oinfo);
     tinfo.pgid.shard = pg_whoami.shard;
     (*(ctx->info_map))[from.osd].push_back(
@@ -3880,6 +3881,7 @@ void PG::chunky_scrub(ThreadPool::TPHandle &handle)
         scrubber.received_maps.clear();
 
         {
+	  hobject_t candidate_end;
 
           // get the start and end of our scrub chunk
           //
@@ -3898,11 +3900,11 @@ void PG::chunky_scrub(ThreadPool::TPHandle &handle)
 	      cct->_conf->osd_scrub_chunk_max,
 	      0,
 	      &objects,
-	      &scrubber.end);
+	      &candidate_end);
             assert(ret >= 0);
 
             // in case we don't find a boundary: start again at the end
-            start = scrubber.end;
+            start = candidate_end;
 
             // special case: reached end of file store, implicitly a boundary
             if (objects.empty()) {
@@ -3910,19 +3912,28 @@ void PG::chunky_scrub(ThreadPool::TPHandle &handle)
             }
 
             // search backward from the end looking for a boundary
-            objects.push_back(scrubber.end);
+            objects.push_back(candidate_end);
             while (!boundary_found && objects.size() > 1) {
               hobject_t end = objects.back().get_boundary();
               objects.pop_back();
 
               if (objects.back().get_filestore_key() != end.get_filestore_key()) {
-                scrubber.end = end;
+                candidate_end = end;
                 boundary_found = true;
               }
             }
           }
-        }
 
+	  if (!_range_available_for_scrub(scrubber.start, candidate_end)) {
+	    // we'll be requeued by whatever made us unavailable for scrub
+	    dout(10) << __func__ << ": scrub blocked somewhere in range "
+		     << "[" << scrubber.start << ", " << candidate_end << ")"
+		     << dendl;
+	    done = true;
+	    break;
+	  }
+	  scrubber.end = candidate_end;
+        }
         scrubber.block_writes = true;
 
         // walk the log to find the latest update that affects our chunk
@@ -4938,6 +4949,13 @@ bool PG::can_discard_op(OpRequestRef op)
     return true;
   }
 
+  if (m->get_map_epoch() < pool.info.last_force_op_resend &&
+      m->get_connection()->has_feature(CEPH_FEATURE_OSD_POOLRESEND)) {
+    dout(7) << __func__ << " sent before last_force_op_resend "
+	    << pool.info.last_force_op_resend << ", dropping" << *m << dendl;
+    return true;
+  }
+
   if ((m->get_flags() & (CEPH_OSD_FLAG_BALANCE_READS |
 			 CEPH_OSD_FLAG_LOCALIZE_READS)) &&
       op->may_read() &&
diff --git a/src/osd/PG.h b/src/osd/PG.h
index 1fce297..e9f3981 100644
--- a/src/osd/PG.h
+++ b/src/osd/PG.h
@@ -1118,6 +1118,13 @@ public:
   void build_scrub_map(ScrubMap &map, ThreadPool::TPHandle &handle);
   void build_inc_scrub_map(
     ScrubMap &map, eversion_t v, ThreadPool::TPHandle &handle);
+  /**
+   * returns true if [begin, end) is good to scrub at this time
+   * a false return value obliges the implementer to requeue scrub when the
+   * condition preventing scrub clears
+   */
+  virtual bool _range_available_for_scrub(
+    const hobject_t &begin, const hobject_t &end) = 0;
   virtual void _scrub(ScrubMap &map) { }
   virtual void _scrub_clear_state() { }
   virtual void _scrub_finish() { }
diff --git a/src/osd/ReplicatedPG.cc b/src/osd/ReplicatedPG.cc
index 94eec05..6892208 100644
--- a/src/osd/ReplicatedPG.cc
+++ b/src/osd/ReplicatedPG.cc
@@ -1838,7 +1838,8 @@ void ReplicatedPG::execute_ctx(OpContext *ctx)
   calc_trim_to();
 
   // verify that we are doing this in order?
-  if (cct->_conf->osd_debug_op_order && m->get_source().is_client()) {
+  if (cct->_conf->osd_debug_op_order && m->get_source().is_client() &&
+      !pool.info.is_tier() && !pool.info.has_tiers()) {
     map<client_t,ceph_tid_t>& cm = debug_op_order[obc->obs.oi.soid];
     ceph_tid_t t = m->get_tid();
     client_t n = m->get_source().num();
@@ -6151,7 +6152,8 @@ int ReplicatedPG::start_flush(
       // nonblocking can join anything
       // blocking can only join a blocking flush
       dout(20) << __func__ << " piggybacking on existing flush " << dendl;
-      fop->dup_ops.push_back(op);
+      if (op)
+	fop->dup_ops.push_back(op);
       return -EAGAIN;   // clean up this ctx; op will retry later
     }
 
@@ -6169,6 +6171,7 @@ int ReplicatedPG::start_flush(
 
   // construct a SnapContext appropriate for this clone/head
   SnapContext dsnapc;
+  dsnapc.seq = 0;
   SnapContext snapc;
   if (soid.snap == CEPH_NOSNAP) {
     snapc.seq = snapset.seq;
@@ -6195,14 +6198,20 @@ int ReplicatedPG::start_flush(
       ++p;
     snapc.snaps = vector<snapid_t>(p, snapset.snaps.end());
 
+    while (p != snapset.snaps.end() && *p >= oi.snaps.back())
+      ++p;
+    vector<snapid_t>::iterator dnewest = p;
+
     // we may need to send a delete first
     while (p != snapset.snaps.end() && *p > prev_snapc)
       ++p;
     dsnapc.snaps = vector<snapid_t>(p, snapset.snaps.end());
 
-    if (dsnapc.snaps.empty()) {
+    if (p == dnewest) {
+      // no snaps between the oldest in this clone and prev_snapc
       snapc.seq = prev_snapc;
     } else {
+      // snaps between oldest in this clone and prev_snapc, send delete
       dsnapc.seq = prev_snapc;
       snapc.seq = oi.snaps.back() - 1;
     }
@@ -6211,7 +6220,7 @@ int ReplicatedPG::start_flush(
   object_locator_t base_oloc(soid);
   base_oloc.pool = pool.info.tier_of;
 
-  if (!dsnapc.snaps.empty()) {
+  if (dsnapc.seq > 0) {
     ObjectOperation o;
     o.remove();
     osd->objecter_lock.Lock();
@@ -7439,6 +7448,9 @@ void ReplicatedPG::kick_object_context_blocked(ObjectContextRef obc)
   dout(10) << __func__ << " " << soid << " requeuing " << ls.size() << " requests" << dendl;
   requeue_ops(ls);
   waiting_for_blocked_object.erase(p);
+
+  if (obc->requeue_scrub_on_unblock)
+    osd->queue_for_scrub(this);
 }
 
 SnapSetContext *ReplicatedPG::create_snapset_context(const hobject_t& oid)
@@ -11580,6 +11592,26 @@ void ReplicatedPG::agent_estimate_atime_temp(const hobject_t& oid,
 // SCRUB
 
 
+bool ReplicatedPG::_range_available_for_scrub(
+  const hobject_t &begin, const hobject_t &end)
+{
+  pair<hobject_t, ObjectContextRef> next;
+  next.second = object_contexts.lookup(begin);
+  next.first = begin;
+  bool more = true;
+  while (more && next.first < end) {
+    if (next.second && next.second->is_blocked()) {
+      next.second->requeue_scrub_on_unblock = true;
+      dout(10) << __func__ << ": scrub delayed, "
+	       << next.first << " is blocked"
+	       << dendl;
+      return false;
+    }
+    more = object_contexts.get_next(next.first, &next);
+  }
+  return true;
+}
+
 void ReplicatedPG::_scrub(ScrubMap& scrubmap)
 {
   dout(10) << "_scrub" << dendl;
diff --git a/src/osd/ReplicatedPG.h b/src/osd/ReplicatedPG.h
index 3ea4721..562cb06 100644
--- a/src/osd/ReplicatedPG.h
+++ b/src/osd/ReplicatedPG.h
@@ -1243,6 +1243,8 @@ protected:
   friend struct C_Flush;
 
   // -- scrub --
+  virtual bool _range_available_for_scrub(
+    const hobject_t &begin, const hobject_t &end);
   virtual void _scrub(ScrubMap& map);
   virtual void _scrub_clear_state();
   virtual void _scrub_finish();
diff --git a/src/osd/osd_types.cc b/src/osd/osd_types.cc
index 15dd661..c57ee86 100644
--- a/src/osd/osd_types.cc
+++ b/src/osd/osd_types.cc
@@ -770,6 +770,7 @@ void pg_pool_t::dump(Formatter *f) const
   f->dump_int("pg_placement_num", get_pgp_num());
   f->dump_unsigned("crash_replay_interval", get_crash_replay_interval());
   f->dump_stream("last_change") << get_last_change();
+  f->dump_stream("last_force_op_resend") << get_last_force_op_resend();
   f->dump_unsigned("auid", get_auid());
   f->dump_string("snap_mode", is_pool_snaps_mode() ? "pool" : "selfmanaged");
   f->dump_unsigned("snap_seq", get_snap_seq());
@@ -1058,7 +1059,7 @@ void pg_pool_t::encode(bufferlist& bl, uint64_t features) const
   }
 
   __u8 encode_compat = 5;
-  ENCODE_START(14, encode_compat, bl);
+  ENCODE_START(15, encode_compat, bl);
   ::encode(type, bl);
   ::encode(size, bl);
   ::encode(crush_ruleset, bl);
@@ -1097,12 +1098,13 @@ void pg_pool_t::encode(bufferlist& bl, uint64_t features) const
   ::encode(cache_min_flush_age, bl);
   ::encode(cache_min_evict_age, bl);
   ::encode(erasure_code_profile, bl);
+  ::encode(last_force_op_resend, bl);
   ENCODE_FINISH(bl);
 }
 
 void pg_pool_t::decode(bufferlist::iterator& bl)
 {
-  DECODE_START_LEGACY_COMPAT_LEN(14, 5, 5, bl);
+  DECODE_START_LEGACY_COMPAT_LEN(15, 5, 5, bl);
   ::decode(type, bl);
   ::decode(size, bl);
   ::decode(crush_ruleset, bl);
@@ -1199,7 +1201,11 @@ void pg_pool_t::decode(bufferlist::iterator& bl)
   if (struct_v >= 14) {
     ::decode(erasure_code_profile, bl);
   }
-
+  if (struct_v >= 15) {
+    ::decode(last_force_op_resend, bl);
+  } else {
+    last_force_op_resend = 0;
+  }
   DECODE_FINISH(bl);
   calc_pg_masks();
 }
@@ -1216,6 +1222,7 @@ void pg_pool_t::generate_test_instances(list<pg_pool_t*>& o)
   a.pg_num = 6;
   a.pgp_num = 5;
   a.last_change = 9;
+  a.last_force_op_resend = 123823;
   a.snap_seq = 10;
   a.snap_epoch = 11;
   a.auid = 12;
@@ -1264,8 +1271,11 @@ ostream& operator<<(ostream& out, const pg_pool_t& p)
       << " object_hash " << p.get_object_hash_name()
       << " pg_num " << p.get_pg_num()
       << " pgp_num " << p.get_pgp_num()
-      << " last_change " << p.get_last_change()
-      << " owner " << p.get_auid();
+      << " last_change " << p.get_last_change();
+  if (p.get_last_force_op_resend())
+    out << " lfor " << p.get_last_force_op_resend();
+  if (p.get_auid())
+    out << " owner " << p.get_auid();
   if (p.flags)
     out << " flags " << p.get_flags_string();
   if (p.crash_replay_interval)
diff --git a/src/osd/osd_types.h b/src/osd/osd_types.h
index 092d6cc..b70951c 100644
--- a/src/osd/osd_types.h
+++ b/src/osd/osd_types.h
@@ -882,6 +882,7 @@ public:
   map<string,string> properties;  ///< OBSOLETE
   string erasure_code_profile; ///< name of the erasure code profile in OSDMap
   epoch_t last_change;      ///< most recent epoch changed, exclusing snapshot changes
+  epoch_t last_force_op_resend; ///< last epoch that forced clients to resend
   snapid_t snap_seq;        ///< seq for per-pool snapshot
   epoch_t snap_epoch;       ///< osdmap epoch of last snap
   uint64_t auid;            ///< who owns the pg
@@ -914,6 +915,7 @@ public:
   cache_mode_t cache_mode;  ///< cache pool mode
 
   bool is_tier() const { return tier_of >= 0; }
+  bool has_tiers() const { return !tiers.empty(); }
   void clear_tier() { tier_of = -1; }
   bool has_read_tier() const { return read_tier >= 0; }
   void clear_read_tier() { read_tier = -1; }
@@ -940,6 +942,7 @@ public:
       crush_ruleset(0), object_hash(0),
       pg_num(0), pgp_num(0),
       last_change(0),
+      last_force_op_resend(0),
       snap_seq(0), snap_epoch(0),
       auid(0),
       crash_replay_interval(0),
@@ -979,6 +982,7 @@ public:
     return ceph_str_hash_name(get_object_hash());
   }
   epoch_t get_last_change() const { return last_change; }
+  epoch_t get_last_force_op_resend() const { return last_force_op_resend; }
   epoch_t get_snap_epoch() const { return snap_epoch; }
   snapid_t get_snap_seq() const { return snap_seq; }
   uint64_t get_auid() const { return auid; }
@@ -2690,6 +2694,7 @@ public:
   // set if writes for this object are blocked on another objects recovery
   ObjectContextRef blocked_by;      // object blocking our writes
   set<ObjectContextRef> blocking;   // objects whose writes we block
+  bool requeue_scrub_on_unblock;    // true if we need to requeue scrub on unblock
 
   // any entity in obs.oi.watchers MUST be in either watchers or unconnected_watchers.
   map<pair<uint64_t, entity_name_t>, WatchRef> watchers;
@@ -2862,7 +2867,7 @@ public:
       destructor_callback(0),
       lock("ReplicatedPG::ObjectContext::lock"),
       unstable_writes(0), readers(0), writers_waiting(0), readers_waiting(0),
-      blocked(false) {}
+      blocked(false), requeue_scrub_on_unblock(false) {}
 
   ~ObjectContext() {
     assert(rwstate.empty());
diff --git a/src/osdc/ObjectCacher.cc b/src/osdc/ObjectCacher.cc
index 3e41a5c..e1499b4 100644
--- a/src/osdc/ObjectCacher.cc
+++ b/src/osdc/ObjectCacher.cc
@@ -1619,6 +1619,9 @@ bool ObjectCacher::flush_set(ObjectSet *oset, Context *onfinish)
        !i.end(); ++i) {
     Object *ob = *i;
 
+    if (ob->dirty_or_tx == 0)
+      continue;
+
     if (!flush(ob, 0, 0)) {
       // we'll need to gather...
       ldout(cct, 10) << "flush_set " << oset << " will wait for ack tid " 
diff --git a/src/osdc/Objecter.cc b/src/osdc/Objecter.cc
index 9da65b0..e165266 100644
--- a/src/osdc/Objecter.cc
+++ b/src/osdc/Objecter.cc
@@ -1243,9 +1243,11 @@ ceph_tid_t Objecter::op_submit(Op *op)
 
 ceph_tid_t Objecter::_op_submit(Op *op)
 {
-  // pick tid
-  ceph_tid_t mytid = ++last_tid;
-  op->tid = mytid;
+  // pick tid if we haven't got one yet
+  if (op->tid == ceph_tid_t(0)) {
+    ceph_tid_t mytid = ++last_tid;
+    op->tid = mytid;
+  }
   assert(client_inc >= 0);
 
   // pick target
@@ -1437,19 +1439,23 @@ int Objecter::calc_target(op_target_t *t)
   bool is_read = t->flags & CEPH_OSD_FLAG_READ;
   bool is_write = t->flags & CEPH_OSD_FLAG_WRITE;
 
+  const pg_pool_t *pi = osdmap->get_pg_pool(t->base_oloc.pool);
+  bool force_resend = false;
   bool need_check_tiering = false;
-  if (t->target_oid.name.empty()) {
+  if (pi && osdmap->get_epoch() == pi->last_force_op_resend) {
+    force_resend = true;
+  }
+  if (t->target_oid.name.empty() || force_resend) {
     t->target_oid = t->base_oid;
     need_check_tiering = true;
   }
-  if (t->target_oloc.empty()) {
+  if (t->target_oloc.empty() || force_resend) {
     t->target_oloc = t->base_oloc;
     need_check_tiering = true;
   }
   
   if (need_check_tiering &&
       (t->flags & CEPH_OSD_FLAG_IGNORE_OVERLAY) == 0) {
-    const pg_pool_t *pi = osdmap->get_pg_pool(t->base_oloc.pool);
     if (pi) {
       if (is_read && pi->has_read_tier())
 	t->target_oloc.pool = pi->read_tier;
@@ -1485,7 +1491,8 @@ int Objecter::calc_target(op_target_t *t)
   }
 
   if (t->pgid != pgid ||
-      is_pg_changed(t->primary, t->acting, primary, acting, t->used_replica)) {
+      is_pg_changed(t->primary, t->acting, primary, acting, t->used_replica) ||
+      force_resend) {
     t->pgid = pgid;
     t->acting = acting;
     t->primary = primary;
diff --git a/src/pybind/ceph_rest_api.py b/src/pybind/ceph_rest_api.py
index 46d971e..d940db6 100755
--- a/src/pybind/ceph_rest_api.py
+++ b/src/pybind/ceph_rest_api.py
@@ -274,7 +274,7 @@ def show_human_help(prefix):
     # XXX There ought to be a better discovery mechanism than an HTML table
     s = '<html><body><table border=1><th>Possible commands:</th><th>Method</th><th>Description</th>'
 
-    permmap = {'r':'GET', 'rw':'PUT'}
+    permmap = {'r':'GET', 'rw':'PUT', 'rx':'GET', 'rwx':'PUT'}
     line = ''
     for cmdsig in sorted(app.ceph_sigdict.itervalues(), cmp=descsort):
         concise = concise_sig(cmdsig['sig'])
diff --git a/src/rgw/rgw_json_enc.cc b/src/rgw/rgw_json_enc.cc
index 4df7daf..a198678 100644
--- a/src/rgw/rgw_json_enc.cc
+++ b/src/rgw/rgw_json_enc.cc
@@ -46,6 +46,7 @@ void RGWObjManifestRule::dump(Formatter *f) const
   encode_json("start_ofs", start_ofs, f);
   encode_json("part_size", part_size, f);
   encode_json("stripe_max_size", stripe_max_size, f);
+  encode_json("override_prefix", override_prefix, f);
 }
 
 void RGWObjManifest::dump(Formatter *f) const
diff --git a/src/rgw/rgw_op.cc b/src/rgw/rgw_op.cc
index f3501fb..7894927 100644
--- a/src/rgw/rgw_op.cc
+++ b/src/rgw/rgw_op.cc
@@ -685,8 +685,11 @@ done_err:
   return ret;
 }
 
-int RGWGetObj::iterate_user_manifest_parts(rgw_bucket& bucket, string& obj_prefix, RGWAccessControlPolicy *bucket_policy,
-                                           uint64_t *ptotal_len, bool read_data)
+static int iterate_user_manifest_parts(CephContext *cct, RGWRados *store, off_t ofs, off_t end,
+                                       rgw_bucket& bucket, string& obj_prefix, RGWAccessControlPolicy *bucket_policy,
+                                       uint64_t *ptotal_len,
+                                       int (*cb)(rgw_bucket& bucket, RGWObjEnt& ent, RGWAccessControlPolicy *bucket_policy,
+                                                 off_t start_ofs, off_t end_ofs, void *param), void *cb_param)
 {
   uint64_t obj_ofs = 0, len_count = 0;
   bool found_start = false, found_end = false;
@@ -697,7 +700,7 @@ int RGWGetObj::iterate_user_manifest_parts(rgw_bucket& bucket, string& obj_prefi
   map<string, bool> common_prefixes;
   vector<RGWObjEnt> objs;
 
-  utime_t start_time = ceph_clock_now(s->cct);
+  utime_t start_time = ceph_clock_now(cct);
 
   do {
 #define MAX_LIST_OBJS 100
@@ -727,20 +730,20 @@ int RGWGetObj::iterate_user_manifest_parts(rgw_bucket& bucket, string& obj_prefi
       }
 
       perfcounter->tinc(l_rgw_get_lat,
-                       (ceph_clock_now(s->cct) - start_time));
+                       (ceph_clock_now(cct) - start_time));
 
       if (found_start) {
         len_count += end_ofs - start_ofs;
 
-        if (read_data) {
-          r = read_user_manifest_part(bucket, ent, bucket_policy, start_ofs, end_ofs);
+        if (cb) {
+          r = cb(bucket, ent, bucket_policy, start_ofs, end_ofs, cb_param);
           if (r < 0)
             return r;
         }
       }
       marker = ent.name;
 
-      start_time = ceph_clock_now(s->cct);
+      start_time = ceph_clock_now(cct);
     }
   } while (is_truncated && !found_end);
 
@@ -750,6 +753,13 @@ int RGWGetObj::iterate_user_manifest_parts(rgw_bucket& bucket, string& obj_prefi
   return 0;
 }
 
+static int get_obj_user_manifest_iterate_cb(rgw_bucket& bucket, RGWObjEnt& ent, RGWAccessControlPolicy *bucket_policy, off_t start_ofs, off_t end_ofs,
+                                       void *param)
+{
+  RGWGetObj *op = (RGWGetObj *)param;
+  return op->read_user_manifest_part(bucket, ent, bucket_policy, start_ofs, end_ofs);
+}
+
 int RGWGetObj::handle_user_manifest(const char *prefix)
 {
   ldout(s->cct, 2) << "RGWGetObj::handle_user_manifest() prefix=" << prefix << dendl;
@@ -789,13 +799,13 @@ int RGWGetObj::handle_user_manifest(const char *prefix)
   }
 
   /* dry run to find out total length */
-  int r = iterate_user_manifest_parts(bucket, obj_prefix, bucket_policy, &total_len, false);
+  int r = iterate_user_manifest_parts(s->cct, store, ofs, end, bucket, obj_prefix, bucket_policy, &total_len, NULL, NULL);
   if (r < 0)
     return r;
 
   s->obj_size = total_len;
 
-  r = iterate_user_manifest_parts(bucket, obj_prefix, bucket_policy, NULL, true);
+  r = iterate_user_manifest_parts(s->cct, store, ofs, end, bucket, obj_prefix, bucket_policy, NULL, get_obj_user_manifest_iterate_cb, (void *)this);
   if (r < 0)
     return r;
 
@@ -1357,22 +1367,26 @@ class RGWPutObjProcessor_Multipart : public RGWPutObjProcessor_Atomic
   string upload_id;
 
 protected:
-  bool immutable_head() { return true; }
-  int prepare(RGWRados *store, void *obj_ctx);
+  int prepare(RGWRados *store, void *obj_ctx, string *oid_rand);
   int do_complete(string& etag, time_t *mtime, time_t set_mtime, map<string, bufferlist>& attrs);
 
 public:
+  bool immutable_head() { return true; }
   RGWPutObjProcessor_Multipart(const string& bucket_owner, uint64_t _p, req_state *_s) :
                    RGWPutObjProcessor_Atomic(bucket_owner, _s->bucket, _s->object_str, _p, _s->req_id), s(_s) {}
 };
 
-int RGWPutObjProcessor_Multipart::prepare(RGWRados *store, void *obj_ctx)
+int RGWPutObjProcessor_Multipart::prepare(RGWRados *store, void *obj_ctx, string *oid_rand)
 {
-  RGWPutObjProcessor::prepare(store, obj_ctx);
+  RGWPutObjProcessor::prepare(store, obj_ctx, NULL);
 
   string oid = obj_str;
   upload_id = s->info.args.get("uploadId");
-  mp.init(oid, upload_id);
+  if (!oid_rand) {
+    mp.init(oid, upload_id);
+  } else {
+    mp.init(oid, upload_id, *oid_rand);
+  }
 
   part_num = s->info.args.get("partNumber");
   if (part_num.empty()) {
@@ -1388,7 +1402,13 @@ int RGWPutObjProcessor_Multipart::prepare(RGWRados *store, void *obj_ctx)
     return -EINVAL;
   }
 
-  string upload_prefix = oid + "." + upload_id;
+  string upload_prefix = oid + ".";
+
+  if (!oid_rand) {
+    upload_prefix.append(upload_id);
+  } else {
+    upload_prefix.append(*oid_rand);
+  }
 
   rgw_obj target_obj;
   target_obj.init(bucket, oid);
@@ -1466,7 +1486,7 @@ int RGWPutObjProcessor_Multipart::do_complete(string& etag, time_t *mtime, time_
 }
 
 
-RGWPutObjProcessor *RGWPutObj::select_processor()
+RGWPutObjProcessor *RGWPutObj::select_processor(bool *is_multipart)
 {
   RGWPutObjProcessor *processor;
 
@@ -1482,6 +1502,10 @@ RGWPutObjProcessor *RGWPutObj::select_processor()
     processor = new RGWPutObjProcessor_Multipart(bucket_owner, part_size, s);
   }
 
+  if (is_multipart) {
+    *is_multipart = multipart;
+  }
+
   return processor;
 }
 
@@ -1495,6 +1519,45 @@ void RGWPutObj::pre_exec()
   rgw_bucket_object_pre_exec(s);
 }
 
+static int put_obj_user_manifest_iterate_cb(rgw_bucket& bucket, RGWObjEnt& ent, RGWAccessControlPolicy *bucket_policy, off_t start_ofs, off_t end_ofs,
+                                       void *param)
+{
+  RGWPutObj *op = (RGWPutObj *)param;
+  return op->user_manifest_iterate_cb(bucket, ent, bucket_policy, start_ofs, end_ofs);
+}
+
+int RGWPutObj::user_manifest_iterate_cb(rgw_bucket& bucket, RGWObjEnt& ent, RGWAccessControlPolicy *bucket_policy, off_t start_ofs, off_t end_ofs)
+{
+  rgw_obj part(bucket, ent.name);
+
+  map<string, bufferlist> attrs;
+
+  int ret = get_obj_attrs(store, s, part, attrs, NULL, NULL);
+  if (ret < 0) {
+    return ret;
+  }
+  map<string, bufferlist>::iterator iter = attrs.find(RGW_ATTR_ETAG);
+  if (iter == attrs.end()) {
+    return 0;
+  }
+  bufferlist& bl = iter->second;
+  const char *buf = bl.c_str();
+  int len = bl.length();
+  while (len > 0 && buf[len - 1] == '\0') {
+    len--;
+  }
+  if (len > 0) {
+    user_manifest_parts_hash->Update((const byte *)bl.c_str(), len);
+  }
+
+  if (s->cct->_conf->subsys.should_gather(ceph_subsys_rgw, 20)) {
+    string e(bl.c_str(), bl.length());
+    ldout(s->cct, 20) << __func__ << ": appending user manifest etag: " << e << dendl;
+  }
+
+  return 0;
+}
+
 void RGWPutObj::execute()
 {
   RGWPutObjProcessor *processor = NULL;
@@ -1507,6 +1570,9 @@ void RGWPutObj::execute()
   map<string, bufferlist> attrs;
   int len;
   map<string, string>::iterator iter;
+  bool multipart;
+
+  bool need_calc_md5 = (obj_manifest == NULL);
 
 
   perfcounter->inc(l_rgw_put);
@@ -1520,6 +1586,8 @@ void RGWPutObj::execute()
     goto done;
 
   if (supplied_md5_b64) {
+    need_calc_md5 = true;
+
     ldout(s->cct, 15) << "supplied_md5_b64=" << supplied_md5_b64 << dendl;
     ret = ceph_unarmor(supplied_md5_bin, &supplied_md5_bin[CEPH_CRYPTO_MD5_DIGESTSIZE + 1],
                        supplied_md5_b64, supplied_md5_b64 + strlen(supplied_md5_b64));
@@ -1547,9 +1615,9 @@ void RGWPutObj::execute()
     supplied_md5[sizeof(supplied_md5) - 1] = '\0';
   }
 
-  processor = select_processor();
+  processor = select_processor(&multipart);
 
-  ret = processor->prepare(store, s->obj_ctx);
+  ret = processor->prepare(store, s->obj_ctx, NULL);
   if (ret < 0)
     goto done;
 
@@ -1570,11 +1638,52 @@ void RGWPutObj::execute()
     if (ret < 0)
       goto done;
 
-    hash.Update(data_ptr, len);
+    if (need_calc_md5) {
+      hash.Update(data_ptr, len);
+    }
 
-    ret = processor->throttle_data(handle);
-    if (ret < 0)
-      goto done;
+    /* do we need this operation to be synchronous? if we're dealing with an object with immutable
+     * head, e.g., multipart object we need to make sure we're the first one writing to this object
+     */
+    bool need_to_wait = (ofs == 0) && multipart;
+
+    ret = processor->throttle_data(handle, need_to_wait);
+    if (ret < 0) {
+      if (!need_to_wait || ret != -EEXIST) {
+        ldout(s->cct, 20) << "processor->thottle_data() returned ret=" << ret << dendl;
+        goto done;
+      }
+
+      ldout(s->cct, 5) << "NOTICE: processor->throttle_data() returned -EEXIST, need to restart write" << dendl;
+
+      /* restart processing with different oid suffix */
+
+      dispose_processor(processor);
+      processor = select_processor(&multipart);
+
+      string oid_rand;
+      char buf[33];
+      gen_rand_alphanumeric(store->ctx(), buf, sizeof(buf) - 1);
+      oid_rand.append(buf);
+
+      ret = processor->prepare(store, s->obj_ctx, &oid_rand);
+      if (ret < 0) {
+        ldout(s->cct, 0) << "ERROR: processor->prepare() returned " << ret << dendl;
+        goto done;
+      }
+
+      ret = processor->handle_data(data, ofs, &handle);
+      if (ret < 0) {
+        ldout(s->cct, 0) << "ERROR: processor->handle_data() returned " << ret << dendl;
+        goto done;
+      }
+
+      ret = processor->throttle_data(handle, false);
+      if (ret < 0) {
+        ldout(s->cct, 0) << "ERROR: processor->throttle_data() returned " << ret << dendl;
+        goto done;
+      }
+    }
 
     ofs += len;
   } while (len > 0);
@@ -1592,30 +1701,66 @@ void RGWPutObj::execute()
     goto done;
   }
 
-  hash.Final(m);
+  if (need_calc_md5) {
+    hash.Final(m);
 
-  buf_to_hex(m, CEPH_CRYPTO_MD5_DIGESTSIZE, calc_md5);
+    buf_to_hex(m, CEPH_CRYPTO_MD5_DIGESTSIZE, calc_md5);
+    etag = calc_md5;
 
-  if (supplied_md5_b64 && strcmp(calc_md5, supplied_md5)) {
-     ret = -ERR_BAD_DIGEST;
-     goto done;
+    if (supplied_md5_b64 && strcmp(calc_md5, supplied_md5)) {
+      ret = -ERR_BAD_DIGEST;
+      goto done;
+    }
   }
+
   policy.encode(aclbl);
 
-  etag = calc_md5;
+  attrs[RGW_ATTR_ACL] = aclbl;
+  if (obj_manifest) {
+    bufferlist manifest_bl;
+    string manifest_obj_prefix;
+    string manifest_bucket;
+    RGWBucketInfo bucket_info;
+
+    char etag_buf[CEPH_CRYPTO_MD5_DIGESTSIZE];
+    char etag_buf_str[CEPH_CRYPTO_MD5_DIGESTSIZE * 2 + 16];
+
+    manifest_bl.append(obj_manifest, strlen(obj_manifest) + 1);
+    attrs[RGW_ATTR_USER_MANIFEST] = manifest_bl;
+    user_manifest_parts_hash = &hash;
+    string prefix_str = obj_manifest;
+    int pos = prefix_str.find('/');
+    if (pos < 0) {
+      ldout(s->cct, 0) << "bad user manifest, missing slash separator: " << obj_manifest << dendl;
+      goto done;
+    }
 
+    manifest_bucket = prefix_str.substr(0, pos);
+    manifest_obj_prefix = prefix_str.substr(pos + 1);
+
+    ret = store->get_bucket_info(NULL, manifest_bucket, bucket_info, NULL, NULL);
+    if (ret < 0) {
+      ldout(s->cct, 0) << "could not get bucket info for bucket=" << manifest_bucket << dendl;
+    }
+    ret = iterate_user_manifest_parts(s->cct, store, 0, -1, bucket_info.bucket, manifest_obj_prefix,
+                                      NULL, NULL, put_obj_user_manifest_iterate_cb, (void *)this);
+    if (ret < 0) {
+      goto done;
+    }
+
+    hash.Final((byte *)etag_buf);
+    buf_to_hex((const unsigned char *)etag_buf, CEPH_CRYPTO_MD5_DIGESTSIZE, etag_buf_str);
+
+    ldout(s->cct, 0) << __func__ << ": calculated md5 for user manifest: " << etag_buf_str << dendl;
+
+    etag = etag_buf_str;
+  }
   if (supplied_etag && etag.compare(supplied_etag) != 0) {
     ret = -ERR_UNPROCESSABLE_ENTITY;
     goto done;
   }
   bl.append(etag.c_str(), etag.size() + 1);
   attrs[RGW_ATTR_ETAG] = bl;
-  attrs[RGW_ATTR_ACL] = aclbl;
-  if (obj_manifest) {
-    bufferlist manifest_bl;
-    manifest_bl.append(obj_manifest, strlen(obj_manifest) + 1);
-    attrs[RGW_ATTR_USER_MANIFEST] = manifest_bl;
-  }
 
   for (iter = s->generic_attrs.begin(); iter != s->generic_attrs.end(); ++iter) {
     bufferlist& attrbl = attrs[iter->first];
@@ -1683,7 +1828,7 @@ void RGWPostObj::execute()
 
   processor = select_processor();
 
-  ret = processor->prepare(store, s->obj_ctx);
+  ret = processor->prepare(store, s->obj_ctx, NULL);
   if (ret < 0)
     goto done;
 
@@ -1708,7 +1853,7 @@ void RGWPostObj::execute()
 
      hash.Update(data_ptr, len);
 
-     ret = processor->throttle_data(handle);
+     ret = processor->throttle_data(handle, false);
      if (ret < 0)
        goto done;
 
@@ -1752,8 +1897,13 @@ done:
 
 int RGWPutMetadata::verify_permission()
 {
-  if (!verify_object_permission(s, RGW_PERM_WRITE))
-    return -EACCES;
+  if (s->object) {
+    if (!verify_object_permission(s, RGW_PERM_WRITE))
+      return -EACCES;
+  } else {
+    if (!verify_bucket_permission(s, RGW_PERM_WRITE))
+      return -EACCES;
+  }
 
   return 0;
 }
@@ -2597,6 +2747,15 @@ void RGWCompleteMultipart::execute()
 
   iter = parts->parts.begin();
 
+  meta_obj.init_ns(s->bucket, meta_oid, mp_ns);
+  meta_obj.set_in_extra_data(true);
+
+  ret = get_obj_attrs(store, s, meta_obj, attrs, NULL, NULL);
+  if (ret < 0) {
+    ldout(s->cct, 0) << "ERROR: failed to get obj attrs, obj=" << meta_obj << " ret=" << ret << dendl;
+    return;
+  }
+
   do {
     ret = list_multipart_parts(store, s, upload_id, meta_oid, max_parts, marker, obj_parts, &marker, &truncated);
     if (ret == -ENOENT) {
@@ -2686,8 +2845,6 @@ void RGWCompleteMultipart::execute()
     return;
 
   // remove the upload obj
-  meta_obj.init_ns(s->bucket, meta_oid, mp_ns);
-  meta_obj.set_in_extra_data(true);
   store->delete_obj(s->obj_ctx, s->bucket_owner.get_id(), meta_obj);
 }
 
diff --git a/src/rgw/rgw_op.h b/src/rgw/rgw_op.h
index 9c1fa53..95a34ed 100644
--- a/src/rgw/rgw_op.h
+++ b/src/rgw/rgw_op.h
@@ -132,8 +132,6 @@ public:
   void pre_exec();
   void execute();
   int read_user_manifest_part(rgw_bucket& bucket, RGWObjEnt& ent, RGWAccessControlPolicy *bucket_policy, off_t start_ofs, off_t end_ofs);
-  int iterate_user_manifest_parts(rgw_bucket& bucket, string& obj_prefix, RGWAccessControlPolicy *bucket_policy,
-                                  uint64_t *ptotal_len, bool read_data);
   int handle_user_manifest(const char *prefix);
 
   int get_data_cb(bufferlist& bl, off_t ofs, off_t len);
@@ -324,6 +322,8 @@ protected:
   const char *obj_manifest;
   time_t mtime;
 
+  MD5 *user_manifest_parts_hash;
+
 public:
   RGWPutObj() {
     ret = 0;
@@ -333,6 +333,7 @@ public:
     chunked_upload = false;
     obj_manifest = NULL;
     mtime = 0;
+    user_manifest_parts_hash = NULL;
   }
 
   virtual void init(RGWRados *store, struct req_state *s, RGWHandler *h) {
@@ -340,9 +341,11 @@ public:
     policy.set_ctx(s->cct);
   }
 
-  RGWPutObjProcessor *select_processor();
+  RGWPutObjProcessor *select_processor(bool *is_multipart);
   void dispose_processor(RGWPutObjProcessor *processor);
 
+  int user_manifest_iterate_cb(rgw_bucket& bucket, RGWObjEnt& ent, RGWAccessControlPolicy *bucket_policy, off_t start_ofs, off_t end_ofs);
+
   int verify_permission();
   void pre_exec();
   void execute();
@@ -754,21 +757,22 @@ class RGWMPObj {
   string upload_id;
 public:
   RGWMPObj() {}
-  RGWMPObj(string& _oid, string& _upload_id) {
-    init(_oid, _upload_id);
+  RGWMPObj(const string& _oid, const string& _upload_id) {
+    init(_oid, _upload_id, _upload_id);
+  }
+  void init(const string& _oid, const string& _upload_id) {
+    init(_oid, _upload_id, _upload_id);
   }
-  void init(string& _oid, string& _upload_id) {
+  void init(const string& _oid, const string& _upload_id, const string& part_unique_str) {
     if (_oid.empty()) {
       clear();
       return;
     }
     oid = _oid;
     upload_id = _upload_id;
-    prefix = oid;
-    prefix.append(".");
-    prefix.append(upload_id);
-    meta = prefix;
-    meta.append(MP_META_SUFFIX);
+    prefix = oid + ".";
+    meta = prefix + upload_id + MP_META_SUFFIX;
+    prefix.append(part_unique_str);
   }
   string& get_meta() { return meta; }
   string get_part(int num) {
@@ -799,7 +803,7 @@ public:
       return false;
     oid = meta.substr(0, mid_pos);
     upload_id = meta.substr(mid_pos + 1, end_pos - mid_pos - 1);
-    init(oid, upload_id);
+    init(oid, upload_id, upload_id);
     return true;
   }
   void clear() {
diff --git a/src/rgw/rgw_rados.cc b/src/rgw/rgw_rados.cc
index 20602d5..30335c4 100644
--- a/src/rgw/rgw_rados.cc
+++ b/src/rgw/rgw_rados.cc
@@ -54,6 +54,7 @@ static string *notify_oids = NULL;
 static string shadow_ns = "shadow";
 static string dir_oid_prefix = ".dir.";
 static string default_storage_pool = ".rgw.buckets";
+static string default_storage_extra_pool = ".rgw.buckets.extra";
 static string avail_pools = ".pools.avail";
 
 static string zone_info_oid_prefix = "zone_info.";
@@ -308,6 +309,7 @@ void RGWZoneParams::init_default(RGWRados *store)
     RGWZonePlacementInfo default_placement;
     default_placement.index_pool = ".rgw.buckets.index";
     default_placement.data_pool = ".rgw.buckets";
+    default_placement.data_extra_pool = ".rgw.buckets.extra";
     placement_pools["default-placement"] = default_placement;
   }
 }
@@ -561,7 +563,10 @@ void RGWObjManifest::obj_iterator::seek(uint64_t o)
     rule_iter = manifest->rules.begin();
     stripe_ofs = 0;
     stripe_size = manifest->get_head_size();
-    cur_part_id = rule_iter->second.start_part_num;
+    if (rule_iter != manifest->rules.end()) {
+      cur_part_id = rule_iter->second.start_part_num;
+      cur_override_prefix = rule_iter->second.override_prefix;
+    }
     update_location();
     return;
   }
@@ -572,6 +577,11 @@ void RGWObjManifest::obj_iterator::seek(uint64_t o)
     --rule_iter;
   }
 
+  if (rule_iter == manifest->rules.end()) {
+    update_location();
+    return;
+  }
+
   RGWObjManifestRule& rule = rule_iter->second;
 
   if (rule.part_size > 0) {
@@ -601,6 +611,8 @@ void RGWObjManifest::obj_iterator::seek(uint64_t o)
     stripe_size = next - stripe_ofs;
   }
 
+  cur_override_prefix = rule.override_prefix;
+
   update_location();
 }
 
@@ -618,7 +630,7 @@ void RGWObjManifest::obj_iterator::update_location()
     return;
   }
 
-  manifest->get_implicit_location(cur_part_id, cur_stripe, ofs, &location);
+  manifest->get_implicit_location(cur_part_id, cur_stripe, ofs, &cur_override_prefix, &location);
 }
 
 void RGWObjManifest::obj_iterator::operator++()
@@ -699,6 +711,8 @@ void RGWObjManifest::obj_iterator::operator++()
     stripe_size = MIN(rule->part_size - (stripe_ofs - part_ofs), rule->stripe_max_size);
   }
 
+  cur_override_prefix = rule->override_prefix;
+
   ofs = stripe_ofs;
   if (ofs > obj_size) {
     ofs = obj_size;
@@ -719,10 +733,10 @@ int RGWObjManifest::generator::create_begin(CephContext *cct, RGWObjManifest *_m
   manifest->set_head(_h);
   last_ofs = 0;
 
-  char buf[33];
-  gen_rand_alphanumeric(cct, buf, sizeof(buf) - 1);
-
   if (manifest->get_prefix().empty()) {
+    char buf[33];
+    gen_rand_alphanumeric(cct, buf, sizeof(buf) - 1);
+
     string oid_prefix = ".";
     oid_prefix.append(buf);
     oid_prefix.append("_");
@@ -746,7 +760,7 @@ int RGWObjManifest::generator::create_begin(CephContext *cct, RGWObjManifest *_m
   
   cur_part_id = rule.start_part_num;
 
-  manifest->get_implicit_location(cur_part_id, cur_stripe, 0, &cur_obj);
+  manifest->get_implicit_location(cur_part_id, cur_stripe, 0, NULL, &cur_obj);
 
   manifest->update_iterators();
 
@@ -780,7 +794,7 @@ int RGWObjManifest::generator::create_next(uint64_t ofs)
   manifest->set_obj_size(ofs);
 
 
-  manifest->get_implicit_location(cur_part_id, cur_stripe, ofs, &cur_obj);
+  manifest->get_implicit_location(cur_part_id, cur_stripe, ofs, NULL, &cur_obj);
 
   manifest->update_iterators();
 
@@ -797,9 +811,14 @@ const RGWObjManifest::obj_iterator& RGWObjManifest::obj_end()
   return end_iter;
 }
 
-void RGWObjManifest::get_implicit_location(uint64_t cur_part_id, uint64_t cur_stripe, uint64_t ofs, rgw_obj *location)
+void RGWObjManifest::get_implicit_location(uint64_t cur_part_id, uint64_t cur_stripe, uint64_t ofs, string *override_prefix, rgw_obj *location)
 {
-  string oid = prefix;
+  string oid;
+  if (!override_prefix || override_prefix->empty()) {
+    oid = prefix;
+  } else {
+    oid = *override_prefix;
+  }
   string ns;
 
   if (!cur_part_id) {
@@ -857,10 +876,14 @@ int RGWObjManifest::append(RGWObjManifest& m)
     return 0;
   }
 
+  string override_prefix;
+
   if (prefix.empty()) {
     prefix = m.prefix;
-  } else if (prefix != m.prefix) {
-    return append_explicit(m);
+  }
+
+  if (prefix != m.prefix) {
+    override_prefix = m.prefix;
   }
 
   map<uint64_t, RGWObjManifestRule>::iterator miter = m.rules.begin();
@@ -882,9 +905,15 @@ int RGWObjManifest::append(RGWObjManifest& m)
       next_rule.part_size = m.obj_size - next_rule.start_ofs;
     }
 
+    if (override_prefix != rule.override_prefix) {
+      append_rules(m, miter, &override_prefix);
+      break;
+    }
+
     if (rule.part_size != next_rule.part_size ||
-        rule.stripe_max_size != next_rule.stripe_max_size) {
-      append_rules(m, miter);
+        rule.stripe_max_size != next_rule.stripe_max_size ||
+        rule.override_prefix != next_rule.override_prefix) {
+      append_rules(m, miter, NULL);
       break;
     }
 
@@ -894,7 +923,7 @@ int RGWObjManifest::append(RGWObjManifest& m)
     }
 
     if (expected_part_num != next_rule.start_part_num) {
-      append_rules(m, miter);
+      append_rules(m, miter, NULL);
       break;
     }
   }
@@ -904,11 +933,14 @@ int RGWObjManifest::append(RGWObjManifest& m)
   return 0;
 }
 
-void RGWObjManifest::append_rules(RGWObjManifest& m, map<uint64_t, RGWObjManifestRule>::iterator& miter)
+void RGWObjManifest::append_rules(RGWObjManifest& m, map<uint64_t, RGWObjManifestRule>::iterator& miter,
+                                  string *override_prefix)
 {
   for (; miter != m.rules.end(); ++miter) {
     RGWObjManifestRule rule = miter->second;
     rule.start_ofs += obj_size;
+    if (override_prefix)
+      rule.override_prefix = *override_prefix;
     rules[rule.start_ofs] = rule;
   }
 }
@@ -1006,9 +1038,9 @@ RGWPutObjProcessor::~RGWPutObjProcessor()
   }
 }
 
-int RGWPutObjProcessor_Plain::prepare(RGWRados *store, void *obj_ctx)
+int RGWPutObjProcessor_Plain::prepare(RGWRados *store, void *obj_ctx, string *oid_rand)
 {
-  RGWPutObjProcessor::prepare(store, obj_ctx);
+  RGWPutObjProcessor::prepare(store, obj_ctx, oid_rand);
 
   obj.init(bucket, obj_str);
 
@@ -1041,7 +1073,7 @@ int RGWPutObjProcessor_Plain::do_complete(string& etag, time_t *mtime, time_t se
 }
 
 
-int RGWPutObjProcessor_Aio::handle_obj_data(rgw_obj& obj, bufferlist& bl, off_t ofs, off_t abs_ofs, void **phandle)
+int RGWPutObjProcessor_Aio::handle_obj_data(rgw_obj& obj, bufferlist& bl, off_t ofs, off_t abs_ofs, void **phandle, bool exclusive)
 {
   if ((uint64_t)abs_ofs + bl.length() > obj_len)
     obj_len = abs_ofs + bl.length();
@@ -1051,7 +1083,7 @@ int RGWPutObjProcessor_Aio::handle_obj_data(rgw_obj& obj, bufferlist& bl, off_t
   int r = store->aio_put_obj_data(NULL, obj,
                                      bl,
                                      ((ofs != 0) ? ofs : -1),
-                                     false, phandle);
+                                     exclusive, phandle);
 
   return r;
 }
@@ -1091,7 +1123,7 @@ int RGWPutObjProcessor_Aio::drain_pending()
   return ret;
 }
 
-int RGWPutObjProcessor_Aio::throttle_data(void *handle)
+int RGWPutObjProcessor_Aio::throttle_data(void *handle, bool need_to_wait)
 {
   if (handle) {
     struct put_obj_aio_info info;
@@ -1099,10 +1131,13 @@ int RGWPutObjProcessor_Aio::throttle_data(void *handle)
     pending.push_back(info);
   }
   size_t orig_size = pending.size();
-  while (pending_has_completed()) {
+  while (pending_has_completed()
+         || need_to_wait) {
     int r = wait_pending_front();
     if (r < 0)
       return r;
+
+    need_to_wait = false;
   }
 
   /* resize window in case messages are draining too fast */
@@ -1118,7 +1153,7 @@ int RGWPutObjProcessor_Aio::throttle_data(void *handle)
   return 0;
 }
 
-int RGWPutObjProcessor_Atomic::write_data(bufferlist& bl, off_t ofs, void **phandle)
+int RGWPutObjProcessor_Atomic::write_data(bufferlist& bl, off_t ofs, void **phandle, bool exclusive)
 {
   if (ofs >= next_part_ofs) {
     int r = prepare_next_part(ofs);
@@ -1127,7 +1162,7 @@ int RGWPutObjProcessor_Atomic::write_data(bufferlist& bl, off_t ofs, void **phan
     }
   }
 
-  return RGWPutObjProcessor_Aio::handle_obj_data(cur_obj, bl, ofs - cur_part_ofs, ofs, phandle);
+  return RGWPutObjProcessor_Aio::handle_obj_data(cur_obj, bl, ofs - cur_part_ofs, ofs, phandle, exclusive);
 }
 
 int RGWPutObjProcessor_Atomic::handle_data(bufferlist& bl, off_t ofs, void **phandle)
@@ -1168,12 +1203,15 @@ int RGWPutObjProcessor_Atomic::handle_data(bufferlist& bl, off_t ofs, void **pha
   }
   off_t write_ofs = data_ofs;
   data_ofs = write_ofs + bl.length();
-  return write_data(bl, write_ofs, phandle);
+  bool exclusive = (!write_ofs && immutable_head()); /* immutable head object, need to verify nothing exists there
+                                                        we could be racing with another upload, to the same
+                                                        object and cleanup can be messy */
+  return write_data(bl, write_ofs, phandle, exclusive);
 }
 
-int RGWPutObjProcessor_Atomic::prepare(RGWRados *store, void *obj_ctx)
+int RGWPutObjProcessor_Atomic::prepare(RGWRados *store, void *obj_ctx, string *oid_rand)
 {
-  RGWPutObjProcessor::prepare(store, obj_ctx);
+  RGWPutObjProcessor::prepare(store, obj_ctx, oid_rand);
 
   head_obj.init(bucket, obj_str);
 
@@ -1220,12 +1258,12 @@ int RGWPutObjProcessor_Atomic::complete_writing_data()
   }
   if (pending_data_bl.length()) {
     void *handle;
-    int r = write_data(pending_data_bl, data_ofs, &handle);
+    int r = write_data(pending_data_bl, data_ofs, &handle, false);
     if (r < 0) {
       ldout(store->ctx(), 0) << "ERROR: write_data() returned " << r << dendl;
       return r;
     }
-    r = throttle_data(handle);
+    r = throttle_data(handle, false);
     if (r < 0) {
       ldout(store->ctx(), 0) << "ERROR: throttle_data() returned " << r << dendl;
       return r;
@@ -2345,7 +2383,7 @@ int RGWRados::create_bucket(RGWUserInfo& owner, rgw_bucket& bucket,
     if (!pmaster_bucket) {
       uint64_t iid = instance_id();
       uint64_t bid = next_bucket_id();
-      char buf[32];
+      char buf[zone.name.size() + 48];
       snprintf(buf, sizeof(buf), "%s.%llu.%llu", zone.name.c_str(), (long long)iid, (long long)bid);
       bucket.marker = buf;
       bucket.bucket_id = bucket.marker;
@@ -3014,7 +3052,7 @@ public:
       }
     }
 
-    ret = processor->throttle_data(handle);
+    ret = processor->throttle_data(handle, false);
     if (ret < 0)
       return ret;
 
@@ -3139,7 +3177,7 @@ int RGWRados::copy_obj(void *ctx,
 
     RGWPutObjProcessor_Atomic processor(dest_bucket_info.owner, dest_obj.bucket, dest_obj.object,
                                         cct->_conf->rgw_obj_stripe_size, tag);
-    ret = processor.prepare(this, ctx);
+    ret = processor.prepare(this, ctx, NULL);
     if (ret < 0)
       return ret;
 
diff --git a/src/rgw/rgw_rados.h b/src/rgw/rgw_rados.h
index b0c233b..5cc9c73 100644
--- a/src/rgw/rgw_rados.h
+++ b/src/rgw/rgw_rados.h
@@ -139,26 +139,30 @@ struct RGWObjManifestRule {
   uint64_t start_ofs;
   uint64_t part_size; /* each part size, 0 if there's no part size, meaning it's unlimited */
   uint64_t stripe_max_size; /* underlying obj max size */
+  string override_prefix;
 
   RGWObjManifestRule() : start_part_num(0), start_ofs(0), part_size(0), stripe_max_size(0) {}
   RGWObjManifestRule(uint32_t _start_part_num, uint64_t _start_ofs, uint64_t _part_size, uint64_t _stripe_max_size) :
                        start_part_num(_start_part_num), start_ofs(_start_ofs), part_size(_part_size), stripe_max_size(_stripe_max_size) {}
 
   void encode(bufferlist& bl) const {
-    ENCODE_START(1, 1, bl);
+    ENCODE_START(2, 1, bl);
     ::encode(start_part_num, bl);
     ::encode(start_ofs, bl);
     ::encode(part_size, bl);
     ::encode(stripe_max_size, bl);
+    ::encode(override_prefix, bl);
     ENCODE_FINISH(bl);
   }
 
   void decode(bufferlist::iterator& bl) {
-    DECODE_START(1,  bl);
+    DECODE_START(2, bl);
     ::decode(start_part_num, bl);
     ::decode(start_ofs, bl);
     ::decode(part_size, bl);
     ::decode(stripe_max_size, bl);
+    if (struct_v >= 2)
+      ::decode(override_prefix, bl);
     DECODE_FINISH(bl);
   }
   void dump(Formatter *f) const;
@@ -183,7 +187,7 @@ protected:
 
   void convert_to_explicit();
   int append_explicit(RGWObjManifest& m);
-  void append_rules(RGWObjManifest& m, map<uint64_t, RGWObjManifestRule>::iterator& iter);
+  void append_rules(RGWObjManifest& m, map<uint64_t, RGWObjManifestRule>::iterator& iter, string *override_prefix);
 
   void update_iterators() {
     begin_iter.seek(0);
@@ -223,7 +227,7 @@ public:
     objs.swap(_objs);
   }
 
-  void get_implicit_location(uint64_t cur_part_id, uint64_t cur_stripe, uint64_t ofs, rgw_obj *location);
+  void get_implicit_location(uint64_t cur_part_id, uint64_t cur_stripe, uint64_t ofs, string *override_prefix, rgw_obj *location);
 
   void set_trivial_rule(uint64_t tail_ofs, uint64_t stripe_max_size) {
     RGWObjManifestRule rule(0, tail_ofs, 0, stripe_max_size);
@@ -358,6 +362,7 @@ public:
 
     int cur_part_id;
     int cur_stripe;
+    string cur_override_prefix;
 
     rgw_obj location;
 
@@ -369,6 +374,7 @@ public:
     void init() {
       part_ofs = 0;
       stripe_ofs = 0;
+      ofs = 0;
       stripe_size = 0;
       cur_part_id = 0;
       cur_stripe = 0;
@@ -537,13 +543,13 @@ protected:
 public:
   RGWPutObjProcessor(const string& _bo) : store(NULL), obj_ctx(NULL), is_complete(false), bucket_owner(_bo) {}
   virtual ~RGWPutObjProcessor();
-  virtual int prepare(RGWRados *_store, void *_o) {
+  virtual int prepare(RGWRados *_store, void *_o, string *oid_rand) {
     store = _store;
     obj_ctx = _o;
     return 0;
   };
   virtual int handle_data(bufferlist& bl, off_t ofs, void **phandle) = 0;
-  virtual int throttle_data(void *handle) = 0;
+  virtual int throttle_data(void *handle, bool need_to_wait) = 0;
   virtual int complete(string& etag, time_t *mtime, time_t set_mtime, map<string, bufferlist>& attrs);
 };
 
@@ -557,12 +563,12 @@ class RGWPutObjProcessor_Plain : public RGWPutObjProcessor
   off_t ofs;
 
 protected:
-  int prepare(RGWRados *store, void *obj_ctx);
+  int prepare(RGWRados *store, void *obj_ctx, string *oid_rand);
   int handle_data(bufferlist& bl, off_t ofs, void **phandle);
   int do_complete(string& etag, time_t *mtime, time_t set_mtime, map<string, bufferlist>& attrs);
 
 public:
-  int throttle_data(void *handle) { return 0; }
+  int throttle_data(void *handle, bool need_to_wait) { return 0; }
   RGWPutObjProcessor_Plain(const string& bucket_owner, rgw_bucket& b, const string& o) : RGWPutObjProcessor(bucket_owner),
                                                                                          bucket(b), obj_str(o), ofs(0) {}
 };
@@ -584,10 +590,10 @@ protected:
   uint64_t obj_len;
 
   int drain_pending();
-  int handle_obj_data(rgw_obj& obj, bufferlist& bl, off_t ofs, off_t abs_ofs, void **phandle);
+  int handle_obj_data(rgw_obj& obj, bufferlist& bl, off_t ofs, off_t abs_ofs, void **phandle, bool exclusive);
 
 public:
-  int throttle_data(void *handle);
+  int throttle_data(void *handle, bool need_to_wait);
 
   RGWPutObjProcessor_Aio(const string& bucket_owner) : RGWPutObjProcessor(bucket_owner), max_chunks(RGW_MAX_PENDING_CHUNKS), obj_len(0) {}
   virtual ~RGWPutObjProcessor_Aio() {
@@ -618,9 +624,7 @@ protected:
   RGWObjManifest manifest;
   RGWObjManifest::generator manifest_gen;
 
-  virtual bool immutable_head() { return false; }
-
-  int write_data(bufferlist& bl, off_t ofs, void **phandle);
+  int write_data(bufferlist& bl, off_t ofs, void **phandle, bool exclusive);
   virtual int do_complete(string& etag, time_t *mtime, time_t set_mtime, map<string, bufferlist>& attrs);
 
   int prepare_next_part(off_t ofs);
@@ -640,11 +644,12 @@ public:
                                 bucket(_b),
                                 obj_str(_o),
                                 unique_tag(_t) {}
-  int prepare(RGWRados *store, void *obj_ctx);
+  int prepare(RGWRados *store, void *obj_ctx, string *oid_rand);
+  virtual bool immutable_head() { return false; }
   void set_extra_data_len(uint64_t len) {
     extra_data_len = len;
   }
-  int handle_data(bufferlist& bl, off_t ofs, void **phandle);
+  virtual int handle_data(bufferlist& bl, off_t ofs, void **phandle);
   bufferlist& get_extra_data() { return extra_data_bl; }
 };
 
diff --git a/src/test/cli/osdmaptool/clobber.t b/src/test/cli/osdmaptool/clobber.t
index 37399fd..5f37f03 100644
--- a/src/test/cli/osdmaptool/clobber.t
+++ b/src/test/cli/osdmaptool/clobber.t
@@ -20,9 +20,9 @@
   modified \d{4}-\d{2}-\d{2} \d{2}:\d{2}:\d{2}\.\d+ (re)
   flags 
   
-  pool 0 'data' replicated size 3 min_size 2 crush_ruleset 0 object_hash rjenkins pg_num 192 pgp_num 192 last_change 0 owner 0 flags hashpspool crash_replay_interval 45 stripe_width 0
-  pool 1 'metadata' replicated size 3 min_size 2 crush_ruleset 0 object_hash rjenkins pg_num 192 pgp_num 192 last_change 0 owner 0 flags hashpspool stripe_width 0
-  pool 2 'rbd' replicated size 3 min_size 2 crush_ruleset 0 object_hash rjenkins pg_num 192 pgp_num 192 last_change 0 owner 0 flags hashpspool stripe_width 0
+  pool 0 'data' replicated size 3 min_size 2 crush_ruleset 0 object_hash rjenkins pg_num 192 pgp_num 192 last_change 0 flags hashpspool crash_replay_interval 45 stripe_width 0
+  pool 1 'metadata' replicated size 3 min_size 2 crush_ruleset 0 object_hash rjenkins pg_num 192 pgp_num 192 last_change 0 flags hashpspool stripe_width 0
+  pool 2 'rbd' replicated size 3 min_size 2 crush_ruleset 0 object_hash rjenkins pg_num 192 pgp_num 192 last_change 0 flags hashpspool stripe_width 0
   
   max_osd 3
   
@@ -43,9 +43,9 @@
   modified \d{4}-\d{2}-\d{2} \d{2}:\d{2}:\d{2}\.\d+ (re)
   flags 
   
-  pool 0 'data' replicated size 3 min_size 2 crush_ruleset 0 object_hash rjenkins pg_num 64 pgp_num 64 last_change 0 owner 0 flags hashpspool crash_replay_interval 45 stripe_width 0
-  pool 1 'metadata' replicated size 3 min_size 2 crush_ruleset 0 object_hash rjenkins pg_num 64 pgp_num 64 last_change 0 owner 0 flags hashpspool stripe_width 0
-  pool 2 'rbd' replicated size 3 min_size 2 crush_ruleset 0 object_hash rjenkins pg_num 64 pgp_num 64 last_change 0 owner 0 flags hashpspool stripe_width 0
+  pool 0 'data' replicated size 3 min_size 2 crush_ruleset 0 object_hash rjenkins pg_num 64 pgp_num 64 last_change 0 flags hashpspool crash_replay_interval 45 stripe_width 0
+  pool 1 'metadata' replicated size 3 min_size 2 crush_ruleset 0 object_hash rjenkins pg_num 64 pgp_num 64 last_change 0 flags hashpspool stripe_width 0
+  pool 2 'rbd' replicated size 3 min_size 2 crush_ruleset 0 object_hash rjenkins pg_num 64 pgp_num 64 last_change 0 flags hashpspool stripe_width 0
   
   max_osd 1
   
diff --git a/src/test/cli/osdmaptool/create-print.t b/src/test/cli/osdmaptool/create-print.t
index 84c37ff..9ebd274 100644
--- a/src/test/cli/osdmaptool/create-print.t
+++ b/src/test/cli/osdmaptool/create-print.t
@@ -75,9 +75,9 @@
   modified \d{4}-\d{2}-\d{2} \d{2}:\d{2}:\d{2}\.\d+ (re)
   flags 
   
-  pool 0 'data' replicated size 3 min_size 2 crush_ruleset 0 object_hash rjenkins pg_num 192 pgp_num 192 last_change 0 owner 0 flags hashpspool crash_replay_interval 45 stripe_width 0
-  pool 1 'metadata' replicated size 3 min_size 2 crush_ruleset 0 object_hash rjenkins pg_num 192 pgp_num 192 last_change 0 owner 0 flags hashpspool stripe_width 0
-  pool 2 'rbd' replicated size 3 min_size 2 crush_ruleset 0 object_hash rjenkins pg_num 192 pgp_num 192 last_change 0 owner 0 flags hashpspool stripe_width 0
+  pool 0 'data' replicated size 3 min_size 2 crush_ruleset 0 object_hash rjenkins pg_num 192 pgp_num 192 last_change 0 flags hashpspool crash_replay_interval 45 stripe_width 0
+  pool 1 'metadata' replicated size 3 min_size 2 crush_ruleset 0 object_hash rjenkins pg_num 192 pgp_num 192 last_change 0 flags hashpspool stripe_width 0
+  pool 2 'rbd' replicated size 3 min_size 2 crush_ruleset 0 object_hash rjenkins pg_num 192 pgp_num 192 last_change 0 flags hashpspool stripe_width 0
   
   max_osd 3
   
@@ -86,27 +86,19 @@
   osdmaptool: writing epoch 1 to myosdmap
   $ osdmaptool --print myosdmap | grep 'pool 0'
   osdmaptool: osdmap file 'myosdmap'
-  pool 0 'data' replicated size 3 min_size 2 crush_ruleset 66 object_hash rjenkins pg_num 192 pgp_num 192 last_change 0 owner 0 flags hashpspool crash_replay_interval 45 stripe_width 0
+  pool 0 'data' replicated size 3 min_size 2 crush_ruleset 66 object_hash rjenkins pg_num 192 pgp_num 192 last_change 0 flags hashpspool crash_replay_interval 45 stripe_width 0
   $ osdmaptool --clobber --createsimple 3 --osd_pool_default_crush_rule 55 myosdmap 2>&1 >/dev/null | sed -e 's/^.* 0 osd_pool_//'
   osdmaptool: osdmap file 'myosdmap'
   default_crush_rule is deprecated use osd_pool_default_crush_replicated_ruleset instead
   default_crush_rule = 55 overrides osd_pool_default_crush_replicated_ruleset = 0
-  default_crush_rule is deprecated use osd_pool_default_crush_replicated_ruleset instead
-  default_crush_rule = 55 overrides osd_pool_default_crush_replicated_ruleset = 0
-  default_crush_rule is deprecated use osd_pool_default_crush_replicated_ruleset instead
-  default_crush_rule = 55 overrides osd_pool_default_crush_replicated_ruleset = 0
   $ osdmaptool --print myosdmap | grep 'pool 0'
   osdmaptool: osdmap file 'myosdmap'
-  pool 0 'data' replicated size 3 min_size 2 crush_ruleset 55 object_hash rjenkins pg_num 192 pgp_num 192 last_change 0 owner 0 flags hashpspool crash_replay_interval 45 stripe_width 0
+  pool 0 'data' replicated size 3 min_size 2 crush_ruleset 55 object_hash rjenkins pg_num 192 pgp_num 192 last_change 0 flags hashpspool crash_replay_interval 45 stripe_width 0
   $ osdmaptool --clobber --createsimple 3 --osd_pool_default_crush_replicated_ruleset 66 --osd_pool_default_crush_rule 55 myosdmap 2>&1 >/dev/null | sed -e 's/^.* 0 osd_pool_//'
   osdmaptool: osdmap file 'myosdmap'
   default_crush_rule is deprecated use osd_pool_default_crush_replicated_ruleset instead
   default_crush_rule = 55 overrides osd_pool_default_crush_replicated_ruleset = 66
-  default_crush_rule is deprecated use osd_pool_default_crush_replicated_ruleset instead
-  default_crush_rule = 55 overrides osd_pool_default_crush_replicated_ruleset = 66
-  default_crush_rule is deprecated use osd_pool_default_crush_replicated_ruleset instead
-  default_crush_rule = 55 overrides osd_pool_default_crush_replicated_ruleset = 66
   $ osdmaptool --print myosdmap | grep 'pool 0'
   osdmaptool: osdmap file 'myosdmap'
-  pool 0 'data' replicated size 3 min_size 2 crush_ruleset 55 object_hash rjenkins pg_num 192 pgp_num 192 last_change 0 owner 0 flags hashpspool crash_replay_interval 45 stripe_width 0
+  pool 0 'data' replicated size 3 min_size 2 crush_ruleset 55 object_hash rjenkins pg_num 192 pgp_num 192 last_change 0 flags hashpspool crash_replay_interval 45 stripe_width 0
   $ rm -f myosdmap
diff --git a/src/test/cli/osdmaptool/create-racks.t b/src/test/cli/osdmaptool/create-racks.t
index f502806..33fa9ee 100644
--- a/src/test/cli/osdmaptool/create-racks.t
+++ b/src/test/cli/osdmaptool/create-racks.t
@@ -788,9 +788,9 @@
   modified \d{4}-\d{2}-\d{2} \d{2}:\d{2}:\d{2}\.\d+ (re)
   flags 
   
-  pool 0 'data' replicated size 3 min_size 2 crush_ruleset 0 object_hash rjenkins pg_num 15296 pgp_num 15296 last_change 0 owner 0 flags hashpspool crash_replay_interval 45 stripe_width 0
-  pool 1 'metadata' replicated size 3 min_size 2 crush_ruleset 0 object_hash rjenkins pg_num 15296 pgp_num 15296 last_change 0 owner 0 flags hashpspool stripe_width 0
-  pool 2 'rbd' replicated size 3 min_size 2 crush_ruleset 0 object_hash rjenkins pg_num 15296 pgp_num 15296 last_change 0 owner 0 flags hashpspool stripe_width 0
+  pool 0 'data' replicated size 3 min_size 2 crush_ruleset 0 object_hash rjenkins pg_num 15296 pgp_num 15296 last_change 0 flags hashpspool crash_replay_interval 45 stripe_width 0
+  pool 1 'metadata' replicated size 3 min_size 2 crush_ruleset 0 object_hash rjenkins pg_num 15296 pgp_num 15296 last_change 0 flags hashpspool stripe_width 0
+  pool 2 'rbd' replicated size 3 min_size 2 crush_ruleset 0 object_hash rjenkins pg_num 15296 pgp_num 15296 last_change 0 flags hashpspool stripe_width 0
   
   max_osd 239
   
@@ -800,27 +800,19 @@
   osdmaptool: writing epoch 1 to om
   $ osdmaptool --print om | grep 'pool 0'
   osdmaptool: osdmap file 'om'
-  pool 0 'data' replicated size 3 min_size 2 crush_ruleset 55 object_hash rjenkins pg_num 15296 pgp_num 15296 last_change 0 owner 0 flags hashpspool crash_replay_interval 45 stripe_width 0
+  pool 0 'data' replicated size 3 min_size 2 crush_ruleset 55 object_hash rjenkins pg_num 15296 pgp_num 15296 last_change 0 flags hashpspool crash_replay_interval 45 stripe_width 0
   $ osdmaptool --clobber --create-from-conf --osd_pool_default_crush_rule 55 om -c $TESTDIR/ceph.conf.withracks 2>&1 >/dev/null | sed -e 's/^.* 0 osd_pool_//'
   osdmaptool: osdmap file 'om'
   default_crush_rule is deprecated use osd_pool_default_crush_replicated_ruleset instead
   default_crush_rule = 55 overrides osd_pool_default_crush_replicated_ruleset = 0
-  default_crush_rule is deprecated use osd_pool_default_crush_replicated_ruleset instead
-  default_crush_rule = 55 overrides osd_pool_default_crush_replicated_ruleset = 0
-  default_crush_rule is deprecated use osd_pool_default_crush_replicated_ruleset instead
-  default_crush_rule = 55 overrides osd_pool_default_crush_replicated_ruleset = 0
   $ osdmaptool --print om | grep 'pool 0'
   osdmaptool: osdmap file 'om'
-  pool 0 'data' replicated size 3 min_size 2 crush_ruleset 55 object_hash rjenkins pg_num 15296 pgp_num 15296 last_change 0 owner 0 flags hashpspool crash_replay_interval 45 stripe_width 0
+  pool 0 'data' replicated size 3 min_size 2 crush_ruleset 55 object_hash rjenkins pg_num 15296 pgp_num 15296 last_change 0 flags hashpspool crash_replay_interval 45 stripe_width 0
   $ osdmaptool --clobber --create-from-conf --osd_pool_default_crush_replicated_ruleset 66 --osd_pool_default_crush_rule 55 om -c $TESTDIR/ceph.conf.withracks 2>&1 >/dev/null | sed -e 's/^.* 0 osd_pool_//'
   osdmaptool: osdmap file 'om'
   default_crush_rule is deprecated use osd_pool_default_crush_replicated_ruleset instead
   default_crush_rule = 55 overrides osd_pool_default_crush_replicated_ruleset = 66
-  default_crush_rule is deprecated use osd_pool_default_crush_replicated_ruleset instead
-  default_crush_rule = 55 overrides osd_pool_default_crush_replicated_ruleset = 66
-  default_crush_rule is deprecated use osd_pool_default_crush_replicated_ruleset instead
-  default_crush_rule = 55 overrides osd_pool_default_crush_replicated_ruleset = 66
   $ osdmaptool --print om | grep 'pool 0'
   osdmaptool: osdmap file 'om'
-  pool 0 'data' replicated size 3 min_size 2 crush_ruleset 55 object_hash rjenkins pg_num 15296 pgp_num 15296 last_change 0 owner 0 flags hashpspool crash_replay_interval 45 stripe_width 0
+  pool 0 'data' replicated size 3 min_size 2 crush_ruleset 55 object_hash rjenkins pg_num 15296 pgp_num 15296 last_change 0 flags hashpspool crash_replay_interval 45 stripe_width 0
   $ rm -f om
diff --git a/src/test/librados/pool.cc b/src/test/librados/pool.cc
index 65d5c22..04286fc 100644
--- a/src/test/librados/pool.cc
+++ b/src/test/librados/pool.cc
@@ -8,8 +8,8 @@
 #define POOL_LIST_BUF_SZ 32768
 
 TEST(LibRadosPools, PoolList) {
-  std::vector<char> pool_list_buf(POOL_LIST_BUF_SZ, '\0');
-  char *buf = &pool_list_buf[0];
+  char pool_list_buf[POOL_LIST_BUF_SZ];
+  char *buf = pool_list_buf;
   rados_t cluster;
   std::string pool_name = get_temp_pool_name();
   ASSERT_EQ("", create_one_pool(pool_name, &cluster));
@@ -23,6 +23,14 @@ TEST(LibRadosPools, PoolList) {
     buf += strlen(buf) + 1;
   }
   ASSERT_EQ(found_pool, true);
+
+  // make sure we honor the buffer size limit
+  buf = pool_list_buf;
+  memset(buf, 0, POOL_LIST_BUF_SZ);
+  ASSERT_LT(rados_pool_list(cluster, buf, 20), POOL_LIST_BUF_SZ);
+  ASSERT_NE(0, buf[0]);  // include at least one pool name
+  ASSERT_EQ(0, buf[20]);  // but don't touch the stopping point
+
   ASSERT_EQ(0, destroy_one_pool(pool_name, &cluster));
 }
 
diff --git a/src/test/librados/tier.cc b/src/test/librados/tier.cc
index ad956be..611e17e 100644
--- a/src/test/librados/tier.cc
+++ b/src/test/librados/tier.cc
@@ -3993,6 +3993,8 @@ TEST_F(LibRadosTierECPP, HitSetRead) {
   }
 }
 
+// disable this test until hitset-get reliably works on EC pools
+#if 0
 TEST_F(LibRadosTierECPP, HitSetWrite) {
   int num_pg = _get_pg_num(cluster, pool_name);
   assert(num_pg > 0);
@@ -4065,6 +4067,7 @@ TEST_F(LibRadosTierECPP, HitSetWrite) {
     ASSERT_TRUE(found);
   }
 }
+#endif
 
 TEST_F(LibRadosTierECPP, HitSetTrim) {
   unsigned count = 3;
diff --git a/src/tools/rados/rados.cc b/src/tools/rados/rados.cc
index 5b9297a..bd71433 100644
--- a/src/tools/rados/rados.cc
+++ b/src/tools/rados/rados.cc
@@ -1359,6 +1359,15 @@ static int rados_tool_common(const std::map < std::string, std::string > &opts,
 	   << cpp_strerror(ret) << std::endl;
       goto out;
     }
+
+    // align op_size
+    if (io_ctx.pool_requires_alignment()) {
+      const uint64_t align = io_ctx.pool_required_alignment();
+      const bool wrn = (op_size != (1<<22));
+      op_size = uint64_t((op_size + align - 1) / align) * align;
+      if (wrn)
+	cerr << "INFO: op_size has been rounded to " << op_size << std::endl;
+    }
   }
 
   // snapname?

-- 
Alioth's /usr/local/bin/git-commit-notice on /srv/git.debian.org/git/pkg-ceph/ceph.git



More information about the Pkg-ceph-commits mailing list