[Pkg-ceph-commits] [ceph] 02/06: Imported Upstream version 10.2.3

James Downing Page jamespage at moszumanska.debian.org
Thu Sep 29 07:58:52 UTC 2016


This is an automated email from the git hooks/post-receive script.

jamespage pushed a commit to branch ubuntu-yakkety
in repository ceph.

commit 7f897562d3e5b221bd28d6920a26fbd6fbcc9442
Author: James Page <james.page at ubuntu.com>
Date:   Thu Sep 29 08:43:05 2016 +0100

    Imported Upstream version 10.2.3
---
 AUTHORS                                            |   12 +
 ChangeLog                                          |  370 ++++-
 Makefile.am                                        |    1 +
 Makefile.in                                        |    6 +-
 ceph.spec                                          |   49 +-
 ceph.spec.in                                       |   47 +-
 configure                                          |   23 +-
 configure.ac                                       |    3 +-
 doc/man/8/radosgw-admin.rst                        |    4 -
 doc/man/8/rbd.rst                                  |    2 +-
 install-deps.sh                                    |   32 +-
 man/ceph-authtool.8                                |    2 +-
 man/ceph-clsinfo.8                                 |    2 +-
 man/ceph-conf.8                                    |    2 +-
 man/ceph-create-keys.8                             |    2 +-
 man/ceph-debugpack.8                               |    2 +-
 man/ceph-dencoder.8                                |    2 +-
 man/ceph-deploy.8                                  |    2 +-
 man/ceph-detect-init.8                             |    2 +-
 man/ceph-disk.8                                    |    2 +-
 man/ceph-fuse.8                                    |    2 +-
 man/ceph-mds.8                                     |    2 +-
 man/ceph-mon.8                                     |    2 +-
 man/ceph-osd.8                                     |    2 +-
 man/ceph-post-file.8                               |    2 +-
 man/ceph-rbdnamer.8                                |    2 +-
 man/ceph-rest-api.8                                |    2 +-
 man/ceph-run.8                                     |    2 +-
 man/ceph-syn.8                                     |    2 +-
 man/ceph.8                                         |    2 +-
 man/cephfs.8                                       |    2 +-
 man/crushtool.8                                    |    2 +-
 man/librados-config.8                              |    2 +-
 man/monmaptool.8                                   |    2 +-
 man/mount.ceph.8                                   |    2 +-
 man/osdmaptool.8                                   |    2 +-
 man/rados.8                                        |    2 +-
 man/radosgw-admin.8                                |    6 +-
 man/radosgw.8                                      |    2 +-
 man/rbd-fuse.8                                     |    2 +-
 man/rbd-mirror.8                                   |    2 +-
 man/rbd-nbd.8                                      |    2 +-
 man/rbd-replay-many.8                              |    2 +-
 man/rbd-replay-prep.8                              |    2 +-
 man/rbd-replay.8                                   |    2 +-
 man/rbd.8                                          |    4 +-
 man/rbdmap.8                                       |    2 +-
 selinux/ceph.te                                    |    4 +-
 src/.git_version                                   |    4 +-
 src/Makefile-client.am                             |    7 +
 src/Makefile-env.am                                |    2 +
 src/Makefile-server.am                             |    7 -
 src/Makefile.in                                    | 1492 ++++++++++----------
 src/ceph-disk/ceph_disk/main.py                    |   19 +-
 src/ceph-osd-prestart.sh                           |    4 +-
 src/client/Client.cc                               |  210 +--
 src/client/Client.h                                |    8 +-
 src/client/Inode.h                                 |    9 +-
 src/client/MetaRequest.h                           |    2 +-
 src/client/MetaSession.h                           |    8 +-
 src/client/fuse_ll.cc                              |   10 +-
 src/cls/rgw/cls_rgw.cc                             |   21 +-
 src/cls/rgw/cls_rgw_types.h                        |    4 +-
 src/common/admin_socket.cc                         |    2 +-
 src/common/ceph_context.cc                         |    5 +
 src/common/ceph_context.h                          |    1 +
 src/common/common_init.cc                          |    9 +-
 src/common/config_opts.h                           |   22 +-
 src/common/event_socket.h                          |    1 +
 src/common/obj_bencher.cc                          |    7 +
 src/common/scrub_types.cc                          |   20 +-
 src/common/scrub_types.h                           |    8 +-
 src/common/strtol.cc                               |    2 +
 src/global/global_init.cc                          |   72 +-
 src/include/buffer.h                               |    4 +-
 src/include/rados/buffer.h                         |    4 +-
 src/include/rados/librados.h                       |   16 +
 src/include/rados/rados_types.hpp                  |   30 +-
 src/include/rbd/librbd.h                           |   23 +
 src/include/rbd/librbd.hpp                         |   12 +
 src/journal/Entry.cc                               |    2 +-
 src/journal/FutureImpl.cc                          |   43 +-
 src/journal/FutureImpl.h                           |    4 +
 src/journal/JournalMetadata.cc                     |   32 +-
 src/journal/JournalMetadata.h                      |   20 +-
 src/journal/JournalMetadataListener.h              |   30 +
 src/journal/JournalPlayer.cc                       |  272 ++--
 src/journal/JournalPlayer.h                        |   14 +-
 src/journal/JournalRecorder.cc                     |    2 +-
 src/journal/JournalRecorder.h                      |    2 +-
 src/journal/JournalTrimmer.cc                      |    2 +-
 src/journal/JournalTrimmer.h                       |    2 +-
 src/journal/Journaler.cc                           |   31 +-
 src/journal/Journaler.h                            |   11 +-
 src/journal/Makefile.am                            |    2 +
 src/journal/ObjectPlayer.cc                        |  131 +-
 src/journal/ObjectPlayer.h                         |   30 +-
 src/journal/ObjectRecorder.cc                      |    2 +-
 src/journal/Settings.h                             |   19 +
 src/librbd/AioCompletion.cc                        |  372 +++--
 src/librbd/AioCompletion.h                         |  447 +++---
 src/librbd/AioImageRequest.cc                      |  461 +++---
 src/librbd/AioImageRequest.h                       |   87 +-
 src/librbd/AioImageRequestWQ.cc                    |   18 +-
 src/librbd/AioImageRequestWQ.h                     |    1 -
 src/librbd/AioObjectRequest.cc                     | 1002 +++++++------
 src/librbd/AioObjectRequest.h                      |  695 ++++-----
 src/librbd/AsyncObjectThrottle.cc                  |    2 -
 src/librbd/AsyncObjectThrottle.h                   |    2 -
 src/librbd/AsyncRequest.cc                         |    2 -
 src/librbd/AsyncRequest.h                          |   10 +-
 src/librbd/CopyupRequest.cc                        |  390 ++---
 src/librbd/CopyupRequest.h                         |  153 +-
 src/librbd/DiffIterate.cc                          |    2 -
 src/librbd/ExclusiveLock.cc                        |   46 +-
 src/librbd/ExclusiveLock.h                         |   21 +-
 src/librbd/ImageCtx.cc                             |   23 +-
 src/librbd/ImageCtx.h                              |   13 +-
 src/librbd/ImageState.cc                           |  255 +++-
 src/librbd/ImageState.h                            |    9 +
 src/librbd/ImageWatcher.cc                         |  590 +++++---
 src/librbd/ImageWatcher.h                          |   14 +-
 src/librbd/Journal.cc                              |  313 +++-
 src/librbd/Journal.h                               |   55 +-
 src/librbd/LibrbdWriteback.cc                      |    4 +-
 src/librbd/LibrbdWriteback.h                       |    5 +-
 src/librbd/MirroringWatcher.cc                     |    1 +
 src/librbd/MirroringWatcher.h                      |    5 +-
 src/librbd/ObjectMap.cc                            |   18 +-
 src/librbd/ObjectMap.h                             |    5 +-
 src/librbd/Operations.cc                           |  107 +-
 src/librbd/Operations.h                            |   17 +-
 src/librbd/SnapInfo.h                              |    3 -
 src/librbd/TaskFinisher.h                          |   18 +-
 src/librbd/Utils.h                                 |    7 +-
 src/librbd/exclusive_lock/AcquireRequest.h         |    2 -
 src/librbd/exclusive_lock/ReleaseRequest.cc        |   15 +-
 src/librbd/exclusive_lock/ReleaseRequest.h         |    1 -
 src/librbd/image/CloseRequest.cc                   |   26 +-
 src/librbd/image/CloseRequest.h                    |    7 +-
 src/librbd/image/OpenRequest.cc                    |    1 -
 src/librbd/image/OpenRequest.h                     |    1 -
 src/librbd/image/RefreshRequest.cc                 |    2 -
 src/librbd/image/RefreshRequest.h                  |    1 -
 src/librbd/image/SetSnapRequest.cc                 |   10 +-
 src/librbd/image/SetSnapRequest.h                  |    2 -
 src/librbd/image_watcher/NotifyLockOwner.h         |    1 -
 src/librbd/internal.cc                             |   95 +-
 src/librbd/journal/Policy.h                        |    2 +-
 src/librbd/journal/Replay.cc                       |  177 ++-
 src/librbd/journal/Replay.h                        |   10 +-
 src/librbd/journal/StandardPolicy.cc               |    5 -
 src/librbd/journal/StandardPolicy.h                |    4 +-
 src/librbd/journal/Types.cc                        |    9 +-
 src/librbd/journal/Types.h                         |   17 +-
 src/librbd/librbd.cc                               |   56 +-
 src/librbd/object_map/InvalidateRequest.cc         |    2 -
 src/librbd/object_map/LockRequest.h                |    4 +-
 src/librbd/object_map/RefreshRequest.cc            |    3 -
 src/librbd/object_map/RefreshRequest.h             |    3 +-
 src/librbd/object_map/Request.cc                   |    3 -
 src/librbd/object_map/SnapshotCreateRequest.cc     |    1 -
 src/librbd/object_map/SnapshotRemoveRequest.cc     |    5 +
 src/librbd/object_map/SnapshotRemoveRequest.h      |    3 +-
 src/librbd/object_map/SnapshotRollbackRequest.cc   |    1 -
 src/librbd/object_map/UnlockRequest.h              |    5 +-
 src/librbd/object_map/UpdateRequest.cc             |    1 -
 src/librbd/operation/FlattenRequest.cc             |    4 +-
 src/librbd/operation/RebuildObjectMapRequest.cc    |    1 -
 src/librbd/operation/RenameRequest.h               |    1 -
 src/librbd/operation/Request.cc                    |   74 +-
 src/librbd/operation/Request.h                     |   37 +-
 src/librbd/operation/ResizeRequest.cc              |   27 +-
 src/librbd/operation/SnapshotCreateRequest.cc      |   31 +-
 src/librbd/operation/SnapshotCreateRequest.h       |    2 -
 src/librbd/operation/SnapshotProtectRequest.h      |    1 -
 src/librbd/operation/SnapshotRemoveRequest.cc      |    1 -
 src/librbd/operation/SnapshotRenameRequest.cc      |   16 +-
 src/librbd/operation/SnapshotRenameRequest.h       |    5 +-
 src/librbd/operation/SnapshotRollbackRequest.cc    |   12 +-
 src/librbd/operation/SnapshotRollbackRequest.h     |    2 +-
 src/librbd/operation/SnapshotUnprotectRequest.h    |    1 -
 src/librbd/operation/TrimRequest.cc                |   10 +-
 src/librbd/parent_types.h                          |    2 +-
 src/mds/CDir.cc                                    |   18 +-
 src/mds/CInode.cc                                  |   36 +-
 src/mds/CInode.h                                   |    9 +-
 src/mds/FSMap.cc                                   |    3 +-
 src/mds/Locker.cc                                  |    7 +-
 src/mds/MDCache.cc                                 |  268 ++--
 src/mds/MDCache.h                                  |   50 +-
 src/mds/MDSDaemon.cc                               |   59 +-
 src/mds/MDSDaemon.h                                |   10 +-
 src/mds/MDSMap.cc                                  |    2 +-
 src/mds/MDSRank.cc                                 |   73 +-
 src/mds/MDSRank.h                                  |   13 +-
 src/mds/Server.cc                                  |   92 +-
 src/mds/Server.h                                   |    1 +
 src/mds/SessionMap.cc                              |    2 +-
 src/mds/SimpleLock.h                               |    2 +-
 src/mds/SnapRealm.cc                               |    4 +-
 src/mds/StrayManager.cc                            |    7 +-
 src/mds/events/EMetaBlob.h                         |    6 +-
 src/mds/events/EOpen.h                             |    6 +-
 src/mds/journal.cc                                 |   38 +-
 src/mds/mdstypes.cc                                |    7 +-
 src/mds/mdstypes.h                                 |   40 +-
 src/messages/MClientReconnect.h                    |    4 +-
 src/messages/MMDSCacheRejoin.h                     |    3 +-
 src/messages/MNop.h                                |   54 +
 src/messages/Makefile.am                           |    3 +-
 src/mon/MDSMonitor.cc                              |   15 +-
 src/mon/MonClient.cc                               |    4 +-
 src/mon/Monitor.cc                                 |   38 +-
 src/mon/Monitor.h                                  |    6 +-
 src/mon/OSDMonitor.cc                              |   46 +-
 src/mon/OSDMonitor.h                               |    1 +
 src/msg/async/AsyncConnection.cc                   |  221 ++-
 src/msg/async/AsyncConnection.h                    |   59 +-
 src/msg/async/AsyncMessenger.cc                    |   69 +-
 src/msg/async/AsyncMessenger.h                     |   16 +-
 src/msg/async/Event.cc                             |   96 +-
 src/msg/async/Event.h                              |   10 +-
 src/msg/async/EventKqueue.cc                       |    6 +-
 src/msg/async/net_handler.cc                       |    2 +-
 src/ocf/Makefile.am                                |   14 +-
 src/ocf/Makefile.in                                |   49 +-
 src/ocf/ceph.in                                    |  177 ---
 src/os/filestore/FileStore.cc                      |    6 +-
 src/os/filestore/HashIndex.cc                      |   12 +-
 src/os/filestore/HashIndex.h                       |    5 +
 src/osd/OSD.cc                                     |   13 +-
 src/osd/PG.cc                                      |   26 +-
 src/osd/PG.h                                       |   14 +-
 src/osd/ReplicatedBackend.cc                       |    4 +
 src/osd/ReplicatedPG.cc                            |   91 +-
 src/osd/ReplicatedPG.h                             |    2 +-
 src/osd/osd_types.h                                |   19 +
 src/osdc/Journaler.cc                              |    2 +-
 src/osdc/ObjectCacher.cc                           |    3 +-
 src/osdc/Objecter.cc                               |    6 +-
 src/osdc/Objecter.h                                |    2 +-
 src/pybind/ceph_argparse.py                        |   67 +-
 src/pybind/ceph_volume_client.py                   |  601 +++++++-
 src/pybind/cephfs/cephfs.pyx                       |   67 +-
 src/rbd_replay/ActionTypes.cc                      |   18 +
 src/rbd_replay/ActionTypes.h                       |   28 +
 src/rbd_replay/actions.cc                          |   34 +-
 src/rbd_replay/actions.hpp                         |   30 +
 src/rbd_replay/ios.cc                              |   24 +
 src/rbd_replay/ios.hpp                             |   50 +
 src/rbd_replay/rbd-replay-prep.cc                  |   30 +
 src/rbdmap                                         |    2 +-
 src/rgw/Makefile.am                                |    2 +-
 src/rgw/librgw.cc                                  |    6 +-
 src/rgw/rgw_admin.cc                               |   67 +-
 src/rgw/rgw_auth_s3.cc                             |   11 +-
 src/rgw/rgw_bucket.cc                              |  125 +-
 src/rgw/rgw_bucket.h                               |    6 +-
 src/rgw/rgw_common.cc                              |   35 +-
 src/rgw/rgw_common.h                               |   42 +-
 src/rgw/rgw_coroutine.cc                           |   20 +-
 src/rgw/rgw_coroutine.h                            |   12 +-
 src/rgw/rgw_cors.cc                                |    7 +
 src/rgw/rgw_cors.h                                 |    1 +
 src/rgw/rgw_cr_rados.cc                            |   50 +-
 src/rgw/rgw_cr_rados.h                             |   70 +-
 src/rgw/rgw_cr_rest.h                              |    2 +-
 src/rgw/rgw_data_sync.cc                           |  623 +++++---
 src/rgw/rgw_data_sync.h                            |   25 +-
 src/rgw/rgw_fcgi_process.cc                        |    6 +-
 src/rgw/rgw_http_client.cc                         |   22 +-
 src/rgw/rgw_json_enc.cc                            |    2 +
 src/rgw/rgw_ldap.cc                                |   35 +
 src/rgw/rgw_ldap.h                                 |   54 +-
 src/rgw/rgw_object_expirer_core.cc                 |   56 +-
 src/rgw/rgw_object_expirer_core.h                  |   30 +-
 src/rgw/rgw_op.cc                                  |  213 ++-
 src/rgw/rgw_op.h                                   |    6 +-
 src/rgw/rgw_period_puller.cc                       |    7 +
 src/rgw/rgw_process.h                              |    1 -
 src/rgw/rgw_rados.cc                               |  401 ++++--
 src/rgw/rgw_rados.h                                |   83 +-
 src/rgw/rgw_realm_watcher.cc                       |    5 +-
 src/rgw/rgw_rest.cc                                |  227 ++-
 src/rgw/rgw_rest.h                                 |   11 +-
 src/rgw/rgw_rest_client.cc                         |    2 +-
 src/rgw/rgw_rest_realm.cc                          |    9 +-
 src/rgw/rgw_rest_s3.cc                             |  554 ++++++--
 src/rgw/rgw_rest_s3.h                              |   21 +-
 src/rgw/rgw_rest_s3website.h                       |    5 +-
 src/rgw/rgw_rest_swift.cc                          |   88 +-
 src/rgw/rgw_rest_swift.h                           |    2 +-
 src/rgw/rgw_rest_user.cc                           |   10 +-
 src/rgw/rgw_sync.cc                                |   49 +-
 src/rgw/rgw_user.cc                                |    7 +-
 src/rgw/rgw_user.h                                 |   31 +-
 src/rocksdb/Makefile                               |    3 +-
 src/script/subman                                  |    8 +-
 src/test/Makefile-client.am                        |   18 +-
 src/test/Makefile.am                               |    7 +-
 src/test/centos-6/ceph.spec.in                     |   47 +-
 src/test/centos-6/install-deps.sh                  |   32 +-
 src/test/centos-7/ceph.spec.in                     |   47 +-
 src/test/centos-7/install-deps.sh                  |   32 +-
 src/test/cli/radosgw-admin/help.t                  |    5 +-
 src/test/debian-jessie/install-deps.sh             |   32 +-
 src/test/fedora-21/ceph.spec.in                    |   47 +-
 src/test/fedora-21/install-deps.sh                 |   32 +-
 src/test/journal/RadosTestFixture.cc               |   10 +-
 src/test/journal/RadosTestFixture.h                |    5 +-
 src/test/journal/mock/MockJournaler.h              |   20 +-
 src/test/journal/test_FutureImpl.cc                |    7 +-
 src/test/journal/test_JournalPlayer.cc             |  748 +++++-----
 src/test/journal/test_Journaler.cc                 |    9 +-
 src/test/journal/test_ObjectPlayer.cc              |  211 +--
 src/test/librados/cmd.cc                           |   35 +
 src/test/librados/misc.cc                          |   24 +-
 src/test/librados/test.h                           |    2 +-
 .../exclusive_lock/test_mock_ReleaseRequest.cc     |   20 +-
 src/test/librbd/fsx.cc                             |   42 +-
 src/test/librbd/journal/test_Entries.cc            |    6 +-
 src/test/librbd/journal/test_Replay.cc             |   42 +-
 src/test/librbd/journal/test_mock_Replay.cc        |   61 +-
 src/test/librbd/mock/MockImageCtx.h                |   47 +-
 src/test/librbd/mock/MockJournal.h                 |   34 +-
 src/test/librbd/mock/MockJournalPolicy.h           |    2 +-
 src/test/librbd/mock/MockOperations.h              |   22 +-
 .../object_map/test_mock_SnapshotRemoveRequest.cc  |   25 +
 .../librbd/operation/test_mock_ResizeRequest.cc    |   37 +-
 .../operation/test_mock_SnapshotRollbackRequest.cc |   12 +-
 src/test/librbd/test_internal.cc                   |    6 +-
 src/test/librbd/test_librbd.cc                     |  195 ++-
 src/test/librbd/test_mirroring.cc                  |    3 +-
 src/test/librbd/test_mock_ExclusiveLock.cc         |   71 +
 src/test/librbd/test_mock_Journal.cc               |  194 ++-
 src/test/librbd/test_mock_fixture.cc               |   18 +-
 src/test/librbd/test_mock_fixture.h                |    5 +-
 src/test/librbd/test_support.h                     |   13 +-
 src/test/msgr/test_msgr.cc                         |   33 +-
 src/test/opensuse-13.2/ceph.spec.in                |   47 +-
 src/test/opensuse-13.2/install-deps.sh             |   32 +-
 src/test/osd/osd-scrub-repair.sh                   |   49 +
 src/test/osd/osd-scrub-snaps.sh                    |   46 +-
 src/test/osdc/MemWriteback.cc                      |  163 +++
 src/test/osdc/MemWriteback.h                       |   49 +
 src/test/osdc/object_cacher_stress.cc              |  192 ++-
 src/test/pybind/test_ceph_argparse.py              |   26 +-
 src/test/rbd_mirror/image_replay.cc                |  225 ---
 .../image_replayer/test_mock_BootstrapRequest.cc   |   15 +
 .../image_replayer/test_mock_EventPreprocessor.cc  |  265 ++++
 .../image_sync/test_mock_ImageCopyRequest.cc       |  232 ++-
 .../image_sync/test_mock_ObjectCopyRequest.cc      |   60 +-
 .../image_sync/test_mock_SnapshotCopyRequest.cc    |  109 +-
 .../image_sync/test_mock_SnapshotCreateRequest.cc  |   56 +-
 .../image_sync/test_mock_SyncPointCreateRequest.cc |   31 +-
 .../image_sync/test_mock_SyncPointPruneRequest.cc  |   97 +-
 src/test/rbd_mirror/test_ImageDeleter.cc           |   37 +-
 src/test/rbd_mirror/test_ImageReplayer.cc          |  357 ++++-
 src/test/rbd_mirror/test_ImageSync.cc              |    3 +-
 src/test/rbd_mirror/test_mock_ImageReplayer.cc     |   60 +-
 src/test/rbd_mirror/test_mock_ImageSync.cc         |   84 +-
 .../rbd_mirror/test_mock_ImageSyncThrottler.cc     |  409 ++++++
 src/test/strtol.cc                                 |   15 +
 src/test/test_subman.sh                            |   28 +
 src/test/ubuntu-12.04/install-deps.sh              |   32 +-
 src/test/ubuntu-14.04/install-deps.sh              |   32 +-
 src/tools/Makefile-client.am                       |    4 +
 src/tools/rados/rados.cc                           |  132 +-
 src/tools/rbd/action/Journal.cc                    |    3 +-
 src/tools/rbd_mirror/ImageDeleter.cc               |  182 ++-
 src/tools/rbd_mirror/ImageDeleter.h                |   50 +-
 src/tools/rbd_mirror/ImageReplayer.cc              |  355 ++++-
 src/tools/rbd_mirror/ImageReplayer.h               |   59 +-
 src/tools/rbd_mirror/ImageSync.cc                  |    5 +-
 src/tools/rbd_mirror/ImageSyncThrottler.cc         |  253 ++++
 src/tools/rbd_mirror/ImageSyncThrottler.h          |  107 ++
 src/tools/rbd_mirror/Mirror.cc                     |   34 +-
 src/tools/rbd_mirror/Mirror.h                      |    1 +
 src/tools/rbd_mirror/PoolWatcher.cc                |    8 +
 src/tools/rbd_mirror/PoolWatcher.h                 |    3 +
 src/tools/rbd_mirror/Replayer.cc                   |  196 ++-
 src/tools/rbd_mirror/Replayer.h                    |   19 +-
 .../rbd_mirror/image_replayer/BootstrapRequest.cc  |   65 +-
 .../rbd_mirror/image_replayer/BootstrapRequest.h   |   38 +-
 .../rbd_mirror/image_replayer/EventPreprocessor.cc |  202 +++
 .../rbd_mirror/image_replayer/EventPreprocessor.h  |  118 ++
 .../image_replayer/OpenLocalImageRequest.cc        |   15 +-
 .../rbd_mirror/image_sync/ImageCopyRequest.cc      |  105 +-
 src/tools/rbd_mirror/image_sync/ImageCopyRequest.h |    7 +
 .../rbd_mirror/image_sync/ObjectCopyRequest.cc     |    7 +
 .../rbd_mirror/image_sync/SyncPointPruneRequest.cc |   24 +-
 .../rbd_mirror/image_sync/SyncPointPruneRequest.h  |    2 +
 src/tools/rbd_mirror/types.h                       |    4 +
 src/tools/rbd_nbd/rbd-nbd.cc                       |   39 +-
 src/tracing/librbd.tp                              |   38 +
 src/vstart.sh                                      |   12 +-
 udev/60-ceph-by-parttypeuuid.rules                 |   31 +
 398 files changed, 15241 insertions(+), 6942 deletions(-)

diff --git a/AUTHORS b/AUTHORS
index ea5d120..4352f3e 100644
--- a/AUTHORS
+++ b/AUTHORS
@@ -15,6 +15,7 @@ Ahoussi Armand <ahoussi.say at telecom-bretagne.eu>
 Ailing Zhang <zhangal1992 at gmail.com>
 Alan Grosskurth <code at alan.grosskurth.ca>
 Alan Somers <asomers at gmail.com>
+Albert Tu <albert.t at inwinstack.com>
 Alexander Chuzhoy <achuzhoy at redhat.com>
 Alexandre Derumier <aderumier at odiso.com>
 Alexandre Marangone <alexandre.marangone at inktank.com>
@@ -92,6 +93,7 @@ Chen Baozi <baozich at gmail.com>
 Chen Dihao <tobeg3oogle at gmail.com>
 Chendi Xue <chendi.xue at intel.com>
 Cheng Cheng <ccheng.leo at gmail.com>
+Chengwei Yang <yangchengwei at qiyi.com>
 Chengyuan Li <chengyli at ebay.com>
 Chris Dunlop <chris at onthe.net.au>
 Chris Glass <tribaal at gmail.com>
@@ -111,6 +113,7 @@ Colin Mattson <colinmattson at gmail.com>
 Colin P. McCabe <colinm at hq.newdream.net>
 cy.l at inwinstack.com <cy.l at inwinstack.com>
 Dan Chai <tengweicai at gmail.com>
+Dan Horák <dan at danny.cz>
 Daniel Gollub <d.gollub at telekom.de>
 Daniel Gryniewicz <dang at redhat.com>
 Daniel J. Hofmann <daniel at trvx.org>
@@ -160,6 +163,7 @@ Fabio Alessandro Locati <fabiolocati at gmail.com>
 fangdong <yp.fangdong at gmail.com>
 Federico Gimenez <fgimenez at coit.es>
 Federico Simoncelli <fsimonce at redhat.com>
+Feng Guo <diluga at gmail.com>
 Feng He <fenglife at hotmail.com>
 Feng Wang <cyclonew at cs.ucsc.edu>
 Filippos Giannakos <philipgian at grnet.gr>
@@ -231,8 +235,10 @@ Jean-Rémi Deveaux <jeanremi.deveaux at gmail.com>
 Jeff Epstein <jepst79 at gmail.com>
 Jeffrey Lu <lzhng2000 at aliyun.com>
 Jeff Weber <jweber at cofront.net>
+Jenkins Build Slave User <ceph-release-team at redhat.com>
 Jenkins Build Slave User <jenkins-build at trusty-huge--11a52675-9585-4db4-a514-798db40d6da2.localdomain>
 Jenkins Build Slave User <jenkins-build at trusty-huge--abdffd45-81df-4aa8-a769-e169993c7a0f.localdomain>
+Jenkins Build Slave User <jenkins-build at trusty-huge--dfeed223-b008-47e6-b689-a0d96992b62c.localdomain>
 Jenkins Build Slave User <jenkins-build at trusty-small-unique--68a2c286-dc75-4669-822d-28cd109dc3c5.localdomain>
 Jenkins <jenkins at ceph.com>
 Jens-Christian Fischer <jens-christian.fischer at switch.ch>
@@ -247,6 +253,7 @@ Jiaying Ren <mikulely at gmail.com>
 Ji Chen <insomnia at 139.com>
 Jie Wang <jie.wang at kylin-cloud.com>
 Jim Schutt <jaschut at sandia.gov>
+Jim Wright <jim at quadraturecapital.com>
 Jingkai Yuan <jingkai.yuan at intel.com>
 João Eduardo Luís <joao.luis at inktank.com>
 João Eduardo Luís <joao at redhat.com>
@@ -346,6 +353,7 @@ Michael Nelson <mikenel at tnld.net>
 Michael Riederer <michael at riederer.org>
 Michael Rodriguez <michael at newdream.net>
 Michal Jarzabek <stiopa at gmail.com>
+Michel Normand <normand at linux.vnet.ibm.com>
 Mike Kelly <pioto at pioto.org>
 Mike Lundy <mike at fluffypenguin.org>
 Mike Ryan <mike.ryan at inktank.com>
@@ -372,6 +380,7 @@ Ning Yao <yaoning at unitedstack.com>
 Nishtha Rai <nishtha3rai at gmail.com>
 Noah Watkins <nwatkins at redhat.com>
 (no author) <(no author)@29311d96-e01e-0410-9327-a35deaab8ce9>
+Oleh Prypin <oleh at pryp.in>
 Orit Wasserman <owasserm at redhat.com>
 Owen Synge <osynge at suse.com>
 Padraig O'Sullivan <posulliv at umd.edu>
@@ -385,6 +394,7 @@ Paul Chiang <paul_chiang at tcloudcomputing.com>
 Paul Meserve <paul at pogodan.com>
 Pavan Rallabhandi <pavan.rallabhandi at sandisk.com>
 Peter Reiher <reiher at inktank.com>
+Peter Sabaini <peter at sabaini.at>
 Peter Vinson <peter at hq.newdream.net>
 Peter Wienemann <wienemann at physik.uni-bonn.de>
 Pete V <peter at squid.newdream.net>
@@ -393,6 +403,7 @@ Petr Machata <pmachata at redhat.com>
 Pierre Chaumont <pierre.chaumont31 at gmail.com>
 Pierre Rognant <prognant at oodrive.com>
 Piotr Dałek <piotr.dalek at ts.fujitsu.com>
+Pritha Srivastava <prsrivas at redhat.com>
 Qiankun Zheng <zheng.qiankun at h3c.com>
 Qinghua Jin <qhjin_dev at 163.com>
 Rachana Patel <rachana83.patel at gmail.com>
@@ -451,6 +462,7 @@ Shang Ding <dingshang2013 at 163.com>
 Shanggao Qiu <qiushanggao at qq.com>
 Sharif Olorin <sio at tesser.org>
 Shawn Edwards <lesser.evil at gmail.com>
+Shilpa Jagannath <smanjara at redhat.com>
 Shishir Gowda <shishir.gowda at sandisk.com>
 Shotaro Kawaguchi <kawaguchi.s at jp.fujitsu.com>
 Shun Song <song.shun3 at zte.com.cn>
diff --git a/ChangeLog b/ChangeLog
index 66d6661..3ace453 100644
--- a/ChangeLog
+++ b/ChangeLog
@@ -1,4 +1,307 @@
-45107e2 (HEAD, tag: v10.2.2, origin/jewel) 10.2.2
+ecc2377 (HEAD, tag: v10.2.3, origin/jewel) 10.2.3
+577336e mds: fix double-unlock on shutdown
+21da103 rgw: collect skips a specific coroutine stack
+98779c3 rgw: fix compilation
+f5d9537 rgw: remove datalog keys from error repo on ENOENT
+32505b2 test/rgw: add optional --tenant flag to test_multi.py
+f68337a rgw: add tenant id to GetBucketInstanceInfo
+a0ffffa rgw: carry tenant id with data sync
+bff626f rgw: add tenant to url for RGWRESTStreamWriteRequest
+dc50687 rgw: add tenant to rgw_bucket json format
+06223ad rgw: add rgw_bucket_parse_bucket_key()
+b4f687e rgw: add tenant name to datalog entries
+dda0ee0 rgw: convert bucket instance listings back to metadata key format
+6225a6d rgw: use tenant/ for bucket instance metadata keys
+cac6612 rgw: add get_key() methods to format rgw_buckets
+ac557e0 rgw: data sync debug logging
+6bb8c15 rgw: modifying multi-site log messages.
+ecea6dc librbd: delay acquiring exclusive lock if watch has failed
+49a39eb librbd: convert ImageWatcher class to template
+f4fb598 build/ops: bump rocksdb submodule
+1cbc839 osd: set objecter.osdmap when starts
+4d37cfb mon/MonClient: should update sub_sent with sub_new
+5498377 librbd: cache was not switching to writeback after first flush
+471871e test: unit test cases for disabling librbd journal by policy
+062162f8 librbd: utilize factory methods to create AioObjectRequest objects
+d8eddc6 librbd: convert AioObjectRequest/AioObjectRead classes to templates
+1a7cb60 librbd: move read callback helpers to narrowest scope
+026f6d2 librbd: convert AioImageRequest derived classes to templates
+61f0acb librbd: removed namespace indentation from legacy classes
+e83866b librbd: do not record journal events if append is disabled by policy
+a9a84ba librbd: remove unused journal replay canceled callback
+9429500 librbd: optionally support disabling journal appends via policy
+1f63307 librbd: optimize header file dependency & modify related file.
+c49398d librbd: optimize operation header file dependency
+5366973 librbd: optimize journal header file dependency
+c95b83e librbd: optimize image_watcher header file dependency
+e955496 librbd: optimize image header file dependency
+f403aba librbd: optimize exclusive_lock  header file dependency
+0ca8071 librbd: optimize object-map header file dependency
+0331414 test: fix librbd tests for rbd_skip_partial_discard
+d0c0c2f librbd: discard hangs when 'rbd_skip_partial_discard' is enabled
+5bf4398 librbd: object_may_exist always return true when you write an empty object
+6786106 librbd: ensure that AIO ops are started prior to request
+47279f8 librbd: helper method for creating and starting AioCompletions
+d336735 rbd-mirror: potential assertion failure during error-induced shutdown
+d1e0512 rbd-mirror: potential race condition during failure shutdown
+48ee3aa rbd-mirror: replaying state should include flush action
+e098fb1 doc: added rbdmap man page to RBD restructured index
+b7793d7 rbdmap: specify bash shell interpreter
+92d7882 rbd-mirror: remove ceph_test_rbd_mirror_image_replay test case
+0d9d573 qa/workunits/rbd: override rbd-mirror integration test poll frequency
+96cdb11 rbd-mirror: do not cancel maintenance ops with missing finish events
+d47a232 rbd-mirror: potential memory leak when attempting to cancel image sync
+1e4d98c rbd-mirror: fix issues detected when attempting clean shut down
+e4c4319 rbd-mirror: shut down image replayers in parallel
+1e38212 rbd-mirror: configuration options to control replay throttling
+549aada librbd: new configuration option to restrict journal payload size
+99195e8 librbd: wait for journal commit op event to be safely recorded
+3ae52eb journal: optimize speed of live replay journal pruning
+3850ded journal: possible deadlock during flush of journal entries
+b4b9843 journal: improve debug log messages
+196de27 journal: support streaming entry playback
+b08335d journal: replay should only read from a single object set
+c6f5303 journal: optionally restrict maximum entry payload size
+6dc609f journal: optionally fetch entries in small chunks during replay
+91c70f3 journal: helper class for organizing optional settings
+d68fe79 rbd-mirror: preprocess journal events prior to applying
+01f5f3f rbd-mirror: event preprocessor to handle snap rename operations
+f71dc87 librbd: improve journaling debug log messages
+14db535 librbd: separate journal event decoding and processing
+0c4a73f librbd: record original snap name in snap rename journal event
+1a25490 librbd: simple duplicate op checks for all maintenance operations
+45a0b74 qa/workunits/rbd: exercise snapshot renames within rbd-mirror test
+c7ab24e librbd: re-register watch on old format image rename
+46fdba4 rbd-mirror: gracefully restart pool replayer when blacklisted
+a806cdb rbd-mirror: do not clear image replayer error code after shut down
+b88a851 rbd-mirror: image deleter should use provided librados connection
+4b05677 rbd-mirror: each pool replayer should use its own librados connection
+569fbee rbd-mirror: clean up ImageReplayer before stopping state machine
+55460e2 rgw: raise log levels for common radosgw-admin errors
+73cc608 rgw: fixes for period puller
+f298643 rgw: ONLY improve code formatting in rgw_object_expirer_core.{cc,h}.
+e7be5a5 rgw: fix skipping some objects to delete by RadosGW's object expirer.
+af7e1e0 rgw: fix trimming object expirer's hints.
+dc154cd osd: fix the mem leak of RepGather
+96ad2d1 rgw: can set negative max_buckets on RGWUserInfo
+dbf8cf0 rgw: improve support for Swift's object versioning.
+5ffdc34 doc: format 2 now is the default image format
+bd70d6d qa: remove tmap_migrate tests from upgrade testing
+49db733 qa: add rados test script for upgrades
+3ff6e8f rgw: Fallback to Host header for bucket name.
+90d29fc rbd-mirror: include local pool id in resync throttle unique key
+03c7614 test: ensure unique template specializations per test case
+ea9e031 test: missing template specialization for unittest_rbd_mirror
+c2e6d08 qa/workunits/rbd: increase writes in large image count test
+88a7c5c rbd-mirror: prune sync points referencing missing snapshots
+9a6bad3 rbd-mirror: ignore empty snapshot sequence mapping
+c8089de qa/workunits/rbd: remove temporary image exports
+bdb2189 rbd-mirror: fix potential image replayer state transition race
+e7d7990 rbd-mirror: cancel image deletion callback on shut down
+bba7811 rbd-mirror: fixed potential leaking image deletion context callback
+b71efb0 rbd-mirror: ImageReplayer doesn't need image deleted callback
+b657d18 rbd-mirror: use async callback when deletion not in-progress
+c9c1216 rbd-mirror: tests: ImageSyncThrottler unit tests
+c56d6ec rbd-mirror: Usage of image-sync throttler in BootstrapRequest
+5323ebd rbd-mirror: Implementation of image-sync throttler
+783bd68 rbd-mirror: tests: Support for inflight image sync point update
+b878f5f rbd-mirror: image-sync: Periodically update sync point object number
+2ba61a7 rbd-mirror: image-replayer: Fix bug in resync listener remotion
+cbd1ab4 rbd-mirror: resync: Added unit tests
+5fe93a4 rbd-mirror: image-replayer: Implementation of resync operation
+c2b786c rbd: journal: Support for listening updates on client metadata
+21d2ba5 journal: Support for registering metadata listeners in the Journaler
+fcc00f7 test: fsx: fix rbd-nbd daemon logging
+63fbed0 qa/workunits/rbd: image resize test for rbd-nbd
+3db1a3c rbd-nbd: use librbd API method to watch image size update
+8f94f5e librbd: API: methods to watch image stat update
+22c9df1 rgw: Fix for using port 443 with pre-signed urls.
+ce5e250 rgw: added zone rename to radosgw_admin
+ce986aa rgw: RGWMetaSyncCR holds refs to stacks for wakeup
+2485efc rgw: clear realm watch on failed watch_restart
+fa2e42d rgw: use endpoints from master zone instead of zonegroup
+a865f26 radosgw-admin: zone[group] modify can change realm id
+f5db5a4 mon: tolerate missing osd metadata
+36e5c86 mon: fix metadata dumps for empty lists
+de99bd5 mon: 'std::move` Metadata when updating it
+828df39 mon: fix 'mon metadata' for lone monitors
+308f514 ceph.spec.in: fix rpm package building error as follows:
+727a704 udev: always populate /dev/disk/by-parttypeuuid
+9fc5b41 librbd: optimize away unnecessary object map updates
+81a2534 rbd-replay: decode and replay discard IO operations
+5968664 librbd: failed assertion after shrinking a clone image twice
+8316b6a rbd-mirror: gracefully fail if object map is unavailable
+5ae0e43 subman: use replace instead of format
+0ca2772 cmake: script that sets env vars for unit tests
+0ff0960 ExclusiveArch for suse_version
+8b9954d rgw: Fix civetweb IPv6
+7e1ba28 os: Fix HashIndex::recursive_remove() to remove everything but original path
+cb1f17f filestore: Clear objects before calling collection_list() again
+bdcfcaf filestore: Improve logging
+e1eb8af ceph_volume_client: version on-disk metadata
+e8dd1ee ceph_volume_client: add versioning
+a0ffc85 ceph_volume_client: disallow tenants to share auth IDs
+d3e22db ceph_volume_client: cleanup auth meta files
+2e9c37a ceph_volume_client: fix log messages
+7e93d3d ceph_volume_client: create/delete VMeta for create/delete volume
+7cea0ee ceph_volume_client: modify locking of meta files
+e77684f cephfs.pyx: implement python bindings for fstat
+d2792ef ceph_volume_client: restrict volume group names
+4f874dd ceph_volume_client: use fsync instead of syncfs
+bde2c8f pybind: expose fsync in cephfs binding.
+0c13bf2 ceph_volume_client: recover from dirty auth and auth meta updates
+8f7defb ceph_volume_client: modify data layout in meta files
+748a5a0 pybind: ceph_volume_client authentication metadata
+e14dc25 pybind: enable integer flags to libcephfs open
+c6546bb buffer: fix iterator_impl visibility through typedef
+e825dd2 rgw multisite: preserve zone's extra pool
+059ed62 rgw: remove bucket index objects when deleting the bucket
+93d7875 rgw: add missing master_zone when running with old default region config
+dc96383 rgw: fix error_repo segfault in data sync
+67eb961 rgw: add line space between inl. member function defns (rgw_user.h)
+3f8298a rgw-admin: return error on email address conflict (add user)
+962e7dc rgw-admin: convert user email addresses to lower case
+461782e selinux: allow chown for self and setattr for /var/run/ceph
+fe57ace rgw ldap: fix ldap bindpw parsing
+429e9c0 rgw: aws4: fix buffer sharing issue with chunked uploads
+7c2eab1 mds: move Finisher to unlocked shutdown
+28697fc mds: Kill C_SaferCond in evict_sessions()
+1d3a816 mds: fix shutting down mds timed-out due to deadlock
+49a1ce9 msg/async: remove the unnecessary checking to wakup event_wait
+2beb56a mds: fix SnapRealm::have_past_parents_open()
+3d6d36a ceph-disk: partprobe should block udev induced BLKRRPART
+8cd7f44 rgw: aws4: add STREAMING-AWS4-HMAC-SHA256-PAYLOAD support
+276ec72 rgw: use std::unique_ptr for rgw_aws4_auth management.
+2c422e3 rgw: add handling of memory allocation failure in AWS4 auth.
+2cd3ed8 ceph_volume_client: allow read-only authorization for volumes
+46246e3 osd: increment stas on recovery pull also
+3da251f pybind/ceph_argparse: handle non ascii unicode args
+b01af21 Fix tabs->whitespace in ceph_argparse
+bb2dc95 Make usage of builtins in ceph_argparse compatible with Python 3
+f80d10c rpm: move libatomic_ops-devel to non-distro-specific section
+a28810c rpm: move gperftools-devel to non-distro-specific section
+e6b7a4b rpm: use new name of libatomic_ops-devel
+9bbf2e8 fix tcmalloc handling in spec file
+b26acc0 ceph-osd-prestart.sh: drop Upstart-specific code
+1e622a5 rpm: Fix creation of mount.ceph symbolic link for SUSE distros
+89cb116 build/ops: build mount.ceph and mount.fuse.ceph as client binaries
+84b45b7 rpm: move mount.ceph from ceph-base to ceph-common
+e028cd1 rpm: create mount.ceph symlink in /sbin (SUSE only)
+ad67d1c makefile: install mount.fuse.ceph,mount.ceph into /usr/sbin
+2538b77 client: fix MetaRequest::set_other_inode()
+ffcfe69 client: close directory's snapdir when deleting directory
+b900702 client: invalidate snap inodes after removing snapshot
+d9f957a mds: fix incorrect "unconnected snaprealm xxx" warning
+3e745ef qa/workunits/fs: fix expect_failure function in test scripts
+37157d1 client: make sure snapflush is sent before normal cap message
+326d46b client: unify cap flush and snapcap flush
+5c2ff04 mds: handle partly purged directory
+57b39f0 mds: do files recovery after processing cap flushes
+bace1c8 mds: combine MDCache::{reconnected_caps,cap_imports_dirty}
+365bda1 mds: remove CEPH_LOCK_IFLOCKL from cinode_lock_info
+55367c5 mds: rebuild the internal states that tracking pending snapflush
+0897fc4 mds: using cap_reconnect_t to track cap recconect
+ac508dc mds: add 'follows' of first pending snapflush to cap reconnect message
+c258f52 mds: journal snap inodes that need flush when expiring log segment
+42dd72d mds: properly update client_snap_caps when splitting snap inode
+b15cf42 install-deps: Get the latest virtualenv in a temporary folder to work around a bug in old virtualenv
+00f3fd4 ceph-fuse: link to libtcmalloc or jemalloc
+7178f23 doc: fix standby replay config
+8981f3b rgw: add pg_ver to tombstone_cache
+6c32fe8 rgw: add obj tombstone cache
+f651a8f rgw: rgw_obj comparison also accounts for bucket id
+354e81c cls/rgw: reduce tag_timeout significantly
+24d4831 cls_rgw: dir_suggest entries bucket index logging
+bdef8b2 cls/rgw: fix timespan comparison
+c34dbd4 rgw: data sync squash, prioritize object removal
+09eee3b rgw: squash bi complete bi log entries
+571a132 rgw: stop bucket lease only after draining object sync operations
+7af0306 rgw: don't leak RGWRados::cr_registry on error
+9591e50 rgw: dont need atomic RGWRados::next_rados_handle
+047379c rgw: remove unneeded RGWRados::num_rados_handles
+7848482 rgw: use vector for librados handles
+2c39d36 rgw: Add documentation for the Multi-tenancy feature
+9e5a3ae RGW:add socket backlog setting for via ceph.conf http://tracker.ceph.com/issues/16406
+aa39361 mds: disallow 'open truncate' non-regular inode
+3af7b42 mds: only open non-regular inode with mode FILE_MODE_PIN
+8655803 rgw: fix multi-delete query param parsing.
+546141c rgw: Set Access-Control-Allow-Origin to a Asterisk if allowed in a rule
+2afc176 rgw: fix double counting in RGWRados::update_containers_stats()
+489f8ce librbd: fix missing return statement if failed to get mirror image state
+a7987f0 MDSMonitor.cc: fix mdsmap.<namespace> subscriptions
+d244b7a mds: add maximum fragment size constraint
+64d99b1 mds: fix Session::check_access()
+bce5646 client: skip executing async invalidates while umounting
+fd7ff96 ceph-fuse: add option to disable kernel pagecache
+3f76e4a rgw: finish error_repo cr in stop_spawned_services()
+a5f5513 test: fix CMake build of ceph_test_objectcacher_stress
+17f1bff ObjectCacher: fix bh_read_finish offset logic
+73bc6d1 osd: provide some contents on ObjectExtent usage in testing
+3446fa4 test: build a correctness test for the ObjectCacher
+b668491 test: split objectcacher test into 'stress' and 'correctness'
+74f5920 test: add a data-storing MemWriteback for testing ObjectCacher
+757babb librbd: memory leak possible if journal op event failed
+e7ec20e librbd: ignore snap unprotect -EBUSY errors during journal replay
+cbc9636 librbd: delete ExclusiveLock instance when switching to snapshot
+d9c3f28 librbd: mark exclusive lock as released after journal is closed
+b1d9698 librbd: do not propagate mirror status notification failures
+51defea librbd: fix crash while using advisory locks with R/O image
+db28ddc rbd-mirror: block proxied ops with -EROFS return code
+ebce8ce librbd: optionally block proxied requests with an error code
+70bf746 librbd: potential race when replaying journal ops
+f3f4a4a librbd: journal callback to interrupt replay
+b203168 rbd-mirror: keep local pointer to image journal
+0399958 rbd-mirror: keep events from different epochs independent
+6a28d63 librbd: fix lockdep issue when duplicate event detected
+1e85da9 rbd-mirror: ensure replay status formatter has completed before stopping
+bf58eab journal: do not log watch errors against deleted journal
+deb6ca8 librbd: force-remove journal when disabling feature and removing image
+415ea77 librbd: ignore ENOENT error when removing image from mirror directory
+cf65ed9 rbd: Skip rbd cache flush if journaling is enabled under aio_flush
+caad884 mon: Monitor: validate prefix on handle_command()
+3250c4d rgw_swift: newer versions of boost/utility no longer include in_place
+dd635e4 librbd: ignore missing object map during snap remove
+db7ce96 librbd: removal of partially deleted image needs id lookup
+c1a47c7 packaging: move parted requirement to -osd subpkg
+74dd035 osd/PG: set last_* PG fields on a creating PG
+2c03e02 osd: set pg history last_clean_scrub_stamp on create
+a2e8ae6 osd: sparse_read offset may not be zero for ecpool
+b8f7aa2 msg/async/AsyncConnection: make verify auth callback without connection lock
+3dbb08a AsyncConnection: create writable event for in progress connection
+6554d46 OSDMonitor::prepare_pgtemp: only update up_thru if newer
+a826bb8 OSDMonitor: drop pg temp from sources other than the current primary
+18fdc1c osd: reject PARALLELEXEC ops with EINVAL
+f91da93 ceph_test_rados_api_misc: test rados op with bad flas
+edd3f79 msg/async: close STATE_WAIT connection in short period
+af8e86c test_msgr: add assert if wait for shutdown hang
+42ef435 test/test_msgr: add shutdown hang debug info
+d4c531d test_msgr: add verbose info for pending connections
+90ce35c msg/async/AsyncConnection: lower racing case debug log level
+1f19dbd librbd: potential use after free on refresh error
+ad3b788 Drop ceph Resource Agent
+b751d48 librbd: flag image as updated after proxying maintenance op
+9b75275 install-deps.sh: use mk-build-deps instead of processing control
+a34b227 xio: add MNop.h to dist tarball
+393bf7e rgw: check for -ERR_NOT_MODIFIED in rgw_rest_s3.cc
+6b41d76 TaskFinisher: cancel all tasks wait until finisher done
+762db30 rgw: support size suffixes for --max-size in radosgw-admin command
+e3a99c0 common: add int64_t template for strict_si_cast()
+f8e4911 rados: Add cleanup message with time to rados bench output
+8b82bb8 qa/workunits/rbd: respect RBD_CREATE_ARGS environment variable
+9415d38 rados: Improve list-inconsistent json format
+2fd6e7c test: Fix test to not use jq -S which isn't avail in all distributions
+c789aa3 test: Add testing of new scrub commands in rados
+a6f3f76 rados: Don't bother showing list-inconsistent-* errors that aren't set
+4c72195 osd, rados: Fixes for list-inconsistent-snapset
+4e4e562 include, rados: Fixes for list-inconsistent-obj and librados
+80f0dd3 rados: Balance format sections in same do_get_inconsistent_cmd()
+d248aba rados: Include epoch in the list-inconsistent-* command output
+43141e3 rados: Improve error messages for list-inconsistent commands
+502540f 10.2.1
+b4a80cb qa/workunits/rbd: specify source path
+70c97bd qa/workunits/rbd: additional rbd-mirror stress tests
+c7cfb48 vstart: add --nolockdep option
+45107e2 (tag: v10.2.2) 10.2.2
 dd1ea65 librbd: remove should ignore mirror errors from older OSDs
 2656881 librbd: track in-flight flush events for journaling
 a85fbb4 librbd: do not shut down exclusive lock while acquiring
@@ -59,12 +362,38 @@ bb279f1 librbd: refresh image if needed in mirror functions
 4a967eb Revert "osd/ReplicatedPG: for copy_get get omap, firstly check ob whether has omap."
 fd8f8af Revert "osd/ReplicatedPG: For omap read ops, it should check object wether has omap"
 d59ca31 Revert "osd/ReplicatedPG: When do omapclear, it should check object whether is omap."
+d48a1ed rgw/s3website: Fix x-amz-website-redirect-location support.
 f4306de osdc/Objecter: upper bound watch_check result
 64f15b3 OSD: fix deadlock in OSD::_committed_osd_maps
 adfbe95 osd: handle boot racing with NOUP set
 e424482 TaskFinisher: use singleton SafeTimer and Finisher
 99ff1fc BackoffThrottle: wait() if past max
 5ce43eb BackoffThrottle: use wait_for instead of wait_until
+3f8642d osd: fix sched_time not actually randomized
+6619bd9 ReplicatedPG: adjust num_pinned in _delete_oid
+b7f0e73 PG: update PGPool to detect map gaps and reset cached_removed_snaps
+1c7abcb librados: Added declaration for rados_aio_get_version
+ec884a3 rgw/s3website: whitespace style fixes
+bf26b6e rgw/s3website: Fix ErrocDoc memory leak.
+36672c6 rgw/s3website: Fix x-amz-website-redirect-location support.
+3c0ac8e rgw/s3website: Implement ErrorDoc & fix Double-Fault handler
+cb9e9e1 msg/async: Implement smarter worker thread selection
+578ac8a Event: fix delete_time_event while in processing list
+8c7a13f test_msgr: add delay inject test
+846992f AsyncConnection: make delay message happen within original thread
+9b199d0 msg/async: add missing DelayedDelivery and delay injection
+0e6324a Event: replace ceph_clock_now with coarse_real_clock
+6597fab msg/async: fix some return values and misspellings.
+18f18c7 msg/async: delete the confused comments.
+79354f4 msg/async: add numevents statistics for external_events
+4005a51 AsyncConnection: remove unnecessary "send" flag
+441847d async: skip unnecessary steps when parsing simple messages
+c025010 client: fstat should take CEPH_STAT_CAP_INODE_ALL
+c3f6d82 mon/MDSMonitor: fix wrongly set expiration time of blacklist
+d4017ae mon/MDSMonitor: fix wrong positive of jewel flag check
+eea0e91 mds: finish lock waiters in the same order that they were added.
+2c7fc95 mds: fix race between StrayManager::{eval_stray,reintegrate_stray}
+19c1366 osdc: send error to recovery waiters on shutdown
 cec6870 test/rbd: fsx needs to shut down the journal
 09200d4 remove invalid objectmap flag when objectmap is disabled Fixes: http://tracker.ceph.com/issues/16076 Signed-off-by: xinxin shu <shuxinxin at chinac.com>
 6c0e202 rbd: check value of config override when setting image-meta
@@ -74,6 +403,17 @@ f9e32ac librbd: clone: default to parent features
 73464af test: initial python APIs to support mirroring
 5c31266 rbd: initial python APIs to support mirroring
 3084cf3 rbd: close journal before removing
+7b0318a mds: fix mdsmap print_summary with standby replays
+f858f94 rgw: reduce string copy
+67e3be3 rgw: rework aws4 header parsing
+5bdd13e rgw: don't add port to aws4 canonical string if using default port
+474739e rgw: use correct method to get current epoch
+fc34fbd rgw: check for aws4 headers size where needed
+44decb4 rgw: properly handle initial slashes in SLO's segment path.
+e8b7dd4 rgw: remove unnecessary data copying in RGWPutMetadataBucket.
+63e0993 rgw: Fix updating CORS/ACLs during POST on Swift's container.
+4eded9a rgw: fix update of already existing account/bucket's custom attributes.
+30ee180 rgw: fix updating account/container metadata of Swift API.
 a32820d src/: remove all direct comparisons to get_max()
 f869594 PG::replica_scrub: don't adjust pool on max object
 1737ff3 hobject: compensate for non-canonical hobject_t::get_max() encodings
@@ -82,6 +422,11 @@ f869594 PG::replica_scrub: don't adjust pool on max object
 726292e client: report root's quota in statfs
 46c2bd0 pybind: fix unicode handling in CephFSVolumeClient::purge
 b989084 ceph-disk: workaround gperftool hang
+75d3261 rgw: back off if error repo is empty
+8dcd2a1 rgw: data sync retries sync on prevously failed bucket shards
+3e5210d rgw: store failed data sync entries in separate omap
+d08ca52 rgw: configurable window size to RGWOmapAppend
+368e884 rgw: add a cr for omap keys removal
 69a9500 rpm: unconditionally set ceph user's primary group to ceph (SUSE)
 208659f qa/workunits/rbd: basic cloned image test
 af6be1b rbd-mirror: copy snapshot parent image settings
@@ -120,7 +465,11 @@ fef7456 rbd-mirror: Replayer: bootstrap existing mirrored images
 c3b1bf1 rbd-mirror: calculate behind_master only if mirror tag is not newer than master
 cb950fc test: workaround failure in journal.sh
 f92c2a5 cls::journal: treat empty commit position as minimal
+933fdef rgw: add_zone only clears master_zone if --master=false
+e9f9916 rgw: add missing metadata_heap pool to old zones
+2266287 mds: wrongly treat symlink inode as normal file/dir when symlink inode is stale on kcephfs
 be9e85d tests: rm -fr /tmp/*virtualenv*
+0bdc8fd rgw : cleanup radosgw-admin temp command as it was deprecated and also implementation code for this command was removed in commit 8d7c8828b02c46e119adc4b9e8f655551512fc2d
 8fbb555 cls_journal: Select min commit position for new clients
 576ff0c cls_journal: remove duplicated key generation
 fae360f rgw: fix manager selection when APIs customized
@@ -130,6 +479,7 @@ e97cc2d osd: reset session->osdmap if session is not waiting for a map anymore
 305ebbc librbd: metadata retrieval added to open image state machine
 5c9ecea cls_rbd: async version of metadata_list helper method
 0c99028 rbd-mirror: disable librbd caching for replicated images
+108638f rgw: handle initial slashes properly in BulkDelete of Swift API.
 188318b rpm: Restore /var/run/ceph context manually
 ab1f65d rpm: Mute semodule in postun script
 587fb3d install-deps.sh: systemd-rpm-macros must be installed on SUSE
@@ -161,6 +511,11 @@ e5ebb51 journal: replay position might change after pruning stale tags
 9ecc3dc librbd: delay commit of overwritten journal event
 070dc7c ObjectCacher: pass new journal tid when overwriting extent
 0dfc787 qa/workunits/rbd: record rbd CLI debug messages during mirror stress
+a40cfe4 rgw: remove -EEXIST error msg for ZoneCreate
+ff9c29a rgw: camelcase names of custom attributes in Swift's responses.
+4a3c9f3 rgw: fix realm pull and period pull for apache frontend
+a08caa6 rgw: handle errors properly during GET on Swift's DLO.
+8163c4d rgw: don't unregister request if request is not connected to manager
 ffd545b rgw: keep track of written_objs correctly
 8356021 Pipe: take a ref to existing while we are waiting
 e0dfc55 qa/workunits/rbd: fixed rbd_mirror teuthology runtime errors
@@ -190,6 +545,7 @@ e885f1e radosgw-admin: fix 'period push' handling of --url
 7be281d debian/control: dh_systemd_start is in the dh-systemd package
 e463aa8 debian: install systemd target files
 3a66dd4 (tag: v10.2.1) 10.2.1
+70018bf os/FileStore::sync_entry check for stop in after wait
 72c9b6f osd: remove all stale osdmaps in handle_osd_map()
 a496b70 cmake: fix rbd compile errors
 9a46e13 cmake: add library cls_journal for target unittest_librbd
@@ -306,6 +662,7 @@ af3a4e4 mds: fix auth caps with hyphen in path
 d985135 mon/OSDMonitor: improve reweight_by_utilization() logic
 b0543fd mon/OSDMonitor: avoid potential expensive grace calculation
 53686df mon/OSDMonitor: max_osds must be > 0 for reweight-by-*
+b2d1df1 tests: be more generous with test timeout
 0de6345 OSDMonitor: avoid underflow in reweight-by-utilization if max_change=1
 63738d4 rbd: disk usage now includes all snapshots by default
 895c975 librbd: block RPC requests while updating features
@@ -340,6 +697,14 @@ c72f0bc rpm: implement scriptlets for the post-split daemon packages
 f4d63af python_cephfs: rule out empty/None volume_id
 b609017 python-cephfs: use rados namespace for data isolation.
 96b3726 Signed-off-by: Tamil Muthamizhan <tmuthami at redhat.com>
+6c1163c admin-socket: use chown instead of fchown
+3963de7 global-init: fixup inconsistent use of ceph ctx and conf
+2bc4194 global-init: chown pid files
+5681b78 global-init: chown run dir
+c207544 common-init: chown admin socket after service thread started
+f7e6b3c global-init: check init flags and set accordingly
+d4afe94 global-init: add a path chown wrapper function
+770ae9e ceph-context: add function to set init flags
 77fdbf1 Signed-off-by: Tamil Muthamizhan <tmuthami at redhat.com>
 1fa533e fix ceph init script
 7acbefa test: Fix ceph-objectstore-tool test to run manually from src non-cmake
@@ -560,6 +925,7 @@ ac750ce chain_[f]getxattr: always use size, no reaon to consider CHAIN_XATTR_MAX
 a330078 rgw-admin: fix period delete error message
 3320f8f rgw-admin: remove unused iterator
 4b0e39e osd/ReplicatedPG: make handle_watch_timeout no-op if !active
+21f0216 ceph-disk: Accept bcache devices as data disks
 64a8a6a rbd-mirror: fixed bug that caused infinite loop when disabling image mirroring
 a651598 mailmap: Luo Kexue name normalization
 c36c5d4 mailmap: Ning Yao affiliation
@@ -6373,7 +6739,7 @@ c503e97 rgw: include RequestId as part of the Error response
 6907778 ceph-objectstore-tool: add mark-complete operation
 567dd1e common: OpTracker age histogram calculation is not correct
 06147dd rgw: preserve all attrs if intra-zone copy
-293d12a test/Makefile.am: run mon/mon-scrub.sh as part of checks
+293d12a2 test/Makefile.am: run mon/mon-scrub.sh as part of checks
 6ceb37d test: mon/mon-scrub.sh: port clashed with other tests
 8fd40e1 librbd: remove duplicate read_only test in librbd::async_flatten
 897f074 test_async_compressor.cc: prefer ++operator for non-primitive iterators
diff --git a/Makefile.am b/Makefile.am
index 6fa1a00..a7f0e3e 100644
--- a/Makefile.am
+++ b/Makefile.am
@@ -15,6 +15,7 @@ EXTRA_DIST += \
 	etc/sysconfig/SuSEfirewall2.d/services/ceph-mon \
 	etc/sysconfig/SuSEfirewall2.d/services/ceph-osd-mds \
 	udev/50-rbd.rules \
+	udev/60-ceph-by-parttypeuuid.rules \
 	udev/95-ceph-osd.rules \
 	share/known_hosts_drop.ceph.com \
 	share/id_dsa_drop.ceph.com \
diff --git a/Makefile.in b/Makefile.in
index f1cd62e..94fa726 100644
--- a/Makefile.in
+++ b/Makefile.in
@@ -621,9 +621,9 @@ EXTRA_DIST = autogen.sh ceph.spec.in ceph.spec install-deps.sh \
 	etc/sysconfig/ceph \
 	etc/sysconfig/SuSEfirewall2.d/services/ceph-mon \
 	etc/sysconfig/SuSEfirewall2.d/services/ceph-osd-mds \
-	udev/50-rbd.rules udev/95-ceph-osd.rules \
-	share/known_hosts_drop.ceph.com share/id_dsa_drop.ceph.com \
-	share/id_dsa_drop.ceph.com.pub
+	udev/50-rbd.rules udev/60-ceph-by-parttypeuuid.rules \
+	udev/95-ceph-osd.rules share/known_hosts_drop.ceph.com \
+	share/id_dsa_drop.ceph.com share/id_dsa_drop.ceph.com.pub
 # the "." here makes sure check-local builds gtest and gmock before they are used
 SUBDIRS = . src man doc systemd selinux
 NPROC = nproc
diff --git a/ceph.spec b/ceph.spec
index 992e0b7..fa3a932 100644
--- a/ceph.spec
+++ b/ceph.spec
@@ -18,7 +18,12 @@
 %bcond_without cephfs_java
 %bcond_with tests
 %bcond_with xio
+%ifnarch s390 s390x
 %bcond_without tcmalloc
+%else
+# no gperftools/tcmalloc on s390(x)
+%bcond_with tcmalloc
+%endif
 %bcond_without libs_compat
 %bcond_with lowmem_builder
 %if 0%{?fedora} || 0%{?rhel}
@@ -49,7 +54,7 @@
 # common
 #################################################################################
 Name:		ceph
-Version:	10.2.2
+Version:	10.2.3
 Release:	0%{?dist}
 Epoch:		1
 Summary:	User space components of the Ceph file system
@@ -59,6 +64,13 @@ Group:         System/Filesystems
 %endif
 URL:		http://ceph.com/
 Source0:	http://ceph.com/download/%{name}-%{version}.tar.bz2
+%if 0%{?suse_version}
+%if 0%{?is_opensuse}
+ExclusiveArch:  x86_64 aarch64 ppc64 ppc64le
+%else
+ExclusiveArch:  x86_64 aarch64
+%endif
+%endif
 #################################################################################
 # dependencies that apply across all distro families
 #################################################################################
@@ -81,9 +93,13 @@ BuildRequires:	cryptsetup
 BuildRequires:	fuse-devel
 BuildRequires:	gcc-c++
 BuildRequires:	gdbm
+%if 0%{with tcmalloc}
+BuildRequires:	gperftools-devel
+%endif
 BuildRequires:	hdparm
 BuildRequires:	leveldb-devel > 1.2
 BuildRequires:	libaio-devel
+BuildRequires:	libatomic_ops-devel
 BuildRequires:	libblkid-devel >= 2.17
 BuildRequires:	libcurl-devel
 BuildRequires:	libudev-devel
@@ -118,13 +134,9 @@ BuildRequires:	systemd
 PreReq:		%fillup_prereq
 BuildRequires:	net-tools
 BuildRequires:	libbz2-devel
-%if 0%{with tcmalloc}
-BuildRequires:	gperftools-devel
-%endif
 BuildRequires:  btrfsprogs
 BuildRequires:	mozilla-nss-devel
 BuildRequires:	keyutils-devel
-BuildRequires:	libatomic-ops-devel
 BuildRequires:  libopenssl-devel
 BuildRequires:  lsb-release
 BuildRequires:  openldap2-devel
@@ -136,8 +148,6 @@ BuildRequires:  boost-random
 BuildRequires:	btrfs-progs
 BuildRequires:	nss-devel
 BuildRequires:	keyutils-libs-devel
-BuildRequires:	libatomic_ops-devel
-BuildRequires:	gperftools-devel
 BuildRequires:  openldap-devel
 BuildRequires:  openssl-devel
 BuildRequires:  redhat-lsb-core
@@ -197,7 +207,6 @@ Requires:      python-setuptools
 Requires:      grep
 Requires:      xfsprogs
 Requires:      logrotate
-Requires:      parted
 Requires:      util-linux
 Requires:      hdparm
 Requires:      cryptsetup
@@ -342,6 +351,7 @@ Requires:	gdisk
 %if 0%{?suse_version}
 Requires:	gptfdisk
 %endif
+Requires:       parted
 %description osd
 ceph-osd is the object storage daemon for the Ceph distributed file
 system.  It is responsible for storing objects on a local file system
@@ -660,7 +670,9 @@ export RPM_OPT_FLAGS=`echo $RPM_OPT_FLAGS | sed -e 's/i386/i486/'`
 %endif
 		$CEPH_EXTRA_CONFIGURE_ARGS \
 		%{?_with_ocf} \
-		%{?_with_tcmalloc} \
+%if %{without tcmalloc}
+		--without-tcmalloc \
+%endif
 		CFLAGS="$RPM_OPT_FLAGS" CXXFLAGS="$RPM_OPT_FLAGS"
 
 %if %{with lowmem_builder}
@@ -700,17 +712,18 @@ install -m 0644 -D src/logrotate.conf %{buildroot}%{_sysconfdir}/logrotate.d/cep
 chmod 0644 %{buildroot}%{_docdir}/ceph/sample.ceph.conf
 chmod 0644 %{buildroot}%{_docdir}/ceph/sample.fetch_config
 
-# firewall templates
+# firewall templates and /sbin/mount.ceph symlink
 %if 0%{?suse_version}
 install -m 0644 -D etc/sysconfig/SuSEfirewall2.d/services/ceph-mon %{buildroot}%{_sysconfdir}/sysconfig/SuSEfirewall2.d/services/ceph-mon
 install -m 0644 -D etc/sysconfig/SuSEfirewall2.d/services/ceph-osd-mds %{buildroot}%{_sysconfdir}/sysconfig/SuSEfirewall2.d/services/ceph-osd-mds
+mkdir -p %{buildroot}/sbin
+ln -sf %{_sbindir}/mount.ceph %{buildroot}/sbin/mount.ceph
 %endif
 
 # udev rules
 install -m 0644 -D udev/50-rbd.rules %{buildroot}%{_udevrulesdir}/50-rbd.rules
+install -m 0644 -D udev/60-ceph-by-parttypeuuid.rules %{buildroot}%{_udevrulesdir}/60-ceph-by-parttypeuuid.rules
 install -m 0644 -D udev/95-ceph-osd.rules %{buildroot}%{_udevrulesdir}/95-ceph-osd.rules
-mv %{buildroot}/sbin/mount.ceph %{buildroot}/usr/sbin/mount.ceph
-mv %{buildroot}/sbin/mount.fuse.ceph %{buildroot}/usr/sbin/mount.fuse.ceph
 
 #set up placeholder directories
 mkdir -p %{buildroot}%{_sysconfdir}/ceph
@@ -750,7 +763,6 @@ rm -rf %{buildroot}
 %{_libexecdir}/systemd/system-preset/50-ceph.preset
 %{_sbindir}/ceph-create-keys
 %{_sbindir}/rcceph
-%{_sbindir}/mount.ceph
 %dir %{_libexecdir}/ceph
 %{_libexecdir}/ceph/ceph_common.sh
 %dir %{_libdir}/rados-classes
@@ -785,7 +797,6 @@ rm -rf %{buildroot}
 %{_mandir}/man8/osdmaptool.8*
 %{_mandir}/man8/monmaptool.8*
 %{_mandir}/man8/cephfs.8*
-%{_mandir}/man8/mount.ceph.8*
 #set up placeholder directories
 %attr(750,ceph,ceph) %dir %{_localstatedir}/lib/ceph/tmp
 %attr(750,ceph,ceph) %dir %{_localstatedir}/lib/ceph/bootstrap-osd
@@ -841,6 +852,10 @@ DISABLE_RESTART_ON_UPDATE="yes"
 %{_bindir}/rbd-replay
 %{_bindir}/rbd-replay-many
 %{_bindir}/rbdmap
+%{_sbindir}/mount.ceph
+%if 0%{?suse_version}
+/sbin/mount.ceph
+%endif
 %if %{with lttng}
 %{_bindir}/rbd-replay-prep
 %endif
@@ -854,6 +869,7 @@ DISABLE_RESTART_ON_UPDATE="yes"
 %{_mandir}/man8/ceph-syn.8*
 %{_mandir}/man8/ceph-post-file.8*
 %{_mandir}/man8/ceph.8*
+%{_mandir}/man8/mount.ceph.8*
 %{_mandir}/man8/rados.8*
 %{_mandir}/man8/rbd.8*
 %{_mandir}/man8/rbdmap.8*
@@ -1140,6 +1156,7 @@ fi
 %{_sbindir}/ceph-disk
 %{_sbindir}/ceph-disk-udev
 %{_libexecdir}/ceph/ceph-osd-prestart.sh
+%{_udevrulesdir}/60-ceph-by-parttypeuuid.rules
 %{_udevrulesdir}/95-ceph-osd.rules
 %{_mandir}/man8/ceph-clsinfo.8*
 %{_mandir}/man8/ceph-disk.8*
@@ -1201,10 +1218,6 @@ fi
 %dir %{_prefix}/lib/ocf
 %dir %{_prefix}/lib/ocf/resource.d
 %dir %{_prefix}/lib/ocf/resource.d/ceph
-%exclude %{_prefix}/lib/ocf/resource.d/ceph/ceph
-%exclude %{_prefix}/lib/ocf/resource.d/ceph/mds
-%exclude %{_prefix}/lib/ocf/resource.d/ceph/mon
-%exclude %{_prefix}/lib/ocf/resource.d/ceph/osd
 %{_prefix}/lib/ocf/resource.d/ceph/rbd
 
 %endif
diff --git a/ceph.spec.in b/ceph.spec.in
index 3cf6307..b2e4b12 100644
--- a/ceph.spec.in
+++ b/ceph.spec.in
@@ -18,7 +18,12 @@
 %bcond_without cephfs_java
 %bcond_with tests
 %bcond_with xio
+%ifnarch s390 s390x
 %bcond_without tcmalloc
+%else
+# no gperftools/tcmalloc on s390(x)
+%bcond_with tcmalloc
+%endif
 %bcond_without libs_compat
 %bcond_with lowmem_builder
 %if 0%{?fedora} || 0%{?rhel}
@@ -59,6 +64,13 @@ Group:         System/Filesystems
 %endif
 URL:		http://ceph.com/
 Source0:	http://ceph.com/download/%{name}-%{version}.tar.bz2
+%if 0%{?suse_version}
+%if 0%{?is_opensuse}
+ExclusiveArch:  x86_64 aarch64 ppc64 ppc64le
+%else
+ExclusiveArch:  x86_64 aarch64
+%endif
+%endif
 #################################################################################
 # dependencies that apply across all distro families
 #################################################################################
@@ -81,9 +93,13 @@ BuildRequires:	cryptsetup
 BuildRequires:	fuse-devel
 BuildRequires:	gcc-c++
 BuildRequires:	gdbm
+%if 0%{with tcmalloc}
+BuildRequires:	gperftools-devel
+%endif
 BuildRequires:	hdparm
 BuildRequires:	leveldb-devel > 1.2
 BuildRequires:	libaio-devel
+BuildRequires:	libatomic_ops-devel
 BuildRequires:	libblkid-devel >= 2.17
 BuildRequires:	libcurl-devel
 BuildRequires:	libudev-devel
@@ -118,13 +134,9 @@ BuildRequires:	systemd
 PreReq:		%fillup_prereq
 BuildRequires:	net-tools
 BuildRequires:	libbz2-devel
-%if 0%{with tcmalloc}
-BuildRequires:	gperftools-devel
-%endif
 BuildRequires:  btrfsprogs
 BuildRequires:	mozilla-nss-devel
 BuildRequires:	keyutils-devel
-BuildRequires:	libatomic-ops-devel
 BuildRequires:  libopenssl-devel
 BuildRequires:  lsb-release
 BuildRequires:  openldap2-devel
@@ -136,8 +148,6 @@ BuildRequires:  boost-random
 BuildRequires:	btrfs-progs
 BuildRequires:	nss-devel
 BuildRequires:	keyutils-libs-devel
-BuildRequires:	libatomic_ops-devel
-BuildRequires:	gperftools-devel
 BuildRequires:  openldap-devel
 BuildRequires:  openssl-devel
 BuildRequires:  redhat-lsb-core
@@ -197,7 +207,6 @@ Requires:      python-setuptools
 Requires:      grep
 Requires:      xfsprogs
 Requires:      logrotate
-Requires:      parted
 Requires:      util-linux
 Requires:      hdparm
 Requires:      cryptsetup
@@ -342,6 +351,7 @@ Requires:	gdisk
 %if 0%{?suse_version}
 Requires:	gptfdisk
 %endif
+Requires:       parted
 %description osd
 ceph-osd is the object storage daemon for the Ceph distributed file
 system.  It is responsible for storing objects on a local file system
@@ -660,7 +670,9 @@ export RPM_OPT_FLAGS=`echo $RPM_OPT_FLAGS | sed -e 's/i386/i486/'`
 %endif
 		$CEPH_EXTRA_CONFIGURE_ARGS \
 		%{?_with_ocf} \
-		%{?_with_tcmalloc} \
+%if %{without tcmalloc}
+		--without-tcmalloc \
+%endif
 		CFLAGS="$RPM_OPT_FLAGS" CXXFLAGS="$RPM_OPT_FLAGS"
 
 %if %{with lowmem_builder}
@@ -700,17 +712,18 @@ install -m 0644 -D src/logrotate.conf %{buildroot}%{_sysconfdir}/logrotate.d/cep
 chmod 0644 %{buildroot}%{_docdir}/ceph/sample.ceph.conf
 chmod 0644 %{buildroot}%{_docdir}/ceph/sample.fetch_config
 
-# firewall templates
+# firewall templates and /sbin/mount.ceph symlink
 %if 0%{?suse_version}
 install -m 0644 -D etc/sysconfig/SuSEfirewall2.d/services/ceph-mon %{buildroot}%{_sysconfdir}/sysconfig/SuSEfirewall2.d/services/ceph-mon
 install -m 0644 -D etc/sysconfig/SuSEfirewall2.d/services/ceph-osd-mds %{buildroot}%{_sysconfdir}/sysconfig/SuSEfirewall2.d/services/ceph-osd-mds
+mkdir -p %{buildroot}/sbin
+ln -sf %{_sbindir}/mount.ceph %{buildroot}/sbin/mount.ceph
 %endif
 
 # udev rules
 install -m 0644 -D udev/50-rbd.rules %{buildroot}%{_udevrulesdir}/50-rbd.rules
+install -m 0644 -D udev/60-ceph-by-parttypeuuid.rules %{buildroot}%{_udevrulesdir}/60-ceph-by-parttypeuuid.rules
 install -m 0644 -D udev/95-ceph-osd.rules %{buildroot}%{_udevrulesdir}/95-ceph-osd.rules
-mv %{buildroot}/sbin/mount.ceph %{buildroot}/usr/sbin/mount.ceph
-mv %{buildroot}/sbin/mount.fuse.ceph %{buildroot}/usr/sbin/mount.fuse.ceph
 
 #set up placeholder directories
 mkdir -p %{buildroot}%{_sysconfdir}/ceph
@@ -750,7 +763,6 @@ rm -rf %{buildroot}
 %{_libexecdir}/systemd/system-preset/50-ceph.preset
 %{_sbindir}/ceph-create-keys
 %{_sbindir}/rcceph
-%{_sbindir}/mount.ceph
 %dir %{_libexecdir}/ceph
 %{_libexecdir}/ceph/ceph_common.sh
 %dir %{_libdir}/rados-classes
@@ -785,7 +797,6 @@ rm -rf %{buildroot}
 %{_mandir}/man8/osdmaptool.8*
 %{_mandir}/man8/monmaptool.8*
 %{_mandir}/man8/cephfs.8*
-%{_mandir}/man8/mount.ceph.8*
 #set up placeholder directories
 %attr(750,ceph,ceph) %dir %{_localstatedir}/lib/ceph/tmp
 %attr(750,ceph,ceph) %dir %{_localstatedir}/lib/ceph/bootstrap-osd
@@ -841,6 +852,10 @@ DISABLE_RESTART_ON_UPDATE="yes"
 %{_bindir}/rbd-replay
 %{_bindir}/rbd-replay-many
 %{_bindir}/rbdmap
+%{_sbindir}/mount.ceph
+%if 0%{?suse_version}
+/sbin/mount.ceph
+%endif
 %if %{with lttng}
 %{_bindir}/rbd-replay-prep
 %endif
@@ -854,6 +869,7 @@ DISABLE_RESTART_ON_UPDATE="yes"
 %{_mandir}/man8/ceph-syn.8*
 %{_mandir}/man8/ceph-post-file.8*
 %{_mandir}/man8/ceph.8*
+%{_mandir}/man8/mount.ceph.8*
 %{_mandir}/man8/rados.8*
 %{_mandir}/man8/rbd.8*
 %{_mandir}/man8/rbdmap.8*
@@ -1140,6 +1156,7 @@ fi
 %{_sbindir}/ceph-disk
 %{_sbindir}/ceph-disk-udev
 %{_libexecdir}/ceph/ceph-osd-prestart.sh
+%{_udevrulesdir}/60-ceph-by-parttypeuuid.rules
 %{_udevrulesdir}/95-ceph-osd.rules
 %{_mandir}/man8/ceph-clsinfo.8*
 %{_mandir}/man8/ceph-disk.8*
@@ -1201,10 +1218,6 @@ fi
 %dir %{_prefix}/lib/ocf
 %dir %{_prefix}/lib/ocf/resource.d
 %dir %{_prefix}/lib/ocf/resource.d/ceph
-%exclude %{_prefix}/lib/ocf/resource.d/ceph/ceph
-%exclude %{_prefix}/lib/ocf/resource.d/ceph/mds
-%exclude %{_prefix}/lib/ocf/resource.d/ceph/mon
-%exclude %{_prefix}/lib/ocf/resource.d/ceph/osd
 %{_prefix}/lib/ocf/resource.d/ceph/rbd
 
 %endif
diff --git a/configure b/configure
index 8083cc5..1cc95de 100755
--- a/configure
+++ b/configure
@@ -1,6 +1,6 @@
 #! /bin/sh
 # Guess values for system-dependent variables and create Makefiles.
-# Generated by GNU Autoconf 2.69 for ceph 10.2.2.
+# Generated by GNU Autoconf 2.69 for ceph 10.2.3.
 #
 # Report bugs to <ceph-devel at vger.kernel.org>.
 #
@@ -590,8 +590,8 @@ MAKEFLAGS=
 # Identity of this package.
 PACKAGE_NAME='ceph'
 PACKAGE_TARNAME='ceph'
-PACKAGE_VERSION='10.2.2'
-PACKAGE_STRING='ceph 10.2.2'
+PACKAGE_VERSION='10.2.3'
+PACKAGE_STRING='ceph 10.2.3'
 PACKAGE_BUGREPORT='ceph-devel at vger.kernel.org'
 PACKAGE_URL=''
 
@@ -1582,7 +1582,7 @@ if test "$ac_init_help" = "long"; then
   # Omit some internal or obsolete options to make the list less imposing.
   # This message is too long to be a string in the A/UX 3.1 sh.
   cat <<_ACEOF
-\`configure' configures ceph 10.2.2 to adapt to many kinds of systems.
+\`configure' configures ceph 10.2.3 to adapt to many kinds of systems.
 
 Usage: $0 [OPTION]... [VAR=VALUE]...
 
@@ -1653,7 +1653,7 @@ fi
 
 if test -n "$ac_init_help"; then
   case $ac_init_help in
-     short | recursive ) echo "Configuration of ceph 10.2.2:";;
+     short | recursive ) echo "Configuration of ceph 10.2.3:";;
    esac
   cat <<\_ACEOF
 
@@ -1837,7 +1837,7 @@ fi
 test -n "$ac_init_help" && exit $ac_status
 if $ac_init_version; then
   cat <<\_ACEOF
-ceph configure 10.2.2
+ceph configure 10.2.3
 generated by GNU Autoconf 2.69
 
 Copyright (C) 2012 Free Software Foundation, Inc.
@@ -2913,7 +2913,7 @@ cat >config.log <<_ACEOF
 This file contains any messages produced by compilers while
 running configure, to aid debugging if configure makes a mistake.
 
-It was created by ceph $as_me 10.2.2, which was
+It was created by ceph $as_me 10.2.3, which was
 generated by GNU Autoconf 2.69.  Invocation command line was
 
   $ $0 $@
@@ -16408,7 +16408,7 @@ fi
 
 # Define the identity of the package.
  PACKAGE='ceph'
- VERSION='10.2.2'
+ VERSION='10.2.3'
 
 
 cat >>confdefs.h <<_ACEOF
@@ -25332,7 +25332,7 @@ $as_echo "$am_cv_python_pyexecdir" >&6; }
 
 ac_config_headers="$ac_config_headers src/acconfig.h"
 
-ac_config_files="$ac_config_files Makefile src/Makefile src/ocf/Makefile src/ocf/ceph src/ocf/rbd src/java/Makefile systemd/Makefile man/Makefile doc/Makefile selinux/Makefile ceph.spec"
+ac_config_files="$ac_config_files Makefile src/Makefile src/ocf/Makefile src/ocf/rbd src/java/Makefile systemd/Makefile man/Makefile doc/Makefile selinux/Makefile ceph.spec"
 
 cat >confcache <<\_ACEOF
 # This file is a shell script that caches the results of configure
@@ -26100,7 +26100,7 @@ cat >>$CONFIG_STATUS <<\_ACEOF || ac_write_fail=1
 # report actual input values of CONFIG_FILES etc. instead of their
 # values after options handling.
 ac_log="
-This file was extended by ceph $as_me 10.2.2, which was
+This file was extended by ceph $as_me 10.2.3, which was
 generated by GNU Autoconf 2.69.  Invocation command line was
 
   CONFIG_FILES    = $CONFIG_FILES
@@ -26166,7 +26166,7 @@ _ACEOF
 cat >>$CONFIG_STATUS <<_ACEOF || ac_write_fail=1
 ac_cs_config="`$as_echo "$ac_configure_args" | sed 's/^ //; s/[\\""\`\$]/\\\\&/g'`"
 ac_cs_version="\\
-ceph config.status 10.2.2
+ceph config.status 10.2.3
 configured by $0, generated by GNU Autoconf 2.69,
   with options \\"\$ac_cs_config\\"
 
@@ -26676,7 +26676,6 @@ do
     "Makefile") CONFIG_FILES="$CONFIG_FILES Makefile" ;;
     "src/Makefile") CONFIG_FILES="$CONFIG_FILES src/Makefile" ;;
     "src/ocf/Makefile") CONFIG_FILES="$CONFIG_FILES src/ocf/Makefile" ;;
-    "src/ocf/ceph") CONFIG_FILES="$CONFIG_FILES src/ocf/ceph" ;;
     "src/ocf/rbd") CONFIG_FILES="$CONFIG_FILES src/ocf/rbd" ;;
     "src/java/Makefile") CONFIG_FILES="$CONFIG_FILES src/java/Makefile" ;;
     "systemd/Makefile") CONFIG_FILES="$CONFIG_FILES systemd/Makefile" ;;
diff --git a/configure.ac b/configure.ac
index e06baf1..fcc500b 100644
--- a/configure.ac
+++ b/configure.ac
@@ -8,7 +8,7 @@ AC_PREREQ(2.59)
 # VERSION define is not used by the code.  It gets a version string
 # from 'git describe'; see src/ceph_ver.[ch]
 
-AC_INIT([ceph], [10.2.2], [ceph-devel at vger.kernel.org])
+AC_INIT([ceph], [10.2.3], [ceph-devel at vger.kernel.org])
 
 AX_CXX_COMPILE_STDCXX_11(, mandatory)
 
@@ -1356,7 +1356,6 @@ AC_CONFIG_HEADERS([src/acconfig.h])
 AC_CONFIG_FILES([Makefile
 	src/Makefile
 	src/ocf/Makefile
-	src/ocf/ceph
 	src/ocf/rbd
 	src/java/Makefile
 	systemd/Makefile
diff --git a/doc/man/8/radosgw-admin.rst b/doc/man/8/radosgw-admin.rst
index b4d75ff..550e551 100644
--- a/doc/man/8/radosgw-admin.rst
+++ b/doc/man/8/radosgw-admin.rst
@@ -159,10 +159,6 @@ which are as follows:
 :command:`usage trim`
   Trim usage information (with optional user and date range).
 
-:command:`temp remove`
-  Remove temporary objects that were created up to specified date
-  (and optional time).
-
 :command:`gc list`
   Dump expired garbage collection objects (specify --include-all to list all
   entries, including unexpired).
diff --git a/doc/man/8/rbd.rst b/doc/man/8/rbd.rst
index a3594c0..b312d02 100644
--- a/doc/man/8/rbd.rst
+++ b/doc/man/8/rbd.rst
@@ -54,7 +54,7 @@ Parameters
 
 .. option:: --image-format format-id
 
-   Specifies which object layout to use. The default is 1.
+   Specifies which object layout to use. The default is 2.
 
    * format 1 - (deprecated) Use the original format for a new rbd image. This
      format is understood by all versions of librbd and the kernel rbd module,
diff --git a/install-deps.sh b/install-deps.sh
index 03ca760..129178f 100755
--- a/install-deps.sh
+++ b/install-deps.sh
@@ -24,7 +24,7 @@ if test -f /etc/redhat-release ; then
 fi
 
 if type apt-get > /dev/null 2>&1 ; then
-    $SUDO apt-get install -y lsb-release
+    $SUDO apt-get install -y lsb-release devscripts equivs
 fi
 
 if type zypper > /dev/null 2>&1 ; then
@@ -39,20 +39,23 @@ Ubuntu|Debian|Devuan)
             exit 1
         fi
         touch $DIR/status
-        packages=$(dpkg-checkbuilddeps --admindir=$DIR debian/control 2>&1 | \
-            perl -p -e 's/.*Unmet build dependencies: *//;' \
-            -e 's/build-essential:native/build-essential/;' \
-            -e 's/\s*\|\s*/\|/g;' \
-            -e 's/\(.*?\)//g;' \
-            -e 's/ +/\n/g;' | sort)
+
+	backports=""
+	control="debian/control"
         case $(lsb_release -sc) in
             squeeze|wheezy)
-                packages=$(echo $packages | perl -pe 's/[-\w]*babeltrace[-\w]*//g')
+		control="/tmp/control.$$"
+		grep -v babeltrace debian/control > $control
                 backports="-t $(lsb_release -sc)-backports"
                 ;;
         esac
-        packages=$(echo $packages) # change newlines into spaces
-        $SUDO env DEBIAN_FRONTEND=noninteractive apt-get install $backports -y $packages || exit 1
+
+	# make a metapackage that expresses the build dependencies,
+	# install it, rm the .deb; then uninstall the package as its
+	# work is done
+	$SUDO env DEBIAN_FRONTEND=noninteractive mk-build-deps --install --remove --tool="apt-get -y --no-install-recommends $backports" $control || exit 1
+	$SUDO env DEBIAN_FRONTEND=noninteractive apt-get -y remove ceph-build-deps
+	if [ -n "$backports" ] ; then rm $control; fi
         ;;
 CentOS|Fedora|RedHatEnterpriseServer)
         case $(lsb_release -si) in
@@ -106,7 +109,14 @@ function activate_virtualenv() {
     local env_dir=$top_srcdir/install-deps-$interpreter
 
     if ! test -d $env_dir ; then
-        virtualenv --python $interpreter $env_dir
+        # Make a temporary virtualenv to get a fresh version of virtualenv
+        # because CentOS 7 has a buggy old version (v1.10.1)
+        # https://github.com/pypa/virtualenv/issues/463
+        virtualenv ${env_dir}_tmp
+        ${env_dir}_tmp/bin/pip install --upgrade virtualenv
+        ${env_dir}_tmp/bin/virtualenv --python $interpreter $env_dir
+        rm -rf ${env_dir}_tmp
+
         . $env_dir/bin/activate
         if ! populate_wheelhouse install ; then
             rm -rf $env_dir
diff --git a/man/ceph-authtool.8 b/man/ceph-authtool.8
index 0502d27..953ba14 100644
--- a/man/ceph-authtool.8
+++ b/man/ceph-authtool.8
@@ -1,6 +1,6 @@
 .\" Man page generated from reStructuredText.
 .
-.TH "CEPH-AUTHTOOL" "8" "June 14, 2016" "dev" "Ceph"
+.TH "CEPH-AUTHTOOL" "8" "September 21, 2016" "dev" "Ceph"
 .SH NAME
 ceph-authtool \- ceph keyring manipulation tool
 .
diff --git a/man/ceph-clsinfo.8 b/man/ceph-clsinfo.8
index 7e5c299..bb62ace 100644
--- a/man/ceph-clsinfo.8
+++ b/man/ceph-clsinfo.8
@@ -1,6 +1,6 @@
 .\" Man page generated from reStructuredText.
 .
-.TH "CEPH-CLSINFO" "8" "June 14, 2016" "dev" "Ceph"
+.TH "CEPH-CLSINFO" "8" "September 21, 2016" "dev" "Ceph"
 .SH NAME
 ceph-clsinfo \- show class object information
 .
diff --git a/man/ceph-conf.8 b/man/ceph-conf.8
index 6ffbd16..761b750 100644
--- a/man/ceph-conf.8
+++ b/man/ceph-conf.8
@@ -1,6 +1,6 @@
 .\" Man page generated from reStructuredText.
 .
-.TH "CEPH-CONF" "8" "June 14, 2016" "dev" "Ceph"
+.TH "CEPH-CONF" "8" "September 21, 2016" "dev" "Ceph"
 .SH NAME
 ceph-conf \- ceph conf file tool
 .
diff --git a/man/ceph-create-keys.8 b/man/ceph-create-keys.8
index fa5ecd8..b97c967 100644
--- a/man/ceph-create-keys.8
+++ b/man/ceph-create-keys.8
@@ -1,6 +1,6 @@
 .\" Man page generated from reStructuredText.
 .
-.TH "CEPH-CREATE-KEYS" "8" "June 14, 2016" "dev" "Ceph"
+.TH "CEPH-CREATE-KEYS" "8" "September 21, 2016" "dev" "Ceph"
 .SH NAME
 ceph-create-keys \- ceph keyring generate tool
 .
diff --git a/man/ceph-debugpack.8 b/man/ceph-debugpack.8
index 2735a75..daa4466 100644
--- a/man/ceph-debugpack.8
+++ b/man/ceph-debugpack.8
@@ -1,6 +1,6 @@
 .\" Man page generated from reStructuredText.
 .
-.TH "CEPH-DEBUGPACK" "8" "June 14, 2016" "dev" "Ceph"
+.TH "CEPH-DEBUGPACK" "8" "September 21, 2016" "dev" "Ceph"
 .SH NAME
 ceph-debugpack \- ceph debug packer utility
 .
diff --git a/man/ceph-dencoder.8 b/man/ceph-dencoder.8
index 63f1356..7b46673 100644
--- a/man/ceph-dencoder.8
+++ b/man/ceph-dencoder.8
@@ -1,6 +1,6 @@
 .\" Man page generated from reStructuredText.
 .
-.TH "CEPH-DENCODER" "8" "June 14, 2016" "dev" "Ceph"
+.TH "CEPH-DENCODER" "8" "September 21, 2016" "dev" "Ceph"
 .SH NAME
 ceph-dencoder \- ceph encoder/decoder utility
 .
diff --git a/man/ceph-deploy.8 b/man/ceph-deploy.8
index 6ccf587..5dadae7 100644
--- a/man/ceph-deploy.8
+++ b/man/ceph-deploy.8
@@ -1,6 +1,6 @@
 .\" Man page generated from reStructuredText.
 .
-.TH "CEPH-DEPLOY" "8" "June 14, 2016" "dev" "Ceph"
+.TH "CEPH-DEPLOY" "8" "September 21, 2016" "dev" "Ceph"
 .SH NAME
 ceph-deploy \- Ceph deployment tool
 .
diff --git a/man/ceph-detect-init.8 b/man/ceph-detect-init.8
index 94242e9..cebd66b 100644
--- a/man/ceph-detect-init.8
+++ b/man/ceph-detect-init.8
@@ -1,6 +1,6 @@
 .\" Man page generated from reStructuredText.
 .
-.TH "CEPH-DETECT-INIT" "8" "June 14, 2016" "dev" "Ceph"
+.TH "CEPH-DETECT-INIT" "8" "September 21, 2016" "dev" "Ceph"
 .SH NAME
 ceph-detect-init \- display the init system Ceph should use
 .
diff --git a/man/ceph-disk.8 b/man/ceph-disk.8
index 618897f..5c9c9f6 100644
--- a/man/ceph-disk.8
+++ b/man/ceph-disk.8
@@ -1,6 +1,6 @@
 .\" Man page generated from reStructuredText.
 .
-.TH "CEPH-DISK" "8" "June 14, 2016" "dev" "Ceph"
+.TH "CEPH-DISK" "8" "September 21, 2016" "dev" "Ceph"
 .SH NAME
 ceph-disk \- Ceph disk utility for OSD
 .
diff --git a/man/ceph-fuse.8 b/man/ceph-fuse.8
index 219fefb..f421ba5 100644
--- a/man/ceph-fuse.8
+++ b/man/ceph-fuse.8
@@ -1,6 +1,6 @@
 .\" Man page generated from reStructuredText.
 .
-.TH "CEPH-FUSE" "8" "June 14, 2016" "dev" "Ceph"
+.TH "CEPH-FUSE" "8" "September 21, 2016" "dev" "Ceph"
 .SH NAME
 ceph-fuse \- FUSE-based client for ceph
 .
diff --git a/man/ceph-mds.8 b/man/ceph-mds.8
index 0b16acd..99fd9fa 100644
--- a/man/ceph-mds.8
+++ b/man/ceph-mds.8
@@ -1,6 +1,6 @@
 .\" Man page generated from reStructuredText.
 .
-.TH "CEPH-MDS" "8" "June 14, 2016" "dev" "Ceph"
+.TH "CEPH-MDS" "8" "September 21, 2016" "dev" "Ceph"
 .SH NAME
 ceph-mds \- ceph metadata server daemon
 .
diff --git a/man/ceph-mon.8 b/man/ceph-mon.8
index ee60c53..ecd39b7 100644
--- a/man/ceph-mon.8
+++ b/man/ceph-mon.8
@@ -1,6 +1,6 @@
 .\" Man page generated from reStructuredText.
 .
-.TH "CEPH-MON" "8" "June 14, 2016" "dev" "Ceph"
+.TH "CEPH-MON" "8" "September 21, 2016" "dev" "Ceph"
 .SH NAME
 ceph-mon \- ceph monitor daemon
 .
diff --git a/man/ceph-osd.8 b/man/ceph-osd.8
index 528ce83..bb7ecda 100644
--- a/man/ceph-osd.8
+++ b/man/ceph-osd.8
@@ -1,6 +1,6 @@
 .\" Man page generated from reStructuredText.
 .
-.TH "CEPH-OSD" "8" "June 14, 2016" "dev" "Ceph"
+.TH "CEPH-OSD" "8" "September 21, 2016" "dev" "Ceph"
 .SH NAME
 ceph-osd \- ceph object storage daemon
 .
diff --git a/man/ceph-post-file.8 b/man/ceph-post-file.8
index c835754..81c8ecf 100644
--- a/man/ceph-post-file.8
+++ b/man/ceph-post-file.8
@@ -1,6 +1,6 @@
 .\" Man page generated from reStructuredText.
 .
-.TH "CEPH-POST-FILE" "8" "June 14, 2016" "dev" "Ceph"
+.TH "CEPH-POST-FILE" "8" "September 21, 2016" "dev" "Ceph"
 .SH NAME
 ceph-post-file \- post files for ceph developers
 .
diff --git a/man/ceph-rbdnamer.8 b/man/ceph-rbdnamer.8
index cac2ad6..1aa3adb 100644
--- a/man/ceph-rbdnamer.8
+++ b/man/ceph-rbdnamer.8
@@ -1,6 +1,6 @@
 .\" Man page generated from reStructuredText.
 .
-.TH "CEPH-RBDNAMER" "8" "June 14, 2016" "dev" "Ceph"
+.TH "CEPH-RBDNAMER" "8" "September 21, 2016" "dev" "Ceph"
 .SH NAME
 ceph-rbdnamer \- udev helper to name RBD devices
 .
diff --git a/man/ceph-rest-api.8 b/man/ceph-rest-api.8
index ebe5524..159e89b 100644
--- a/man/ceph-rest-api.8
+++ b/man/ceph-rest-api.8
@@ -1,6 +1,6 @@
 .\" Man page generated from reStructuredText.
 .
-.TH "CEPH-REST-API" "8" "June 14, 2016" "dev" "Ceph"
+.TH "CEPH-REST-API" "8" "September 21, 2016" "dev" "Ceph"
 .SH NAME
 ceph-rest-api \- ceph RESTlike administration server
 .
diff --git a/man/ceph-run.8 b/man/ceph-run.8
index 0ff4647..4f01ddd 100644
--- a/man/ceph-run.8
+++ b/man/ceph-run.8
@@ -1,6 +1,6 @@
 .\" Man page generated from reStructuredText.
 .
-.TH "CEPH-RUN" "8" "June 14, 2016" "dev" "Ceph"
+.TH "CEPH-RUN" "8" "September 21, 2016" "dev" "Ceph"
 .SH NAME
 ceph-run \- restart daemon on core dump
 .
diff --git a/man/ceph-syn.8 b/man/ceph-syn.8
index e09a243..f97049f 100644
--- a/man/ceph-syn.8
+++ b/man/ceph-syn.8
@@ -1,6 +1,6 @@
 .\" Man page generated from reStructuredText.
 .
-.TH "CEPH-SYN" "8" "June 14, 2016" "dev" "Ceph"
+.TH "CEPH-SYN" "8" "September 21, 2016" "dev" "Ceph"
 .SH NAME
 ceph-syn \- ceph synthetic workload generator
 .
diff --git a/man/ceph.8 b/man/ceph.8
index 5d2be6d..d57535f 100644
--- a/man/ceph.8
+++ b/man/ceph.8
@@ -1,6 +1,6 @@
 .\" Man page generated from reStructuredText.
 .
-.TH "CEPH" "8" "June 14, 2016" "dev" "Ceph"
+.TH "CEPH" "8" "September 21, 2016" "dev" "Ceph"
 .SH NAME
 ceph \- ceph administration tool
 .
diff --git a/man/cephfs.8 b/man/cephfs.8
index 6924d76..60f8d34 100644
--- a/man/cephfs.8
+++ b/man/cephfs.8
@@ -1,6 +1,6 @@
 .\" Man page generated from reStructuredText.
 .
-.TH "CEPHFS" "8" "June 14, 2016" "dev" "Ceph"
+.TH "CEPHFS" "8" "September 21, 2016" "dev" "Ceph"
 .SH NAME
 cephfs \- ceph file system options utility
 .
diff --git a/man/crushtool.8 b/man/crushtool.8
index 04798f8..1101e25 100644
--- a/man/crushtool.8
+++ b/man/crushtool.8
@@ -1,6 +1,6 @@
 .\" Man page generated from reStructuredText.
 .
-.TH "CRUSHTOOL" "8" "June 14, 2016" "dev" "Ceph"
+.TH "CRUSHTOOL" "8" "September 21, 2016" "dev" "Ceph"
 .SH NAME
 crushtool \- CRUSH map manipulation tool
 .
diff --git a/man/librados-config.8 b/man/librados-config.8
index 2267d2a..311d954 100644
--- a/man/librados-config.8
+++ b/man/librados-config.8
@@ -1,6 +1,6 @@
 .\" Man page generated from reStructuredText.
 .
-.TH "LIBRADOS-CONFIG" "8" "June 14, 2016" "dev" "Ceph"
+.TH "LIBRADOS-CONFIG" "8" "September 21, 2016" "dev" "Ceph"
 .SH NAME
 librados-config \- display information about librados
 .
diff --git a/man/monmaptool.8 b/man/monmaptool.8
index 77d81d7..a4896d2 100644
--- a/man/monmaptool.8
+++ b/man/monmaptool.8
@@ -1,6 +1,6 @@
 .\" Man page generated from reStructuredText.
 .
-.TH "MONMAPTOOL" "8" "June 14, 2016" "dev" "Ceph"
+.TH "MONMAPTOOL" "8" "September 21, 2016" "dev" "Ceph"
 .SH NAME
 monmaptool \- ceph monitor cluster map manipulation tool
 .
diff --git a/man/mount.ceph.8 b/man/mount.ceph.8
index 253e816..45e3217 100644
--- a/man/mount.ceph.8
+++ b/man/mount.ceph.8
@@ -1,6 +1,6 @@
 .\" Man page generated from reStructuredText.
 .
-.TH "MOUNT.CEPH" "8" "June 14, 2016" "dev" "Ceph"
+.TH "MOUNT.CEPH" "8" "September 21, 2016" "dev" "Ceph"
 .SH NAME
 mount.ceph \- mount a ceph file system
 .
diff --git a/man/osdmaptool.8 b/man/osdmaptool.8
index 2542850..383ae44 100644
--- a/man/osdmaptool.8
+++ b/man/osdmaptool.8
@@ -1,6 +1,6 @@
 .\" Man page generated from reStructuredText.
 .
-.TH "OSDMAPTOOL" "8" "June 14, 2016" "dev" "Ceph"
+.TH "OSDMAPTOOL" "8" "September 21, 2016" "dev" "Ceph"
 .SH NAME
 osdmaptool \- ceph osd cluster map manipulation tool
 .
diff --git a/man/rados.8 b/man/rados.8
index bfcb603..ecc5bb4 100644
--- a/man/rados.8
+++ b/man/rados.8
@@ -1,6 +1,6 @@
 .\" Man page generated from reStructuredText.
 .
-.TH "RADOS" "8" "June 14, 2016" "dev" "Ceph"
+.TH "RADOS" "8" "September 21, 2016" "dev" "Ceph"
 .SH NAME
 rados \- rados object storage utility
 .
diff --git a/man/radosgw-admin.8 b/man/radosgw-admin.8
index d06027d..45da485 100644
--- a/man/radosgw-admin.8
+++ b/man/radosgw-admin.8
@@ -1,6 +1,6 @@
 .\" Man page generated from reStructuredText.
 .
-.TH "RADOSGW-ADMIN" "8" "June 14, 2016" "dev" "Ceph"
+.TH "RADOSGW-ADMIN" "8" "September 21, 2016" "dev" "Ceph"
 .SH NAME
 radosgw-admin \- rados REST gateway user administration utility
 .
@@ -179,10 +179,6 @@ Show the usage information (with optional user and date range).
 .B \fBusage trim\fP
 Trim usage information (with optional user and date range).
 .TP
-.B \fBtemp remove\fP
-Remove temporary objects that were created up to specified date
-(and optional time).
-.TP
 .B \fBgc list\fP
 Dump expired garbage collection objects (specify \-\-include\-all to list all
 entries, including unexpired).
diff --git a/man/radosgw.8 b/man/radosgw.8
index 2aea650..1825139 100644
--- a/man/radosgw.8
+++ b/man/radosgw.8
@@ -1,6 +1,6 @@
 .\" Man page generated from reStructuredText.
 .
-.TH "RADOSGW" "8" "June 14, 2016" "dev" "Ceph"
+.TH "RADOSGW" "8" "September 21, 2016" "dev" "Ceph"
 .SH NAME
 radosgw \- rados REST gateway
 .
diff --git a/man/rbd-fuse.8 b/man/rbd-fuse.8
index 7111fc3..57399f1 100644
--- a/man/rbd-fuse.8
+++ b/man/rbd-fuse.8
@@ -1,6 +1,6 @@
 .\" Man page generated from reStructuredText.
 .
-.TH "RBD-FUSE" "8" "June 14, 2016" "dev" "Ceph"
+.TH "RBD-FUSE" "8" "September 21, 2016" "dev" "Ceph"
 .SH NAME
 rbd-fuse \- expose rbd images as files
 .
diff --git a/man/rbd-mirror.8 b/man/rbd-mirror.8
index 2cf1656..c3c9d10 100644
--- a/man/rbd-mirror.8
+++ b/man/rbd-mirror.8
@@ -1,6 +1,6 @@
 .\" Man page generated from reStructuredText.
 .
-.TH "RBD-MIRROR" "8" "June 14, 2016" "dev" "Ceph"
+.TH "RBD-MIRROR" "8" "September 21, 2016" "dev" "Ceph"
 .SH NAME
 rbd-mirror \- Ceph daemon for mirroring RBD images
 .
diff --git a/man/rbd-nbd.8 b/man/rbd-nbd.8
index c2c37b6..aa816f5 100644
--- a/man/rbd-nbd.8
+++ b/man/rbd-nbd.8
@@ -1,6 +1,6 @@
 .\" Man page generated from reStructuredText.
 .
-.TH "RBD-NBD" "8" "June 14, 2016" "dev" "Ceph"
+.TH "RBD-NBD" "8" "September 21, 2016" "dev" "Ceph"
 .SH NAME
 rbd-nbd \- map rbd images to nbd device
 .
diff --git a/man/rbd-replay-many.8 b/man/rbd-replay-many.8
index eb67bfa..f0bed2c 100644
--- a/man/rbd-replay-many.8
+++ b/man/rbd-replay-many.8
@@ -1,6 +1,6 @@
 .\" Man page generated from reStructuredText.
 .
-.TH "RBD-REPLAY-MANY" "8" "June 14, 2016" "dev" "Ceph"
+.TH "RBD-REPLAY-MANY" "8" "September 21, 2016" "dev" "Ceph"
 .SH NAME
 rbd-replay-many \- replay a rados block device (RBD) workload on several clients
 .
diff --git a/man/rbd-replay-prep.8 b/man/rbd-replay-prep.8
index 078db7e..36982cb 100644
--- a/man/rbd-replay-prep.8
+++ b/man/rbd-replay-prep.8
@@ -1,6 +1,6 @@
 .\" Man page generated from reStructuredText.
 .
-.TH "RBD-REPLAY-PREP" "8" "June 14, 2016" "dev" "Ceph"
+.TH "RBD-REPLAY-PREP" "8" "September 21, 2016" "dev" "Ceph"
 .SH NAME
 rbd-replay-prep \- prepare captured rados block device (RBD) workloads for replay
 .
diff --git a/man/rbd-replay.8 b/man/rbd-replay.8
index 5fbdf7f..60c8786 100644
--- a/man/rbd-replay.8
+++ b/man/rbd-replay.8
@@ -1,6 +1,6 @@
 .\" Man page generated from reStructuredText.
 .
-.TH "RBD-REPLAY" "8" "June 14, 2016" "dev" "Ceph"
+.TH "RBD-REPLAY" "8" "September 21, 2016" "dev" "Ceph"
 .SH NAME
 rbd-replay \- replay rados block device (RBD) workloads
 .
diff --git a/man/rbd.8 b/man/rbd.8
index 72d34b1..e1aa861 100644
--- a/man/rbd.8
+++ b/man/rbd.8
@@ -1,6 +1,6 @@
 .\" Man page generated from reStructuredText.
 .
-.TH "RBD" "8" "June 14, 2016" "dev" "Ceph"
+.TH "RBD" "8" "September 21, 2016" "dev" "Ceph"
 .SH NAME
 rbd \- manage rados block device (RBD) images
 .
@@ -75,7 +75,7 @@ default for some commands).
 .INDENT 0.0
 .TP
 .B \-\-image\-format format\-id
-Specifies which object layout to use. The default is 1.
+Specifies which object layout to use. The default is 2.
 .INDENT 7.0
 .IP \(bu 2
 format 1 \- (deprecated) Use the original format for a new rbd image. This
diff --git a/man/rbdmap.8 b/man/rbdmap.8
index 92596c8..10a098f 100644
--- a/man/rbdmap.8
+++ b/man/rbdmap.8
@@ -1,6 +1,6 @@
 .\" Man page generated from reStructuredText.
 .
-.TH "RBDMAP" "8" "June 14, 2016" "dev" "Ceph"
+.TH "RBDMAP" "8" "September 21, 2016" "dev" "Ceph"
 .SH NAME
 rbdmap \- map RBD devices at boot time
 .
diff --git a/selinux/ceph.te b/selinux/ceph.te
index 52bb504..0e85c84 100644
--- a/selinux/ceph.te
+++ b/selinux/ceph.te
@@ -84,8 +84,8 @@ logging_send_syslog_msg(ceph_t)
 sysnet_dns_name_resolve(ceph_t)
 
 # basis for future security review
-allow ceph_t ceph_var_run_t:sock_file { create unlink write };
-allow ceph_t self:capability sys_rawio;
+allow ceph_t ceph_var_run_t:sock_file { create unlink write setattr };
+allow ceph_t self:capability { sys_rawio chown };
 
 allow ceph_t self:tcp_socket { accept listen };
 corenet_tcp_connect_cyphesis_port(ceph_t)
diff --git a/src/.git_version b/src/.git_version
index 1dd57db..8c04d7c 100644
--- a/src/.git_version
+++ b/src/.git_version
@@ -1,2 +1,2 @@
-45107e21c568dd033c2f0a3107dec8f0b0e58374
-v10.2.2
+ecc23778eb545d8dd55e2e4735b53cc93f92e65b
+v10.2.3
diff --git a/src/Makefile-client.am b/src/Makefile-client.am
index 98e8ebb..07d11b5 100644
--- a/src/Makefile-client.am
+++ b/src/Makefile-client.am
@@ -62,6 +62,7 @@ if WITH_FUSE
 ceph_fuse_SOURCES = ceph_fuse.cc
 ceph_fuse_LDADD = $(LIBCLIENT_FUSE) $(CEPH_GLOBAL)
 bin_PROGRAMS += ceph-fuse
+sbin_SCRIPTS += mount.fuse.ceph
 
 if WITH_RBD
 rbd_fuse_SOURCES = rbd_fuse/rbd-fuse.cc
@@ -77,6 +78,12 @@ cephfs_SOURCES = cephfs.cc
 cephfs_LDADD = $(LIBCOMMON)
 bin_PROGRAMS += cephfs
 
+mount_ceph_SOURCES = mount/mount.ceph.c
+mount_ceph_LDADD = $(LIBSECRET) $(LIBCOMMON)
+if LINUX
+sbin_PROGRAMS += mount.ceph
+endif # LINUX
+
 python_PYTHON += pybind/ceph_volume_client.py
 
 # libcephfs (this should go somewhere else in the future)
diff --git a/src/Makefile-env.am b/src/Makefile-env.am
index df225d6..4771bad 100644
--- a/src/Makefile-env.am
+++ b/src/Makefile-env.am
@@ -250,6 +250,7 @@ LIBMON += -ljemalloc
 LIBOSD += -ljemalloc
 LIBMDS += -ljemalloc
 LIBRGW += -ljemalloc
+LIBCLIENT_FUSE += -ljemalloc
 endif # WITH_JEMALLOC
 
 if ENABLE_COVERAGE
@@ -265,6 +266,7 @@ LIBOSD += $(LIBOSDC) $(LIBOS)
 LIBMON += $(LIBPERFGLUE)
 LIBOSD += $(LIBPERFGLUE)
 LIBMDS += $(LIBPERFGLUE)
+LIBCLIENT_FUSE += $(LIBPERFGLUE)
 
 # OSD needs types
 LIBOSD += $(LIBOSD_TYPES) $(LIBOS_TYPES)
diff --git a/src/Makefile-server.am b/src/Makefile-server.am
index 1ea73b1..fecae94 100644
--- a/src/Makefile-server.am
+++ b/src/Makefile-server.am
@@ -16,13 +16,6 @@ BUILT_SOURCES += init-ceph
 
 shell_scripts += init-ceph
 
-mount_ceph_SOURCES = mount/mount.ceph.c
-mount_ceph_LDADD = $(LIBSECRET) $(LIBCOMMON)
-if LINUX
-su_sbin_PROGRAMS += mount.ceph
-endif # LINUX
-su_sbin_SCRIPTS += mount.fuse.ceph
-
 
 if WITH_MON
 
diff --git a/src/Makefile.in b/src/Makefile.in
index 06d9f7e..892aa1a 100644
--- a/src/Makefile.in
+++ b/src/Makefile.in
@@ -140,8 +140,8 @@ bin_PROGRAMS = $(am__EXEEXT_28) $(am__EXEEXT_29) $(am__EXEEXT_30) \
 	$(am__EXEEXT_44) $(am__EXEEXT_45) $(am__EXEEXT_46) \
 	$(am__EXEEXT_47)
 noinst_PROGRAMS = $(am__EXEEXT_65) $(am__EXEEXT_66) $(am__EXEEXT_67)
-sbin_PROGRAMS =
-su_sbin_PROGRAMS = $(am__EXEEXT_68)
+sbin_PROGRAMS = $(am__EXEEXT_68)
+su_sbin_PROGRAMS =
 check_PROGRAMS = $(am__EXEEXT_63) $(am__EXEEXT_64) \
 	unittest_subprocess$(EXEEXT) \
 	unittest_async_compressor$(EXEEXT)
@@ -181,33 +181,34 @@ check_PROGRAMS = $(am__EXEEXT_63) $(am__EXEEXT_64) \
 @WITH_JEMALLOC_TRUE at am__append_23 = -ljemalloc
 @WITH_JEMALLOC_TRUE at am__append_24 = -ljemalloc
 @WITH_JEMALLOC_TRUE at am__append_25 = -ljemalloc
- at ENABLE_COVERAGE_TRUE@am__append_26 = -lgcov
+ at WITH_JEMALLOC_TRUE@am__append_26 = -ljemalloc
+ at ENABLE_COVERAGE_TRUE@am__append_27 = -lgcov
 
 # libkv/libos linking order is ornery
- at WITH_SLIBROCKSDB_TRUE@am__append_27 = rocksdb/librocksdb.a
- at HAVE_BZLIB_TRUE@@WITH_SLIBROCKSDB_TRUE at am__append_28 = -lbz2
- at HAVE_LZ4_TRUE@@WITH_SLIBROCKSDB_TRUE at am__append_29 = -llz4
- at ENABLE_CLIENT_TRUE@am__append_30 = brag/client/ceph-brag ceph \
+ at WITH_SLIBROCKSDB_TRUE@am__append_28 = rocksdb/librocksdb.a
+ at HAVE_BZLIB_TRUE@@WITH_SLIBROCKSDB_TRUE at am__append_29 = -lbz2
+ at HAVE_LZ4_TRUE@@WITH_SLIBROCKSDB_TRUE at am__append_30 = -llz4
+ at ENABLE_CLIENT_TRUE@am__append_31 = brag/client/ceph-brag ceph \
 @ENABLE_CLIENT_TRUE@	ceph-post-file
- at ENABLE_CLIENT_TRUE@am__append_31 = brag/server brag/README.md brag/client
- at ENABLE_SERVER_TRUE@am__append_32 = libkv.a
- at ENABLE_SERVER_TRUE@am__append_33 = \
+ at ENABLE_CLIENT_TRUE@am__append_32 = brag/server brag/README.md brag/client
+ at ENABLE_SERVER_TRUE@am__append_33 = libkv.a
+ at ENABLE_SERVER_TRUE@am__append_34 = \
 @ENABLE_SERVER_TRUE@	kv/KeyValueDB.h \
 @ENABLE_SERVER_TRUE@	kv/LevelDBStore.h
 
- at ENABLE_SERVER_TRUE@@WITH_SLIBROCKSDB_TRUE at am__append_34 = -I rocksdb/include -fPIC
- at ENABLE_SERVER_TRUE@@WITH_SLIBROCKSDB_TRUE at am__append_35 = kv/RocksDBStore.cc
- at ENABLE_SERVER_TRUE@@WITH_SLIBROCKSDB_TRUE at am__append_36 = rocksdb/librocksdb.a
- at ENABLE_SERVER_TRUE@@WITH_SLIBROCKSDB_TRUE at am__append_37 = kv/RocksDBStore.h
- at ENABLE_SERVER_TRUE@@WITH_DLIBROCKSDB_TRUE at am__append_38 = kv/RocksDBStore.cc
- at ENABLE_SERVER_TRUE@@WITH_DLIBROCKSDB_TRUE at am__append_39 = -lrocksdb
- at ENABLE_SERVER_TRUE@@WITH_DLIBROCKSDB_TRUE at am__append_40 = kv/RocksDBStore.h
- at ENABLE_SERVER_TRUE@@WITH_KINETIC_TRUE at am__append_41 = kv/KineticStore.cc
- at ENABLE_SERVER_TRUE@@WITH_KINETIC_TRUE at am__append_42 = -std=gnu++11
- at ENABLE_SERVER_TRUE@@WITH_KINETIC_TRUE at am__append_43 = -lkinetic_client -lprotobuf -lglog -lgflags libcrypto.a
- at ENABLE_SERVER_TRUE@@WITH_KINETIC_TRUE at am__append_44 = kv/KineticStore.h
- at ENABLE_SERVER_TRUE@@WITH_MON_TRUE at am__append_45 = libmon.a
- at ENABLE_SERVER_TRUE@@WITH_MON_TRUE at am__append_46 = \
+ at ENABLE_SERVER_TRUE@@WITH_SLIBROCKSDB_TRUE at am__append_35 = -I rocksdb/include -fPIC
+ at ENABLE_SERVER_TRUE@@WITH_SLIBROCKSDB_TRUE at am__append_36 = kv/RocksDBStore.cc
+ at ENABLE_SERVER_TRUE@@WITH_SLIBROCKSDB_TRUE at am__append_37 = rocksdb/librocksdb.a
+ at ENABLE_SERVER_TRUE@@WITH_SLIBROCKSDB_TRUE at am__append_38 = kv/RocksDBStore.h
+ at ENABLE_SERVER_TRUE@@WITH_DLIBROCKSDB_TRUE at am__append_39 = kv/RocksDBStore.cc
+ at ENABLE_SERVER_TRUE@@WITH_DLIBROCKSDB_TRUE at am__append_40 = -lrocksdb
+ at ENABLE_SERVER_TRUE@@WITH_DLIBROCKSDB_TRUE at am__append_41 = kv/RocksDBStore.h
+ at ENABLE_SERVER_TRUE@@WITH_KINETIC_TRUE at am__append_42 = kv/KineticStore.cc
+ at ENABLE_SERVER_TRUE@@WITH_KINETIC_TRUE at am__append_43 = -std=gnu++11
+ at ENABLE_SERVER_TRUE@@WITH_KINETIC_TRUE at am__append_44 = -lkinetic_client -lprotobuf -lglog -lgflags libcrypto.a
+ at ENABLE_SERVER_TRUE@@WITH_KINETIC_TRUE at am__append_45 = kv/KineticStore.h
+ at ENABLE_SERVER_TRUE@@WITH_MON_TRUE at am__append_46 = libmon.a
+ at ENABLE_SERVER_TRUE@@WITH_MON_TRUE at am__append_47 = \
 @ENABLE_SERVER_TRUE@@WITH_MON_TRUE@	mon/AuthMonitor.h \
 @ENABLE_SERVER_TRUE@@WITH_MON_TRUE@	mon/DataHealthService.h \
 @ENABLE_SERVER_TRUE@@WITH_MON_TRUE@	mon/Elector.h \
@@ -236,10 +237,10 @@ check_PROGRAMS = $(am__EXEEXT_63) $(am__EXEEXT_64) \
 
 
 # There are no libmds_types so use the full mds library for dencoder for now
- at ENABLE_CLIENT_TRUE@am__append_47 = $(LIBMDS_SOURCES)
- at ENABLE_CLIENT_TRUE@am__append_48 = $(LIBMDS_DEPS)
- at ENABLE_SERVER_TRUE@@WITH_MDS_TRUE at am__append_49 = libmds.la
- at ENABLE_SERVER_TRUE@@WITH_MDS_TRUE at am__append_50 =  \
+ at ENABLE_CLIENT_TRUE@am__append_48 = $(LIBMDS_SOURCES)
+ at ENABLE_CLIENT_TRUE@am__append_49 = $(LIBMDS_DEPS)
+ at ENABLE_SERVER_TRUE@@WITH_MDS_TRUE at am__append_50 = libmds.la
+ at ENABLE_SERVER_TRUE@@WITH_MDS_TRUE at am__append_51 =  \
 @ENABLE_SERVER_TRUE@@WITH_MDS_TRUE@	mds/inode_backtrace.h \
 @ENABLE_SERVER_TRUE@@WITH_MDS_TRUE@	mds/flock.h mds/locks.c \
 @ENABLE_SERVER_TRUE@@WITH_MDS_TRUE@	mds/locks.h mds/CDentry.h \
@@ -295,12 +296,12 @@ check_PROGRAMS = $(am__EXEEXT_63) $(am__EXEEXT_64) \
 @ENABLE_SERVER_TRUE@@WITH_MDS_TRUE@	mds/events/ETableClient.h \
 @ENABLE_SERVER_TRUE@@WITH_MDS_TRUE@	mds/events/ETableServer.h \
 @ENABLE_SERVER_TRUE@@WITH_MDS_TRUE@	mds/events/EUpdate.h
- at WITH_LIBAIO_TRUE@am__append_51 = \
+ at WITH_LIBAIO_TRUE@am__append_52 = \
 @WITH_LIBAIO_TRUE@	os/bluestore/bluestore_types.cc \
 @WITH_LIBAIO_TRUE@	os/bluestore/bluefs_types.cc
 
- at ENABLE_SERVER_TRUE@@WITH_FUSE_TRUE at am__append_52 = os/FuseStore.cc
- at ENABLE_SERVER_TRUE@@WITH_LIBAIO_TRUE at am__append_53 = \
+ at ENABLE_SERVER_TRUE@@WITH_FUSE_TRUE at am__append_53 = os/FuseStore.cc
+ at ENABLE_SERVER_TRUE@@WITH_LIBAIO_TRUE at am__append_54 = \
 @ENABLE_SERVER_TRUE@@WITH_LIBAIO_TRUE@	os/bluestore/kv.cc \
 @ENABLE_SERVER_TRUE@@WITH_LIBAIO_TRUE@	os/bluestore/Allocator.cc \
 @ENABLE_SERVER_TRUE@@WITH_LIBAIO_TRUE@	os/bluestore/BlockDevice.cc \
@@ -311,15 +312,15 @@ check_PROGRAMS = $(am__EXEEXT_63) $(am__EXEEXT_64) \
 @ENABLE_SERVER_TRUE@@WITH_LIBAIO_TRUE@	os/bluestore/KernelDevice.cc \
 @ENABLE_SERVER_TRUE@@WITH_LIBAIO_TRUE@	os/bluestore/StupidAllocator.cc
 
- at ENABLE_SERVER_TRUE@@LINUX_TRUE at am__append_54 = os/filestore/BtrfsFileStoreBackend.cc
- at ENABLE_SERVER_TRUE@@WITH_LIBXFS_TRUE at am__append_55 = \
+ at ENABLE_SERVER_TRUE@@LINUX_TRUE at am__append_55 = os/filestore/BtrfsFileStoreBackend.cc
+ at ENABLE_SERVER_TRUE@@WITH_LIBXFS_TRUE at am__append_56 = \
 @ENABLE_SERVER_TRUE@@WITH_LIBXFS_TRUE@    os/filestore/XfsFileStoreBackend.cc \
 @ENABLE_SERVER_TRUE@@WITH_LIBXFS_TRUE@    os/fs/XFS.cc
 
- at ENABLE_SERVER_TRUE@@WITH_LIBZFS_TRUE at am__append_56 = os/filestore/ZFSFileStoreBackend.cc
- at ENABLE_SERVER_TRUE@am__append_57 = libos.a
- at ENABLE_SERVER_TRUE@@WITH_LTTNG_TRUE at am__append_58 = $(LIBOS_TP)
- at ENABLE_SERVER_TRUE@am__append_59 = \
+ at ENABLE_SERVER_TRUE@@WITH_LIBZFS_TRUE at am__append_57 = os/filestore/ZFSFileStoreBackend.cc
+ at ENABLE_SERVER_TRUE@am__append_58 = libos.a
+ at ENABLE_SERVER_TRUE@@WITH_LTTNG_TRUE at am__append_59 = $(LIBOS_TP)
+ at ENABLE_SERVER_TRUE@am__append_60 = \
 @ENABLE_SERVER_TRUE@	os/filestore/chain_xattr.h \
 @ENABLE_SERVER_TRUE@	os/filestore/BtrfsFileStoreBackend.h \
 @ENABLE_SERVER_TRUE@	os/filestore/CollectionIndex.h \
@@ -350,7 +351,7 @@ check_PROGRAMS = $(am__EXEEXT_63) $(am__EXEEXT_64) \
 @ENABLE_SERVER_TRUE@	os/ObjectMap.h \
 @ENABLE_SERVER_TRUE@	os/ObjectStore.h
 
- at ENABLE_SERVER_TRUE@@WITH_LIBAIO_TRUE at am__append_60 = \
+ at ENABLE_SERVER_TRUE@@WITH_LIBAIO_TRUE at am__append_61 = \
 @ENABLE_SERVER_TRUE@@WITH_LIBAIO_TRUE@	os/bluestore/bluefs_types.h \
 @ENABLE_SERVER_TRUE@@WITH_LIBAIO_TRUE@	os/bluestore/bluestore_types.h \
 @ENABLE_SERVER_TRUE@@WITH_LIBAIO_TRUE@	os/bluestore/kv.h \
@@ -363,18 +364,18 @@ check_PROGRAMS = $(am__EXEEXT_63) $(am__EXEEXT_64) \
 @ENABLE_SERVER_TRUE@@WITH_LIBAIO_TRUE@	os/bluestore/FreelistManager.h \
 @ENABLE_SERVER_TRUE@@WITH_LIBAIO_TRUE@	os/bluestore/StupidAllocator.h
 
- at ENABLE_SERVER_TRUE@@WITH_LIBZFS_TRUE at am__append_61 = libos_zfs.a
- at ENABLE_SERVER_TRUE@@WITH_LIBZFS_TRUE at am__append_62 = os/fs/ZFS.h
- at ENABLE_SERVER_TRUE@@WITH_SPDK_TRUE at am__append_63 = \
+ at ENABLE_SERVER_TRUE@@WITH_LIBZFS_TRUE at am__append_62 = libos_zfs.a
+ at ENABLE_SERVER_TRUE@@WITH_LIBZFS_TRUE at am__append_63 = os/fs/ZFS.h
+ at ENABLE_SERVER_TRUE@@WITH_SPDK_TRUE at am__append_64 = \
 @ENABLE_SERVER_TRUE@@WITH_SPDK_TRUE@	${LIBSPDK_CFLAGS} \
 @ENABLE_SERVER_TRUE@@WITH_SPDK_TRUE@	${LIBDPDK_CFLAGS} \
 @ENABLE_SERVER_TRUE@@WITH_SPDK_TRUE@	${LIBPCIACCESS_CFLAGS}
 
- at ENABLE_SERVER_TRUE@@WITH_SPDK_TRUE at am__append_64 = os/bluestore/NVMEDevice.cc
- at ENABLE_SERVER_TRUE@@WITH_SPDK_TRUE at am__append_65 = os/bluestore/NVMEDevice.h
- at ENABLE_SERVER_TRUE@@WITH_LIBAIO_TRUE at am__append_66 = ceph-bluefs-tool
- at ENABLE_SERVER_TRUE@@WITH_OSD_TRUE at am__append_67 = libosd.a
- at ENABLE_SERVER_TRUE@@WITH_OSD_TRUE at am__append_68 = \
+ at ENABLE_SERVER_TRUE@@WITH_SPDK_TRUE at am__append_65 = os/bluestore/NVMEDevice.cc
+ at ENABLE_SERVER_TRUE@@WITH_SPDK_TRUE at am__append_66 = os/bluestore/NVMEDevice.h
+ at ENABLE_SERVER_TRUE@@WITH_LIBAIO_TRUE at am__append_67 = ceph-bluefs-tool
+ at ENABLE_SERVER_TRUE@@WITH_OSD_TRUE at am__append_68 = libosd.a
+ at ENABLE_SERVER_TRUE@@WITH_OSD_TRUE at am__append_69 = \
 @ENABLE_SERVER_TRUE@@WITH_OSD_TRUE@	osd/ClassHandler.h \
 @ENABLE_SERVER_TRUE@@WITH_OSD_TRUE@	osd/HitSet.h \
 @ENABLE_SERVER_TRUE@@WITH_OSD_TRUE@	osd/OSD.h \
@@ -397,26 +398,26 @@ check_PROGRAMS = $(am__EXEEXT_63) $(am__EXEEXT_64) \
 @ENABLE_SERVER_TRUE@@WITH_OSD_TRUE@	osd/ScrubStore.h \
 @ENABLE_SERVER_TRUE@@WITH_OSD_TRUE@	osd/osd_types.h
 
- at LINUX_TRUE@am__append_69 = -export-symbols-regex '.*__erasure_code_.*'
 @LINUX_TRUE at am__append_70 = -export-symbols-regex '.*__erasure_code_.*'
- at HAVE_NEON_TRUE@am__append_71 = libec_jerasure_neon.la
- at LINUX_TRUE@am__append_72 = -export-symbols-regex '.*__erasure_code_.*'
- at HAVE_SSSE3_TRUE@am__append_73 = libec_jerasure_sse3.la
- at LINUX_TRUE@am__append_74 = -export-symbols-regex '.*__erasure_code_.*'
- at HAVE_SSE4_PCLMUL_TRUE@am__append_75 = libec_jerasure_sse4.la
- at LINUX_TRUE@am__append_76 = -export-symbols-regex '.*__erasure_code_.*'
+ at LINUX_TRUE@am__append_71 = -export-symbols-regex '.*__erasure_code_.*'
+ at HAVE_NEON_TRUE@am__append_72 = libec_jerasure_neon.la
+ at LINUX_TRUE@am__append_73 = -export-symbols-regex '.*__erasure_code_.*'
+ at HAVE_SSSE3_TRUE@am__append_74 = libec_jerasure_sse3.la
+ at LINUX_TRUE@am__append_75 = -export-symbols-regex '.*__erasure_code_.*'
+ at HAVE_SSE4_PCLMUL_TRUE@am__append_76 = libec_jerasure_sse4.la
 @LINUX_TRUE at am__append_77 = -export-symbols-regex '.*__erasure_code_.*'
 @LINUX_TRUE at am__append_78 = -export-symbols-regex '.*__erasure_code_.*'
 @LINUX_TRUE at am__append_79 = -export-symbols-regex '.*__erasure_code_.*'
- at HAVE_NEON_TRUE@am__append_80 = libec_shec_neon.la
- at LINUX_TRUE@am__append_81 = -export-symbols-regex '.*__erasure_code_.*'
- at HAVE_SSSE3_TRUE@am__append_82 = libec_shec_sse3.la
- at LINUX_TRUE@am__append_83 = -export-symbols-regex '.*__erasure_code_.*'
- at HAVE_SSE4_PCLMUL_TRUE@am__append_84 = libec_shec_sse4.la
- at LINUX_TRUE@am__append_85 = -export-symbols-regex '.*__erasure_code_.*'
+ at LINUX_TRUE@am__append_80 = -export-symbols-regex '.*__erasure_code_.*'
+ at HAVE_NEON_TRUE@am__append_81 = libec_shec_neon.la
+ at LINUX_TRUE@am__append_82 = -export-symbols-regex '.*__erasure_code_.*'
+ at HAVE_SSSE3_TRUE@am__append_83 = libec_shec_sse3.la
+ at LINUX_TRUE@am__append_84 = -export-symbols-regex '.*__erasure_code_.*'
+ at HAVE_SSE4_PCLMUL_TRUE@am__append_85 = libec_shec_sse4.la
+ at LINUX_TRUE@am__append_86 = -export-symbols-regex '.*__erasure_code_.*'
 
 # ISA
- at WITH_BETTER_YASM_ELF64_TRUE@am__append_86 = \
+ at WITH_BETTER_YASM_ELF64_TRUE@am__append_87 = \
 @WITH_BETTER_YASM_ELF64_TRUE@	erasure-code/isa/ErasureCodeIsa.h \
 @WITH_BETTER_YASM_ELF64_TRUE@	erasure-code/isa/ErasureCodeIsaTableCache.h \
 @WITH_BETTER_YASM_ELF64_TRUE@	erasure-code/isa/xor_op.h \
@@ -427,13 +428,13 @@ check_PROGRAMS = $(am__EXEEXT_63) $(am__EXEEXT_64) \
 @WITH_BETTER_YASM_ELF64_TRUE@	erasure-code/isa/isa-l/include/gf_vect_mul.h \
 @WITH_BETTER_YASM_ELF64_TRUE@	erasure-code/isa/isa-l/include/types.h
 
- at WITH_BETTER_YASM_ELF64_TRUE@am__append_87 = libisa.la
- at LINUX_TRUE@@WITH_BETTER_YASM_ELF64_TRUE at am__append_88 = -export-symbols-regex '.*__erasure_code_.*'
- at WITH_BETTER_YASM_ELF64_TRUE@am__append_89 = libec_isa.la
- at LINUX_TRUE@am__append_90 = -export-symbols-regex '.*__compressor_.*'
+ at WITH_BETTER_YASM_ELF64_TRUE@am__append_88 = libisa.la
+ at LINUX_TRUE@@WITH_BETTER_YASM_ELF64_TRUE at am__append_89 = -export-symbols-regex '.*__erasure_code_.*'
+ at WITH_BETTER_YASM_ELF64_TRUE@am__append_90 = libec_isa.la
 @LINUX_TRUE at am__append_91 = -export-symbols-regex '.*__compressor_.*'
- at ENABLE_CLIENT_TRUE@am__append_92 = libclient.la
- at ENABLE_CLIENT_TRUE@am__append_93 = \
+ at LINUX_TRUE@am__append_92 = -export-symbols-regex '.*__compressor_.*'
+ at ENABLE_CLIENT_TRUE@am__append_93 = libclient.la
+ at ENABLE_CLIENT_TRUE@am__append_94 = \
 @ENABLE_CLIENT_TRUE@	client/Client.h \
 @ENABLE_CLIENT_TRUE@	client/Dentry.h \
 @ENABLE_CLIENT_TRUE@	client/Dir.h \
@@ -450,52 +451,52 @@ check_PROGRAMS = $(am__EXEEXT_63) $(am__EXEEXT_64) \
 @ENABLE_CLIENT_TRUE@	client/posix_acl.h \
 @ENABLE_CLIENT_TRUE@	client/UserGroups.h
 
- at ENABLE_CLIENT_TRUE@@WITH_FUSE_TRUE at am__append_94 = libclient_fuse.la
- at ENABLE_CLIENT_TRUE@@WITH_FUSE_TRUE at am__append_95 = client/fuse_ll.h
- at ENABLE_CLIENT_TRUE@am__append_96 = ceph_test_ioctls
- at WITH_TCMALLOC_TRUE@am__append_97 = perfglue/heap_profiler.cc
- at WITH_TCMALLOC_TRUE@am__append_98 = -fno-builtin-malloc -fno-builtin-calloc -fno-builtin-realloc -fno-builtin-free
+ at ENABLE_CLIENT_TRUE@@WITH_FUSE_TRUE at am__append_95 = libclient_fuse.la
+ at ENABLE_CLIENT_TRUE@@WITH_FUSE_TRUE at am__append_96 = client/fuse_ll.h
+ at ENABLE_CLIENT_TRUE@am__append_97 = ceph_test_ioctls
+ at WITH_TCMALLOC_TRUE@am__append_98 = perfglue/heap_profiler.cc
 @WITH_TCMALLOC_TRUE at am__append_99 = -fno-builtin-malloc -fno-builtin-calloc -fno-builtin-realloc -fno-builtin-free
- at WITH_TCMALLOC_FALSE@@WITH_TCMALLOC_MINIMAL_TRUE at am__append_100 = perfglue/heap_profiler.cc
- at WITH_TCMALLOC_FALSE@@WITH_TCMALLOC_MINIMAL_TRUE at am__append_101 = -fno-builtin-malloc -fno-builtin-calloc -fno-builtin-realloc -fno-builtin-free
+ at WITH_TCMALLOC_TRUE@am__append_100 = -fno-builtin-malloc -fno-builtin-calloc -fno-builtin-realloc -fno-builtin-free
+ at WITH_TCMALLOC_FALSE@@WITH_TCMALLOC_MINIMAL_TRUE at am__append_101 = perfglue/heap_profiler.cc
 @WITH_TCMALLOC_FALSE@@WITH_TCMALLOC_MINIMAL_TRUE at am__append_102 = -fno-builtin-malloc -fno-builtin-calloc -fno-builtin-realloc -fno-builtin-free
- at WITH_TCMALLOC_FALSE@@WITH_TCMALLOC_MINIMAL_FALSE at am__append_103 = perfglue/disabled_heap_profiler.cc
- at WITH_PROFILER_TRUE@am__append_104 = perfglue/cpu_profiler.cc
- at WITH_PROFILER_FALSE@am__append_105 = perfglue/disabled_stubs.cc
- at ENABLE_SERVER_TRUE@am__append_106 = \
+ at WITH_TCMALLOC_FALSE@@WITH_TCMALLOC_MINIMAL_TRUE at am__append_103 = -fno-builtin-malloc -fno-builtin-calloc -fno-builtin-realloc -fno-builtin-free
+ at WITH_TCMALLOC_FALSE@@WITH_TCMALLOC_MINIMAL_FALSE at am__append_104 = perfglue/disabled_heap_profiler.cc
+ at WITH_PROFILER_TRUE@am__append_105 = perfglue/cpu_profiler.cc
+ at WITH_PROFILER_FALSE@am__append_106 = perfglue/disabled_stubs.cc
+ at ENABLE_SERVER_TRUE@am__append_107 = \
 @ENABLE_SERVER_TRUE@	common/xattr.c \
 @ENABLE_SERVER_TRUE@	common/ipaddr.cc \
 @ENABLE_SERVER_TRUE@	common/ceph_json.cc \
 @ENABLE_SERVER_TRUE@	common/util.cc \
 @ENABLE_SERVER_TRUE@	common/pick_address.cc
 
- at LINUX_TRUE@am__append_107 = \
+ at LINUX_TRUE@am__append_108 = \
 @LINUX_TRUE@	common/linux_version.c
 
- at SOLARIS_TRUE@am__append_108 = \
+ at SOLARIS_TRUE@am__append_109 = \
 @SOLARIS_TRUE@        common/solaris_errno.cc
 
- at AIX_TRUE@am__append_109 = \
+ at AIX_TRUE@am__append_110 = \
 @AIX_TRUE@        common/aix_errno.cc
 
- at ENABLE_XIO_TRUE@am__append_110 = \
+ at ENABLE_XIO_TRUE@am__append_111 = \
 @ENABLE_XIO_TRUE@	common/address_helper.cc
 
- at WITH_GOOD_YASM_ELF64_TRUE@am__append_111 = common/crc32c_intel_fast_asm.S common/crc32c_intel_fast_zero_asm.S
- at HAVE_ARMV8_CRC_TRUE@am__append_112 = libcommon_crc_aarch64.la
+ at WITH_GOOD_YASM_ELF64_TRUE@am__append_112 = common/crc32c_intel_fast_asm.S common/crc32c_intel_fast_zero_asm.S
 @HAVE_ARMV8_CRC_TRUE at am__append_113 = libcommon_crc_aarch64.la
- at LINUX_TRUE@am__append_114 = -lrt -lblkid
- at ENABLE_XIO_TRUE@am__append_115 = \
+ at HAVE_ARMV8_CRC_TRUE@am__append_114 = libcommon_crc_aarch64.la
+ at LINUX_TRUE@am__append_115 = -lrt -lblkid
+ at ENABLE_XIO_TRUE@am__append_116 = \
 @ENABLE_XIO_TRUE@	common/address_helper.h
 
- at LINUX_TRUE@am__append_116 = libsecret.la
- at LINUX_TRUE@am__append_117 = msg/async/EventEpoll.cc
- at DARWIN_TRUE@am__append_118 = msg/async/EventKqueue.cc
- at FREEBSD_TRUE@am__append_119 = msg/async/EventKqueue.cc
- at LINUX_TRUE@am__append_120 = msg/async/EventEpoll.h
- at DARWIN_TRUE@am__append_121 = msg/async/EventKqueue.h
- at FREEBSD_TRUE@am__append_122 = msg/async/EventKqueue.h
- at ENABLE_XIO_TRUE@am__append_123 = \
+ at LINUX_TRUE@am__append_117 = libsecret.la
+ at LINUX_TRUE@am__append_118 = msg/async/EventEpoll.cc
+ at DARWIN_TRUE@am__append_119 = msg/async/EventKqueue.cc
+ at FREEBSD_TRUE@am__append_120 = msg/async/EventKqueue.cc
+ at LINUX_TRUE@am__append_121 = msg/async/EventEpoll.h
+ at DARWIN_TRUE@am__append_122 = msg/async/EventKqueue.h
+ at FREEBSD_TRUE@am__append_123 = msg/async/EventKqueue.h
+ at ENABLE_XIO_TRUE@am__append_124 = \
 @ENABLE_XIO_TRUE@	msg/xio/QueueStrategy.cc \
 @ENABLE_XIO_TRUE@	msg/xio/XioConnection.cc \
 @ENABLE_XIO_TRUE@	msg/xio/XioMessenger.cc \
@@ -503,7 +504,7 @@ check_PROGRAMS = $(am__EXEEXT_63) $(am__EXEEXT_64) \
 @ENABLE_XIO_TRUE@	msg/xio/XioPortal.cc \
 @ENABLE_XIO_TRUE@	msg/xio/XioPool.cc
 
- at ENABLE_XIO_TRUE@am__append_124 = \
+ at ENABLE_XIO_TRUE@am__append_125 = \
 @ENABLE_XIO_TRUE@	msg/xio/DispatchStrategy.h \
 @ENABLE_XIO_TRUE@	msg/xio/FastStrategy.h \
 @ENABLE_XIO_TRUE@	msg/xio/QueueStrategy.h \
@@ -515,22 +516,22 @@ check_PROGRAMS = $(am__EXEEXT_63) $(am__EXEEXT_64) \
 @ENABLE_XIO_TRUE@	msg/xio/XioPortal.h \
 @ENABLE_XIO_TRUE@	msg/xio/XioSubmit.h
 
- at ENABLE_CLIENT_TRUE@@WITH_RADOSGW_TRUE@@WITH_RADOS_TRUE at am__append_125 = \
+ at ENABLE_CLIENT_TRUE@@WITH_RADOSGW_TRUE@@WITH_RADOS_TRUE at am__append_126 = \
 @ENABLE_CLIENT_TRUE@@WITH_RADOSGW_TRUE@@WITH_RADOS_TRUE@	$(srcdir)/include/rados/librgw.h \
 @ENABLE_CLIENT_TRUE@@WITH_RADOSGW_TRUE@@WITH_RADOS_TRUE@	$(srcdir)/include/rados/rgw_file.h
 
- at ENABLE_CLIENT_TRUE@@WITH_RADOS_TRUE at am__append_126 =  \
+ at ENABLE_CLIENT_TRUE@@WITH_RADOS_TRUE at am__append_127 =  \
 @ENABLE_CLIENT_TRUE@@WITH_RADOS_TRUE@	librados_internal.la \
 @ENABLE_CLIENT_TRUE@@WITH_RADOS_TRUE@	librados_api.la \
 @ENABLE_CLIENT_TRUE@@WITH_RADOS_TRUE@	libjournal.la
- at ENABLE_CLIENT_TRUE@@WITH_RADOS_TRUE at am__append_127 = \
+ at ENABLE_CLIENT_TRUE@@WITH_RADOS_TRUE at am__append_128 = \
 @ENABLE_CLIENT_TRUE@@WITH_RADOS_TRUE@	librados_internal.la libcls_lock_client.la \
 @ENABLE_CLIENT_TRUE@@WITH_RADOS_TRUE@	$(LIBOSDC) $(LIBCOMMON_DEPS)
 
- at ENABLE_CLIENT_TRUE@@LINUX_TRUE@@WITH_RADOS_TRUE at am__append_128 = -fvisibility=hidden -fvisibility-inlines-hidden
- at ENABLE_CLIENT_TRUE@@LINUX_TRUE@@WITH_RADOS_TRUE at am__append_129 = -Xcompiler -Xlinker -Xcompiler '--exclude-libs=ALL'
- at ENABLE_CLIENT_TRUE@@WITH_RADOS_TRUE at am__append_130 = librados.la
- at ENABLE_CLIENT_TRUE@@WITH_RADOS_TRUE at am__append_131 = \
+ at ENABLE_CLIENT_TRUE@@LINUX_TRUE@@WITH_RADOS_TRUE at am__append_129 = -fvisibility=hidden -fvisibility-inlines-hidden
+ at ENABLE_CLIENT_TRUE@@LINUX_TRUE@@WITH_RADOS_TRUE at am__append_130 = -Xcompiler -Xlinker -Xcompiler '--exclude-libs=ALL'
+ at ENABLE_CLIENT_TRUE@@WITH_RADOS_TRUE at am__append_131 = librados.la
+ at ENABLE_CLIENT_TRUE@@WITH_RADOS_TRUE at am__append_132 = \
 @ENABLE_CLIENT_TRUE@@WITH_RADOS_TRUE@	librados/snap_set_diff.h \
 @ENABLE_CLIENT_TRUE@@WITH_RADOS_TRUE@	librados/AioCompletionImpl.h \
 @ENABLE_CLIENT_TRUE@@WITH_RADOS_TRUE@	librados/IoCtxImpl.h \
@@ -539,19 +540,20 @@ check_PROGRAMS = $(am__EXEEXT_63) $(am__EXEEXT_64) \
 @ENABLE_CLIENT_TRUE@@WITH_RADOS_TRUE@	librados/RadosXattrIter.h \
 @ENABLE_CLIENT_TRUE@@WITH_RADOS_TRUE@	librados/ListObjectImpl.h
 
- at ENABLE_CLIENT_TRUE@@LINUX_TRUE@@WITH_RADOSSTRIPER_TRUE@@WITH_RADOS_TRUE at am__append_132 = -export-symbols-regex '^radosstriper_.*'
- at ENABLE_CLIENT_TRUE@@WITH_RADOSSTRIPER_TRUE@@WITH_RADOS_TRUE at am__append_133 = libradosstriper.la
- at ENABLE_CLIENT_TRUE@@WITH_RADOSSTRIPER_TRUE@@WITH_RADOS_TRUE at am__append_134 = \
+ at ENABLE_CLIENT_TRUE@@LINUX_TRUE@@WITH_RADOSSTRIPER_TRUE@@WITH_RADOS_TRUE at am__append_133 = -export-symbols-regex '^radosstriper_.*'
+ at ENABLE_CLIENT_TRUE@@WITH_RADOSSTRIPER_TRUE@@WITH_RADOS_TRUE at am__append_134 = libradosstriper.la
+ at ENABLE_CLIENT_TRUE@@WITH_RADOSSTRIPER_TRUE@@WITH_RADOS_TRUE at am__append_135 = \
 @ENABLE_CLIENT_TRUE@@WITH_RADOSSTRIPER_TRUE@@WITH_RADOS_TRUE@	libradosstriper/RadosStriperImpl.h \
 @ENABLE_CLIENT_TRUE@@WITH_RADOSSTRIPER_TRUE@@WITH_RADOS_TRUE@	libradosstriper/MultiAioCompletionImpl.h
 
- at ENABLE_CLIENT_TRUE@@WITH_RADOS_TRUE at am__append_135 = \
+ at ENABLE_CLIENT_TRUE@@WITH_RADOS_TRUE at am__append_136 = \
 @ENABLE_CLIENT_TRUE@@WITH_RADOS_TRUE@	journal/AsyncOpTracker.h \
 @ENABLE_CLIENT_TRUE@@WITH_RADOS_TRUE@	journal/Entry.h \
 @ENABLE_CLIENT_TRUE@@WITH_RADOS_TRUE@	journal/Future.h \
 @ENABLE_CLIENT_TRUE@@WITH_RADOS_TRUE@	journal/FutureImpl.h \
 @ENABLE_CLIENT_TRUE@@WITH_RADOS_TRUE@	journal/Journaler.h \
 @ENABLE_CLIENT_TRUE@@WITH_RADOS_TRUE@	journal/JournalMetadata.h \
+ at ENABLE_CLIENT_TRUE@@WITH_RADOS_TRUE@	journal/JournalMetadataListener.h \
 @ENABLE_CLIENT_TRUE@@WITH_RADOS_TRUE@	journal/JournalPlayer.h \
 @ENABLE_CLIENT_TRUE@@WITH_RADOS_TRUE@	journal/JournalRecorder.h \
 @ENABLE_CLIENT_TRUE@@WITH_RADOS_TRUE@	journal/JournalTrimmer.h \
@@ -559,14 +561,15 @@ check_PROGRAMS = $(am__EXEEXT_63) $(am__EXEEXT_64) \
 @ENABLE_CLIENT_TRUE@@WITH_RADOS_TRUE@	journal/ObjectRecorder.h \
 @ENABLE_CLIENT_TRUE@@WITH_RADOS_TRUE@	journal/ReplayEntry.h \
 @ENABLE_CLIENT_TRUE@@WITH_RADOS_TRUE@	journal/ReplayHandler.h \
+ at ENABLE_CLIENT_TRUE@@WITH_RADOS_TRUE@	journal/Settings.h \
 @ENABLE_CLIENT_TRUE@@WITH_RADOS_TRUE@	journal/Utils.h
 
- at ENABLE_CLIENT_TRUE@@WITH_RADOS_TRUE at am__append_136 = libjournal.la
- at ENABLE_CLIENT_TRUE@@WITH_RADOS_TRUE@@WITH_RBD_TRUE at am__append_137 = librbd_internal.la \
+ at ENABLE_CLIENT_TRUE@@WITH_RADOS_TRUE at am__append_137 = libjournal.la
+ at ENABLE_CLIENT_TRUE@@WITH_RADOS_TRUE@@WITH_RBD_TRUE at am__append_138 = librbd_internal.la \
 @ENABLE_CLIENT_TRUE@@WITH_RADOS_TRUE@@WITH_RBD_TRUE@	librbd_api.la
- at ENABLE_CLIENT_TRUE@@LINUX_TRUE@@WITH_RADOS_TRUE@@WITH_RBD_TRUE at am__append_138 = -Xcompiler -Xlinker -Xcompiler '--exclude-libs=ALL'
- at ENABLE_CLIENT_TRUE@@WITH_RADOS_TRUE@@WITH_RBD_TRUE at am__append_139 = librbd.la
- at ENABLE_CLIENT_TRUE@@WITH_RADOS_TRUE@@WITH_RBD_TRUE at am__append_140 = \
+ at ENABLE_CLIENT_TRUE@@LINUX_TRUE@@WITH_RADOS_TRUE@@WITH_RBD_TRUE at am__append_139 = -Xcompiler -Xlinker -Xcompiler '--exclude-libs=ALL'
+ at ENABLE_CLIENT_TRUE@@WITH_RADOS_TRUE@@WITH_RBD_TRUE at am__append_140 = librbd.la
+ at ENABLE_CLIENT_TRUE@@WITH_RADOS_TRUE@@WITH_RBD_TRUE at am__append_141 = \
 @ENABLE_CLIENT_TRUE@@WITH_RADOS_TRUE@@WITH_RBD_TRUE@	librbd/AioCompletion.h \
 @ENABLE_CLIENT_TRUE@@WITH_RADOS_TRUE@@WITH_RBD_TRUE@	librbd/AioImageRequest.h \
 @ENABLE_CLIENT_TRUE@@WITH_RADOS_TRUE@@WITH_RBD_TRUE@	librbd/AioImageRequestWQ.h \
@@ -635,7 +638,7 @@ check_PROGRAMS = $(am__EXEEXT_63) $(am__EXEEXT_64) \
 
 
 # inject rgw stuff in the decoder testcase
- at ENABLE_CLIENT_TRUE@am__append_141 = \
+ at ENABLE_CLIENT_TRUE@am__append_142 = \
 @ENABLE_CLIENT_TRUE@	rgw/rgw_dencoder.cc \
 @ENABLE_CLIENT_TRUE@	rgw/rgw_acl.cc \
 @ENABLE_CLIENT_TRUE@	rgw/rgw_basic_types.cc \
@@ -644,7 +647,7 @@ check_PROGRAMS = $(am__EXEEXT_63) $(am__EXEEXT_64) \
 @ENABLE_CLIENT_TRUE@	rgw/rgw_json_enc.cc \
 @ENABLE_CLIENT_TRUE@	rgw/rgw_keystone.cc
 
- at ENABLE_CLIENT_TRUE@am__append_142 = -lcurl -lexpat \
+ at ENABLE_CLIENT_TRUE@am__append_143 = -lcurl -lexpat \
 @ENABLE_CLIENT_TRUE@	libcls_version_client.la \
 @ENABLE_CLIENT_TRUE@	libcls_log_client.la \
 @ENABLE_CLIENT_TRUE@	libcls_refcount_client.la \
@@ -658,9 +661,9 @@ check_PROGRAMS = $(am__EXEEXT_63) $(am__EXEEXT_64) \
 @ENABLE_CLIENT_TRUE@	libcls_user_client.la \
 @ENABLE_CLIENT_TRUE@	libcls_numops_client.la \
 @ENABLE_CLIENT_TRUE@	libcls_journal_client.la
- at ENABLE_CLIENT_TRUE@@WITH_OPENLDAP_TRUE@@WITH_RADOSGW_TRUE@@WITH_RADOS_TRUE at am__append_143 = rgw/rgw_ldap.cc
+ at ENABLE_CLIENT_TRUE@@WITH_OPENLDAP_TRUE@@WITH_RADOSGW_TRUE@@WITH_RADOS_TRUE at am__append_144 = rgw/rgw_ldap.cc
 # noinst_LTLIBRARIES += librgw.la
- at ENABLE_CLIENT_TRUE@@WITH_RADOSGW_TRUE@@WITH_RADOS_TRUE at am__append_144 = \
+ at ENABLE_CLIENT_TRUE@@WITH_RADOSGW_TRUE@@WITH_RADOS_TRUE at am__append_145 = \
 @ENABLE_CLIENT_TRUE@@WITH_RADOSGW_TRUE@@WITH_RADOS_TRUE@	$(LIBRADOS) \
 @ENABLE_CLIENT_TRUE@@WITH_RADOSGW_TRUE@@WITH_RADOS_TRUE@	libcls_rgw_client.la \
 @ENABLE_CLIENT_TRUE@@WITH_RADOSGW_TRUE@@WITH_RADOS_TRUE@	libcls_log_client.la \
@@ -677,16 +680,16 @@ check_PROGRAMS = $(am__EXEEXT_63) $(am__EXEEXT_64) \
 @ENABLE_CLIENT_TRUE@@WITH_RADOSGW_TRUE@@WITH_RADOS_TRUE@	-lfcgi \
 @ENABLE_CLIENT_TRUE@@WITH_RADOSGW_TRUE@@WITH_RADOS_TRUE@	-ldl
 
- at ENABLE_CLIENT_TRUE@@WITH_RADOSGW_TRUE@@WITH_RADOS_TRUE at am__append_145 = librgw.la
- at ENABLE_CLIENT_TRUE@@WITH_RADOSGW_TRUE@@WITH_RADOS_TRUE at am__append_146 = -lssl -lcrypto
- at ENABLE_CLIENT_TRUE@@WITH_RADOSGW_TRUE@@WITH_RADOS_TRUE at am__append_147 = libcivetweb.la
- at ENABLE_CLIENT_TRUE@@WITH_RADOSGW_TRUE@@WITH_RADOS_TRUE at am__append_148 = radosgw \
+ at ENABLE_CLIENT_TRUE@@WITH_RADOSGW_TRUE@@WITH_RADOS_TRUE at am__append_146 = librgw.la
+ at ENABLE_CLIENT_TRUE@@WITH_RADOSGW_TRUE@@WITH_RADOS_TRUE at am__append_147 = -lssl -lcrypto
+ at ENABLE_CLIENT_TRUE@@WITH_RADOSGW_TRUE@@WITH_RADOS_TRUE at am__append_148 = libcivetweb.la
+ at ENABLE_CLIENT_TRUE@@WITH_RADOSGW_TRUE@@WITH_RADOS_TRUE at am__append_149 = radosgw \
 @ENABLE_CLIENT_TRUE@@WITH_RADOSGW_TRUE@@WITH_RADOS_TRUE@	radosgw-admin \
 @ENABLE_CLIENT_TRUE@@WITH_RADOSGW_TRUE@@WITH_RADOS_TRUE@	radosgw-token \
 @ENABLE_CLIENT_TRUE@@WITH_RADOSGW_TRUE@@WITH_RADOS_TRUE@	radosgw-object-expirer
- at ENABLE_CLIENT_TRUE@@WITH_RADOSGW_TRUE@@WITH_RADOS_TRUE at am__append_149 = ceph_rgw_multiparser \
+ at ENABLE_CLIENT_TRUE@@WITH_RADOSGW_TRUE@@WITH_RADOS_TRUE at am__append_150 = ceph_rgw_multiparser \
 @ENABLE_CLIENT_TRUE@@WITH_RADOSGW_TRUE@@WITH_RADOS_TRUE@	ceph_rgw_jsonparser
- at ENABLE_CLIENT_TRUE@@WITH_RADOSGW_TRUE@@WITH_RADOS_TRUE at am__append_150 = \
+ at ENABLE_CLIENT_TRUE@@WITH_RADOSGW_TRUE@@WITH_RADOS_TRUE at am__append_151 = \
 @ENABLE_CLIENT_TRUE@@WITH_RADOSGW_TRUE@@WITH_RADOS_TRUE@	rgw/rgw_acl.h \
 @ENABLE_CLIENT_TRUE@@WITH_RADOSGW_TRUE@@WITH_RADOS_TRUE@	rgw/rgw_acl_s3.h \
 @ENABLE_CLIENT_TRUE@@WITH_RADOSGW_TRUE@@WITH_RADOS_TRUE@	rgw/rgw_acl_swift.h \
@@ -772,7 +775,7 @@ check_PROGRAMS = $(am__EXEEXT_63) $(am__EXEEXT_64) \
 @ENABLE_CLIENT_TRUE@@WITH_RADOSGW_TRUE@@WITH_RADOS_TRUE@	civetweb/include/civetweb_conf.h \
 @ENABLE_CLIENT_TRUE@@WITH_RADOSGW_TRUE@@WITH_RADOS_TRUE@	civetweb/src/md5.h
 
- at ENABLE_CLIENT_TRUE@am__append_151 = libcls_lock_client.la \
+ at ENABLE_CLIENT_TRUE@am__append_152 = libcls_lock_client.la \
 @ENABLE_CLIENT_TRUE@	libcls_refcount_client.la \
 @ENABLE_CLIENT_TRUE@	libcls_version_client.la \
 @ENABLE_CLIENT_TRUE@	libcls_log_client.la \
@@ -784,7 +787,7 @@ check_PROGRAMS = $(am__EXEEXT_63) $(am__EXEEXT_64) \
 @ENABLE_CLIENT_TRUE@	libcls_cephfs_client.la \
 @ENABLE_CLIENT_TRUE@	libcls_numops_client.la \
 @ENABLE_CLIENT_TRUE@	libcls_journal_client.la
- at ENABLE_CLIENT_TRUE@am__append_152 = \
+ at ENABLE_CLIENT_TRUE@am__append_153 = \
 @ENABLE_CLIENT_TRUE@	cls/lock/cls_lock_types.h \
 @ENABLE_CLIENT_TRUE@	cls/lock/cls_lock_ops.h \
 @ENABLE_CLIENT_TRUE@	cls/lock/cls_lock_client.h \
@@ -820,7 +823,7 @@ check_PROGRAMS = $(am__EXEEXT_63) $(am__EXEEXT_64) \
 @ENABLE_CLIENT_TRUE@	cls/journal/cls_journal_client.h \
 @ENABLE_CLIENT_TRUE@	cls/journal/cls_journal_types.h
 
- at ENABLE_SERVER_TRUE@@WITH_OSD_TRUE at am__append_153 = libcls_hello.la \
+ at ENABLE_SERVER_TRUE@@WITH_OSD_TRUE at am__append_154 = libcls_hello.la \
 @ENABLE_SERVER_TRUE@@WITH_OSD_TRUE@	libcls_numops.la \
 @ENABLE_SERVER_TRUE@@WITH_OSD_TRUE@	libcls_rbd.la \
 @ENABLE_SERVER_TRUE@@WITH_OSD_TRUE@	libcls_lock.la \
@@ -834,13 +837,13 @@ check_PROGRAMS = $(am__EXEEXT_63) $(am__EXEEXT_64) \
 @ENABLE_SERVER_TRUE@@WITH_OSD_TRUE@	libcls_rgw.la \
 @ENABLE_SERVER_TRUE@@WITH_OSD_TRUE@	libcls_cephfs.la \
 @ENABLE_SERVER_TRUE@@WITH_OSD_TRUE@	libcls_journal.la
- at ENABLE_SERVER_TRUE@@LINUX_TRUE@@WITH_OSD_TRUE at am__append_154 = libcls_kvs.la
- at ENABLE_SERVER_TRUE@@WITH_OSD_TRUE at am__append_155 = \
+ at ENABLE_SERVER_TRUE@@LINUX_TRUE@@WITH_OSD_TRUE at am__append_155 = libcls_kvs.la
+ at ENABLE_SERVER_TRUE@@WITH_OSD_TRUE at am__append_156 = \
 @ENABLE_SERVER_TRUE@@WITH_OSD_TRUE@	key_value_store/key_value_structure.h \
 @ENABLE_SERVER_TRUE@@WITH_OSD_TRUE@	key_value_store/kv_flat_btree_async.h \
 @ENABLE_SERVER_TRUE@@WITH_OSD_TRUE@	key_value_store/kvs_arg_types.h
 
- at ENABLE_CLIENT_TRUE@@WITH_RADOS_TRUE@@WITH_RBD_TRUE at am__append_156 = rbd_replay/ActionTypes.h \
+ at ENABLE_CLIENT_TRUE@@WITH_RADOS_TRUE@@WITH_RBD_TRUE at am__append_157 = rbd_replay/ActionTypes.h \
 @ENABLE_CLIENT_TRUE@@WITH_RADOS_TRUE@@WITH_RBD_TRUE@	rbd_replay/actions.hpp \
 @ENABLE_CLIENT_TRUE@@WITH_RADOS_TRUE@@WITH_RBD_TRUE@	rbd_replay/BoundedBuffer.hpp \
 @ENABLE_CLIENT_TRUE@@WITH_RADOS_TRUE@@WITH_RBD_TRUE@	rbd_replay/BufferReader.h \
@@ -850,27 +853,27 @@ check_PROGRAMS = $(am__EXEEXT_63) $(am__EXEEXT_64) \
 @ENABLE_CLIENT_TRUE@@WITH_RADOS_TRUE@@WITH_RBD_TRUE@	rbd_replay/rbd_loc.hpp \
 @ENABLE_CLIENT_TRUE@@WITH_RADOS_TRUE@@WITH_RBD_TRUE@	rbd_replay/rbd_replay_debug.hpp \
 @ENABLE_CLIENT_TRUE@@WITH_RADOS_TRUE@@WITH_RBD_TRUE@	rbd_replay/Replayer.hpp
- at ENABLE_CLIENT_TRUE@@WITH_RADOS_TRUE@@WITH_RBD_TRUE at am__append_157 = librbd_replay_types.la \
+ at ENABLE_CLIENT_TRUE@@WITH_RADOS_TRUE@@WITH_RBD_TRUE at am__append_158 = librbd_replay_types.la \
 @ENABLE_CLIENT_TRUE@@WITH_RADOS_TRUE@@WITH_RBD_TRUE@	librbd_replay.la \
 @ENABLE_CLIENT_TRUE@@WITH_RADOS_TRUE@@WITH_RBD_TRUE@	librbd_replay_ios.la
- at ENABLE_CLIENT_TRUE@@WITH_RADOS_TRUE@@WITH_RBD_TRUE at am__append_158 = librbd_replay_types.la
- at ENABLE_CLIENT_TRUE@@LINUX_TRUE@@WITH_RADOS_TRUE@@WITH_RBD_TRUE at am__append_159 = rbd-replay
- at ENABLE_CLIENT_TRUE@@WITH_BABELTRACE_TRUE@@WITH_RADOS_TRUE@@WITH_RBD_TRUE at am__append_160 = rbd-replay-prep
- at ENABLE_SERVER_TRUE@@WITH_OSD_TRUE at am__append_161 = \
+ at ENABLE_CLIENT_TRUE@@WITH_RADOS_TRUE@@WITH_RBD_TRUE at am__append_159 = librbd_replay_types.la
+ at ENABLE_CLIENT_TRUE@@LINUX_TRUE@@WITH_RADOS_TRUE@@WITH_RBD_TRUE at am__append_160 = rbd-replay
+ at ENABLE_CLIENT_TRUE@@WITH_BABELTRACE_TRUE@@WITH_RADOS_TRUE@@WITH_RBD_TRUE at am__append_161 = rbd-replay-prep
+ at ENABLE_SERVER_TRUE@@WITH_OSD_TRUE at am__append_162 = \
 @ENABLE_SERVER_TRUE@@WITH_OSD_TRUE@	test/erasure-code/test-erasure-code.sh \
 @ENABLE_SERVER_TRUE@@WITH_OSD_TRUE@	test/erasure-code/test-erasure-eio.sh
 
- at ENABLE_SERVER_TRUE@@WITH_OSD_TRUE at am__append_162 = test/erasure-code/ceph_erasure_code_benchmark.h \
+ at ENABLE_SERVER_TRUE@@WITH_OSD_TRUE at am__append_163 = test/erasure-code/ceph_erasure_code_benchmark.h \
 @ENABLE_SERVER_TRUE@@WITH_OSD_TRUE@	test/erasure-code/ceph_erasure_code_benchmark.h \
 @ENABLE_SERVER_TRUE@@WITH_OSD_TRUE@	test/erasure-code/ErasureCodeExample.h
- at ENABLE_SERVER_TRUE@@LINUX_TRUE@@WITH_OSD_TRUE at am__append_163 = -ldl
- at ENABLE_SERVER_TRUE@@WITH_OSD_TRUE at am__append_164 = ceph_erasure_code_benchmark \
+ at ENABLE_SERVER_TRUE@@LINUX_TRUE@@WITH_OSD_TRUE at am__append_164 = -ldl
+ at ENABLE_SERVER_TRUE@@WITH_OSD_TRUE at am__append_165 = ceph_erasure_code_benchmark \
 @ENABLE_SERVER_TRUE@@WITH_OSD_TRUE@	ceph_erasure_code
- at ENABLE_SERVER_TRUE@@LINUX_TRUE@@WITH_OSD_TRUE at am__append_165 = -ldl
- at ENABLE_SERVER_TRUE@@WITH_OSD_TRUE at am__append_166 = ceph_erasure_code_non_regression
- at ENABLE_SERVER_TRUE@@LINUX_TRUE@@WITH_OSD_TRUE at am__append_167 = -ldl
- at ENABLE_SERVER_TRUE@@LINUX_TRUE@@WITH_OSD_TRUE at am__append_168 = -export-symbols-regex '.*__erasure_code_.*'
- at ENABLE_SERVER_TRUE@@WITH_OSD_TRUE at am__append_169 = libec_example.la \
+ at ENABLE_SERVER_TRUE@@LINUX_TRUE@@WITH_OSD_TRUE at am__append_166 = -ldl
+ at ENABLE_SERVER_TRUE@@WITH_OSD_TRUE at am__append_167 = ceph_erasure_code_non_regression
+ at ENABLE_SERVER_TRUE@@LINUX_TRUE@@WITH_OSD_TRUE at am__append_168 = -ldl
+ at ENABLE_SERVER_TRUE@@LINUX_TRUE@@WITH_OSD_TRUE at am__append_169 = -export-symbols-regex '.*__erasure_code_.*'
+ at ENABLE_SERVER_TRUE@@WITH_OSD_TRUE at am__append_170 = libec_example.la \
 @ENABLE_SERVER_TRUE@@WITH_OSD_TRUE@	libec_missing_entry_point.la \
 @ENABLE_SERVER_TRUE@@WITH_OSD_TRUE@	libec_missing_version.la \
 @ENABLE_SERVER_TRUE@@WITH_OSD_TRUE@	libec_hangs.la \
@@ -884,7 +887,6 @@ check_PROGRAMS = $(am__EXEEXT_63) $(am__EXEEXT_64) \
 @ENABLE_SERVER_TRUE@@WITH_OSD_TRUE@	libec_test_shec_sse4.la \
 @ENABLE_SERVER_TRUE@@WITH_OSD_TRUE@	libec_test_shec_sse3.la \
 @ENABLE_SERVER_TRUE@@WITH_OSD_TRUE@	libec_test_shec_generic.la
- at ENABLE_SERVER_TRUE@@LINUX_TRUE@@WITH_OSD_TRUE at am__append_170 = -export-symbols-regex '.*__erasure_code_.*'
 @ENABLE_SERVER_TRUE@@LINUX_TRUE@@WITH_OSD_TRUE at am__append_171 = -export-symbols-regex '.*__erasure_code_.*'
 @ENABLE_SERVER_TRUE@@LINUX_TRUE@@WITH_OSD_TRUE at am__append_172 = -export-symbols-regex '.*__erasure_code_.*'
 @ENABLE_SERVER_TRUE@@LINUX_TRUE@@WITH_OSD_TRUE at am__append_173 = -export-symbols-regex '.*__erasure_code_.*'
@@ -893,19 +895,20 @@ check_PROGRAMS = $(am__EXEEXT_63) $(am__EXEEXT_64) \
 @ENABLE_SERVER_TRUE@@LINUX_TRUE@@WITH_OSD_TRUE at am__append_176 = -export-symbols-regex '.*__erasure_code_.*'
 @ENABLE_SERVER_TRUE@@LINUX_TRUE@@WITH_OSD_TRUE at am__append_177 = -export-symbols-regex '.*__erasure_code_.*'
 @ENABLE_SERVER_TRUE@@LINUX_TRUE@@WITH_OSD_TRUE at am__append_178 = -export-symbols-regex '.*__erasure_code_.*'
- at ENABLE_SERVER_TRUE@@LINUX_TRUE@@WITH_OSD_TRUE at am__append_179 = -ldl
- at ENABLE_SERVER_TRUE@@WITH_OSD_TRUE at am__append_180 = unittest_erasure_code_plugin \
+ at ENABLE_SERVER_TRUE@@LINUX_TRUE@@WITH_OSD_TRUE at am__append_179 = -export-symbols-regex '.*__erasure_code_.*'
+ at ENABLE_SERVER_TRUE@@LINUX_TRUE@@WITH_OSD_TRUE at am__append_180 = -ldl
+ at ENABLE_SERVER_TRUE@@WITH_OSD_TRUE at am__append_181 = unittest_erasure_code_plugin \
 @ENABLE_SERVER_TRUE@@WITH_OSD_TRUE@	unittest_erasure_code \
 @ENABLE_SERVER_TRUE@@WITH_OSD_TRUE@	unittest_erasure_code_jerasure \
 @ENABLE_SERVER_TRUE@@WITH_OSD_TRUE@	unittest_erasure_code_plugin_jerasure
- at ENABLE_SERVER_TRUE@@LINUX_TRUE@@WITH_OSD_TRUE at am__append_181 = -ldl
 @ENABLE_SERVER_TRUE@@LINUX_TRUE@@WITH_OSD_TRUE at am__append_182 = -ldl
- at ENABLE_SERVER_TRUE@@LINUX_TRUE@@WITH_BETTER_YASM_ELF64_TRUE@@WITH_OSD_TRUE at am__append_183 = -ldl
- at ENABLE_SERVER_TRUE@@WITH_BETTER_YASM_ELF64_TRUE@@WITH_OSD_TRUE at am__append_184 = unittest_erasure_code_isa \
+ at ENABLE_SERVER_TRUE@@LINUX_TRUE@@WITH_OSD_TRUE at am__append_183 = -ldl
+ at ENABLE_SERVER_TRUE@@LINUX_TRUE@@WITH_BETTER_YASM_ELF64_TRUE@@WITH_OSD_TRUE at am__append_184 = -ldl
+ at ENABLE_SERVER_TRUE@@WITH_BETTER_YASM_ELF64_TRUE@@WITH_OSD_TRUE at am__append_185 = unittest_erasure_code_isa \
 @ENABLE_SERVER_TRUE@@WITH_BETTER_YASM_ELF64_TRUE@@WITH_OSD_TRUE@	unittest_erasure_code_plugin_isa
- at ENABLE_SERVER_TRUE@@LINUX_TRUE@@WITH_BETTER_YASM_ELF64_TRUE@@WITH_OSD_TRUE at am__append_185 = -ldl
- at ENABLE_SERVER_TRUE@@LINUX_TRUE@@WITH_OSD_TRUE at am__append_186 = -ldl
- at ENABLE_SERVER_TRUE@@WITH_OSD_TRUE at am__append_187 =  \
+ at ENABLE_SERVER_TRUE@@LINUX_TRUE@@WITH_BETTER_YASM_ELF64_TRUE@@WITH_OSD_TRUE at am__append_186 = -ldl
+ at ENABLE_SERVER_TRUE@@LINUX_TRUE@@WITH_OSD_TRUE at am__append_187 = -ldl
+ at ENABLE_SERVER_TRUE@@WITH_OSD_TRUE at am__append_188 =  \
 @ENABLE_SERVER_TRUE@@WITH_OSD_TRUE@	unittest_erasure_code_lrc \
 @ENABLE_SERVER_TRUE@@WITH_OSD_TRUE@	unittest_erasure_code_plugin_lrc \
 @ENABLE_SERVER_TRUE@@WITH_OSD_TRUE@	unittest_erasure_code_shec \
@@ -919,61 +922,61 @@ check_PROGRAMS = $(am__EXEEXT_63) $(am__EXEEXT_64) \
 @ENABLE_SERVER_TRUE@@WITH_OSD_TRUE@	unittest_compression_plugin_snappy \
 @ENABLE_SERVER_TRUE@@WITH_OSD_TRUE@	unittest_compression_zlib \
 @ENABLE_SERVER_TRUE@@WITH_OSD_TRUE@	unittest_compression_plugin_zlib
- at ENABLE_SERVER_TRUE@@LINUX_TRUE@@WITH_OSD_TRUE at am__append_188 = -ldl
 @ENABLE_SERVER_TRUE@@LINUX_TRUE@@WITH_OSD_TRUE at am__append_189 = -ldl
 @ENABLE_SERVER_TRUE@@LINUX_TRUE@@WITH_OSD_TRUE at am__append_190 = -ldl
 @ENABLE_SERVER_TRUE@@LINUX_TRUE@@WITH_OSD_TRUE at am__append_191 = -ldl
 @ENABLE_SERVER_TRUE@@LINUX_TRUE@@WITH_OSD_TRUE at am__append_192 = -ldl
 @ENABLE_SERVER_TRUE@@LINUX_TRUE@@WITH_OSD_TRUE at am__append_193 = -ldl
- at ENABLE_SERVER_TRUE@@LINUX_TRUE@@WITH_OSD_TRUE at am__append_194 = -export-symbols-regex '.*__erasure_code_.*'
+ at ENABLE_SERVER_TRUE@@LINUX_TRUE@@WITH_OSD_TRUE at am__append_194 = -ldl
 @ENABLE_SERVER_TRUE@@LINUX_TRUE@@WITH_OSD_TRUE at am__append_195 = -export-symbols-regex '.*__erasure_code_.*'
 @ENABLE_SERVER_TRUE@@LINUX_TRUE@@WITH_OSD_TRUE at am__append_196 = -export-symbols-regex '.*__erasure_code_.*'
 @ENABLE_SERVER_TRUE@@LINUX_TRUE@@WITH_OSD_TRUE at am__append_197 = -export-symbols-regex '.*__erasure_code_.*'
- at ENABLE_SERVER_TRUE@@ENABLE_XIO_TRUE at am__append_198 = test/messenger/message_helper.h \
+ at ENABLE_SERVER_TRUE@@LINUX_TRUE@@WITH_OSD_TRUE at am__append_198 = -export-symbols-regex '.*__erasure_code_.*'
+ at ENABLE_SERVER_TRUE@@ENABLE_XIO_TRUE at am__append_199 = test/messenger/message_helper.h \
 @ENABLE_SERVER_TRUE@@ENABLE_XIO_TRUE@	test/messenger/simple_dispatcher.h \
 @ENABLE_SERVER_TRUE@@ENABLE_XIO_TRUE@	test/messenger/xio_dispatcher.h
- at ENABLE_SERVER_TRUE@@ENABLE_XIO_TRUE@@LINUX_TRUE at am__append_199 = -ldl
 @ENABLE_SERVER_TRUE@@ENABLE_XIO_TRUE@@LINUX_TRUE at am__append_200 = -ldl
- at ENABLE_SERVER_TRUE@@ENABLE_XIO_TRUE at am__append_201 = simple_server \
+ at ENABLE_SERVER_TRUE@@ENABLE_XIO_TRUE@@LINUX_TRUE at am__append_201 = -ldl
+ at ENABLE_SERVER_TRUE@@ENABLE_XIO_TRUE at am__append_202 = simple_server \
 @ENABLE_SERVER_TRUE@@ENABLE_XIO_TRUE@	simple_client xio_server \
 @ENABLE_SERVER_TRUE@@ENABLE_XIO_TRUE@	xio_client
- at ENABLE_SERVER_TRUE@@ENABLE_XIO_TRUE@@LINUX_TRUE at am__append_202 = -ldl
 @ENABLE_SERVER_TRUE@@ENABLE_XIO_TRUE@@LINUX_TRUE at am__append_203 = -ldl
- at ENABLE_SERVER_TRUE@@WITH_OSD_TRUE at am__append_204 = test/compressor/compressor_example.h
- at ENABLE_SERVER_TRUE@@WITH_OSD_TRUE at am__append_205 = libceph_example.la
- at ENABLE_SERVER_TRUE@@LINUX_TRUE@@WITH_OSD_TRUE at am__append_206 = -ldl
+ at ENABLE_SERVER_TRUE@@ENABLE_XIO_TRUE@@LINUX_TRUE at am__append_204 = -ldl
+ at ENABLE_SERVER_TRUE@@WITH_OSD_TRUE at am__append_205 = test/compressor/compressor_example.h
+ at ENABLE_SERVER_TRUE@@WITH_OSD_TRUE at am__append_206 = libceph_example.la
 @ENABLE_SERVER_TRUE@@LINUX_TRUE@@WITH_OSD_TRUE at am__append_207 = -ldl
 @ENABLE_SERVER_TRUE@@LINUX_TRUE@@WITH_OSD_TRUE at am__append_208 = -ldl
 @ENABLE_SERVER_TRUE@@LINUX_TRUE@@WITH_OSD_TRUE at am__append_209 = -ldl
 @ENABLE_SERVER_TRUE@@LINUX_TRUE@@WITH_OSD_TRUE at am__append_210 = -ldl
- at COMPILER_HAS_VTA_TRUE@@ENABLE_CLIENT_TRUE at am__append_211 = -fno-var-tracking-assignments
+ at ENABLE_SERVER_TRUE@@LINUX_TRUE@@WITH_OSD_TRUE at am__append_211 = -ldl
 @COMPILER_HAS_VTA_TRUE@@ENABLE_CLIENT_TRUE at am__append_212 = -fno-var-tracking-assignments
- at ENABLE_CLIENT_TRUE@@WITH_RBD_TRUE at am__append_213 = -DWITH_RBD
- at ENABLE_CLIENT_TRUE@@WITH_RADOSGW_TRUE at am__append_214 = -DWITH_RADOSGW
- at ENABLE_CLIENT_TRUE@am__append_215 = ceph-dencoder
- at ENABLE_CLIENT_TRUE@am__append_216 = \
+ at COMPILER_HAS_VTA_TRUE@@ENABLE_CLIENT_TRUE at am__append_213 = -fno-var-tracking-assignments
+ at ENABLE_CLIENT_TRUE@@WITH_RBD_TRUE at am__append_214 = -DWITH_RBD
+ at ENABLE_CLIENT_TRUE@@WITH_RADOSGW_TRUE at am__append_215 = -DWITH_RADOSGW
+ at ENABLE_CLIENT_TRUE@am__append_216 = ceph-dencoder
+ at ENABLE_CLIENT_TRUE@am__append_217 = \
 @ENABLE_CLIENT_TRUE@	test/encoding/test_ceph_time.h
 
- at ENABLE_CLIENT_TRUE@@WITH_RADOS_TRUE at am__append_217 = libradostest.la \
+ at ENABLE_CLIENT_TRUE@@WITH_RADOS_TRUE at am__append_218 = libradostest.la \
 @ENABLE_CLIENT_TRUE@@WITH_RADOS_TRUE@	librados_test_stub.la \
 @ENABLE_CLIENT_TRUE@@WITH_RADOS_TRUE@	libjournal_test_mock.la
- at ENABLE_CLIENT_TRUE@@WITH_RADOS_TRUE at am__append_218 = ceph_test_rados \
+ at ENABLE_CLIENT_TRUE@@WITH_RADOS_TRUE at am__append_219 = ceph_test_rados \
 @ENABLE_CLIENT_TRUE@@WITH_RADOS_TRUE@	ceph_test_mutate
- at ENABLE_CLIENT_TRUE@@WITH_BUILD_TESTS_TRUE@@WITH_RADOS_TRUE at am__append_219 = test_build_librados
- at ENABLE_CLIENT_TRUE@@WITH_RADOS_TRUE at am__append_220 =  \
+ at ENABLE_CLIENT_TRUE@@WITH_BUILD_TESTS_TRUE@@WITH_RADOS_TRUE at am__append_220 = test_build_librados
+ at ENABLE_CLIENT_TRUE@@WITH_RADOS_TRUE at am__append_221 =  \
 @ENABLE_CLIENT_TRUE@@WITH_RADOS_TRUE@	ceph_smalliobench \
 @ENABLE_CLIENT_TRUE@@WITH_RADOS_TRUE@	ceph_omapbench \
 @ENABLE_CLIENT_TRUE@@WITH_RADOS_TRUE@	ceph_objectstore_bench
- at ENABLE_CLIENT_TRUE@@LINUX_TRUE@@WITH_RADOS_TRUE at am__append_221 = ceph_kvstorebench \
+ at ENABLE_CLIENT_TRUE@@LINUX_TRUE@@WITH_RADOS_TRUE at am__append_222 = ceph_kvstorebench \
 @ENABLE_CLIENT_TRUE@@LINUX_TRUE@@WITH_RADOS_TRUE@	ceph_test_rados_list_parallel \
 @ENABLE_CLIENT_TRUE@@LINUX_TRUE@@WITH_RADOS_TRUE@	ceph_test_rados_open_pools_parallel \
 @ENABLE_CLIENT_TRUE@@LINUX_TRUE@@WITH_RADOS_TRUE@	ceph_test_rados_delete_pools_parallel \
 @ENABLE_CLIENT_TRUE@@LINUX_TRUE@@WITH_RADOS_TRUE@	ceph_test_rados_watch_notify
- at ENABLE_CLIENT_TRUE@@WITH_RADOS_TRUE at am__append_222 =  \
+ at ENABLE_CLIENT_TRUE@@WITH_RADOS_TRUE at am__append_223 =  \
 @ENABLE_CLIENT_TRUE@@WITH_RADOS_TRUE@	unittest_librados \
 @ENABLE_CLIENT_TRUE@@WITH_RADOS_TRUE@	unittest_librados_config \
 @ENABLE_CLIENT_TRUE@@WITH_RADOS_TRUE@	unittest_journal
- at ENABLE_CLIENT_TRUE@@WITH_RADOS_TRUE at am__append_223 =  \
+ at ENABLE_CLIENT_TRUE@@WITH_RADOS_TRUE at am__append_224 =  \
 @ENABLE_CLIENT_TRUE@@WITH_RADOS_TRUE@	ceph_multi_stress_watch \
 @ENABLE_CLIENT_TRUE@@WITH_RADOS_TRUE@	ceph_test_cls_rbd \
 @ENABLE_CLIENT_TRUE@@WITH_RADOS_TRUE@	ceph_test_cls_refcount \
@@ -1002,7 +1005,7 @@ check_PROGRAMS = $(am__EXEEXT_63) $(am__EXEEXT_64) \
 @ENABLE_CLIENT_TRUE@@WITH_RADOS_TRUE@	ceph_test_rados_api_lock \
 @ENABLE_CLIENT_TRUE@@WITH_RADOS_TRUE@	ceph_test_rados_api_tmap_migrate \
 @ENABLE_CLIENT_TRUE@@WITH_RADOS_TRUE@	ceph_test_stress_watch
- at ENABLE_CLIENT_TRUE@@WITH_RADOS_TRUE at am__append_224 = test/librados_test_stub/LibradosTestStub.h \
+ at ENABLE_CLIENT_TRUE@@WITH_RADOS_TRUE at am__append_225 = test/librados_test_stub/LibradosTestStub.h \
 @ENABLE_CLIENT_TRUE@@WITH_RADOS_TRUE@	test/librados_test_stub/MockTestMemIoCtxImpl.h \
 @ENABLE_CLIENT_TRUE@@WITH_RADOS_TRUE@	test/librados_test_stub/MockTestMemRadosClient.h \
 @ENABLE_CLIENT_TRUE@@WITH_RADOS_TRUE@	test/librados_test_stub/TestClassHandler.h \
@@ -1012,20 +1015,19 @@ check_PROGRAMS = $(am__EXEEXT_63) $(am__EXEEXT_64) \
 @ENABLE_CLIENT_TRUE@@WITH_RADOS_TRUE@	test/librados_test_stub/TestMemIoCtxImpl.h \
 @ENABLE_CLIENT_TRUE@@WITH_RADOS_TRUE@	test/librados_test_stub/TestIoCtxImpl.h \
 @ENABLE_CLIENT_TRUE@@WITH_RADOS_TRUE@	test/journal/mock/MockJournaler.h
- at ENABLE_CLIENT_TRUE@@WITH_RADOS_TRUE@@WITH_RBD_TRUE at am__append_225 = ceph_smalliobenchrbd \
+ at ENABLE_CLIENT_TRUE@@WITH_RADOS_TRUE@@WITH_RBD_TRUE at am__append_226 = ceph_smalliobenchrbd \
 @ENABLE_CLIENT_TRUE@@WITH_RADOS_TRUE@@WITH_RBD_TRUE@	ceph_test_librbd \
 @ENABLE_CLIENT_TRUE@@WITH_RADOS_TRUE@@WITH_RBD_TRUE@	ceph_test_librbd_api \
 @ENABLE_CLIENT_TRUE@@WITH_RADOS_TRUE@@WITH_RBD_TRUE@	ceph_test_rbd_mirror \
- at ENABLE_CLIENT_TRUE@@WITH_RADOS_TRUE@@WITH_RBD_TRUE@	ceph_test_rbd_mirror_random_write \
- at ENABLE_CLIENT_TRUE@@WITH_RADOS_TRUE@@WITH_RBD_TRUE@	ceph_test_rbd_mirror_image_replay
- at ENABLE_CLIENT_TRUE@@WITH_RADOS_TRUE@@WITH_RBD_TRUE at am__append_226 = unittest_rbd_replay
- at ENABLE_CLIENT_TRUE@@WITH_RADOS_TRUE@@WITH_RBD_TRUE at am__append_227 = librbd_test.la \
+ at ENABLE_CLIENT_TRUE@@WITH_RADOS_TRUE@@WITH_RBD_TRUE@	ceph_test_rbd_mirror_random_write
+ at ENABLE_CLIENT_TRUE@@WITH_RADOS_TRUE@@WITH_RBD_TRUE at am__append_227 = unittest_rbd_replay
+ at ENABLE_CLIENT_TRUE@@WITH_RADOS_TRUE@@WITH_RBD_TRUE at am__append_228 = librbd_test.la \
 @ENABLE_CLIENT_TRUE@@WITH_RADOS_TRUE@@WITH_RBD_TRUE@	librbd_test_mock.la \
 @ENABLE_CLIENT_TRUE@@WITH_RADOS_TRUE@@WITH_RBD_TRUE@	librbd_mirror_test.la
- at ENABLE_CLIENT_TRUE@@WITH_RADOS_TRUE@@WITH_RBD_TRUE at am__append_228 = unittest_librbd \
+ at ENABLE_CLIENT_TRUE@@WITH_RADOS_TRUE@@WITH_RBD_TRUE at am__append_229 = unittest_librbd \
 @ENABLE_CLIENT_TRUE@@WITH_RADOS_TRUE@@WITH_RBD_TRUE@	unittest_rbd_mirror
- at ENABLE_CLIENT_TRUE@@WITH_RADOS_TRUE@@WITH_RBD_TRUE at am__append_229 = test/run-rbd-unit-tests.sh
- at ENABLE_CLIENT_TRUE@@WITH_RADOS_TRUE@@WITH_RBD_TRUE at am__append_230 = test/librbd/test_fixture.h \
+ at ENABLE_CLIENT_TRUE@@WITH_RADOS_TRUE@@WITH_RBD_TRUE at am__append_230 = test/run-rbd-unit-tests.sh
+ at ENABLE_CLIENT_TRUE@@WITH_RADOS_TRUE@@WITH_RBD_TRUE at am__append_231 = test/librbd/test_fixture.h \
 @ENABLE_CLIENT_TRUE@@WITH_RADOS_TRUE@@WITH_RBD_TRUE@	test/librbd/test_mock_fixture.h \
 @ENABLE_CLIENT_TRUE@@WITH_RADOS_TRUE@@WITH_RBD_TRUE@	test/librbd/test_support.h \
 @ENABLE_CLIENT_TRUE@@WITH_RADOS_TRUE@@WITH_RBD_TRUE@	test/librbd/mock/MockAioImageRequestWQ.h \
@@ -1042,23 +1044,23 @@ check_PROGRAMS = $(am__EXEEXT_63) $(am__EXEEXT_64) \
 @ENABLE_CLIENT_TRUE@@WITH_RADOS_TRUE@@WITH_RBD_TRUE@	test/librbd/object_map/mock/MockInvalidateRequest.h \
 @ENABLE_CLIENT_TRUE@@WITH_RADOS_TRUE@@WITH_RBD_TRUE@	test/rbd_mirror/test_fixture.h \
 @ENABLE_CLIENT_TRUE@@WITH_RADOS_TRUE@@WITH_RBD_TRUE@	test/rbd_mirror/test_mock_fixture.h
- at ENABLE_CLIENT_TRUE@@LINUX_TRUE@@WITH_RADOS_TRUE@@WITH_RBD_TRUE at am__append_231 = ceph_test_librbd_fsx
- at ENABLE_CLIENT_TRUE@@WITH_RADOSSTRIPER_TRUE@@WITH_RADOS_TRUE at am__append_232 = libradosstripertest.la
- at ENABLE_CLIENT_TRUE@@WITH_RADOSSTRIPER_TRUE@@WITH_RADOS_TRUE at am__append_233 = ceph_test_rados_striper_api_io \
+ at ENABLE_CLIENT_TRUE@@LINUX_TRUE@@WITH_RADOS_TRUE@@WITH_RBD_TRUE at am__append_232 = ceph_test_librbd_fsx
+ at ENABLE_CLIENT_TRUE@@WITH_RADOSSTRIPER_TRUE@@WITH_RADOS_TRUE at am__append_233 = libradosstripertest.la
+ at ENABLE_CLIENT_TRUE@@WITH_RADOSSTRIPER_TRUE@@WITH_RADOS_TRUE at am__append_234 = ceph_test_rados_striper_api_io \
 @ENABLE_CLIENT_TRUE@@WITH_RADOSSTRIPER_TRUE@@WITH_RADOS_TRUE@	ceph_test_rados_striper_api_aio \
 @ENABLE_CLIENT_TRUE@@WITH_RADOSSTRIPER_TRUE@@WITH_RADOS_TRUE@	ceph_test_rados_striper_api_striping
- at ENABLE_CLIENT_TRUE@@WITH_BUILD_TESTS_TRUE@@WITH_CEPHFS_TRUE@@WITH_RADOS_TRUE at am__append_234 = test_build_libcephfs
- at ENABLE_CLIENT_TRUE@@WITH_CEPHFS_TRUE@@WITH_RADOS_TRUE at am__append_235 = unittest_encoding \
+ at ENABLE_CLIENT_TRUE@@WITH_BUILD_TESTS_TRUE@@WITH_CEPHFS_TRUE@@WITH_RADOS_TRUE at am__append_235 = test_build_libcephfs
+ at ENABLE_CLIENT_TRUE@@WITH_CEPHFS_TRUE@@WITH_RADOS_TRUE at am__append_236 = unittest_encoding \
 @ENABLE_CLIENT_TRUE@@WITH_CEPHFS_TRUE@@WITH_RADOS_TRUE@	unittest_base64 \
 @ENABLE_CLIENT_TRUE@@WITH_CEPHFS_TRUE@@WITH_RADOS_TRUE@	unittest_run_cmd \
 @ENABLE_CLIENT_TRUE@@WITH_CEPHFS_TRUE@@WITH_RADOS_TRUE@	unittest_simple_spin \
 @ENABLE_CLIENT_TRUE@@WITH_CEPHFS_TRUE@@WITH_RADOS_TRUE@	unittest_libcephfs_config
- at ENABLE_CLIENT_TRUE@@LINUX_TRUE@@WITH_CEPHFS_TRUE@@WITH_RADOS_TRUE at am__append_236 = test/libcephfs/flock.cc
- at ENABLE_CLIENT_TRUE@@WITH_CEPHFS_TRUE@@WITH_RADOS_TRUE at am__append_237 = ceph_test_libcephfs \
+ at ENABLE_CLIENT_TRUE@@LINUX_TRUE@@WITH_CEPHFS_TRUE@@WITH_RADOS_TRUE at am__append_237 = test/libcephfs/flock.cc
+ at ENABLE_CLIENT_TRUE@@WITH_CEPHFS_TRUE@@WITH_RADOS_TRUE at am__append_238 = ceph_test_libcephfs \
 @ENABLE_CLIENT_TRUE@@WITH_CEPHFS_TRUE@@WITH_RADOS_TRUE@	ceph_test_c_headers
- at CLANG_FALSE@@ENABLE_CLIENT_TRUE@@WITH_CEPHFS_TRUE@@WITH_RADOS_TRUE at am__append_238 = -Werror -Wold-style-declaration
- at ENABLE_CLIENT_TRUE@@WITH_BUILD_TESTS_TRUE@@WITH_RADOSGW_TRUE@@WITH_RADOS_TRUE at am__append_239 = test_build_librgw
- at ENABLE_CLIENT_TRUE@@WITH_RADOSGW_TRUE@@WITH_RADOS_TRUE at am__append_240 = ceph_test_cors \
+ at CLANG_FALSE@@ENABLE_CLIENT_TRUE@@WITH_CEPHFS_TRUE@@WITH_RADOS_TRUE at am__append_239 = -Werror -Wold-style-declaration
+ at ENABLE_CLIENT_TRUE@@WITH_BUILD_TESTS_TRUE@@WITH_RADOSGW_TRUE@@WITH_RADOS_TRUE at am__append_240 = test_build_librgw
+ at ENABLE_CLIENT_TRUE@@WITH_RADOSGW_TRUE@@WITH_RADOS_TRUE at am__append_241 = ceph_test_cors \
 @ENABLE_CLIENT_TRUE@@WITH_RADOSGW_TRUE@@WITH_RADOS_TRUE@	ceph_test_rgw_manifest \
 @ENABLE_CLIENT_TRUE@@WITH_RADOSGW_TRUE@@WITH_RADOS_TRUE@	ceph_test_rgw_period_history \
 @ENABLE_CLIENT_TRUE@@WITH_RADOSGW_TRUE@@WITH_RADOS_TRUE@	ceph_test_rgw_obj \
@@ -1071,20 +1073,20 @@ check_PROGRAMS = $(am__EXEEXT_63) $(am__EXEEXT_64) \
 @ENABLE_CLIENT_TRUE@@WITH_RADOSGW_TRUE@@WITH_RADOS_TRUE@	librgw_file_gp \
 @ENABLE_CLIENT_TRUE@@WITH_RADOSGW_TRUE@@WITH_RADOS_TRUE@	librgw_file_aw \
 @ENABLE_CLIENT_TRUE@@WITH_RADOSGW_TRUE@@WITH_RADOS_TRUE@	librgw_file_nfsns
- at ENABLE_SERVER_TRUE@am__append_241 = ceph_test_async_driver \
+ at ENABLE_SERVER_TRUE@am__append_242 = ceph_test_async_driver \
 @ENABLE_SERVER_TRUE@	ceph_test_msgr ceph_test_trans \
 @ENABLE_SERVER_TRUE@	ceph_test_mon_workloadgen \
 @ENABLE_SERVER_TRUE@	ceph_test_mon_msg ceph_perf_objectstore \
 @ENABLE_SERVER_TRUE@	ceph_perf_local ceph_perf_msgr_server \
 @ENABLE_SERVER_TRUE@	ceph_perf_msgr_client
- at ENABLE_SERVER_TRUE@am__append_242 = test/perf_helper.h
- at ENABLE_SERVER_TRUE@@LINUX_TRUE at am__append_243 =  \
+ at ENABLE_SERVER_TRUE@am__append_243 = test/perf_helper.h
+ at ENABLE_SERVER_TRUE@@LINUX_TRUE at am__append_244 =  \
 @ENABLE_SERVER_TRUE@@LINUX_TRUE@	ceph_test_objectstore \
 @ENABLE_SERVER_TRUE@@LINUX_TRUE@	ceph_test_keyvaluedb \
 @ENABLE_SERVER_TRUE@@LINUX_TRUE@	ceph_test_filestore
- at ENABLE_SERVER_TRUE@@LINUX_TRUE at am__append_244 = unittest_bluefs \
+ at ENABLE_SERVER_TRUE@@LINUX_TRUE at am__append_245 = unittest_bluefs \
 @ENABLE_SERVER_TRUE@@LINUX_TRUE@	unittest_bluestore_types
- at ENABLE_SERVER_TRUE@am__append_245 =  \
+ at ENABLE_SERVER_TRUE@am__append_246 =  \
 @ENABLE_SERVER_TRUE@	ceph_test_objectstore_workloadgen \
 @ENABLE_SERVER_TRUE@	ceph_test_filestore_idempotent \
 @ENABLE_SERVER_TRUE@	ceph_test_filestore_idempotent_sequence \
@@ -1092,47 +1094,47 @@ check_PROGRAMS = $(am__EXEEXT_63) $(am__EXEEXT_64) \
 @ENABLE_SERVER_TRUE@	ceph_test_object_map \
 @ENABLE_SERVER_TRUE@	ceph_test_keyvaluedb_atomicity \
 @ENABLE_SERVER_TRUE@	ceph_test_keyvaluedb_iterators
- at ENABLE_SERVER_TRUE@am__append_246 = unittest_transaction
- at ENABLE_CLIENT_TRUE@@ENABLE_SERVER_TRUE@@WITH_RADOS_TRUE at am__append_247 = ceph_smalliobenchfs \
+ at ENABLE_SERVER_TRUE@am__append_247 = unittest_transaction
+ at ENABLE_CLIENT_TRUE@@ENABLE_SERVER_TRUE@@WITH_RADOS_TRUE at am__append_248 = ceph_smalliobenchfs \
 @ENABLE_CLIENT_TRUE@@ENABLE_SERVER_TRUE@@WITH_RADOS_TRUE@	ceph_smalliobenchdumb \
 @ENABLE_CLIENT_TRUE@@ENABLE_SERVER_TRUE@@WITH_RADOS_TRUE@	ceph_tpbench
- at ENABLE_SERVER_TRUE@@WITH_MON_TRUE at am__append_248 = ceph_test_keys
- at ENABLE_SERVER_TRUE@@WITH_MON_TRUE at am__append_249 = get_command_descriptions
- at ENABLE_SERVER_TRUE@@WITH_MON_TRUE at am__append_250 =  \
+ at ENABLE_SERVER_TRUE@@WITH_MON_TRUE at am__append_249 = ceph_test_keys
+ at ENABLE_SERVER_TRUE@@WITH_MON_TRUE at am__append_250 = get_command_descriptions
+ at ENABLE_SERVER_TRUE@@WITH_MON_TRUE at am__append_251 =  \
 @ENABLE_SERVER_TRUE@@WITH_MON_TRUE@	unittest_mon_moncap \
 @ENABLE_SERVER_TRUE@@WITH_MON_TRUE@	unittest_mon_pgmap
- at ENABLE_SERVER_TRUE@@WITH_OSD_TRUE at am__append_251 =  \
+ at ENABLE_SERVER_TRUE@@WITH_OSD_TRUE at am__append_252 =  \
 @ENABLE_SERVER_TRUE@@WITH_OSD_TRUE@	unittest_ecbackend \
 @ENABLE_SERVER_TRUE@@WITH_OSD_TRUE@	unittest_osdscrub \
 @ENABLE_SERVER_TRUE@@WITH_OSD_TRUE@	unittest_pglog \
 @ENABLE_SERVER_TRUE@@WITH_OSD_TRUE@	unittest_hitset \
 @ENABLE_SERVER_TRUE@@WITH_OSD_TRUE@	unittest_osd_osdcap \
 @ENABLE_SERVER_TRUE@@WITH_OSD_TRUE@	unittest_pageset
- at ENABLE_SERVER_TRUE@@LINUX_TRUE@@WITH_OSD_TRUE at am__append_252 = -ldl
 @ENABLE_SERVER_TRUE@@LINUX_TRUE@@WITH_OSD_TRUE at am__append_253 = -ldl
- at ENABLE_SERVER_TRUE@@WITH_OSD_TRUE at am__append_254 = ceph_test_snap_mapper
- at ENABLE_SERVER_TRUE@@WITH_SLIBROCKSDB_TRUE at am__append_255 = unittest_rocksdb_option_static
- at ENABLE_SERVER_TRUE@@WITH_DLIBROCKSDB_TRUE at am__append_256 = unittest_rocksdb_option
- at ENABLE_SERVER_TRUE@am__append_257 = unittest_chain_xattr \
+ at ENABLE_SERVER_TRUE@@LINUX_TRUE@@WITH_OSD_TRUE at am__append_254 = -ldl
+ at ENABLE_SERVER_TRUE@@WITH_OSD_TRUE at am__append_255 = ceph_test_snap_mapper
+ at ENABLE_SERVER_TRUE@@WITH_SLIBROCKSDB_TRUE at am__append_256 = unittest_rocksdb_option_static
+ at ENABLE_SERVER_TRUE@@WITH_DLIBROCKSDB_TRUE at am__append_257 = unittest_rocksdb_option
+ at ENABLE_SERVER_TRUE@am__append_258 = unittest_chain_xattr \
 @ENABLE_SERVER_TRUE@	unittest_lfnindex
- at ENABLE_SERVER_TRUE@@WITH_MDS_TRUE at am__append_258 = unittest_mds_authcap
- at WITH_BUILD_TESTS_TRUE@am__append_259 = test_build_libcommon
- at LINUX_TRUE@am__append_260 = libsystest.la
- at SOLARIS_TRUE@am__append_261 = \
+ at ENABLE_SERVER_TRUE@@WITH_MDS_TRUE at am__append_259 = unittest_mds_authcap
+ at WITH_BUILD_TESTS_TRUE@am__append_260 = test_build_libcommon
+ at LINUX_TRUE@am__append_261 = libsystest.la
+ at SOLARIS_TRUE@am__append_262 = \
 @SOLARIS_TRUE@	-lsocket -lnsl
 
- at LINUX_TRUE@am__append_262 = unittest_blkdev
- at LINUX_TRUE@am__append_263 = ceph_test_get_blkdev_size
- at ENABLE_CLIENT_TRUE@@WITH_RADOS_TRUE at am__append_264 =  \
+ at LINUX_TRUE@am__append_263 = unittest_blkdev
+ at LINUX_TRUE@am__append_264 = ceph_test_get_blkdev_size
+ at ENABLE_CLIENT_TRUE@@WITH_RADOS_TRUE at am__append_265 =  \
 @ENABLE_CLIENT_TRUE@@WITH_RADOS_TRUE@	ceph_scratchtool \
 @ENABLE_CLIENT_TRUE@@WITH_RADOS_TRUE@	ceph_scratchtoolpp \
 @ENABLE_CLIENT_TRUE@@WITH_RADOS_TRUE@	ceph_radosacl
- at ENABLE_CLIENT_TRUE@@WITH_RADOS_TRUE at am__append_265 = rados
- at ENABLE_CLIENT_TRUE@@LINUX_TRUE@@WITH_RADOS_TRUE@@WITH_RBD_TRUE at am__append_266 = \
+ at ENABLE_CLIENT_TRUE@@WITH_RADOS_TRUE at am__append_266 = rados
+ at ENABLE_CLIENT_TRUE@@LINUX_TRUE@@WITH_RADOS_TRUE@@WITH_RBD_TRUE at am__append_267 = \
 @ENABLE_CLIENT_TRUE@@LINUX_TRUE@@WITH_RADOS_TRUE@@WITH_RBD_TRUE@	tools/rbd/action/Kernel.cc \
 @ENABLE_CLIENT_TRUE@@LINUX_TRUE@@WITH_RADOS_TRUE@@WITH_RBD_TRUE@	tools/rbd/action/Nbd.cc
 
- at ENABLE_CLIENT_TRUE@@WITH_RADOS_TRUE@@WITH_RBD_TRUE at am__append_267 = tools/rbd/ArgumentTypes.h \
+ at ENABLE_CLIENT_TRUE@@WITH_RADOS_TRUE@@WITH_RBD_TRUE at am__append_268 = tools/rbd/ArgumentTypes.h \
 @ENABLE_CLIENT_TRUE@@WITH_RADOS_TRUE@@WITH_RBD_TRUE@	tools/rbd/IndentStream.h \
 @ENABLE_CLIENT_TRUE@@WITH_RADOS_TRUE@@WITH_RBD_TRUE@	tools/rbd/OptionPrinter.h \
 @ENABLE_CLIENT_TRUE@@WITH_RADOS_TRUE@@WITH_RBD_TRUE@	tools/rbd/Shell.h \
@@ -1141,6 +1143,7 @@ check_PROGRAMS = $(am__EXEEXT_63) $(am__EXEEXT_64) \
 @ENABLE_CLIENT_TRUE@@WITH_RADOS_TRUE@@WITH_RBD_TRUE@	tools/rbd_mirror/ClusterWatcher.h \
 @ENABLE_CLIENT_TRUE@@WITH_RADOS_TRUE@@WITH_RBD_TRUE@	tools/rbd_mirror/ImageReplayer.h \
 @ENABLE_CLIENT_TRUE@@WITH_RADOS_TRUE@@WITH_RBD_TRUE@	tools/rbd_mirror/ImageSync.h \
+ at ENABLE_CLIENT_TRUE@@WITH_RADOS_TRUE@@WITH_RBD_TRUE@	tools/rbd_mirror/ImageSyncThrottler.h \
 @ENABLE_CLIENT_TRUE@@WITH_RADOS_TRUE@@WITH_RBD_TRUE@	tools/rbd_mirror/Mirror.h \
 @ENABLE_CLIENT_TRUE@@WITH_RADOS_TRUE@@WITH_RBD_TRUE@	tools/rbd_mirror/PoolWatcher.h \
 @ENABLE_CLIENT_TRUE@@WITH_RADOS_TRUE@@WITH_RBD_TRUE@	tools/rbd_mirror/ProgressContext.h \
@@ -1151,6 +1154,7 @@ check_PROGRAMS = $(am__EXEEXT_63) $(am__EXEEXT_64) \
 @ENABLE_CLIENT_TRUE@@WITH_RADOS_TRUE@@WITH_RBD_TRUE@	tools/rbd_mirror/image_replayer/BootstrapRequest.h \
 @ENABLE_CLIENT_TRUE@@WITH_RADOS_TRUE@@WITH_RBD_TRUE@	tools/rbd_mirror/image_replayer/CloseImageRequest.h \
 @ENABLE_CLIENT_TRUE@@WITH_RADOS_TRUE@@WITH_RBD_TRUE@	tools/rbd_mirror/image_replayer/CreateImageRequest.h \
+ at ENABLE_CLIENT_TRUE@@WITH_RADOS_TRUE@@WITH_RBD_TRUE@	tools/rbd_mirror/image_replayer/EventPreprocessor.h \
 @ENABLE_CLIENT_TRUE@@WITH_RADOS_TRUE@@WITH_RBD_TRUE@	tools/rbd_mirror/image_replayer/OpenImageRequest.h \
 @ENABLE_CLIENT_TRUE@@WITH_RADOS_TRUE@@WITH_RBD_TRUE@	tools/rbd_mirror/image_replayer/OpenLocalImageRequest.h \
 @ENABLE_CLIENT_TRUE@@WITH_RADOS_TRUE@@WITH_RBD_TRUE@	tools/rbd_mirror/image_replayer/ReplayStatusFormatter.h \
@@ -1161,26 +1165,26 @@ check_PROGRAMS = $(am__EXEEXT_63) $(am__EXEEXT_64) \
 @ENABLE_CLIENT_TRUE@@WITH_RADOS_TRUE@@WITH_RBD_TRUE@	tools/rbd_mirror/image_sync/SnapshotCreateRequest.h \
 @ENABLE_CLIENT_TRUE@@WITH_RADOS_TRUE@@WITH_RBD_TRUE@	tools/rbd_mirror/image_sync/SyncPointCreateRequest.h \
 @ENABLE_CLIENT_TRUE@@WITH_RADOS_TRUE@@WITH_RBD_TRUE@	tools/rbd_mirror/image_sync/SyncPointPruneRequest.h
- at ENABLE_CLIENT_TRUE@@LINUX_TRUE@@WITH_RADOS_TRUE@@WITH_RBD_TRUE at am__append_268 = $(LIBKRBD)
- at ENABLE_CLIENT_TRUE@@WITH_RADOS_TRUE@@WITH_RBD_TRUE at am__append_269 = rbd
- at ENABLE_CLIENT_TRUE@@LINUX_TRUE@@WITH_RADOS_TRUE@@WITH_RBD_TRUE at am__append_270 = rbd-nbd
- at ENABLE_CLIENT_TRUE@@WITH_RADOS_TRUE@@WITH_RBD_TRUE at am__append_271 = librbd_mirror_internal.la
- at ENABLE_CLIENT_TRUE@@WITH_RADOS_TRUE@@WITH_RBD_TRUE at am__append_272 = rbd-mirror
- at ENABLE_CLIENT_TRUE@@WITH_CEPHFS_TRUE@@WITH_RADOS_TRUE at am__append_273 = ceph-client-debug
- at ENABLE_SERVER_TRUE@am__append_274 = ceph-osdomap-tool \
+ at ENABLE_CLIENT_TRUE@@LINUX_TRUE@@WITH_RADOS_TRUE@@WITH_RBD_TRUE at am__append_269 = $(LIBKRBD)
+ at ENABLE_CLIENT_TRUE@@WITH_RADOS_TRUE@@WITH_RBD_TRUE at am__append_270 = rbd
+ at ENABLE_CLIENT_TRUE@@LINUX_TRUE@@WITH_RADOS_TRUE@@WITH_RBD_TRUE at am__append_271 = rbd-nbd
+ at ENABLE_CLIENT_TRUE@@WITH_RADOS_TRUE@@WITH_RBD_TRUE at am__append_272 = librbd_mirror_internal.la
+ at ENABLE_CLIENT_TRUE@@WITH_RADOS_TRUE@@WITH_RBD_TRUE at am__append_273 = rbd-mirror
+ at ENABLE_CLIENT_TRUE@@WITH_CEPHFS_TRUE@@WITH_RADOS_TRUE at am__append_274 = ceph-client-debug
+ at ENABLE_SERVER_TRUE@am__append_275 = ceph-osdomap-tool \
 @ENABLE_SERVER_TRUE@	ceph-monstore-tool ceph-kvstore-tool
- at ENABLE_SERVER_TRUE@@LINUX_TRUE@@WITH_OSD_TRUE at am__append_275 = -ldl
- at ENABLE_SERVER_TRUE@@WITH_OSD_TRUE at am__append_276 = ceph-objectstore-tool
- at ENABLE_CLIENT_TRUE@@ENABLE_SERVER_TRUE@@WITH_MDS_TRUE@@WITH_RADOS_TRUE at am__append_277 = cephfs-journal-tool \
+ at ENABLE_SERVER_TRUE@@LINUX_TRUE@@WITH_OSD_TRUE at am__append_276 = -ldl
+ at ENABLE_SERVER_TRUE@@WITH_OSD_TRUE at am__append_277 = ceph-objectstore-tool
+ at ENABLE_CLIENT_TRUE@@ENABLE_SERVER_TRUE@@WITH_MDS_TRUE@@WITH_RADOS_TRUE at am__append_278 = cephfs-journal-tool \
 @ENABLE_CLIENT_TRUE@@ENABLE_SERVER_TRUE@@WITH_MDS_TRUE@@WITH_RADOS_TRUE@	cephfs-table-tool \
 @ENABLE_CLIENT_TRUE@@ENABLE_SERVER_TRUE@@WITH_MDS_TRUE@@WITH_RADOS_TRUE@	cephfs-data-scan
- at WITH_LTTNG_TRUE@am__append_278 = \
+ at WITH_LTTNG_TRUE@am__append_279 = \
 @WITH_LTTNG_TRUE@	libosd_tp.la \
 @WITH_LTTNG_TRUE@	libos_tp.la \
 @WITH_LTTNG_TRUE@	librados_tp.la \
 @WITH_LTTNG_TRUE@	librbd_tp.la
 
- at WITH_LTTNG_TRUE@am__append_279 = \
+ at WITH_LTTNG_TRUE@am__append_280 = \
 @WITH_LTTNG_TRUE@	tracing/librados.h \
 @WITH_LTTNG_TRUE@	tracing/librbd.h \
 @WITH_LTTNG_TRUE@	tracing/objectstore.h \
@@ -1188,63 +1192,63 @@ check_PROGRAMS = $(am__EXEEXT_63) $(am__EXEEXT_64) \
 @WITH_LTTNG_TRUE@	tracing/osd.h \
 @WITH_LTTNG_TRUE@	tracing/pg.h
 
- at ENABLE_CLIENT_TRUE@@WITH_CYTHON_TRUE@@WITH_RADOS_TRUE at am__append_280 = $(srcdir)/pybind/rados/setup.py $(srcdir)/pybind/rados/rados.pyx $(srcdir)/pybind/rados/rados.pxd
- at ENABLE_CLIENT_TRUE@@WITH_CYTHON_TRUE@@WITH_RADOS_TRUE at am__append_281 = rados-pybind-all
- at ENABLE_CLIENT_TRUE@@WITH_CYTHON_TRUE@@WITH_RADOS_TRUE at am__append_282 = rados-pybind-clean
- at ENABLE_CLIENT_TRUE@@WITH_CYTHON_TRUE@@WITH_RADOS_TRUE at am__append_283 = rados-pybind-install-exec
- at ENABLE_CLIENT_TRUE@@WITH_CYTHON_TRUE@@WITH_RADOS_TRUE@@WITH_RBD_TRUE at am__append_284 = $(srcdir)/pybind/rbd/setup.py $(srcdir)/pybind/rbd/rbd.pyx
- at ENABLE_CLIENT_TRUE@@WITH_CYTHON_TRUE@@WITH_RADOS_TRUE@@WITH_RBD_TRUE at am__append_285 = rbd-pybind-all
- at ENABLE_CLIENT_TRUE@@WITH_CYTHON_TRUE@@WITH_RADOS_TRUE@@WITH_RBD_TRUE at am__append_286 = rbd-pybind-clean
- at ENABLE_CLIENT_TRUE@@WITH_CYTHON_TRUE@@WITH_RADOS_TRUE@@WITH_RBD_TRUE at am__append_287 = rbd-pybind-install-exec
- at ENABLE_CLIENT_TRUE@@WITH_CEPHFS_TRUE@@WITH_CYTHON_TRUE@@WITH_RADOS_TRUE at am__append_288 = $(srcdir)/pybind/cephfs/setup.py $(srcdir)/pybind/cephfs/cephfs.pyx
- at ENABLE_CLIENT_TRUE@@WITH_CEPHFS_TRUE@@WITH_CYTHON_TRUE@@WITH_RADOS_TRUE at am__append_289 = cephfs-pybind-all
- at ENABLE_CLIENT_TRUE@@WITH_CEPHFS_TRUE@@WITH_CYTHON_TRUE@@WITH_RADOS_TRUE at am__append_290 = cephfs-pybind-clean
- at ENABLE_CLIENT_TRUE@@WITH_CEPHFS_TRUE@@WITH_CYTHON_TRUE@@WITH_RADOS_TRUE at am__append_291 = cephfs-pybind-install-exec
+ at ENABLE_CLIENT_TRUE@@WITH_CYTHON_TRUE@@WITH_RADOS_TRUE at am__append_281 = $(srcdir)/pybind/rados/setup.py $(srcdir)/pybind/rados/rados.pyx $(srcdir)/pybind/rados/rados.pxd
+ at ENABLE_CLIENT_TRUE@@WITH_CYTHON_TRUE@@WITH_RADOS_TRUE at am__append_282 = rados-pybind-all
+ at ENABLE_CLIENT_TRUE@@WITH_CYTHON_TRUE@@WITH_RADOS_TRUE at am__append_283 = rados-pybind-clean
+ at ENABLE_CLIENT_TRUE@@WITH_CYTHON_TRUE@@WITH_RADOS_TRUE at am__append_284 = rados-pybind-install-exec
+ at ENABLE_CLIENT_TRUE@@WITH_CYTHON_TRUE@@WITH_RADOS_TRUE@@WITH_RBD_TRUE at am__append_285 = $(srcdir)/pybind/rbd/setup.py $(srcdir)/pybind/rbd/rbd.pyx
+ at ENABLE_CLIENT_TRUE@@WITH_CYTHON_TRUE@@WITH_RADOS_TRUE@@WITH_RBD_TRUE at am__append_286 = rbd-pybind-all
+ at ENABLE_CLIENT_TRUE@@WITH_CYTHON_TRUE@@WITH_RADOS_TRUE@@WITH_RBD_TRUE at am__append_287 = rbd-pybind-clean
+ at ENABLE_CLIENT_TRUE@@WITH_CYTHON_TRUE@@WITH_RADOS_TRUE@@WITH_RBD_TRUE at am__append_288 = rbd-pybind-install-exec
+ at ENABLE_CLIENT_TRUE@@WITH_CEPHFS_TRUE@@WITH_CYTHON_TRUE@@WITH_RADOS_TRUE at am__append_289 = $(srcdir)/pybind/cephfs/setup.py $(srcdir)/pybind/cephfs/cephfs.pyx
+ at ENABLE_CLIENT_TRUE@@WITH_CEPHFS_TRUE@@WITH_CYTHON_TRUE@@WITH_RADOS_TRUE at am__append_290 = cephfs-pybind-all
+ at ENABLE_CLIENT_TRUE@@WITH_CEPHFS_TRUE@@WITH_CYTHON_TRUE@@WITH_RADOS_TRUE at am__append_291 = cephfs-pybind-clean
+ at ENABLE_CLIENT_TRUE@@WITH_CEPHFS_TRUE@@WITH_CYTHON_TRUE@@WITH_RADOS_TRUE at am__append_292 = cephfs-pybind-install-exec
 TESTS = $(am__EXEEXT_63) $(check_SCRIPTS)
- at ENABLE_CLIENT_TRUE@am__append_292 = \
+ at ENABLE_CLIENT_TRUE@am__append_293 = \
 @ENABLE_CLIENT_TRUE@	pybind/ceph_argparse.py \
 @ENABLE_CLIENT_TRUE@	pybind/ceph_daemon.py
 
- at ENABLE_CLIENT_TRUE@am__append_293 = ceph-syn
- at ENABLE_CLIENT_TRUE@@WITH_RADOS_TRUE at am__append_294 = \
+ at ENABLE_CLIENT_TRUE@am__append_294 = ceph-syn
+ at ENABLE_CLIENT_TRUE@@WITH_RADOS_TRUE at am__append_295 = \
 @ENABLE_CLIENT_TRUE@@WITH_RADOS_TRUE@	$(srcdir)/bash_completion/rados \
 @ENABLE_CLIENT_TRUE@@WITH_RADOS_TRUE@	$(srcdir)/bash_completion/radosgw-admin
 
- at ENABLE_CLIENT_TRUE@@WITH_RADOS_TRUE at am__append_295 = librados-config
- at ENABLE_CLIENT_TRUE@@WITH_RADOS_TRUE@@WITH_RBD_TRUE at am__append_296 = \
+ at ENABLE_CLIENT_TRUE@@WITH_RADOS_TRUE at am__append_296 = librados-config
+ at ENABLE_CLIENT_TRUE@@WITH_RADOS_TRUE@@WITH_RBD_TRUE at am__append_297 = \
 @ENABLE_CLIENT_TRUE@@WITH_RADOS_TRUE@@WITH_RBD_TRUE@	$(srcdir)/bash_completion/rbd
 
- at ENABLE_CLIENT_TRUE@@WITH_RADOS_TRUE@@WITH_RBD_TRUE at am__append_297 = \
+ at ENABLE_CLIENT_TRUE@@WITH_RADOS_TRUE@@WITH_RBD_TRUE at am__append_298 = \
 @ENABLE_CLIENT_TRUE@@WITH_RADOS_TRUE@@WITH_RBD_TRUE@	ceph-rbdnamer \
 @ENABLE_CLIENT_TRUE@@WITH_RADOS_TRUE@@WITH_RBD_TRUE@	rbd-replay-many \
 @ENABLE_CLIENT_TRUE@@WITH_RADOS_TRUE@@WITH_RBD_TRUE@        rbdmap
 
- at ENABLE_CLIENT_TRUE@@LINUX_TRUE@@WITH_RADOS_TRUE@@WITH_RBD_TRUE at am__append_298 = libkrbd.la
- at ENABLE_CLIENT_TRUE@@WITH_FUSE_TRUE@@WITH_RADOS_TRUE at am__append_299 = ceph-fuse
- at ENABLE_CLIENT_TRUE@@WITH_FUSE_TRUE@@WITH_RADOS_TRUE@@WITH_RBD_TRUE at am__append_300 = rbd-fuse
- at ENABLE_CLIENT_TRUE@@WITH_CEPHFS_TRUE@@WITH_RADOS_TRUE at am__append_301 = cephfs
- at ENABLE_CLIENT_TRUE@@WITH_CEPHFS_TRUE@@WITH_RADOS_TRUE at am__append_302 = pybind/ceph_volume_client.py
- at ENABLE_CLIENT_TRUE@@LINUX_TRUE@@WITH_CEPHFS_TRUE@@WITH_RADOS_TRUE at am__append_303 = -Xcompiler -Xlinker -Xcompiler '--exclude-libs=libcommon.a'
- at ENABLE_CLIENT_TRUE@@WITH_CEPHFS_TRUE@@WITH_RADOS_TRUE at am__append_304 = libcephfs.la
- at ENABLE_CEPHFS_JAVA_TRUE@@ENABLE_CLIENT_TRUE@@WITH_CEPHFS_TRUE@@WITH_RADOS_TRUE at am__append_305 = libcephfs_jni.la
- at ENABLE_SERVER_TRUE@am__append_306 = ceph-run ceph-rest-api \
+ at ENABLE_CLIENT_TRUE@@LINUX_TRUE@@WITH_RADOS_TRUE@@WITH_RBD_TRUE at am__append_299 = libkrbd.la
+ at ENABLE_CLIENT_TRUE@@WITH_FUSE_TRUE@@WITH_RADOS_TRUE at am__append_300 = ceph-fuse
+ at ENABLE_CLIENT_TRUE@@WITH_FUSE_TRUE@@WITH_RADOS_TRUE at am__append_301 = mount.fuse.ceph
+ at ENABLE_CLIENT_TRUE@@WITH_FUSE_TRUE@@WITH_RADOS_TRUE@@WITH_RBD_TRUE at am__append_302 = rbd-fuse
+ at ENABLE_CLIENT_TRUE@@WITH_CEPHFS_TRUE@@WITH_RADOS_TRUE at am__append_303 = cephfs
+ at ENABLE_CLIENT_TRUE@@LINUX_TRUE@@WITH_CEPHFS_TRUE@@WITH_RADOS_TRUE at am__append_304 = mount.ceph
+ at ENABLE_CLIENT_TRUE@@WITH_CEPHFS_TRUE@@WITH_RADOS_TRUE at am__append_305 = pybind/ceph_volume_client.py
+ at ENABLE_CLIENT_TRUE@@LINUX_TRUE@@WITH_CEPHFS_TRUE@@WITH_RADOS_TRUE at am__append_306 = -Xcompiler -Xlinker -Xcompiler '--exclude-libs=libcommon.a'
+ at ENABLE_CLIENT_TRUE@@WITH_CEPHFS_TRUE@@WITH_RADOS_TRUE at am__append_307 = libcephfs.la
+ at ENABLE_CEPHFS_JAVA_TRUE@@ENABLE_CLIENT_TRUE@@WITH_CEPHFS_TRUE@@WITH_RADOS_TRUE at am__append_308 = libcephfs_jni.la
+ at ENABLE_SERVER_TRUE@am__append_309 = ceph-run ceph-rest-api \
 @ENABLE_SERVER_TRUE@	ceph-debugpack ceph-crush-location \
 @ENABLE_SERVER_TRUE@	ceph-coverage
- at ENABLE_SERVER_TRUE@am__append_307 = pybind/ceph_rest_api.py
- at ENABLE_SERVER_TRUE@am__append_308 = ceph-coverage init-ceph
- at ENABLE_SERVER_TRUE@am__append_309 = init-ceph
- at ENABLE_SERVER_TRUE@@LINUX_TRUE at am__append_310 = mount.ceph
- at ENABLE_SERVER_TRUE@am__append_311 = mount.fuse.ceph
- at ENABLE_SERVER_TRUE@@WITH_MON_TRUE at am__append_312 = ceph-mon
- at ENABLE_SERVER_TRUE@@WITH_OSD_TRUE at am__append_313 = \
+ at ENABLE_SERVER_TRUE@am__append_310 = pybind/ceph_rest_api.py
+ at ENABLE_SERVER_TRUE@am__append_311 = ceph-coverage init-ceph
+ at ENABLE_SERVER_TRUE@am__append_312 = init-ceph
+ at ENABLE_SERVER_TRUE@@WITH_MON_TRUE at am__append_313 = ceph-mon
+ at ENABLE_SERVER_TRUE@@WITH_OSD_TRUE at am__append_314 = \
 @ENABLE_SERVER_TRUE@@WITH_OSD_TRUE@	ceph-disk-udev
 
- at ENABLE_SERVER_TRUE@@WITH_OSD_TRUE at am__append_314 = \
+ at ENABLE_SERVER_TRUE@@WITH_OSD_TRUE at am__append_315 = \
 @ENABLE_SERVER_TRUE@@WITH_OSD_TRUE@	ceph-clsinfo
 
- at ENABLE_SERVER_TRUE@@WITH_LTTNG_TRUE@@WITH_OSD_TRUE at am__append_315 = $(LIBOSD_TP)
- at ENABLE_SERVER_TRUE@@WITH_OSD_TRUE at am__append_316 = ceph-osd
- at ENABLE_SERVER_TRUE@@WITH_MDS_TRUE at am__append_317 = ceph-mds
+ at ENABLE_SERVER_TRUE@@WITH_LTTNG_TRUE@@WITH_OSD_TRUE at am__append_316 = $(LIBOSD_TP)
+ at ENABLE_SERVER_TRUE@@WITH_OSD_TRUE at am__append_317 = ceph-osd
+ at ENABLE_SERVER_TRUE@@WITH_MDS_TRUE at am__append_318 = ceph-mds
 subdir = src
 ACLOCAL_M4 = $(top_srcdir)/aclocal.m4
 am__aclocal_m4_deps = $(top_srcdir)/m4/ac_check_classpath.m4 \
@@ -1278,7 +1282,7 @@ libkv_a_AR = $(AR) $(ARFLAGS)
 am__DEPENDENCIES_1 =
 @ENABLE_SERVER_TRUE@@WITH_KINETIC_TRUE at am__DEPENDENCIES_2 =  \
 @ENABLE_SERVER_TRUE@@WITH_KINETIC_TRUE@	libcrypto.a
- at ENABLE_SERVER_TRUE@libkv_a_DEPENDENCIES = $(am__append_36) \
+ at ENABLE_SERVER_TRUE@libkv_a_DEPENDENCIES = $(am__append_37) \
 @ENABLE_SERVER_TRUE@	$(am__DEPENDENCIES_1) \
 @ENABLE_SERVER_TRUE@	$(am__DEPENDENCIES_2)
 am__libkv_a_SOURCES_DIST = kv/KeyValueDB.cc kv/LevelDBStore.cc \
@@ -1897,7 +1901,7 @@ libcls_version_client_la_OBJECTS =  \
 	$(am_libcls_version_client_la_OBJECTS)
 @ENABLE_CLIENT_TRUE at am_libcls_version_client_la_rpath =
 am__DEPENDENCIES_5 = libcommon_internal.la libcommon_crc.la \
-	$(am__append_112) $(LIBERASURE_CODE) $(LIBCOMPRESSOR) \
+	$(am__append_113) $(LIBERASURE_CODE) $(LIBCOMPRESSOR) \
 	$(LIBMSG) $(LIBAUTH) $(LIBCRUSH) $(LIBJSON_SPIRIT) $(LIBLOG) \
 	$(LIBARCH) $(am__DEPENDENCIES_1) $(am__DEPENDENCIES_1)
 libcommon_la_DEPENDENCIES = $(am__DEPENDENCIES_5)
@@ -2994,13 +2998,15 @@ librbd_mirror_internal_la_LIBADD =
 am__librbd_mirror_internal_la_SOURCES_DIST =  \
 	tools/rbd_mirror/ClusterWatcher.cc \
 	tools/rbd_mirror/ImageReplayer.cc \
-	tools/rbd_mirror/ImageSync.cc tools/rbd_mirror/Mirror.cc \
-	tools/rbd_mirror/PoolWatcher.cc tools/rbd_mirror/Replayer.cc \
-	tools/rbd_mirror/ImageDeleter.cc tools/rbd_mirror/Threads.cc \
-	tools/rbd_mirror/types.cc \
+	tools/rbd_mirror/ImageSync.cc \
+	tools/rbd_mirror/ImageSyncThrottler.cc \
+	tools/rbd_mirror/Mirror.cc tools/rbd_mirror/PoolWatcher.cc \
+	tools/rbd_mirror/Replayer.cc tools/rbd_mirror/ImageDeleter.cc \
+	tools/rbd_mirror/Threads.cc tools/rbd_mirror/types.cc \
 	tools/rbd_mirror/image_replayer/BootstrapRequest.cc \
 	tools/rbd_mirror/image_replayer/CloseImageRequest.cc \
 	tools/rbd_mirror/image_replayer/CreateImageRequest.cc \
+	tools/rbd_mirror/image_replayer/EventPreprocessor.cc \
 	tools/rbd_mirror/image_replayer/OpenImageRequest.cc \
 	tools/rbd_mirror/image_replayer/OpenLocalImageRequest.cc \
 	tools/rbd_mirror/image_replayer/ReplayStatusFormatter.cc \
@@ -3013,6 +3019,7 @@ am__librbd_mirror_internal_la_SOURCES_DIST =  \
 @ENABLE_CLIENT_TRUE@@WITH_RADOS_TRUE@@WITH_RBD_TRUE at am_librbd_mirror_internal_la_OBJECTS = tools/rbd_mirror/ClusterWatcher.lo \
 @ENABLE_CLIENT_TRUE@@WITH_RADOS_TRUE@@WITH_RBD_TRUE@	tools/rbd_mirror/ImageReplayer.lo \
 @ENABLE_CLIENT_TRUE@@WITH_RADOS_TRUE@@WITH_RBD_TRUE@	tools/rbd_mirror/ImageSync.lo \
+ at ENABLE_CLIENT_TRUE@@WITH_RADOS_TRUE@@WITH_RBD_TRUE@	tools/rbd_mirror/ImageSyncThrottler.lo \
 @ENABLE_CLIENT_TRUE@@WITH_RADOS_TRUE@@WITH_RBD_TRUE@	tools/rbd_mirror/Mirror.lo \
 @ENABLE_CLIENT_TRUE@@WITH_RADOS_TRUE@@WITH_RBD_TRUE@	tools/rbd_mirror/PoolWatcher.lo \
 @ENABLE_CLIENT_TRUE@@WITH_RADOS_TRUE@@WITH_RBD_TRUE@	tools/rbd_mirror/Replayer.lo \
@@ -3022,6 +3029,7 @@ am__librbd_mirror_internal_la_SOURCES_DIST =  \
 @ENABLE_CLIENT_TRUE@@WITH_RADOS_TRUE@@WITH_RBD_TRUE@	tools/rbd_mirror/image_replayer/BootstrapRequest.lo \
 @ENABLE_CLIENT_TRUE@@WITH_RADOS_TRUE@@WITH_RBD_TRUE@	tools/rbd_mirror/image_replayer/CloseImageRequest.lo \
 @ENABLE_CLIENT_TRUE@@WITH_RADOS_TRUE@@WITH_RBD_TRUE@	tools/rbd_mirror/image_replayer/CreateImageRequest.lo \
+ at ENABLE_CLIENT_TRUE@@WITH_RADOS_TRUE@@WITH_RBD_TRUE@	tools/rbd_mirror/image_replayer/EventPreprocessor.lo \
 @ENABLE_CLIENT_TRUE@@WITH_RADOS_TRUE@@WITH_RBD_TRUE@	tools/rbd_mirror/image_replayer/OpenImageRequest.lo \
 @ENABLE_CLIENT_TRUE@@WITH_RADOS_TRUE@@WITH_RBD_TRUE@	tools/rbd_mirror/image_replayer/OpenLocalImageRequest.lo \
 @ENABLE_CLIENT_TRUE@@WITH_RADOS_TRUE@@WITH_RBD_TRUE@	tools/rbd_mirror/image_replayer/ReplayStatusFormatter.lo \
@@ -3320,8 +3328,7 @@ libsystest_la_OBJECTS = $(am_libsystest_la_OBJECTS)
 @ENABLE_CLIENT_TRUE@@WITH_RADOS_TRUE@@WITH_RBD_TRUE@	ceph_test_librbd$(EXEEXT) \
 @ENABLE_CLIENT_TRUE@@WITH_RADOS_TRUE@@WITH_RBD_TRUE@	ceph_test_librbd_api$(EXEEXT) \
 @ENABLE_CLIENT_TRUE@@WITH_RADOS_TRUE@@WITH_RBD_TRUE@	ceph_test_rbd_mirror$(EXEEXT) \
- at ENABLE_CLIENT_TRUE@@WITH_RADOS_TRUE@@WITH_RBD_TRUE@	ceph_test_rbd_mirror_random_write$(EXEEXT) \
- at ENABLE_CLIENT_TRUE@@WITH_RADOS_TRUE@@WITH_RBD_TRUE@	ceph_test_rbd_mirror_image_replay$(EXEEXT)
+ at ENABLE_CLIENT_TRUE@@WITH_RADOS_TRUE@@WITH_RBD_TRUE@	ceph_test_rbd_mirror_random_write$(EXEEXT)
 @ENABLE_CLIENT_TRUE@@LINUX_TRUE@@WITH_RADOS_TRUE@@WITH_RBD_TRUE at am__EXEEXT_10 = ceph_test_librbd_fsx$(EXEEXT)
 @ENABLE_CLIENT_TRUE@@WITH_RADOSSTRIPER_TRUE@@WITH_RADOS_TRUE at am__EXEEXT_11 = ceph_test_rados_striper_api_io$(EXEEXT) \
 @ENABLE_CLIENT_TRUE@@WITH_RADOSSTRIPER_TRUE@@WITH_RADOS_TRUE@	ceph_test_rados_striper_api_aio$(EXEEXT) \
@@ -3510,7 +3517,7 @@ am__EXEEXT_63 = $(am__EXEEXT_48) $(am__EXEEXT_49) $(am__EXEEXT_50) \
 @ENABLE_SERVER_TRUE@@ENABLE_XIO_TRUE@	xio_server$(EXEEXT) \
 @ENABLE_SERVER_TRUE@@ENABLE_XIO_TRUE@	xio_client$(EXEEXT)
 @ENABLE_SERVER_TRUE@@WITH_MON_TRUE at am__EXEEXT_67 = get_command_descriptions$(EXEEXT)
- at ENABLE_SERVER_TRUE@@LINUX_TRUE at am__EXEEXT_68 = mount.ceph$(EXEEXT)
+ at ENABLE_CLIENT_TRUE@@LINUX_TRUE@@WITH_CEPHFS_TRUE@@WITH_RADOS_TRUE at am__EXEEXT_68 = mount.ceph$(EXEEXT)
 PROGRAMS = $(bin_PROGRAMS) $(noinst_PROGRAMS) $(sbin_PROGRAMS) \
 	$(su_sbin_PROGRAMS)
 am_ceph_authtool_OBJECTS = tools/ceph_authtool.$(OBJEXT)
@@ -3522,7 +3529,7 @@ ceph_bluefs_tool_OBJECTS = $(am_ceph_bluefs_tool_OBJECTS)
 @WITH_LIBZFS_TRUE at am__DEPENDENCIES_13 = libos_zfs.a
 @WITH_SPDK_TRUE at am__DEPENDENCIES_14 = $(LIBSPDK_LIBS) \
 @WITH_SPDK_TRUE@	$(am__DEPENDENCIES_1) $(am__DEPENDENCIES_1)
-am__DEPENDENCIES_15 = libkv.a $(am__append_27) $(am__DEPENDENCIES_1) \
+am__DEPENDENCIES_15 = libkv.a $(am__append_28) $(am__DEPENDENCIES_1) \
 	$(am__DEPENDENCIES_1)
 am__DEPENDENCIES_16 = libos.a $(am__DEPENDENCIES_1) \
 	$(am__DEPENDENCIES_13) $(am__DEPENDENCIES_14) $(LIBOS_TYPES) \
@@ -3618,8 +3625,8 @@ am__DEPENDENCIES_17 = librgw.la $(am__DEPENDENCIES_1)
 @ENABLE_CLIENT_TRUE@	libcls_user_client.la \
 @ENABLE_CLIENT_TRUE@	libcls_numops_client.la \
 @ENABLE_CLIENT_TRUE@	libcls_journal_client.la
-am__DEPENDENCIES_19 = $(am__append_48) $(am__append_136) \
-	$(am__DEPENDENCIES_18) $(am__append_158)
+am__DEPENDENCIES_19 = $(am__append_49) $(am__append_137) \
+	$(am__DEPENDENCIES_18) $(am__append_159)
 @ENABLE_CLIENT_TRUE at ceph_dencoder_DEPENDENCIES =  \
 @ENABLE_CLIENT_TRUE@	$(am__DEPENDENCIES_17) $(LIBRADOS) \
 @ENABLE_CLIENT_TRUE@	$(LIBRBD_TYPES) $(LIBOSD_TYPES) \
@@ -3633,7 +3640,11 @@ ceph_dencoder_LINK = $(LIBTOOL) $(AM_V_lt) --tag=CXX \
 am__ceph_fuse_SOURCES_DIST = ceph_fuse.cc
 @ENABLE_CLIENT_TRUE@@WITH_FUSE_TRUE@@WITH_RADOS_TRUE at am_ceph_fuse_OBJECTS = ceph_fuse.$(OBJEXT)
 ceph_fuse_OBJECTS = $(am_ceph_fuse_OBJECTS)
- at ENABLE_CLIENT_TRUE@@WITH_FUSE_TRUE@@WITH_RADOS_TRUE at ceph_fuse_DEPENDENCIES = $(LIBCLIENT_FUSE) \
+am__DEPENDENCIES_20 = libperfglue.la $(am__DEPENDENCIES_1) \
+	$(am__DEPENDENCIES_1)
+am__DEPENDENCIES_21 = libclient_fuse.la $(am__DEPENDENCIES_1) \
+	$(am__DEPENDENCIES_20)
+ at ENABLE_CLIENT_TRUE@@WITH_FUSE_TRUE@@WITH_RADOS_TRUE at ceph_fuse_DEPENDENCIES = $(am__DEPENDENCIES_21) \
 @ENABLE_CLIENT_TRUE@@WITH_FUSE_TRUE@@WITH_RADOS_TRUE@	$(am__DEPENDENCIES_10)
 am__ceph_kvstore_tool_SOURCES_DIST = tools/ceph_kvstore_tool.cc
 @ENABLE_SERVER_TRUE at am_ceph_kvstore_tool_OBJECTS = tools/ceph_kvstore_tool-ceph_kvstore_tool.$(OBJEXT)
@@ -3649,12 +3660,10 @@ am__ceph_mds_SOURCES_DIST = ceph_mds.cc
 @ENABLE_SERVER_TRUE@@WITH_MDS_TRUE at am_ceph_mds_OBJECTS =  \
 @ENABLE_SERVER_TRUE@@WITH_MDS_TRUE@	ceph_mds.$(OBJEXT)
 ceph_mds_OBJECTS = $(am_ceph_mds_OBJECTS)
-am__DEPENDENCIES_20 = libperfglue.la $(am__DEPENDENCIES_1) \
-	$(am__DEPENDENCIES_1)
-am__DEPENDENCIES_21 = libmds.la $(am__DEPENDENCIES_1) \
+am__DEPENDENCIES_22 = libmds.la $(am__DEPENDENCIES_1) \
 	$(am__DEPENDENCIES_20)
 @ENABLE_SERVER_TRUE@@WITH_MDS_TRUE at ceph_mds_DEPENDENCIES =  \
- at ENABLE_SERVER_TRUE@@WITH_MDS_TRUE@	$(am__DEPENDENCIES_21) \
+ at ENABLE_SERVER_TRUE@@WITH_MDS_TRUE@	$(am__DEPENDENCIES_22) \
 @ENABLE_SERVER_TRUE@@WITH_MDS_TRUE@	$(LIBOSDC) \
 @ENABLE_SERVER_TRUE@@WITH_MDS_TRUE@	$(am__DEPENDENCIES_10) \
 @ENABLE_SERVER_TRUE@@WITH_MDS_TRUE@	$(am__DEPENDENCIES_4)
@@ -3662,10 +3671,10 @@ am__ceph_mon_SOURCES_DIST = ceph_mon.cc
 @ENABLE_SERVER_TRUE@@WITH_MON_TRUE at am_ceph_mon_OBJECTS =  \
 @ENABLE_SERVER_TRUE@@WITH_MON_TRUE@	ceph_mon.$(OBJEXT)
 ceph_mon_OBJECTS = $(am_ceph_mon_OBJECTS)
-am__DEPENDENCIES_22 = libmon.a $(am__DEPENDENCIES_1) \
+am__DEPENDENCIES_23 = libmon.a $(am__DEPENDENCIES_1) \
 	$(am__DEPENDENCIES_20) $(LIBMON_TYPES)
 @ENABLE_SERVER_TRUE@@WITH_MON_TRUE at ceph_mon_DEPENDENCIES =  \
- at ENABLE_SERVER_TRUE@@WITH_MON_TRUE@	$(am__DEPENDENCIES_22) \
+ at ENABLE_SERVER_TRUE@@WITH_MON_TRUE@	$(am__DEPENDENCIES_23) \
 @ENABLE_SERVER_TRUE@@WITH_MON_TRUE@	$(am__DEPENDENCIES_16) \
 @ENABLE_SERVER_TRUE@@WITH_MON_TRUE@	$(am__DEPENDENCIES_10) \
 @ENABLE_SERVER_TRUE@@WITH_MON_TRUE@	$(am__DEPENDENCIES_4) \
@@ -3685,11 +3694,11 @@ am__ceph_objectstore_tool_SOURCES_DIST =  \
 @ENABLE_SERVER_TRUE@@WITH_OSD_TRUE at am_ceph_objectstore_tool_OBJECTS = tools/ceph_objectstore_tool.$(OBJEXT) \
 @ENABLE_SERVER_TRUE@@WITH_OSD_TRUE@	tools/RadosDump.$(OBJEXT)
 ceph_objectstore_tool_OBJECTS = $(am_ceph_objectstore_tool_OBJECTS)
-am__DEPENDENCIES_23 = libosd.a $(am__DEPENDENCIES_1) $(LIBOSDC) \
+am__DEPENDENCIES_24 = libosd.a $(am__DEPENDENCIES_1) $(LIBOSDC) \
 	$(am__DEPENDENCIES_16) $(am__DEPENDENCIES_20) $(LIBOSD_TYPES) \
 	$(LIBOS_TYPES)
 @ENABLE_SERVER_TRUE@@WITH_OSD_TRUE at ceph_objectstore_tool_DEPENDENCIES =  \
- at ENABLE_SERVER_TRUE@@WITH_OSD_TRUE@	$(am__DEPENDENCIES_23) \
+ at ENABLE_SERVER_TRUE@@WITH_OSD_TRUE@	$(am__DEPENDENCIES_24) \
 @ENABLE_SERVER_TRUE@@WITH_OSD_TRUE@	$(am__DEPENDENCIES_16) \
 @ENABLE_SERVER_TRUE@@WITH_OSD_TRUE@	$(am__DEPENDENCIES_10) \
 @ENABLE_SERVER_TRUE@@WITH_OSD_TRUE@	$(am__DEPENDENCIES_1) \
@@ -3699,7 +3708,7 @@ am__ceph_osd_SOURCES_DIST = ceph_osd.cc
 @ENABLE_SERVER_TRUE@@WITH_OSD_TRUE@	ceph_osd.$(OBJEXT)
 ceph_osd_OBJECTS = $(am_ceph_osd_OBJECTS)
 @ENABLE_SERVER_TRUE@@WITH_OSD_TRUE at ceph_osd_DEPENDENCIES = $(LIBOSDC) \
- at ENABLE_SERVER_TRUE@@WITH_OSD_TRUE@	$(am__DEPENDENCIES_23) \
+ at ENABLE_SERVER_TRUE@@WITH_OSD_TRUE@	$(am__DEPENDENCIES_24) \
 @ENABLE_SERVER_TRUE@@WITH_OSD_TRUE@	$(LIBOSD_TYPES) \
 @ENABLE_SERVER_TRUE@@WITH_OSD_TRUE@	$(LIBOS_TYPES) \
 @ENABLE_SERVER_TRUE@@WITH_OSD_TRUE@	$(am__DEPENDENCIES_16) \
@@ -3728,7 +3737,7 @@ am__ceph_erasure_code_SOURCES_DIST =  \
 @ENABLE_SERVER_TRUE@@WITH_OSD_TRUE at am_ceph_erasure_code_OBJECTS = test/erasure-code/ceph_erasure_code.$(OBJEXT)
 ceph_erasure_code_OBJECTS = $(am_ceph_erasure_code_OBJECTS)
 @ENABLE_SERVER_TRUE@@WITH_OSD_TRUE at ceph_erasure_code_DEPENDENCIES =  \
- at ENABLE_SERVER_TRUE@@WITH_OSD_TRUE@	$(am__DEPENDENCIES_23) \
+ at ENABLE_SERVER_TRUE@@WITH_OSD_TRUE@	$(am__DEPENDENCIES_24) \
 @ENABLE_SERVER_TRUE@@WITH_OSD_TRUE@	$(am__DEPENDENCIES_4) \
 @ENABLE_SERVER_TRUE@@WITH_OSD_TRUE@	$(am__DEPENDENCIES_1) \
 @ENABLE_SERVER_TRUE@@WITH_OSD_TRUE@	$(am__DEPENDENCIES_10) \
@@ -3741,7 +3750,7 @@ am__ceph_erasure_code_benchmark_SOURCES_DIST =  \
 ceph_erasure_code_benchmark_OBJECTS =  \
 	$(am_ceph_erasure_code_benchmark_OBJECTS)
 @ENABLE_SERVER_TRUE@@WITH_OSD_TRUE at ceph_erasure_code_benchmark_DEPENDENCIES =  \
- at ENABLE_SERVER_TRUE@@WITH_OSD_TRUE@	$(am__DEPENDENCIES_23) \
+ at ENABLE_SERVER_TRUE@@WITH_OSD_TRUE@	$(am__DEPENDENCIES_24) \
 @ENABLE_SERVER_TRUE@@WITH_OSD_TRUE@	$(am__DEPENDENCIES_4) \
 @ENABLE_SERVER_TRUE@@WITH_OSD_TRUE@	$(am__DEPENDENCIES_1) \
 @ENABLE_SERVER_TRUE@@WITH_OSD_TRUE@	$(am__DEPENDENCIES_10) \
@@ -3752,7 +3761,7 @@ am__ceph_erasure_code_non_regression_SOURCES_DIST =  \
 ceph_erasure_code_non_regression_OBJECTS =  \
 	$(am_ceph_erasure_code_non_regression_OBJECTS)
 @ENABLE_SERVER_TRUE@@WITH_OSD_TRUE at ceph_erasure_code_non_regression_DEPENDENCIES =  \
- at ENABLE_SERVER_TRUE@@WITH_OSD_TRUE@	$(am__DEPENDENCIES_23) \
+ at ENABLE_SERVER_TRUE@@WITH_OSD_TRUE@	$(am__DEPENDENCIES_24) \
 @ENABLE_SERVER_TRUE@@WITH_OSD_TRUE@	$(am__DEPENDENCIES_4) \
 @ENABLE_SERVER_TRUE@@WITH_OSD_TRUE@	$(am__DEPENDENCIES_1) \
 @ENABLE_SERVER_TRUE@@WITH_OSD_TRUE@	$(am__DEPENDENCIES_10) \
@@ -3802,13 +3811,13 @@ am__ceph_perf_msgr_client_SOURCES_DIST =  \
 	test/msgr/perf_msgr_client.cc
 @ENABLE_SERVER_TRUE at am_ceph_perf_msgr_client_OBJECTS = test/msgr/ceph_perf_msgr_client-perf_msgr_client.$(OBJEXT)
 ceph_perf_msgr_client_OBJECTS = $(am_ceph_perf_msgr_client_OBJECTS)
-am__DEPENDENCIES_24 = $(top_builddir)/src/gmock/lib/libgmock_main.la \
+am__DEPENDENCIES_25 = $(top_builddir)/src/gmock/lib/libgmock_main.la \
 	$(top_builddir)/src/gmock/lib/libgmock.la \
 	$(top_builddir)/src/gmock/gtest/lib/libgtest.la \
 	$(am__DEPENDENCIES_1) $(am__DEPENDENCIES_1)
 @ENABLE_SERVER_TRUE at ceph_perf_msgr_client_DEPENDENCIES =  \
 @ENABLE_SERVER_TRUE@	$(am__DEPENDENCIES_16) \
- at ENABLE_SERVER_TRUE@	$(am__DEPENDENCIES_24) \
+ at ENABLE_SERVER_TRUE@	$(am__DEPENDENCIES_25) \
 @ENABLE_SERVER_TRUE@	$(am__DEPENDENCIES_10)
 ceph_perf_msgr_client_LINK = $(LIBTOOL) $(AM_V_lt) --tag=CXX \
 	$(AM_LIBTOOLFLAGS) $(LIBTOOLFLAGS) --mode=link $(CXXLD) \
@@ -3820,7 +3829,7 @@ am__ceph_perf_msgr_server_SOURCES_DIST =  \
 ceph_perf_msgr_server_OBJECTS = $(am_ceph_perf_msgr_server_OBJECTS)
 @ENABLE_SERVER_TRUE at ceph_perf_msgr_server_DEPENDENCIES =  \
 @ENABLE_SERVER_TRUE@	$(am__DEPENDENCIES_16) \
- at ENABLE_SERVER_TRUE@	$(am__DEPENDENCIES_24) \
+ at ENABLE_SERVER_TRUE@	$(am__DEPENDENCIES_25) \
 @ENABLE_SERVER_TRUE@	$(am__DEPENDENCIES_10)
 ceph_perf_msgr_server_LINK = $(LIBTOOL) $(AM_V_lt) --tag=CXX \
 	$(AM_LIBTOOLFLAGS) $(LIBTOOLFLAGS) --mode=link $(CXXLD) \
@@ -3832,7 +3841,7 @@ am__ceph_perf_objectstore_SOURCES_DIST =  \
 ceph_perf_objectstore_OBJECTS = $(am_ceph_perf_objectstore_OBJECTS)
 @ENABLE_SERVER_TRUE at ceph_perf_objectstore_DEPENDENCIES =  \
 @ENABLE_SERVER_TRUE@	$(am__DEPENDENCIES_16) \
- at ENABLE_SERVER_TRUE@	$(am__DEPENDENCIES_24) \
+ at ENABLE_SERVER_TRUE@	$(am__DEPENDENCIES_25) \
 @ENABLE_SERVER_TRUE@	$(am__DEPENDENCIES_10)
 ceph_perf_objectstore_LINK = $(LIBTOOL) $(AM_V_lt) --tag=CXX \
 	$(AM_LIBTOOLFLAGS) $(LIBTOOLFLAGS) --mode=link $(CXXLD) \
@@ -3932,7 +3941,7 @@ am__ceph_test_async_driver_SOURCES_DIST =  \
 ceph_test_async_driver_OBJECTS = $(am_ceph_test_async_driver_OBJECTS)
 @ENABLE_SERVER_TRUE at ceph_test_async_driver_DEPENDENCIES =  \
 @ENABLE_SERVER_TRUE@	$(am__DEPENDENCIES_16) \
- at ENABLE_SERVER_TRUE@	$(am__DEPENDENCIES_24) \
+ at ENABLE_SERVER_TRUE@	$(am__DEPENDENCIES_25) \
 @ENABLE_SERVER_TRUE@	$(am__DEPENDENCIES_10)
 ceph_test_async_driver_LINK = $(LIBTOOL) $(AM_V_lt) --tag=CXX \
 	$(AM_LIBTOOLFLAGS) $(LIBTOOLFLAGS) --mode=link $(CXXLD) \
@@ -3959,7 +3968,7 @@ ceph_test_cls_hello_OBJECTS = $(am_ceph_test_cls_hello_OBJECTS)
 @ENABLE_CLIENT_TRUE@@WITH_RADOS_TRUE at ceph_test_cls_hello_DEPENDENCIES =  \
 @ENABLE_CLIENT_TRUE@@WITH_RADOS_TRUE@	$(LIBRADOS) \
 @ENABLE_CLIENT_TRUE@@WITH_RADOS_TRUE@	$(am__DEPENDENCIES_1) \
- at ENABLE_CLIENT_TRUE@@WITH_RADOS_TRUE@	$(am__DEPENDENCIES_24) \
+ at ENABLE_CLIENT_TRUE@@WITH_RADOS_TRUE@	$(am__DEPENDENCIES_25) \
 @ENABLE_CLIENT_TRUE@@WITH_RADOS_TRUE@	$(am__DEPENDENCIES_10) \
 @ENABLE_CLIENT_TRUE@@WITH_RADOS_TRUE@	$(am__DEPENDENCIES_9)
 ceph_test_cls_hello_LINK = $(LIBTOOL) $(AM_V_lt) --tag=CXX \
@@ -3975,7 +3984,7 @@ ceph_test_cls_journal_OBJECTS = $(am_ceph_test_cls_journal_OBJECTS)
 @ENABLE_CLIENT_TRUE@@WITH_RADOS_TRUE@	$(LIBRADOS) \
 @ENABLE_CLIENT_TRUE@@WITH_RADOS_TRUE@	$(am__DEPENDENCIES_4) \
 @ENABLE_CLIENT_TRUE@@WITH_RADOS_TRUE@	$(am__DEPENDENCIES_1) \
- at ENABLE_CLIENT_TRUE@@WITH_RADOS_TRUE@	$(am__DEPENDENCIES_24) \
+ at ENABLE_CLIENT_TRUE@@WITH_RADOS_TRUE@	$(am__DEPENDENCIES_25) \
 @ENABLE_CLIENT_TRUE@@WITH_RADOS_TRUE@	$(am__DEPENDENCIES_9)
 ceph_test_cls_journal_LINK = $(LIBTOOL) $(AM_V_lt) --tag=CXX \
 	$(AM_LIBTOOLFLAGS) $(LIBTOOLFLAGS) --mode=link $(CXXLD) \
@@ -3988,7 +3997,7 @@ ceph_test_cls_lock_OBJECTS = $(am_ceph_test_cls_lock_OBJECTS)
 @ENABLE_CLIENT_TRUE@@WITH_RADOS_TRUE@	$(LIBRADOS) \
 @ENABLE_CLIENT_TRUE@@WITH_RADOS_TRUE@	libcls_lock_client.la \
 @ENABLE_CLIENT_TRUE@@WITH_RADOS_TRUE@	$(am__DEPENDENCIES_4) \
- at ENABLE_CLIENT_TRUE@@WITH_RADOS_TRUE@	$(am__DEPENDENCIES_24) \
+ at ENABLE_CLIENT_TRUE@@WITH_RADOS_TRUE@	$(am__DEPENDENCIES_25) \
 @ENABLE_CLIENT_TRUE@@WITH_RADOS_TRUE@	$(am__DEPENDENCIES_9)
 ceph_test_cls_lock_LINK = $(LIBTOOL) $(AM_V_lt) --tag=CXX \
 	$(AM_LIBTOOLFLAGS) $(LIBTOOLFLAGS) --mode=link $(CXXLD) \
@@ -4000,7 +4009,7 @@ ceph_test_cls_log_OBJECTS = $(am_ceph_test_cls_log_OBJECTS)
 @ENABLE_CLIENT_TRUE@@WITH_RADOS_TRUE at ceph_test_cls_log_DEPENDENCIES =  \
 @ENABLE_CLIENT_TRUE@@WITH_RADOS_TRUE@	$(LIBRADOS) \
 @ENABLE_CLIENT_TRUE@@WITH_RADOS_TRUE@	libcls_log_client.la \
- at ENABLE_CLIENT_TRUE@@WITH_RADOS_TRUE@	$(am__DEPENDENCIES_24) \
+ at ENABLE_CLIENT_TRUE@@WITH_RADOS_TRUE@	$(am__DEPENDENCIES_25) \
 @ENABLE_CLIENT_TRUE@@WITH_RADOS_TRUE@	$(am__DEPENDENCIES_10) \
 @ENABLE_CLIENT_TRUE@@WITH_RADOS_TRUE@	$(am__DEPENDENCIES_9)
 ceph_test_cls_log_LINK = $(LIBTOOL) $(AM_V_lt) --tag=CXX \
@@ -4014,7 +4023,7 @@ ceph_test_cls_numops_OBJECTS = $(am_ceph_test_cls_numops_OBJECTS)
 @ENABLE_CLIENT_TRUE@@WITH_RADOS_TRUE at ceph_test_cls_numops_DEPENDENCIES =  \
 @ENABLE_CLIENT_TRUE@@WITH_RADOS_TRUE@	$(LIBRADOS) \
 @ENABLE_CLIENT_TRUE@@WITH_RADOS_TRUE@	libcls_numops_client.la \
- at ENABLE_CLIENT_TRUE@@WITH_RADOS_TRUE@	$(am__DEPENDENCIES_24) \
+ at ENABLE_CLIENT_TRUE@@WITH_RADOS_TRUE@	$(am__DEPENDENCIES_25) \
 @ENABLE_CLIENT_TRUE@@WITH_RADOS_TRUE@	$(am__DEPENDENCIES_10) \
 @ENABLE_CLIENT_TRUE@@WITH_RADOS_TRUE@	$(am__DEPENDENCIES_9)
 ceph_test_cls_numops_LINK = $(LIBTOOL) $(AM_V_lt) --tag=CXX \
@@ -4029,7 +4038,7 @@ ceph_test_cls_rbd_OBJECTS = $(am_ceph_test_cls_rbd_OBJECTS)
 @ENABLE_CLIENT_TRUE@@WITH_RADOS_TRUE@	libcls_rbd_client.la \
 @ENABLE_CLIENT_TRUE@@WITH_RADOS_TRUE@	libcls_lock_client.la \
 @ENABLE_CLIENT_TRUE@@WITH_RADOS_TRUE@	$(am__DEPENDENCIES_4) \
- at ENABLE_CLIENT_TRUE@@WITH_RADOS_TRUE@	$(am__DEPENDENCIES_24) \
+ at ENABLE_CLIENT_TRUE@@WITH_RADOS_TRUE@	$(am__DEPENDENCIES_25) \
 @ENABLE_CLIENT_TRUE@@WITH_RADOS_TRUE@	$(am__DEPENDENCIES_9) \
 @ENABLE_CLIENT_TRUE@@WITH_RADOS_TRUE@	$(am__DEPENDENCIES_1) \
 @ENABLE_CLIENT_TRUE@@WITH_RADOS_TRUE@	$(am__DEPENDENCIES_3)
@@ -4044,7 +4053,7 @@ ceph_test_cls_refcount_OBJECTS = $(am_ceph_test_cls_refcount_OBJECTS)
 @ENABLE_CLIENT_TRUE@@WITH_RADOS_TRUE at ceph_test_cls_refcount_DEPENDENCIES =  \
 @ENABLE_CLIENT_TRUE@@WITH_RADOS_TRUE@	$(LIBRADOS) \
 @ENABLE_CLIENT_TRUE@@WITH_RADOS_TRUE@	libcls_refcount_client.la \
- at ENABLE_CLIENT_TRUE@@WITH_RADOS_TRUE@	$(am__DEPENDENCIES_24) \
+ at ENABLE_CLIENT_TRUE@@WITH_RADOS_TRUE@	$(am__DEPENDENCIES_25) \
 @ENABLE_CLIENT_TRUE@@WITH_RADOS_TRUE@	$(am__DEPENDENCIES_9)
 ceph_test_cls_refcount_LINK = $(LIBTOOL) $(AM_V_lt) --tag=CXX \
 	$(AM_LIBTOOLFLAGS) $(LIBTOOLFLAGS) --mode=link $(CXXLD) \
@@ -4058,7 +4067,7 @@ ceph_test_cls_replica_log_OBJECTS =  \
 @ENABLE_CLIENT_TRUE@@WITH_RADOS_TRUE at ceph_test_cls_replica_log_DEPENDENCIES =  \
 @ENABLE_CLIENT_TRUE@@WITH_RADOS_TRUE@	$(LIBRADOS) \
 @ENABLE_CLIENT_TRUE@@WITH_RADOS_TRUE@	libcls_replica_log_client.la \
- at ENABLE_CLIENT_TRUE@@WITH_RADOS_TRUE@	$(am__DEPENDENCIES_24) \
+ at ENABLE_CLIENT_TRUE@@WITH_RADOS_TRUE@	$(am__DEPENDENCIES_25) \
 @ENABLE_CLIENT_TRUE@@WITH_RADOS_TRUE@	$(am__DEPENDENCIES_10) \
 @ENABLE_CLIENT_TRUE@@WITH_RADOS_TRUE@	$(am__DEPENDENCIES_9)
 ceph_test_cls_replica_log_LINK = $(LIBTOOL) $(AM_V_lt) --tag=CXX \
@@ -4072,7 +4081,7 @@ ceph_test_cls_rgw_OBJECTS = $(am_ceph_test_cls_rgw_OBJECTS)
 @ENABLE_CLIENT_TRUE@@WITH_RADOSGW_TRUE@@WITH_RADOS_TRUE@	$(am__DEPENDENCIES_1) \
 @ENABLE_CLIENT_TRUE@@WITH_RADOSGW_TRUE@@WITH_RADOS_TRUE@	libcls_rgw_client.la \
 @ENABLE_CLIENT_TRUE@@WITH_RADOSGW_TRUE@@WITH_RADOS_TRUE@	$(am__DEPENDENCIES_4) \
- at ENABLE_CLIENT_TRUE@@WITH_RADOSGW_TRUE@@WITH_RADOS_TRUE@	$(am__DEPENDENCIES_24) \
+ at ENABLE_CLIENT_TRUE@@WITH_RADOSGW_TRUE@@WITH_RADOS_TRUE@	$(am__DEPENDENCIES_25) \
 @ENABLE_CLIENT_TRUE@@WITH_RADOSGW_TRUE@@WITH_RADOS_TRUE@	$(am__DEPENDENCIES_10) \
 @ENABLE_CLIENT_TRUE@@WITH_RADOSGW_TRUE@@WITH_RADOS_TRUE@	$(am__DEPENDENCIES_9)
 ceph_test_cls_rgw_LINK = $(LIBTOOL) $(AM_V_lt) --tag=CXX \
@@ -4086,7 +4095,7 @@ ceph_test_cls_rgw_log_OBJECTS = $(am_ceph_test_cls_rgw_log_OBJECTS)
 @ENABLE_CLIENT_TRUE@@WITH_RADOSGW_TRUE@@WITH_RADOS_TRUE@	$(am__DEPENDENCIES_17) \
 @ENABLE_CLIENT_TRUE@@WITH_RADOSGW_TRUE@@WITH_RADOS_TRUE@	$(am__DEPENDENCIES_12) \
 @ENABLE_CLIENT_TRUE@@WITH_RADOSGW_TRUE@@WITH_RADOS_TRUE@	$(am__DEPENDENCIES_10) \
- at ENABLE_CLIENT_TRUE@@WITH_RADOSGW_TRUE@@WITH_RADOS_TRUE@	$(am__DEPENDENCIES_24) \
+ at ENABLE_CLIENT_TRUE@@WITH_RADOSGW_TRUE@@WITH_RADOS_TRUE@	$(am__DEPENDENCIES_25) \
 @ENABLE_CLIENT_TRUE@@WITH_RADOSGW_TRUE@@WITH_RADOS_TRUE@	$(am__DEPENDENCIES_1) \
 @ENABLE_CLIENT_TRUE@@WITH_RADOSGW_TRUE@@WITH_RADOS_TRUE@	libcls_version_client.la \
 @ENABLE_CLIENT_TRUE@@WITH_RADOSGW_TRUE@@WITH_RADOS_TRUE@	libcls_log_client.la \
@@ -4105,7 +4114,7 @@ ceph_test_cls_rgw_meta_OBJECTS = $(am_ceph_test_cls_rgw_meta_OBJECTS)
 @ENABLE_CLIENT_TRUE@@WITH_RADOSGW_TRUE@@WITH_RADOS_TRUE at ceph_test_cls_rgw_meta_DEPENDENCIES = $(am__DEPENDENCIES_17) \
 @ENABLE_CLIENT_TRUE@@WITH_RADOSGW_TRUE@@WITH_RADOS_TRUE@	$(LIBRADOS) \
 @ENABLE_CLIENT_TRUE@@WITH_RADOSGW_TRUE@@WITH_RADOS_TRUE@	$(am__DEPENDENCIES_10) \
- at ENABLE_CLIENT_TRUE@@WITH_RADOSGW_TRUE@@WITH_RADOS_TRUE@	$(am__DEPENDENCIES_24) \
+ at ENABLE_CLIENT_TRUE@@WITH_RADOSGW_TRUE@@WITH_RADOS_TRUE@	$(am__DEPENDENCIES_25) \
 @ENABLE_CLIENT_TRUE@@WITH_RADOSGW_TRUE@@WITH_RADOS_TRUE@	$(am__DEPENDENCIES_1) \
 @ENABLE_CLIENT_TRUE@@WITH_RADOSGW_TRUE@@WITH_RADOS_TRUE@	libcls_timeindex_client.la \
 @ENABLE_CLIENT_TRUE@@WITH_RADOSGW_TRUE@@WITH_RADOS_TRUE@	libcls_version_client.la \
@@ -4127,7 +4136,7 @@ ceph_test_cls_rgw_opstate_OBJECTS =  \
 @ENABLE_CLIENT_TRUE@@WITH_RADOSGW_TRUE@@WITH_RADOS_TRUE at ceph_test_cls_rgw_opstate_DEPENDENCIES = $(LIBRADOS) \
 @ENABLE_CLIENT_TRUE@@WITH_RADOSGW_TRUE@@WITH_RADOS_TRUE@	$(am__DEPENDENCIES_17) \
 @ENABLE_CLIENT_TRUE@@WITH_RADOSGW_TRUE@@WITH_RADOS_TRUE@	$(am__DEPENDENCIES_10) \
- at ENABLE_CLIENT_TRUE@@WITH_RADOSGW_TRUE@@WITH_RADOS_TRUE@	$(am__DEPENDENCIES_24) \
+ at ENABLE_CLIENT_TRUE@@WITH_RADOSGW_TRUE@@WITH_RADOS_TRUE@	$(am__DEPENDENCIES_25) \
 @ENABLE_CLIENT_TRUE@@WITH_RADOSGW_TRUE@@WITH_RADOS_TRUE@	$(am__DEPENDENCIES_1) \
 @ENABLE_CLIENT_TRUE@@WITH_RADOSGW_TRUE@@WITH_RADOS_TRUE@	libcls_version_client.la \
 @ENABLE_CLIENT_TRUE@@WITH_RADOSGW_TRUE@@WITH_RADOS_TRUE@	libcls_log_client.la \
@@ -4149,7 +4158,7 @@ ceph_test_cls_statelog_OBJECTS = $(am_ceph_test_cls_statelog_OBJECTS)
 @ENABLE_CLIENT_TRUE@@WITH_RADOS_TRUE at ceph_test_cls_statelog_DEPENDENCIES =  \
 @ENABLE_CLIENT_TRUE@@WITH_RADOS_TRUE@	$(LIBRADOS) \
 @ENABLE_CLIENT_TRUE@@WITH_RADOS_TRUE@	libcls_statelog_client.la \
- at ENABLE_CLIENT_TRUE@@WITH_RADOS_TRUE@	$(am__DEPENDENCIES_24) \
+ at ENABLE_CLIENT_TRUE@@WITH_RADOS_TRUE@	$(am__DEPENDENCIES_25) \
 @ENABLE_CLIENT_TRUE@@WITH_RADOS_TRUE@	$(am__DEPENDENCIES_10) \
 @ENABLE_CLIENT_TRUE@@WITH_RADOS_TRUE@	$(am__DEPENDENCIES_9)
 ceph_test_cls_statelog_LINK = $(LIBTOOL) $(AM_V_lt) --tag=CXX \
@@ -4163,7 +4172,7 @@ ceph_test_cls_version_OBJECTS = $(am_ceph_test_cls_version_OBJECTS)
 @ENABLE_CLIENT_TRUE@@WITH_RADOS_TRUE at ceph_test_cls_version_DEPENDENCIES =  \
 @ENABLE_CLIENT_TRUE@@WITH_RADOS_TRUE@	$(LIBRADOS) \
 @ENABLE_CLIENT_TRUE@@WITH_RADOS_TRUE@	libcls_version_client.la \
- at ENABLE_CLIENT_TRUE@@WITH_RADOS_TRUE@	$(am__DEPENDENCIES_24) \
+ at ENABLE_CLIENT_TRUE@@WITH_RADOS_TRUE@	$(am__DEPENDENCIES_25) \
 @ENABLE_CLIENT_TRUE@@WITH_RADOS_TRUE@	$(am__DEPENDENCIES_9)
 ceph_test_cls_version_LINK = $(LIBTOOL) $(AM_V_lt) --tag=CXX \
 	$(AM_LIBTOOLFLAGS) $(LIBTOOLFLAGS) --mode=link $(CXXLD) \
@@ -4175,7 +4184,7 @@ ceph_test_cors_OBJECTS = $(am_ceph_test_cors_OBJECTS)
 @ENABLE_CLIENT_TRUE@@WITH_RADOSGW_TRUE@@WITH_RADOS_TRUE at ceph_test_cors_DEPENDENCIES = $(LIBRADOS) \
 @ENABLE_CLIENT_TRUE@@WITH_RADOSGW_TRUE@@WITH_RADOS_TRUE@	$(am__DEPENDENCIES_17) \
 @ENABLE_CLIENT_TRUE@@WITH_RADOSGW_TRUE@@WITH_RADOS_TRUE@	$(am__DEPENDENCIES_10) \
- at ENABLE_CLIENT_TRUE@@WITH_RADOSGW_TRUE@@WITH_RADOS_TRUE@	$(am__DEPENDENCIES_24)
+ at ENABLE_CLIENT_TRUE@@WITH_RADOSGW_TRUE@@WITH_RADOS_TRUE@	$(am__DEPENDENCIES_25)
 ceph_test_cors_LINK = $(LIBTOOL) $(AM_V_lt) --tag=CXX \
 	$(AM_LIBTOOLFLAGS) $(LIBTOOLFLAGS) --mode=link $(CXXLD) \
 	$(ceph_test_cors_CXXFLAGS) $(CXXFLAGS) $(AM_LDFLAGS) \
@@ -4188,7 +4197,7 @@ am__ceph_test_filejournal_SOURCES_DIST = test/test_filejournal.cc
 ceph_test_filejournal_OBJECTS = $(am_ceph_test_filejournal_OBJECTS)
 @ENABLE_SERVER_TRUE at ceph_test_filejournal_DEPENDENCIES =  \
 @ENABLE_SERVER_TRUE@	$(am__DEPENDENCIES_16) \
- at ENABLE_SERVER_TRUE@	$(am__DEPENDENCIES_24) \
+ at ENABLE_SERVER_TRUE@	$(am__DEPENDENCIES_25) \
 @ENABLE_SERVER_TRUE@	$(am__DEPENDENCIES_10)
 ceph_test_filejournal_LINK = $(LIBTOOL) $(AM_V_lt) --tag=CXX \
 	$(AM_LIBTOOLFLAGS) $(LIBTOOLFLAGS) --mode=link $(CXXLD) \
@@ -4200,7 +4209,7 @@ am__ceph_test_filestore_SOURCES_DIST =  \
 ceph_test_filestore_OBJECTS = $(am_ceph_test_filestore_OBJECTS)
 @ENABLE_SERVER_TRUE@@LINUX_TRUE at ceph_test_filestore_DEPENDENCIES =  \
 @ENABLE_SERVER_TRUE@@LINUX_TRUE@	$(am__DEPENDENCIES_16) \
- at ENABLE_SERVER_TRUE@@LINUX_TRUE@	$(am__DEPENDENCIES_24) \
+ at ENABLE_SERVER_TRUE@@LINUX_TRUE@	$(am__DEPENDENCIES_25) \
 @ENABLE_SERVER_TRUE@@LINUX_TRUE@	$(am__DEPENDENCIES_10)
 ceph_test_filestore_LINK = $(LIBTOOL) $(AM_V_lt) --tag=CXX \
 	$(AM_LIBTOOLFLAGS) $(LIBTOOLFLAGS) --mode=link $(CXXLD) \
@@ -4250,14 +4259,14 @@ am__ceph_test_keys_SOURCES_DIST = test/testkeys.cc
 @ENABLE_SERVER_TRUE@@WITH_MON_TRUE@	test/testkeys.$(OBJEXT)
 ceph_test_keys_OBJECTS = $(am_ceph_test_keys_OBJECTS)
 @ENABLE_SERVER_TRUE@@WITH_MON_TRUE at ceph_test_keys_DEPENDENCIES =  \
- at ENABLE_SERVER_TRUE@@WITH_MON_TRUE@	$(am__DEPENDENCIES_22) \
+ at ENABLE_SERVER_TRUE@@WITH_MON_TRUE@	$(am__DEPENDENCIES_23) \
 @ENABLE_SERVER_TRUE@@WITH_MON_TRUE@	$(am__DEPENDENCIES_10)
 am__ceph_test_keyvaluedb_SOURCES_DIST = test/objectstore/test_kv.cc
 @ENABLE_SERVER_TRUE@@LINUX_TRUE at am_ceph_test_keyvaluedb_OBJECTS = test/objectstore/ceph_test_keyvaluedb-test_kv.$(OBJEXT)
 ceph_test_keyvaluedb_OBJECTS = $(am_ceph_test_keyvaluedb_OBJECTS)
 @ENABLE_SERVER_TRUE@@LINUX_TRUE at ceph_test_keyvaluedb_DEPENDENCIES =  \
 @ENABLE_SERVER_TRUE@@LINUX_TRUE@	$(am__DEPENDENCIES_16) \
- at ENABLE_SERVER_TRUE@@LINUX_TRUE@	$(am__DEPENDENCIES_24) \
+ at ENABLE_SERVER_TRUE@@LINUX_TRUE@	$(am__DEPENDENCIES_25) \
 @ENABLE_SERVER_TRUE@@LINUX_TRUE@	$(am__DEPENDENCIES_10)
 ceph_test_keyvaluedb_LINK = $(LIBTOOL) $(AM_V_lt) --tag=CXX \
 	$(AM_LIBTOOLFLAGS) $(LIBTOOLFLAGS) --mode=link $(CXXLD) \
@@ -4270,7 +4279,7 @@ ceph_test_keyvaluedb_atomicity_OBJECTS =  \
 	$(am_ceph_test_keyvaluedb_atomicity_OBJECTS)
 @ENABLE_SERVER_TRUE at ceph_test_keyvaluedb_atomicity_DEPENDENCIES =  \
 @ENABLE_SERVER_TRUE@	$(am__DEPENDENCIES_16) \
- at ENABLE_SERVER_TRUE@	$(am__DEPENDENCIES_24) \
+ at ENABLE_SERVER_TRUE@	$(am__DEPENDENCIES_25) \
 @ENABLE_SERVER_TRUE@	$(am__DEPENDENCIES_10)
 ceph_test_keyvaluedb_atomicity_LINK = $(LIBTOOL) $(AM_V_lt) --tag=CXX \
 	$(AM_LIBTOOLFLAGS) $(LIBTOOLFLAGS) --mode=link $(CXXLD) \
@@ -4285,7 +4294,7 @@ ceph_test_keyvaluedb_iterators_OBJECTS =  \
 	$(am_ceph_test_keyvaluedb_iterators_OBJECTS)
 @ENABLE_SERVER_TRUE at ceph_test_keyvaluedb_iterators_DEPENDENCIES =  \
 @ENABLE_SERVER_TRUE@	$(am__DEPENDENCIES_16) \
- at ENABLE_SERVER_TRUE@	$(am__DEPENDENCIES_24) \
+ at ENABLE_SERVER_TRUE@	$(am__DEPENDENCIES_25) \
 @ENABLE_SERVER_TRUE@	$(am__DEPENDENCIES_10)
 ceph_test_keyvaluedb_iterators_LINK = $(LIBTOOL) $(AM_V_lt) --tag=CXX \
 	$(AM_LIBTOOLFLAGS) $(LIBTOOLFLAGS) --mode=link $(CXXLD) \
@@ -4307,7 +4316,7 @@ ceph_test_libcephfs_OBJECTS = $(am_ceph_test_libcephfs_OBJECTS)
 @ENABLE_CLIENT_TRUE@@WITH_CEPHFS_TRUE@@WITH_RADOS_TRUE at ceph_test_libcephfs_DEPENDENCIES = $(LIBRADOS) \
 @ENABLE_CLIENT_TRUE@@WITH_CEPHFS_TRUE@@WITH_RADOS_TRUE@	$(LIBCEPHFS) \
 @ENABLE_CLIENT_TRUE@@WITH_CEPHFS_TRUE@@WITH_RADOS_TRUE@	$(am__DEPENDENCIES_4) \
- at ENABLE_CLIENT_TRUE@@WITH_CEPHFS_TRUE@@WITH_RADOS_TRUE@	$(am__DEPENDENCIES_24)
+ at ENABLE_CLIENT_TRUE@@WITH_CEPHFS_TRUE@@WITH_RADOS_TRUE@	$(am__DEPENDENCIES_25)
 ceph_test_libcephfs_LINK = $(LIBTOOL) $(AM_V_lt) --tag=CXX \
 	$(AM_LIBTOOLFLAGS) $(LIBTOOLFLAGS) --mode=link $(CXXLD) \
 	$(ceph_test_libcephfs_CXXFLAGS) $(CXXFLAGS) $(AM_LDFLAGS) \
@@ -4325,7 +4334,7 @@ ceph_test_librbd_OBJECTS = $(am_ceph_test_librbd_OBJECTS)
 @ENABLE_CLIENT_TRUE@@WITH_RADOS_TRUE@@WITH_RBD_TRUE@	libcls_journal_client.la \
 @ENABLE_CLIENT_TRUE@@WITH_RADOS_TRUE@@WITH_RBD_TRUE@	librados_api.la \
 @ENABLE_CLIENT_TRUE@@WITH_RADOS_TRUE@@WITH_RBD_TRUE@	$(am__DEPENDENCIES_7) \
- at ENABLE_CLIENT_TRUE@@WITH_RADOS_TRUE@@WITH_RBD_TRUE@	$(am__DEPENDENCIES_24) \
+ at ENABLE_CLIENT_TRUE@@WITH_RADOS_TRUE@@WITH_RBD_TRUE@	$(am__DEPENDENCIES_25) \
 @ENABLE_CLIENT_TRUE@@WITH_RADOS_TRUE@@WITH_RBD_TRUE@	$(am__DEPENDENCIES_10) \
 @ENABLE_CLIENT_TRUE@@WITH_RADOS_TRUE@@WITH_RBD_TRUE@	$(am__DEPENDENCIES_9)
 ceph_test_librbd_LINK = $(LIBTOOL) $(AM_V_lt) --tag=CXX \
@@ -4342,7 +4351,7 @@ ceph_test_librbd_api_OBJECTS = $(am_ceph_test_librbd_api_OBJECTS)
 @ENABLE_CLIENT_TRUE@@WITH_RADOS_TRUE@@WITH_RBD_TRUE@	$(LIBRBD) \
 @ENABLE_CLIENT_TRUE@@WITH_RADOS_TRUE@@WITH_RBD_TRUE@	$(LIBRADOS) \
 @ENABLE_CLIENT_TRUE@@WITH_RADOS_TRUE@@WITH_RBD_TRUE@	$(am__DEPENDENCIES_4) \
- at ENABLE_CLIENT_TRUE@@WITH_RADOS_TRUE@@WITH_RBD_TRUE@	$(am__DEPENDENCIES_24) \
+ at ENABLE_CLIENT_TRUE@@WITH_RADOS_TRUE@@WITH_RBD_TRUE@	$(am__DEPENDENCIES_25) \
 @ENABLE_CLIENT_TRUE@@WITH_RADOS_TRUE@@WITH_RBD_TRUE@	$(am__DEPENDENCIES_9)
 ceph_test_librbd_api_LINK = $(LIBTOOL) $(AM_V_lt) --tag=CXX \
 	$(AM_LIBTOOLFLAGS) $(LIBTOOLFLAGS) --mode=link $(CXXLD) \
@@ -4368,7 +4377,7 @@ ceph_test_mon_msg_OBJECTS = $(am_ceph_test_mon_msg_OBJECTS)
 @ENABLE_SERVER_TRUE at ceph_test_mon_msg_DEPENDENCIES =  \
 @ENABLE_SERVER_TRUE@	$(am__DEPENDENCIES_16) $(LIBOSDC) \
 @ENABLE_SERVER_TRUE@	$(am__DEPENDENCIES_10) \
- at ENABLE_SERVER_TRUE@	$(am__DEPENDENCIES_24)
+ at ENABLE_SERVER_TRUE@	$(am__DEPENDENCIES_25)
 ceph_test_mon_msg_LINK = $(LIBTOOL) $(AM_V_lt) --tag=CXX \
 	$(AM_LIBTOOLFLAGS) $(LIBTOOLFLAGS) --mode=link $(CXXLD) \
 	$(ceph_test_mon_msg_CXXFLAGS) $(CXXFLAGS) $(AM_LDFLAGS) \
@@ -4387,7 +4396,7 @@ am__ceph_test_msgr_SOURCES_DIST = test/msgr/test_msgr.cc
 ceph_test_msgr_OBJECTS = $(am_ceph_test_msgr_OBJECTS)
 @ENABLE_SERVER_TRUE at ceph_test_msgr_DEPENDENCIES =  \
 @ENABLE_SERVER_TRUE@	$(am__DEPENDENCIES_16) \
- at ENABLE_SERVER_TRUE@	$(am__DEPENDENCIES_24) \
+ at ENABLE_SERVER_TRUE@	$(am__DEPENDENCIES_25) \
 @ENABLE_SERVER_TRUE@	$(am__DEPENDENCIES_10)
 ceph_test_msgr_LINK = $(LIBTOOL) $(AM_V_lt) --tag=CXX \
 	$(AM_LIBTOOLFLAGS) $(LIBTOOLFLAGS) --mode=link $(CXXLD) \
@@ -4407,7 +4416,7 @@ am__ceph_test_object_map_SOURCES_DIST =  \
 ceph_test_object_map_OBJECTS = $(am_ceph_test_object_map_OBJECTS)
 @ENABLE_SERVER_TRUE at ceph_test_object_map_DEPENDENCIES =  \
 @ENABLE_SERVER_TRUE@	$(am__DEPENDENCIES_16) \
- at ENABLE_SERVER_TRUE@	$(am__DEPENDENCIES_24) \
+ at ENABLE_SERVER_TRUE@	$(am__DEPENDENCIES_25) \
 @ENABLE_SERVER_TRUE@	$(am__DEPENDENCIES_10)
 ceph_test_object_map_LINK = $(LIBTOOL) $(AM_V_lt) --tag=CXX \
 	$(AM_LIBTOOLFLAGS) $(LIBTOOLFLAGS) --mode=link $(CXXLD) \
@@ -4415,7 +4424,8 @@ ceph_test_object_map_LINK = $(LIBTOOL) $(AM_V_lt) --tag=CXX \
 	$(LDFLAGS) -o $@
 am_ceph_test_objectcacher_stress_OBJECTS =  \
 	test/osdc/object_cacher_stress.$(OBJEXT) \
-	test/osdc/FakeWriteback.$(OBJEXT)
+	test/osdc/FakeWriteback.$(OBJEXT) \
+	test/osdc/MemWriteback.$(OBJEXT)
 ceph_test_objectcacher_stress_OBJECTS =  \
 	$(am_ceph_test_objectcacher_stress_OBJECTS)
 ceph_test_objectcacher_stress_DEPENDENCIES = $(LIBOSDC) \
@@ -4426,7 +4436,7 @@ am__ceph_test_objectstore_SOURCES_DIST =  \
 ceph_test_objectstore_OBJECTS = $(am_ceph_test_objectstore_OBJECTS)
 @ENABLE_SERVER_TRUE@@LINUX_TRUE at ceph_test_objectstore_DEPENDENCIES =  \
 @ENABLE_SERVER_TRUE@@LINUX_TRUE@	$(am__DEPENDENCIES_16) \
- at ENABLE_SERVER_TRUE@@LINUX_TRUE@	$(am__DEPENDENCIES_24) \
+ at ENABLE_SERVER_TRUE@@LINUX_TRUE@	$(am__DEPENDENCIES_25) \
 @ENABLE_SERVER_TRUE@@LINUX_TRUE@	$(am__DEPENDENCIES_10)
 ceph_test_objectstore_LINK = $(LIBTOOL) $(AM_V_lt) --tag=CXX \
 	$(AM_LIBTOOLFLAGS) $(LIBTOOLFLAGS) --mode=link $(CXXLD) \
@@ -4460,7 +4470,7 @@ ceph_test_rados_api_aio_OBJECTS =  \
 @ENABLE_CLIENT_TRUE@@WITH_RADOS_TRUE at ceph_test_rados_api_aio_DEPENDENCIES =  \
 @ENABLE_CLIENT_TRUE@@WITH_RADOS_TRUE@	$(LIBRADOS) \
 @ENABLE_CLIENT_TRUE@@WITH_RADOS_TRUE@	$(am__DEPENDENCIES_4) \
- at ENABLE_CLIENT_TRUE@@WITH_RADOS_TRUE@	$(am__DEPENDENCIES_24) \
+ at ENABLE_CLIENT_TRUE@@WITH_RADOS_TRUE@	$(am__DEPENDENCIES_25) \
 @ENABLE_CLIENT_TRUE@@WITH_RADOS_TRUE@	$(am__DEPENDENCIES_9)
 ceph_test_rados_api_aio_LINK = $(LIBTOOL) $(AM_V_lt) --tag=CXX \
 	$(AM_LIBTOOLFLAGS) $(LIBTOOLFLAGS) --mode=link $(CXXLD) \
@@ -4473,7 +4483,7 @@ ceph_test_rados_api_c_read_operations_OBJECTS =  \
 	$(am_ceph_test_rados_api_c_read_operations_OBJECTS)
 @ENABLE_CLIENT_TRUE@@WITH_RADOS_TRUE at ceph_test_rados_api_c_read_operations_DEPENDENCIES =  \
 @ENABLE_CLIENT_TRUE@@WITH_RADOS_TRUE@	$(LIBRADOS) \
- at ENABLE_CLIENT_TRUE@@WITH_RADOS_TRUE@	$(am__DEPENDENCIES_24) \
+ at ENABLE_CLIENT_TRUE@@WITH_RADOS_TRUE@	$(am__DEPENDENCIES_25) \
 @ENABLE_CLIENT_TRUE@@WITH_RADOS_TRUE@	$(am__DEPENDENCIES_9)
 ceph_test_rados_api_c_read_operations_LINK = $(LIBTOOL) $(AM_V_lt) \
 	--tag=CXX $(AM_LIBTOOLFLAGS) $(LIBTOOLFLAGS) --mode=link \
@@ -4486,7 +4496,7 @@ ceph_test_rados_api_c_write_operations_OBJECTS =  \
 	$(am_ceph_test_rados_api_c_write_operations_OBJECTS)
 @ENABLE_CLIENT_TRUE@@WITH_RADOS_TRUE at ceph_test_rados_api_c_write_operations_DEPENDENCIES =  \
 @ENABLE_CLIENT_TRUE@@WITH_RADOS_TRUE@	$(LIBRADOS) \
- at ENABLE_CLIENT_TRUE@@WITH_RADOS_TRUE@	$(am__DEPENDENCIES_24) \
+ at ENABLE_CLIENT_TRUE@@WITH_RADOS_TRUE@	$(am__DEPENDENCIES_25) \
 @ENABLE_CLIENT_TRUE@@WITH_RADOS_TRUE@	$(am__DEPENDENCIES_9)
 ceph_test_rados_api_c_write_operations_LINK = $(LIBTOOL) $(AM_V_lt) \
 	--tag=CXX $(AM_LIBTOOLFLAGS) $(LIBTOOLFLAGS) --mode=link \
@@ -4498,7 +4508,7 @@ ceph_test_rados_api_cls_OBJECTS =  \
 	$(am_ceph_test_rados_api_cls_OBJECTS)
 @ENABLE_CLIENT_TRUE@@WITH_RADOS_TRUE at ceph_test_rados_api_cls_DEPENDENCIES =  \
 @ENABLE_CLIENT_TRUE@@WITH_RADOS_TRUE@	$(LIBRADOS) \
- at ENABLE_CLIENT_TRUE@@WITH_RADOS_TRUE@	$(am__DEPENDENCIES_24) \
+ at ENABLE_CLIENT_TRUE@@WITH_RADOS_TRUE@	$(am__DEPENDENCIES_25) \
 @ENABLE_CLIENT_TRUE@@WITH_RADOS_TRUE@	$(am__DEPENDENCIES_9)
 ceph_test_rados_api_cls_LINK = $(LIBTOOL) $(AM_V_lt) --tag=CXX \
 	$(AM_LIBTOOLFLAGS) $(LIBTOOLFLAGS) --mode=link $(CXXLD) \
@@ -4512,7 +4522,7 @@ ceph_test_rados_api_cmd_OBJECTS =  \
 @ENABLE_CLIENT_TRUE@@WITH_RADOS_TRUE@	$(am__DEPENDENCIES_4) \
 @ENABLE_CLIENT_TRUE@@WITH_RADOS_TRUE@	$(LIBRADOS) \
 @ENABLE_CLIENT_TRUE@@WITH_RADOS_TRUE@	$(am__DEPENDENCIES_1) \
- at ENABLE_CLIENT_TRUE@@WITH_RADOS_TRUE@	$(am__DEPENDENCIES_24) \
+ at ENABLE_CLIENT_TRUE@@WITH_RADOS_TRUE@	$(am__DEPENDENCIES_25) \
 @ENABLE_CLIENT_TRUE@@WITH_RADOS_TRUE@	$(am__DEPENDENCIES_9)
 ceph_test_rados_api_cmd_LINK = $(LIBTOOL) $(AM_V_lt) --tag=CXX \
 	$(AM_LIBTOOLFLAGS) $(LIBTOOLFLAGS) --mode=link $(CXXLD) \
@@ -4523,7 +4533,7 @@ am__ceph_test_rados_api_io_SOURCES_DIST = test/librados/io.cc
 ceph_test_rados_api_io_OBJECTS = $(am_ceph_test_rados_api_io_OBJECTS)
 @ENABLE_CLIENT_TRUE@@WITH_RADOS_TRUE at ceph_test_rados_api_io_DEPENDENCIES =  \
 @ENABLE_CLIENT_TRUE@@WITH_RADOS_TRUE@	$(LIBRADOS) \
- at ENABLE_CLIENT_TRUE@@WITH_RADOS_TRUE@	$(am__DEPENDENCIES_24) \
+ at ENABLE_CLIENT_TRUE@@WITH_RADOS_TRUE@	$(am__DEPENDENCIES_25) \
 @ENABLE_CLIENT_TRUE@@WITH_RADOS_TRUE@	$(am__DEPENDENCIES_9)
 ceph_test_rados_api_io_LINK = $(LIBTOOL) $(AM_V_lt) --tag=CXX \
 	$(AM_LIBTOOLFLAGS) $(LIBTOOLFLAGS) --mode=link $(CXXLD) \
@@ -4535,7 +4545,7 @@ ceph_test_rados_api_list_OBJECTS =  \
 	$(am_ceph_test_rados_api_list_OBJECTS)
 @ENABLE_CLIENT_TRUE@@WITH_RADOS_TRUE at ceph_test_rados_api_list_DEPENDENCIES =  \
 @ENABLE_CLIENT_TRUE@@WITH_RADOS_TRUE@	$(LIBRADOS) \
- at ENABLE_CLIENT_TRUE@@WITH_RADOS_TRUE@	$(am__DEPENDENCIES_24) \
+ at ENABLE_CLIENT_TRUE@@WITH_RADOS_TRUE@	$(am__DEPENDENCIES_25) \
 @ENABLE_CLIENT_TRUE@@WITH_RADOS_TRUE@	$(am__DEPENDENCIES_9) \
 @ENABLE_CLIENT_TRUE@@WITH_RADOS_TRUE@	$(am__DEPENDENCIES_10)
 ceph_test_rados_api_list_LINK = $(LIBTOOL) $(AM_V_lt) --tag=CXX \
@@ -4548,7 +4558,7 @@ ceph_test_rados_api_lock_OBJECTS =  \
 	$(am_ceph_test_rados_api_lock_OBJECTS)
 @ENABLE_CLIENT_TRUE@@WITH_RADOS_TRUE at ceph_test_rados_api_lock_DEPENDENCIES =  \
 @ENABLE_CLIENT_TRUE@@WITH_RADOS_TRUE@	$(LIBRADOS) \
- at ENABLE_CLIENT_TRUE@@WITH_RADOS_TRUE@	$(am__DEPENDENCIES_24) \
+ at ENABLE_CLIENT_TRUE@@WITH_RADOS_TRUE@	$(am__DEPENDENCIES_25) \
 @ENABLE_CLIENT_TRUE@@WITH_RADOS_TRUE@	$(am__DEPENDENCIES_9)
 ceph_test_rados_api_lock_LINK = $(LIBTOOL) $(AM_V_lt) --tag=CXX \
 	$(AM_LIBTOOLFLAGS) $(LIBTOOLFLAGS) --mode=link $(CXXLD) \
@@ -4560,7 +4570,7 @@ ceph_test_rados_api_misc_OBJECTS =  \
 	$(am_ceph_test_rados_api_misc_OBJECTS)
 @ENABLE_CLIENT_TRUE@@WITH_RADOS_TRUE at ceph_test_rados_api_misc_DEPENDENCIES =  \
 @ENABLE_CLIENT_TRUE@@WITH_RADOS_TRUE@	$(LIBRADOS) \
- at ENABLE_CLIENT_TRUE@@WITH_RADOS_TRUE@	$(am__DEPENDENCIES_24) \
+ at ENABLE_CLIENT_TRUE@@WITH_RADOS_TRUE@	$(am__DEPENDENCIES_25) \
 @ENABLE_CLIENT_TRUE@@WITH_RADOS_TRUE@	$(am__DEPENDENCIES_10) \
 @ENABLE_CLIENT_TRUE@@WITH_RADOS_TRUE@	$(am__DEPENDENCIES_9)
 ceph_test_rados_api_misc_LINK = $(LIBTOOL) $(AM_V_lt) --tag=CXX \
@@ -4573,7 +4583,7 @@ ceph_test_rados_api_nlist_OBJECTS =  \
 	$(am_ceph_test_rados_api_nlist_OBJECTS)
 @ENABLE_CLIENT_TRUE@@WITH_RADOS_TRUE at ceph_test_rados_api_nlist_DEPENDENCIES =  \
 @ENABLE_CLIENT_TRUE@@WITH_RADOS_TRUE@	$(LIBRADOS) \
- at ENABLE_CLIENT_TRUE@@WITH_RADOS_TRUE@	$(am__DEPENDENCIES_24) \
+ at ENABLE_CLIENT_TRUE@@WITH_RADOS_TRUE@	$(am__DEPENDENCIES_25) \
 @ENABLE_CLIENT_TRUE@@WITH_RADOS_TRUE@	$(am__DEPENDENCIES_9)
 ceph_test_rados_api_nlist_LINK = $(LIBTOOL) $(AM_V_lt) --tag=CXX \
 	$(AM_LIBTOOLFLAGS) $(LIBTOOLFLAGS) --mode=link $(CXXLD) \
@@ -4585,7 +4595,7 @@ ceph_test_rados_api_pool_OBJECTS =  \
 	$(am_ceph_test_rados_api_pool_OBJECTS)
 @ENABLE_CLIENT_TRUE@@WITH_RADOS_TRUE at ceph_test_rados_api_pool_DEPENDENCIES =  \
 @ENABLE_CLIENT_TRUE@@WITH_RADOS_TRUE@	$(LIBRADOS) \
- at ENABLE_CLIENT_TRUE@@WITH_RADOS_TRUE@	$(am__DEPENDENCIES_24) \
+ at ENABLE_CLIENT_TRUE@@WITH_RADOS_TRUE@	$(am__DEPENDENCIES_25) \
 @ENABLE_CLIENT_TRUE@@WITH_RADOS_TRUE@	$(am__DEPENDENCIES_9)
 ceph_test_rados_api_pool_LINK = $(LIBTOOL) $(AM_V_lt) --tag=CXX \
 	$(AM_LIBTOOLFLAGS) $(LIBTOOLFLAGS) --mode=link $(CXXLD) \
@@ -4598,7 +4608,7 @@ ceph_test_rados_api_snapshots_OBJECTS =  \
 	$(am_ceph_test_rados_api_snapshots_OBJECTS)
 @ENABLE_CLIENT_TRUE@@WITH_RADOS_TRUE at ceph_test_rados_api_snapshots_DEPENDENCIES =  \
 @ENABLE_CLIENT_TRUE@@WITH_RADOS_TRUE@	$(LIBRADOS) \
- at ENABLE_CLIENT_TRUE@@WITH_RADOS_TRUE@	$(am__DEPENDENCIES_24) \
+ at ENABLE_CLIENT_TRUE@@WITH_RADOS_TRUE@	$(am__DEPENDENCIES_25) \
 @ENABLE_CLIENT_TRUE@@WITH_RADOS_TRUE@	$(am__DEPENDENCIES_9)
 ceph_test_rados_api_snapshots_LINK = $(LIBTOOL) $(AM_V_lt) --tag=CXX \
 	$(AM_LIBTOOLFLAGS) $(LIBTOOLFLAGS) --mode=link $(CXXLD) \
@@ -4610,7 +4620,7 @@ ceph_test_rados_api_stat_OBJECTS =  \
 	$(am_ceph_test_rados_api_stat_OBJECTS)
 @ENABLE_CLIENT_TRUE@@WITH_RADOS_TRUE at ceph_test_rados_api_stat_DEPENDENCIES =  \
 @ENABLE_CLIENT_TRUE@@WITH_RADOS_TRUE@	$(LIBRADOS) \
- at ENABLE_CLIENT_TRUE@@WITH_RADOS_TRUE@	$(am__DEPENDENCIES_24) \
+ at ENABLE_CLIENT_TRUE@@WITH_RADOS_TRUE@	$(am__DEPENDENCIES_25) \
 @ENABLE_CLIENT_TRUE@@WITH_RADOS_TRUE@	$(am__DEPENDENCIES_9)
 ceph_test_rados_api_stat_LINK = $(LIBTOOL) $(AM_V_lt) --tag=CXX \
 	$(AM_LIBTOOLFLAGS) $(LIBTOOLFLAGS) --mode=link $(CXXLD) \
@@ -4624,7 +4634,7 @@ ceph_test_rados_api_tier_OBJECTS =  \
 	$(am_ceph_test_rados_api_tier_OBJECTS)
 @ENABLE_CLIENT_TRUE@@WITH_RADOS_TRUE at ceph_test_rados_api_tier_DEPENDENCIES =  \
 @ENABLE_CLIENT_TRUE@@WITH_RADOS_TRUE@	$(LIBRADOS) \
- at ENABLE_CLIENT_TRUE@@WITH_RADOS_TRUE@	$(am__DEPENDENCIES_24) \
+ at ENABLE_CLIENT_TRUE@@WITH_RADOS_TRUE@	$(am__DEPENDENCIES_25) \
 @ENABLE_CLIENT_TRUE@@WITH_RADOS_TRUE@	$(am__DEPENDENCIES_10) \
 @ENABLE_CLIENT_TRUE@@WITH_RADOS_TRUE@	$(am__DEPENDENCIES_9)
 ceph_test_rados_api_tier_LINK = $(LIBTOOL) $(AM_V_lt) --tag=CXX \
@@ -4641,8 +4651,8 @@ ceph_test_rados_api_tmap_migrate_OBJECTS =  \
 	$(am_ceph_test_rados_api_tmap_migrate_OBJECTS)
 @ENABLE_CLIENT_TRUE@@WITH_RADOS_TRUE at ceph_test_rados_api_tmap_migrate_DEPENDENCIES =  \
 @ENABLE_CLIENT_TRUE@@WITH_RADOS_TRUE@	$(LIBRADOS) \
- at ENABLE_CLIENT_TRUE@@WITH_RADOS_TRUE@	$(am__DEPENDENCIES_24) \
- at ENABLE_CLIENT_TRUE@@WITH_RADOS_TRUE@	$(am__DEPENDENCIES_21) \
+ at ENABLE_CLIENT_TRUE@@WITH_RADOS_TRUE@	$(am__DEPENDENCIES_25) \
+ at ENABLE_CLIENT_TRUE@@WITH_RADOS_TRUE@	$(am__DEPENDENCIES_22) \
 @ENABLE_CLIENT_TRUE@@WITH_RADOS_TRUE@	libcls_cephfs_client.la \
 @ENABLE_CLIENT_TRUE@@WITH_RADOS_TRUE@	$(am__DEPENDENCIES_10) \
 @ENABLE_CLIENT_TRUE@@WITH_RADOS_TRUE@	$(am__DEPENDENCIES_9)
@@ -4657,7 +4667,7 @@ ceph_test_rados_api_watch_notify_OBJECTS =  \
 	$(am_ceph_test_rados_api_watch_notify_OBJECTS)
 @ENABLE_CLIENT_TRUE@@WITH_RADOS_TRUE at ceph_test_rados_api_watch_notify_DEPENDENCIES =  \
 @ENABLE_CLIENT_TRUE@@WITH_RADOS_TRUE@	$(LIBRADOS) \
- at ENABLE_CLIENT_TRUE@@WITH_RADOS_TRUE@	$(am__DEPENDENCIES_24) \
+ at ENABLE_CLIENT_TRUE@@WITH_RADOS_TRUE@	$(am__DEPENDENCIES_25) \
 @ENABLE_CLIENT_TRUE@@WITH_RADOS_TRUE@	$(am__DEPENDENCIES_9)
 ceph_test_rados_api_watch_notify_LINK = $(LIBTOOL) $(AM_V_lt) \
 	--tag=CXX $(AM_LIBTOOLFLAGS) $(LIBTOOLFLAGS) --mode=link \
@@ -4709,7 +4719,7 @@ ceph_test_rados_striper_api_aio_OBJECTS =  \
 	$(am_ceph_test_rados_striper_api_aio_OBJECTS)
 @ENABLE_CLIENT_TRUE@@WITH_RADOSSTRIPER_TRUE@@WITH_RADOS_TRUE at ceph_test_rados_striper_api_aio_DEPENDENCIES = $(LIBRADOS) \
 @ENABLE_CLIENT_TRUE@@WITH_RADOSSTRIPER_TRUE@@WITH_RADOS_TRUE@	$(LIBRADOSSTRIPER) \
- at ENABLE_CLIENT_TRUE@@WITH_RADOSSTRIPER_TRUE@@WITH_RADOS_TRUE@	$(am__DEPENDENCIES_24) \
+ at ENABLE_CLIENT_TRUE@@WITH_RADOSSTRIPER_TRUE@@WITH_RADOS_TRUE@	$(am__DEPENDENCIES_25) \
 @ENABLE_CLIENT_TRUE@@WITH_RADOSSTRIPER_TRUE@@WITH_RADOS_TRUE@	$(RADOS_STRIPER_TEST_LDADD)
 ceph_test_rados_striper_api_aio_LINK = $(LIBTOOL) $(AM_V_lt) --tag=CXX \
 	$(AM_LIBTOOLFLAGS) $(LIBTOOLFLAGS) --mode=link $(CXXLD) \
@@ -4723,7 +4733,7 @@ ceph_test_rados_striper_api_io_OBJECTS =  \
 @ENABLE_CLIENT_TRUE@@WITH_RADOSSTRIPER_TRUE@@WITH_RADOS_TRUE at ceph_test_rados_striper_api_io_DEPENDENCIES = $(LIBRADOS) \
 @ENABLE_CLIENT_TRUE@@WITH_RADOSSTRIPER_TRUE@@WITH_RADOS_TRUE@	$(LIBRADOSSTRIPER) \
 @ENABLE_CLIENT_TRUE@@WITH_RADOSSTRIPER_TRUE@@WITH_RADOS_TRUE@	$(am__DEPENDENCIES_4) \
- at ENABLE_CLIENT_TRUE@@WITH_RADOSSTRIPER_TRUE@@WITH_RADOS_TRUE@	$(am__DEPENDENCIES_24) \
+ at ENABLE_CLIENT_TRUE@@WITH_RADOSSTRIPER_TRUE@@WITH_RADOS_TRUE@	$(am__DEPENDENCIES_25) \
 @ENABLE_CLIENT_TRUE@@WITH_RADOSSTRIPER_TRUE@@WITH_RADOS_TRUE@	$(RADOS_STRIPER_TEST_LDADD)
 ceph_test_rados_striper_api_io_LINK = $(LIBTOOL) $(AM_V_lt) --tag=CXX \
 	$(AM_LIBTOOLFLAGS) $(LIBTOOLFLAGS) --mode=link $(CXXLD) \
@@ -4736,7 +4746,7 @@ ceph_test_rados_striper_api_striping_OBJECTS =  \
 	$(am_ceph_test_rados_striper_api_striping_OBJECTS)
 @ENABLE_CLIENT_TRUE@@WITH_RADOSSTRIPER_TRUE@@WITH_RADOS_TRUE at ceph_test_rados_striper_api_striping_DEPENDENCIES = $(LIBRADOS) \
 @ENABLE_CLIENT_TRUE@@WITH_RADOSSTRIPER_TRUE@@WITH_RADOS_TRUE@	$(LIBRADOSSTRIPER) \
- at ENABLE_CLIENT_TRUE@@WITH_RADOSSTRIPER_TRUE@@WITH_RADOS_TRUE@	$(am__DEPENDENCIES_24) \
+ at ENABLE_CLIENT_TRUE@@WITH_RADOSSTRIPER_TRUE@@WITH_RADOS_TRUE@	$(am__DEPENDENCIES_25) \
 @ENABLE_CLIENT_TRUE@@WITH_RADOSSTRIPER_TRUE@@WITH_RADOS_TRUE@	$(RADOS_STRIPER_TEST_LDADD)
 ceph_test_rados_striper_api_striping_LINK = $(LIBTOOL) $(AM_V_lt) \
 	--tag=CXX $(AM_LIBTOOLFLAGS) $(LIBTOOLFLAGS) --mode=link \
@@ -4776,30 +4786,13 @@ ceph_test_rbd_mirror_OBJECTS = $(am_ceph_test_rbd_mirror_OBJECTS)
 @ENABLE_CLIENT_TRUE@@WITH_RADOS_TRUE@@WITH_RBD_TRUE@	librados_api.la \
 @ENABLE_CLIENT_TRUE@@WITH_RADOS_TRUE@@WITH_RBD_TRUE@	$(am__DEPENDENCIES_7) \
 @ENABLE_CLIENT_TRUE@@WITH_RADOS_TRUE@@WITH_RBD_TRUE@	$(LIBOSDC) \
- at ENABLE_CLIENT_TRUE@@WITH_RADOS_TRUE@@WITH_RBD_TRUE@	$(am__DEPENDENCIES_24) \
+ at ENABLE_CLIENT_TRUE@@WITH_RADOS_TRUE@@WITH_RBD_TRUE@	$(am__DEPENDENCIES_25) \
 @ENABLE_CLIENT_TRUE@@WITH_RADOS_TRUE@@WITH_RBD_TRUE@	$(am__DEPENDENCIES_10) \
 @ENABLE_CLIENT_TRUE@@WITH_RADOS_TRUE@@WITH_RBD_TRUE@	$(am__DEPENDENCIES_9)
 ceph_test_rbd_mirror_LINK = $(LIBTOOL) $(AM_V_lt) --tag=CXX \
 	$(AM_LIBTOOLFLAGS) $(LIBTOOLFLAGS) --mode=link $(CXXLD) \
 	$(ceph_test_rbd_mirror_CXXFLAGS) $(CXXFLAGS) $(AM_LDFLAGS) \
 	$(LDFLAGS) -o $@
-am__ceph_test_rbd_mirror_image_replay_SOURCES_DIST =  \
-	test/rbd_mirror/image_replay.cc
- at ENABLE_CLIENT_TRUE@@WITH_RADOS_TRUE@@WITH_RBD_TRUE at am_ceph_test_rbd_mirror_image_replay_OBJECTS = test/rbd_mirror/image_replay.$(OBJEXT)
-ceph_test_rbd_mirror_image_replay_OBJECTS =  \
-	$(am_ceph_test_rbd_mirror_image_replay_OBJECTS)
- at ENABLE_CLIENT_TRUE@@WITH_RADOS_TRUE@@WITH_RBD_TRUE at ceph_test_rbd_mirror_image_replay_DEPENDENCIES = librbd_mirror_internal.la \
- at ENABLE_CLIENT_TRUE@@WITH_RADOS_TRUE@@WITH_RBD_TRUE@	librbd_internal.la \
- at ENABLE_CLIENT_TRUE@@WITH_RADOS_TRUE@@WITH_RBD_TRUE@	librbd_api.la \
- at ENABLE_CLIENT_TRUE@@WITH_RADOS_TRUE@@WITH_RBD_TRUE@	$(LIBRBD_TYPES) \
- at ENABLE_CLIENT_TRUE@@WITH_RADOS_TRUE@@WITH_RBD_TRUE@	libjournal.la \
- at ENABLE_CLIENT_TRUE@@WITH_RADOS_TRUE@@WITH_RBD_TRUE@	$(LIBRADOS) \
- at ENABLE_CLIENT_TRUE@@WITH_RADOS_TRUE@@WITH_RBD_TRUE@	$(LIBOSDC) \
- at ENABLE_CLIENT_TRUE@@WITH_RADOS_TRUE@@WITH_RBD_TRUE@	librados_internal.la \
- at ENABLE_CLIENT_TRUE@@WITH_RADOS_TRUE@@WITH_RBD_TRUE@	libcls_rbd_client.la \
- at ENABLE_CLIENT_TRUE@@WITH_RADOS_TRUE@@WITH_RBD_TRUE@	libcls_lock_client.la \
- at ENABLE_CLIENT_TRUE@@WITH_RADOS_TRUE@@WITH_RBD_TRUE@	libcls_journal_client.la \
- at ENABLE_CLIENT_TRUE@@WITH_RADOS_TRUE@@WITH_RBD_TRUE@	$(am__DEPENDENCIES_10)
 am__ceph_test_rbd_mirror_random_write_SOURCES_DIST =  \
 	test/rbd_mirror/random_write.cc
 @ENABLE_CLIENT_TRUE@@WITH_RADOS_TRUE@@WITH_RBD_TRUE at am_ceph_test_rbd_mirror_random_write_OBJECTS = test/rbd_mirror/ceph_test_rbd_mirror_random_write-random_write.$(OBJEXT)
@@ -4828,7 +4821,7 @@ ceph_test_rgw_manifest_OBJECTS = $(am_ceph_test_rgw_manifest_OBJECTS)
 @ENABLE_CLIENT_TRUE@@WITH_RADOSGW_TRUE@@WITH_RADOS_TRUE@	$(am__DEPENDENCIES_17) \
 @ENABLE_CLIENT_TRUE@@WITH_RADOSGW_TRUE@@WITH_RADOS_TRUE@	$(am__DEPENDENCIES_12) \
 @ENABLE_CLIENT_TRUE@@WITH_RADOSGW_TRUE@@WITH_RADOS_TRUE@	$(am__DEPENDENCIES_10) \
- at ENABLE_CLIENT_TRUE@@WITH_RADOSGW_TRUE@@WITH_RADOS_TRUE@	$(am__DEPENDENCIES_24) \
+ at ENABLE_CLIENT_TRUE@@WITH_RADOSGW_TRUE@@WITH_RADOS_TRUE@	$(am__DEPENDENCIES_25) \
 @ENABLE_CLIENT_TRUE@@WITH_RADOSGW_TRUE@@WITH_RADOS_TRUE@	$(am__DEPENDENCIES_1)
 ceph_test_rgw_manifest_LINK = $(LIBTOOL) $(AM_V_lt) --tag=CXX \
 	$(AM_LIBTOOLFLAGS) $(LIBTOOLFLAGS) --mode=link $(CXXLD) \
@@ -4841,7 +4834,7 @@ ceph_test_rgw_obj_OBJECTS = $(am_ceph_test_rgw_obj_OBJECTS)
 @ENABLE_CLIENT_TRUE@@WITH_RADOSGW_TRUE@@WITH_RADOS_TRUE@	$(am__DEPENDENCIES_17) \
 @ENABLE_CLIENT_TRUE@@WITH_RADOSGW_TRUE@@WITH_RADOS_TRUE@	$(am__DEPENDENCIES_12) \
 @ENABLE_CLIENT_TRUE@@WITH_RADOSGW_TRUE@@WITH_RADOS_TRUE@	$(am__DEPENDENCIES_10) \
- at ENABLE_CLIENT_TRUE@@WITH_RADOSGW_TRUE@@WITH_RADOS_TRUE@	$(am__DEPENDENCIES_24) \
+ at ENABLE_CLIENT_TRUE@@WITH_RADOSGW_TRUE@@WITH_RADOS_TRUE@	$(am__DEPENDENCIES_25) \
 @ENABLE_CLIENT_TRUE@@WITH_RADOSGW_TRUE@@WITH_RADOS_TRUE@	$(am__DEPENDENCIES_1)
 ceph_test_rgw_obj_LINK = $(LIBTOOL) $(AM_V_lt) --tag=CXX \
 	$(AM_LIBTOOLFLAGS) $(LIBTOOLFLAGS) --mode=link $(CXXLD) \
@@ -4856,7 +4849,7 @@ ceph_test_rgw_period_history_OBJECTS =  \
 @ENABLE_CLIENT_TRUE@@WITH_RADOSGW_TRUE@@WITH_RADOS_TRUE@	$(am__DEPENDENCIES_17) \
 @ENABLE_CLIENT_TRUE@@WITH_RADOSGW_TRUE@@WITH_RADOS_TRUE@	$(am__DEPENDENCIES_12) \
 @ENABLE_CLIENT_TRUE@@WITH_RADOSGW_TRUE@@WITH_RADOS_TRUE@	$(am__DEPENDENCIES_10) \
- at ENABLE_CLIENT_TRUE@@WITH_RADOSGW_TRUE@@WITH_RADOS_TRUE@	$(am__DEPENDENCIES_24) \
+ at ENABLE_CLIENT_TRUE@@WITH_RADOSGW_TRUE@@WITH_RADOS_TRUE@	$(am__DEPENDENCIES_25) \
 @ENABLE_CLIENT_TRUE@@WITH_RADOSGW_TRUE@@WITH_RADOS_TRUE@	$(am__DEPENDENCIES_1)
 ceph_test_rgw_period_history_LINK = $(LIBTOOL) $(AM_V_lt) --tag=CXX \
 	$(AM_LIBTOOLFLAGS) $(LIBTOOLFLAGS) --mode=link $(CXXLD) \
@@ -4871,8 +4864,8 @@ am__ceph_test_snap_mapper_SOURCES_DIST = test/test_snap_mapper.cc
 @ENABLE_SERVER_TRUE@@WITH_OSD_TRUE at am_ceph_test_snap_mapper_OBJECTS = test/ceph_test_snap_mapper-test_snap_mapper.$(OBJEXT)
 ceph_test_snap_mapper_OBJECTS = $(am_ceph_test_snap_mapper_OBJECTS)
 @ENABLE_SERVER_TRUE@@WITH_OSD_TRUE at ceph_test_snap_mapper_DEPENDENCIES =  \
- at ENABLE_SERVER_TRUE@@WITH_OSD_TRUE@	$(am__DEPENDENCIES_23) \
 @ENABLE_SERVER_TRUE@@WITH_OSD_TRUE@	$(am__DEPENDENCIES_24) \
+ at ENABLE_SERVER_TRUE@@WITH_OSD_TRUE@	$(am__DEPENDENCIES_25) \
 @ENABLE_SERVER_TRUE@@WITH_OSD_TRUE@	$(am__DEPENDENCIES_10)
 ceph_test_snap_mapper_LINK = $(LIBTOOL) $(AM_V_lt) --tag=CXX \
 	$(AM_LIBTOOLFLAGS) $(LIBTOOLFLAGS) --mode=link $(CXXLD) \
@@ -4884,7 +4877,7 @@ ceph_test_stress_watch_OBJECTS = $(am_ceph_test_stress_watch_OBJECTS)
 @ENABLE_CLIENT_TRUE@@WITH_RADOS_TRUE at ceph_test_stress_watch_DEPENDENCIES =  \
 @ENABLE_CLIENT_TRUE@@WITH_RADOS_TRUE@	$(LIBRADOS) \
 @ENABLE_CLIENT_TRUE@@WITH_RADOS_TRUE@	$(am__DEPENDENCIES_4) \
- at ENABLE_CLIENT_TRUE@@WITH_RADOS_TRUE@	$(am__DEPENDENCIES_24) \
+ at ENABLE_CLIENT_TRUE@@WITH_RADOS_TRUE@	$(am__DEPENDENCIES_25) \
 @ENABLE_CLIENT_TRUE@@WITH_RADOS_TRUE@	$(am__DEPENDENCIES_9)
 ceph_test_stress_watch_LINK = $(LIBTOOL) $(AM_V_lt) --tag=CXX \
 	$(AM_LIBTOOLFLAGS) $(LIBTOOLFLAGS) --mode=link $(CXXLD) \
@@ -4914,7 +4907,7 @@ am__ceph_xattr_bench_SOURCES_DIST = test/xattr_bench.cc
 ceph_xattr_bench_OBJECTS = $(am_ceph_xattr_bench_OBJECTS)
 @ENABLE_SERVER_TRUE at ceph_xattr_bench_DEPENDENCIES =  \
 @ENABLE_SERVER_TRUE@	$(am__DEPENDENCIES_16) \
- at ENABLE_SERVER_TRUE@	$(am__DEPENDENCIES_24) \
+ at ENABLE_SERVER_TRUE@	$(am__DEPENDENCIES_25) \
 @ENABLE_SERVER_TRUE@	$(am__DEPENDENCIES_10)
 ceph_xattr_bench_LINK = $(LIBTOOL) $(AM_V_lt) --tag=CXX \
 	$(AM_LIBTOOLFLAGS) $(LIBTOOLFLAGS) --mode=link $(CXXLD) \
@@ -4932,7 +4925,7 @@ am__cephfs_data_scan_SOURCES_DIST = tools/cephfs/cephfs-data-scan.cc \
 @ENABLE_CLIENT_TRUE@@ENABLE_SERVER_TRUE@@WITH_MDS_TRUE@@WITH_RADOS_TRUE@	tools/cephfs/RoleSelector.$(OBJEXT) \
 @ENABLE_CLIENT_TRUE@@ENABLE_SERVER_TRUE@@WITH_MDS_TRUE@@WITH_RADOS_TRUE@	tools/cephfs/MDSUtility.$(OBJEXT)
 cephfs_data_scan_OBJECTS = $(am_cephfs_data_scan_OBJECTS)
- at ENABLE_CLIENT_TRUE@@ENABLE_SERVER_TRUE@@WITH_MDS_TRUE@@WITH_RADOS_TRUE at cephfs_data_scan_DEPENDENCIES = $(am__DEPENDENCIES_21) \
+ at ENABLE_CLIENT_TRUE@@ENABLE_SERVER_TRUE@@WITH_MDS_TRUE@@WITH_RADOS_TRUE at cephfs_data_scan_DEPENDENCIES = $(am__DEPENDENCIES_22) \
 @ENABLE_CLIENT_TRUE@@ENABLE_SERVER_TRUE@@WITH_MDS_TRUE@@WITH_RADOS_TRUE@	libcls_cephfs_client.la \
 @ENABLE_CLIENT_TRUE@@ENABLE_SERVER_TRUE@@WITH_MDS_TRUE@@WITH_RADOS_TRUE@	$(LIBRADOS) \
 @ENABLE_CLIENT_TRUE@@ENABLE_SERVER_TRUE@@WITH_MDS_TRUE@@WITH_RADOS_TRUE@	$(am__DEPENDENCIES_10)
@@ -4952,7 +4945,7 @@ am__cephfs_journal_tool_SOURCES_DIST =  \
 @ENABLE_CLIENT_TRUE@@ENABLE_SERVER_TRUE@@WITH_MDS_TRUE@@WITH_RADOS_TRUE@	tools/cephfs/Resetter.$(OBJEXT) \
 @ENABLE_CLIENT_TRUE@@ENABLE_SERVER_TRUE@@WITH_MDS_TRUE@@WITH_RADOS_TRUE@	tools/cephfs/MDSUtility.$(OBJEXT)
 cephfs_journal_tool_OBJECTS = $(am_cephfs_journal_tool_OBJECTS)
- at ENABLE_CLIENT_TRUE@@ENABLE_SERVER_TRUE@@WITH_MDS_TRUE@@WITH_RADOS_TRUE at cephfs_journal_tool_DEPENDENCIES = $(am__DEPENDENCIES_21) \
+ at ENABLE_CLIENT_TRUE@@ENABLE_SERVER_TRUE@@WITH_MDS_TRUE@@WITH_RADOS_TRUE at cephfs_journal_tool_DEPENDENCIES = $(am__DEPENDENCIES_22) \
 @ENABLE_CLIENT_TRUE@@ENABLE_SERVER_TRUE@@WITH_MDS_TRUE@@WITH_RADOS_TRUE@	$(LIBRADOS) \
 @ENABLE_CLIENT_TRUE@@ENABLE_SERVER_TRUE@@WITH_MDS_TRUE@@WITH_RADOS_TRUE@	$(am__DEPENDENCIES_10)
 am__cephfs_table_tool_SOURCES_DIST =  \
@@ -4963,7 +4956,7 @@ am__cephfs_table_tool_SOURCES_DIST =  \
 @ENABLE_CLIENT_TRUE@@ENABLE_SERVER_TRUE@@WITH_MDS_TRUE@@WITH_RADOS_TRUE@	tools/cephfs/RoleSelector.$(OBJEXT) \
 @ENABLE_CLIENT_TRUE@@ENABLE_SERVER_TRUE@@WITH_MDS_TRUE@@WITH_RADOS_TRUE@	tools/cephfs/MDSUtility.$(OBJEXT)
 cephfs_table_tool_OBJECTS = $(am_cephfs_table_tool_OBJECTS)
- at ENABLE_CLIENT_TRUE@@ENABLE_SERVER_TRUE@@WITH_MDS_TRUE@@WITH_RADOS_TRUE at cephfs_table_tool_DEPENDENCIES = $(am__DEPENDENCIES_21) \
+ at ENABLE_CLIENT_TRUE@@ENABLE_SERVER_TRUE@@WITH_MDS_TRUE@@WITH_RADOS_TRUE at cephfs_table_tool_DEPENDENCIES = $(am__DEPENDENCIES_22) \
 @ENABLE_CLIENT_TRUE@@ENABLE_SERVER_TRUE@@WITH_MDS_TRUE@@WITH_RADOS_TRUE@	$(LIBRADOS) \
 @ENABLE_CLIENT_TRUE@@ENABLE_SERVER_TRUE@@WITH_MDS_TRUE@@WITH_RADOS_TRUE@	$(am__DEPENDENCIES_10)
 am_crushtool_OBJECTS = tools/crushtool.$(OBJEXT)
@@ -4975,7 +4968,7 @@ am__get_command_descriptions_SOURCES_DIST =  \
 get_command_descriptions_OBJECTS =  \
 	$(am_get_command_descriptions_OBJECTS)
 @ENABLE_SERVER_TRUE@@WITH_MON_TRUE at get_command_descriptions_DEPENDENCIES =  \
- at ENABLE_SERVER_TRUE@@WITH_MON_TRUE@	$(am__DEPENDENCIES_22) \
+ at ENABLE_SERVER_TRUE@@WITH_MON_TRUE@	$(am__DEPENDENCIES_23) \
 @ENABLE_SERVER_TRUE@@WITH_MON_TRUE@	$(LIBMON_TYPES) \
 @ENABLE_SERVER_TRUE@@WITH_MON_TRUE@	$(am__DEPENDENCIES_16) \
 @ENABLE_SERVER_TRUE@@WITH_MON_TRUE@	$(am__DEPENDENCIES_4) \
@@ -4989,7 +4982,7 @@ librados_config_OBJECTS = $(am_librados_config_OBJECTS)
 am__librgw_file_SOURCES_DIST = test/librgw_file.cc
 @ENABLE_CLIENT_TRUE@@WITH_RADOSGW_TRUE@@WITH_RADOS_TRUE at am_librgw_file_OBJECTS = test/librgw_file-librgw_file.$(OBJEXT)
 librgw_file_OBJECTS = $(am_librgw_file_OBJECTS)
- at ENABLE_CLIENT_TRUE@@WITH_RADOSGW_TRUE@@WITH_RADOS_TRUE at librgw_file_DEPENDENCIES = $(am__DEPENDENCIES_24) \
+ at ENABLE_CLIENT_TRUE@@WITH_RADOSGW_TRUE@@WITH_RADOS_TRUE at librgw_file_DEPENDENCIES = $(am__DEPENDENCIES_25) \
 @ENABLE_CLIENT_TRUE@@WITH_RADOSGW_TRUE@@WITH_RADOS_TRUE@	librgw.la \
 @ENABLE_CLIENT_TRUE@@WITH_RADOSGW_TRUE@@WITH_RADOS_TRUE@	librados.la \
 @ENABLE_CLIENT_TRUE@@WITH_RADOSGW_TRUE@@WITH_RADOS_TRUE@	$(am__DEPENDENCIES_1) \
@@ -5001,7 +4994,7 @@ librgw_file_LINK = $(LIBTOOL) $(AM_V_lt) --tag=CXX $(AM_LIBTOOLFLAGS) \
 am__librgw_file_aw_SOURCES_DIST = test/librgw_file_aw.cc
 @ENABLE_CLIENT_TRUE@@WITH_RADOSGW_TRUE@@WITH_RADOS_TRUE at am_librgw_file_aw_OBJECTS = test/librgw_file_aw-librgw_file_aw.$(OBJEXT)
 librgw_file_aw_OBJECTS = $(am_librgw_file_aw_OBJECTS)
- at ENABLE_CLIENT_TRUE@@WITH_RADOSGW_TRUE@@WITH_RADOS_TRUE at librgw_file_aw_DEPENDENCIES = $(am__DEPENDENCIES_24) \
+ at ENABLE_CLIENT_TRUE@@WITH_RADOSGW_TRUE@@WITH_RADOS_TRUE at librgw_file_aw_DEPENDENCIES = $(am__DEPENDENCIES_25) \
 @ENABLE_CLIENT_TRUE@@WITH_RADOSGW_TRUE@@WITH_RADOS_TRUE@	librgw.la \
 @ENABLE_CLIENT_TRUE@@WITH_RADOSGW_TRUE@@WITH_RADOS_TRUE@	librados.la \
 @ENABLE_CLIENT_TRUE@@WITH_RADOSGW_TRUE@@WITH_RADOS_TRUE@	$(am__DEPENDENCIES_1) \
@@ -5014,7 +5007,7 @@ librgw_file_aw_LINK = $(LIBTOOL) $(AM_V_lt) --tag=CXX \
 am__librgw_file_cd_SOURCES_DIST = test/librgw_file_cd.cc
 @ENABLE_CLIENT_TRUE@@WITH_RADOSGW_TRUE@@WITH_RADOS_TRUE at am_librgw_file_cd_OBJECTS = test/librgw_file_cd-librgw_file_cd.$(OBJEXT)
 librgw_file_cd_OBJECTS = $(am_librgw_file_cd_OBJECTS)
- at ENABLE_CLIENT_TRUE@@WITH_RADOSGW_TRUE@@WITH_RADOS_TRUE at librgw_file_cd_DEPENDENCIES = $(am__DEPENDENCIES_24) \
+ at ENABLE_CLIENT_TRUE@@WITH_RADOSGW_TRUE@@WITH_RADOS_TRUE at librgw_file_cd_DEPENDENCIES = $(am__DEPENDENCIES_25) \
 @ENABLE_CLIENT_TRUE@@WITH_RADOSGW_TRUE@@WITH_RADOS_TRUE@	librgw.la \
 @ENABLE_CLIENT_TRUE@@WITH_RADOSGW_TRUE@@WITH_RADOS_TRUE@	librados.la \
 @ENABLE_CLIENT_TRUE@@WITH_RADOSGW_TRUE@@WITH_RADOS_TRUE@	$(am__DEPENDENCIES_1) \
@@ -5027,7 +5020,7 @@ librgw_file_cd_LINK = $(LIBTOOL) $(AM_V_lt) --tag=CXX \
 am__librgw_file_gp_SOURCES_DIST = test/librgw_file_gp.cc
 @ENABLE_CLIENT_TRUE@@WITH_RADOSGW_TRUE@@WITH_RADOS_TRUE at am_librgw_file_gp_OBJECTS = test/librgw_file_gp-librgw_file_gp.$(OBJEXT)
 librgw_file_gp_OBJECTS = $(am_librgw_file_gp_OBJECTS)
- at ENABLE_CLIENT_TRUE@@WITH_RADOSGW_TRUE@@WITH_RADOS_TRUE at librgw_file_gp_DEPENDENCIES = $(am__DEPENDENCIES_24) \
+ at ENABLE_CLIENT_TRUE@@WITH_RADOSGW_TRUE@@WITH_RADOS_TRUE at librgw_file_gp_DEPENDENCIES = $(am__DEPENDENCIES_25) \
 @ENABLE_CLIENT_TRUE@@WITH_RADOSGW_TRUE@@WITH_RADOS_TRUE@	librgw.la \
 @ENABLE_CLIENT_TRUE@@WITH_RADOSGW_TRUE@@WITH_RADOS_TRUE@	librados.la \
 @ENABLE_CLIENT_TRUE@@WITH_RADOSGW_TRUE@@WITH_RADOS_TRUE@	$(am__DEPENDENCIES_1) \
@@ -5040,7 +5033,7 @@ librgw_file_gp_LINK = $(LIBTOOL) $(AM_V_lt) --tag=CXX \
 am__librgw_file_nfsns_SOURCES_DIST = test/librgw_file_nfsns.cc
 @ENABLE_CLIENT_TRUE@@WITH_RADOSGW_TRUE@@WITH_RADOS_TRUE at am_librgw_file_nfsns_OBJECTS = test/librgw_file_nfsns-librgw_file_nfsns.$(OBJEXT)
 librgw_file_nfsns_OBJECTS = $(am_librgw_file_nfsns_OBJECTS)
- at ENABLE_CLIENT_TRUE@@WITH_RADOSGW_TRUE@@WITH_RADOS_TRUE at librgw_file_nfsns_DEPENDENCIES = $(am__DEPENDENCIES_24) \
+ at ENABLE_CLIENT_TRUE@@WITH_RADOSGW_TRUE@@WITH_RADOS_TRUE at librgw_file_nfsns_DEPENDENCIES = $(am__DEPENDENCIES_25) \
 @ENABLE_CLIENT_TRUE@@WITH_RADOSGW_TRUE@@WITH_RADOS_TRUE@	librgw.la \
 @ENABLE_CLIENT_TRUE@@WITH_RADOSGW_TRUE@@WITH_RADOS_TRUE@	librados.la \
 @ENABLE_CLIENT_TRUE@@WITH_RADOSGW_TRUE@@WITH_RADOS_TRUE@	$(am__DEPENDENCIES_1) \
@@ -5054,11 +5047,10 @@ am_monmaptool_OBJECTS = tools/monmaptool.$(OBJEXT)
 monmaptool_OBJECTS = $(am_monmaptool_OBJECTS)
 monmaptool_DEPENDENCIES = $(am__DEPENDENCIES_10)
 am__mount_ceph_SOURCES_DIST = mount/mount.ceph.c
- at ENABLE_SERVER_TRUE@am_mount_ceph_OBJECTS =  \
- at ENABLE_SERVER_TRUE@	mount/mount.ceph.$(OBJEXT)
+ at ENABLE_CLIENT_TRUE@@WITH_CEPHFS_TRUE@@WITH_RADOS_TRUE at am_mount_ceph_OBJECTS = mount/mount.ceph.$(OBJEXT)
 mount_ceph_OBJECTS = $(am_mount_ceph_OBJECTS)
- at ENABLE_SERVER_TRUE@mount_ceph_DEPENDENCIES = $(LIBSECRET) \
- at ENABLE_SERVER_TRUE@	$(am__DEPENDENCIES_4)
+ at ENABLE_CLIENT_TRUE@@WITH_CEPHFS_TRUE@@WITH_RADOS_TRUE at mount_ceph_DEPENDENCIES = $(LIBSECRET) \
+ at ENABLE_CLIENT_TRUE@@WITH_CEPHFS_TRUE@@WITH_RADOS_TRUE@	$(am__DEPENDENCIES_4)
 am_osdmaptool_OBJECTS = tools/osdmaptool.$(OBJEXT)
 osdmaptool_OBJECTS = $(am_osdmaptool_OBJECTS)
 osdmaptool_DEPENDENCIES = $(am__DEPENDENCIES_10)
@@ -5088,10 +5080,10 @@ am__radosgw_SOURCES_DIST = rgw/rgw_fcgi_process.cc \
 @ENABLE_CLIENT_TRUE@@WITH_RADOSGW_TRUE@@WITH_RADOS_TRUE@	civetweb/src/radosgw-civetweb.$(OBJEXT) \
 @ENABLE_CLIENT_TRUE@@WITH_RADOSGW_TRUE@@WITH_RADOS_TRUE@	rgw/rgw_main.$(OBJEXT)
 radosgw_OBJECTS = $(am_radosgw_OBJECTS)
-am__DEPENDENCIES_25 = $(am__DEPENDENCIES_1)
+am__DEPENDENCIES_26 = $(am__DEPENDENCIES_1)
 @ENABLE_CLIENT_TRUE@@WITH_RADOSGW_TRUE@@WITH_RADOS_TRUE at radosgw_DEPENDENCIES = $(am__DEPENDENCIES_17) \
 @ENABLE_CLIENT_TRUE@@WITH_RADOSGW_TRUE@@WITH_RADOS_TRUE@	$(LIBCIVETWEB) \
- at ENABLE_CLIENT_TRUE@@WITH_RADOSGW_TRUE@@WITH_RADOS_TRUE@	$(am__DEPENDENCIES_25) \
+ at ENABLE_CLIENT_TRUE@@WITH_RADOSGW_TRUE@@WITH_RADOS_TRUE@	$(am__DEPENDENCIES_26) \
 @ENABLE_CLIENT_TRUE@@WITH_RADOSGW_TRUE@@WITH_RADOS_TRUE@	$(am__DEPENDENCIES_12) \
 @ENABLE_CLIENT_TRUE@@WITH_RADOSGW_TRUE@@WITH_RADOS_TRUE@	$(am__DEPENDENCIES_1) \
 @ENABLE_CLIENT_TRUE@@WITH_RADOSGW_TRUE@@WITH_RADOS_TRUE@	$(am__DEPENDENCIES_10)
@@ -5180,7 +5172,7 @@ rbd_OBJECTS = $(am_rbd_OBJECTS)
 @ENABLE_CLIENT_TRUE@@WITH_RADOS_TRUE@@WITH_RBD_TRUE@	$(am__DEPENDENCIES_10) \
 @ENABLE_CLIENT_TRUE@@WITH_RADOS_TRUE@@WITH_RBD_TRUE@	$(am__DEPENDENCIES_1) \
 @ENABLE_CLIENT_TRUE@@WITH_RADOS_TRUE@@WITH_RBD_TRUE@	$(am__DEPENDENCIES_1) \
- at ENABLE_CLIENT_TRUE@@WITH_RADOS_TRUE@@WITH_RBD_TRUE@	$(am__append_268)
+ at ENABLE_CLIENT_TRUE@@WITH_RADOS_TRUE@@WITH_RBD_TRUE@	$(am__append_269)
 am__rbd_fuse_SOURCES_DIST = rbd_fuse/rbd-fuse.cc
 @ENABLE_CLIENT_TRUE@@WITH_FUSE_TRUE@@WITH_RADOS_TRUE@@WITH_RBD_TRUE at am_rbd_fuse_OBJECTS = rbd_fuse/rbd_fuse-rbd-fuse.$(OBJEXT)
 rbd_fuse_OBJECTS = $(am_rbd_fuse_OBJECTS)
@@ -5427,7 +5419,7 @@ test_build_librgw_LINK = $(LIBTOOL) $(AM_V_lt) --tag=CXX \
 	$(test_build_librgw_LDFLAGS) $(LDFLAGS) -o $@
 am_unittest_addrs_OBJECTS = test/unittest_addrs-test_addrs.$(OBJEXT)
 unittest_addrs_OBJECTS = $(am_unittest_addrs_OBJECTS)
-unittest_addrs_DEPENDENCIES = $(am__DEPENDENCIES_24) \
+unittest_addrs_DEPENDENCIES = $(am__DEPENDENCIES_25) \
 	$(am__DEPENDENCIES_10)
 unittest_addrs_LINK = $(LIBTOOL) $(AM_V_lt) --tag=CXX \
 	$(AM_LIBTOOLFLAGS) $(LIBTOOLFLAGS) --mode=link $(CXXLD) \
@@ -5436,7 +5428,7 @@ unittest_addrs_LINK = $(LIBTOOL) $(AM_V_lt) --tag=CXX \
 am_unittest_admin_socket_OBJECTS =  \
 	test/unittest_admin_socket-admin_socket.$(OBJEXT)
 unittest_admin_socket_OBJECTS = $(am_unittest_admin_socket_OBJECTS)
-unittest_admin_socket_DEPENDENCIES = $(am__DEPENDENCIES_24) \
+unittest_admin_socket_DEPENDENCIES = $(am__DEPENDENCIES_25) \
 	$(am__DEPENDENCIES_10)
 unittest_admin_socket_LINK = $(LIBTOOL) $(AM_V_lt) --tag=CXX \
 	$(AM_LIBTOOLFLAGS) $(LIBTOOLFLAGS) --mode=link $(CXXLD) \
@@ -5444,7 +5436,7 @@ unittest_admin_socket_LINK = $(LIBTOOL) $(AM_V_lt) --tag=CXX \
 	$(LDFLAGS) -o $@
 am_unittest_arch_OBJECTS = test/unittest_arch-test_arch.$(OBJEXT)
 unittest_arch_OBJECTS = $(am_unittest_arch_OBJECTS)
-unittest_arch_DEPENDENCIES = $(am__DEPENDENCIES_24) \
+unittest_arch_DEPENDENCIES = $(am__DEPENDENCIES_25) \
 	$(am__DEPENDENCIES_10)
 unittest_arch_LINK = $(LIBTOOL) $(AM_V_lt) --tag=CXX \
 	$(AM_LIBTOOLFLAGS) $(LIBTOOLFLAGS) --mode=link $(CXXLD) \
@@ -5453,7 +5445,7 @@ unittest_arch_LINK = $(LIBTOOL) $(AM_V_lt) --tag=CXX \
 am_unittest_async_compressor_OBJECTS = test/common/unittest_async_compressor-test_async_compressor.$(OBJEXT)
 unittest_async_compressor_OBJECTS =  \
 	$(am_unittest_async_compressor_OBJECTS)
-unittest_async_compressor_DEPENDENCIES = $(am__DEPENDENCIES_24) \
+unittest_async_compressor_DEPENDENCIES = $(am__DEPENDENCIES_25) \
 	$(am__DEPENDENCIES_10) $(LIBCOMPRESSOR) $(am__DEPENDENCIES_4)
 unittest_async_compressor_LINK = $(LIBTOOL) $(AM_V_lt) --tag=CXX \
 	$(AM_LIBTOOLFLAGS) $(LIBTOOLFLAGS) --mode=link $(CXXLD) \
@@ -5464,7 +5456,7 @@ am__unittest_base64_SOURCES_DIST = test/base64.cc
 unittest_base64_OBJECTS = $(am_unittest_base64_OBJECTS)
 @ENABLE_CLIENT_TRUE@@WITH_CEPHFS_TRUE@@WITH_RADOS_TRUE at unittest_base64_DEPENDENCIES = $(LIBCEPHFS) \
 @ENABLE_CLIENT_TRUE@@WITH_CEPHFS_TRUE@@WITH_RADOS_TRUE@	$(am__DEPENDENCIES_10) \
- at ENABLE_CLIENT_TRUE@@WITH_CEPHFS_TRUE@@WITH_RADOS_TRUE@	$(am__DEPENDENCIES_24)
+ at ENABLE_CLIENT_TRUE@@WITH_CEPHFS_TRUE@@WITH_RADOS_TRUE@	$(am__DEPENDENCIES_25)
 unittest_base64_LINK = $(LIBTOOL) $(AM_V_lt) --tag=CXX \
 	$(AM_LIBTOOLFLAGS) $(LIBTOOLFLAGS) --mode=link $(CXXLD) \
 	$(unittest_base64_CXXFLAGS) $(CXXFLAGS) $(AM_LDFLAGS) \
@@ -5472,7 +5464,7 @@ unittest_base64_LINK = $(LIBTOOL) $(AM_V_lt) --tag=CXX \
 am_unittest_bit_vector_OBJECTS =  \
 	test/common/unittest_bit_vector-test_bit_vector.$(OBJEXT)
 unittest_bit_vector_OBJECTS = $(am_unittest_bit_vector_OBJECTS)
-unittest_bit_vector_DEPENDENCIES = $(am__DEPENDENCIES_24) \
+unittest_bit_vector_DEPENDENCIES = $(am__DEPENDENCIES_25) \
 	$(am__DEPENDENCIES_10)
 unittest_bit_vector_LINK = $(LIBTOOL) $(AM_V_lt) --tag=CXX \
 	$(AM_LIBTOOLFLAGS) $(LIBTOOLFLAGS) --mode=link $(CXXLD) \
@@ -5481,7 +5473,7 @@ unittest_bit_vector_LINK = $(LIBTOOL) $(AM_V_lt) --tag=CXX \
 am_unittest_blkdev_OBJECTS =  \
 	test/common/unittest_blkdev-test_blkdev.$(OBJEXT)
 unittest_blkdev_OBJECTS = $(am_unittest_blkdev_OBJECTS)
-unittest_blkdev_DEPENDENCIES = $(am__DEPENDENCIES_24) \
+unittest_blkdev_DEPENDENCIES = $(am__DEPENDENCIES_25) \
 	$(am__DEPENDENCIES_10)
 unittest_blkdev_LINK = $(LIBTOOL) $(AM_V_lt) --tag=CXX \
 	$(AM_LIBTOOLFLAGS) $(LIBTOOLFLAGS) --mode=link $(CXXLD) \
@@ -5490,7 +5482,7 @@ unittest_blkdev_LINK = $(LIBTOOL) $(AM_V_lt) --tag=CXX \
 am_unittest_bloom_filter_OBJECTS =  \
 	test/common/unittest_bloom_filter-test_bloom_filter.$(OBJEXT)
 unittest_bloom_filter_OBJECTS = $(am_unittest_bloom_filter_OBJECTS)
-unittest_bloom_filter_DEPENDENCIES = $(am__DEPENDENCIES_24) \
+unittest_bloom_filter_DEPENDENCIES = $(am__DEPENDENCIES_25) \
 	$(am__DEPENDENCIES_10)
 unittest_bloom_filter_LINK = $(LIBTOOL) $(AM_V_lt) --tag=CXX \
 	$(AM_LIBTOOLFLAGS) $(LIBTOOLFLAGS) --mode=link $(CXXLD) \
@@ -5501,7 +5493,7 @@ am__unittest_bluefs_SOURCES_DIST = test/objectstore/test_bluefs.cc
 unittest_bluefs_OBJECTS = $(am_unittest_bluefs_OBJECTS)
 @ENABLE_SERVER_TRUE@@LINUX_TRUE at unittest_bluefs_DEPENDENCIES =  \
 @ENABLE_SERVER_TRUE@@LINUX_TRUE@	$(am__DEPENDENCIES_16) \
- at ENABLE_SERVER_TRUE@@LINUX_TRUE@	$(am__DEPENDENCIES_24) \
+ at ENABLE_SERVER_TRUE@@LINUX_TRUE@	$(am__DEPENDENCIES_25) \
 @ENABLE_SERVER_TRUE@@LINUX_TRUE@	$(am__DEPENDENCIES_10)
 unittest_bluefs_LINK = $(LIBTOOL) $(AM_V_lt) --tag=CXX \
 	$(AM_LIBTOOLFLAGS) $(LIBTOOLFLAGS) --mode=link $(CXXLD) \
@@ -5514,7 +5506,7 @@ unittest_bluestore_types_OBJECTS =  \
 	$(am_unittest_bluestore_types_OBJECTS)
 @ENABLE_SERVER_TRUE@@LINUX_TRUE at unittest_bluestore_types_DEPENDENCIES =  \
 @ENABLE_SERVER_TRUE@@LINUX_TRUE@	$(am__DEPENDENCIES_16) \
- at ENABLE_SERVER_TRUE@@LINUX_TRUE@	$(am__DEPENDENCIES_24) \
+ at ENABLE_SERVER_TRUE@@LINUX_TRUE@	$(am__DEPENDENCIES_25) \
 @ENABLE_SERVER_TRUE@@LINUX_TRUE@	$(am__DEPENDENCIES_10)
 unittest_bluestore_types_LINK = $(LIBTOOL) $(AM_V_lt) --tag=CXX \
 	$(AM_LIBTOOLFLAGS) $(LIBTOOLFLAGS) --mode=link $(CXXLD) \
@@ -5523,7 +5515,7 @@ unittest_bluestore_types_LINK = $(LIBTOOL) $(AM_V_lt) --tag=CXX \
 am_unittest_bufferlist_OBJECTS =  \
 	test/unittest_bufferlist-bufferlist.$(OBJEXT)
 unittest_bufferlist_OBJECTS = $(am_unittest_bufferlist_OBJECTS)
-unittest_bufferlist_DEPENDENCIES = $(am__DEPENDENCIES_24) \
+unittest_bufferlist_DEPENDENCIES = $(am__DEPENDENCIES_25) \
 	$(am__DEPENDENCIES_10)
 unittest_bufferlist_LINK = $(LIBTOOL) $(AM_V_lt) --tag=CXX \
 	$(AM_LIBTOOLFLAGS) $(LIBTOOLFLAGS) --mode=link $(CXXLD) \
@@ -5532,7 +5524,7 @@ unittest_bufferlist_LINK = $(LIBTOOL) $(AM_V_lt) --tag=CXX \
 am_unittest_ceph_argparse_OBJECTS =  \
 	test/unittest_ceph_argparse-ceph_argparse.$(OBJEXT)
 unittest_ceph_argparse_OBJECTS = $(am_unittest_ceph_argparse_OBJECTS)
-unittest_ceph_argparse_DEPENDENCIES = $(am__DEPENDENCIES_24) \
+unittest_ceph_argparse_DEPENDENCIES = $(am__DEPENDENCIES_25) \
 	$(am__DEPENDENCIES_10)
 unittest_ceph_argparse_LINK = $(LIBTOOL) $(AM_V_lt) --tag=CXX \
 	$(AM_LIBTOOLFLAGS) $(LIBTOOLFLAGS) --mode=link $(CXXLD) \
@@ -5542,7 +5534,7 @@ am_unittest_ceph_compatset_OBJECTS =  \
 	test/unittest_ceph_compatset-ceph_compatset.$(OBJEXT)
 unittest_ceph_compatset_OBJECTS =  \
 	$(am_unittest_ceph_compatset_OBJECTS)
-unittest_ceph_compatset_DEPENDENCIES = $(am__DEPENDENCIES_24) \
+unittest_ceph_compatset_DEPENDENCIES = $(am__DEPENDENCIES_25) \
 	$(am__DEPENDENCIES_10)
 unittest_ceph_compatset_LINK = $(LIBTOOL) $(AM_V_lt) --tag=CXX \
 	$(AM_LIBTOOLFLAGS) $(LIBTOOLFLAGS) --mode=link $(CXXLD) \
@@ -5551,7 +5543,7 @@ unittest_ceph_compatset_LINK = $(LIBTOOL) $(AM_V_lt) --tag=CXX \
 am_unittest_ceph_crypto_OBJECTS =  \
 	test/unittest_ceph_crypto-ceph_crypto.$(OBJEXT)
 unittest_ceph_crypto_OBJECTS = $(am_unittest_ceph_crypto_OBJECTS)
-unittest_ceph_crypto_DEPENDENCIES = $(am__DEPENDENCIES_24) \
+unittest_ceph_crypto_DEPENDENCIES = $(am__DEPENDENCIES_25) \
 	$(am__DEPENDENCIES_10)
 unittest_ceph_crypto_LINK = $(LIBTOOL) $(AM_V_lt) --tag=CXX \
 	$(AM_LIBTOOLFLAGS) $(LIBTOOLFLAGS) --mode=link $(CXXLD) \
@@ -5563,7 +5555,7 @@ am__unittest_chain_xattr_SOURCES_DIST =  \
 unittest_chain_xattr_OBJECTS = $(am_unittest_chain_xattr_OBJECTS)
 @ENABLE_SERVER_TRUE at unittest_chain_xattr_DEPENDENCIES =  \
 @ENABLE_SERVER_TRUE@	$(am__DEPENDENCIES_16) \
- at ENABLE_SERVER_TRUE@	$(am__DEPENDENCIES_24) \
+ at ENABLE_SERVER_TRUE@	$(am__DEPENDENCIES_25) \
 @ENABLE_SERVER_TRUE@	$(am__DEPENDENCIES_10)
 unittest_chain_xattr_LINK = $(LIBTOOL) $(AM_V_lt) --tag=CXX \
 	$(AM_LIBTOOLFLAGS) $(LIBTOOLFLAGS) --mode=link $(CXXLD) \
@@ -5577,9 +5569,9 @@ am__unittest_compression_plugin_SOURCES_DIST =  \
 unittest_compression_plugin_OBJECTS =  \
 	$(am_unittest_compression_plugin_OBJECTS)
 @ENABLE_SERVER_TRUE@@WITH_OSD_TRUE at unittest_compression_plugin_DEPENDENCIES =  \
- at ENABLE_SERVER_TRUE@@WITH_OSD_TRUE@	$(am__DEPENDENCIES_23) \
- at ENABLE_SERVER_TRUE@@WITH_OSD_TRUE@	$(am__DEPENDENCIES_4) \
 @ENABLE_SERVER_TRUE@@WITH_OSD_TRUE@	$(am__DEPENDENCIES_24) \
+ at ENABLE_SERVER_TRUE@@WITH_OSD_TRUE@	$(am__DEPENDENCIES_4) \
+ at ENABLE_SERVER_TRUE@@WITH_OSD_TRUE@	$(am__DEPENDENCIES_25) \
 @ENABLE_SERVER_TRUE@@WITH_OSD_TRUE@	$(am__DEPENDENCIES_10) \
 @ENABLE_SERVER_TRUE@@WITH_OSD_TRUE@	$(am__DEPENDENCIES_1)
 unittest_compression_plugin_LINK = $(LIBTOOL) $(AM_V_lt) --tag=CXX \
@@ -5597,9 +5589,9 @@ am__objects_52 = compressor/unittest_compression_plugin_snappy-Compressor.$(OBJE
 unittest_compression_plugin_snappy_OBJECTS =  \
 	$(am_unittest_compression_plugin_snappy_OBJECTS)
 @ENABLE_SERVER_TRUE@@WITH_OSD_TRUE at unittest_compression_plugin_snappy_DEPENDENCIES =  \
- at ENABLE_SERVER_TRUE@@WITH_OSD_TRUE@	$(am__DEPENDENCIES_23) \
- at ENABLE_SERVER_TRUE@@WITH_OSD_TRUE@	$(am__DEPENDENCIES_4) \
 @ENABLE_SERVER_TRUE@@WITH_OSD_TRUE@	$(am__DEPENDENCIES_24) \
+ at ENABLE_SERVER_TRUE@@WITH_OSD_TRUE@	$(am__DEPENDENCIES_4) \
+ at ENABLE_SERVER_TRUE@@WITH_OSD_TRUE@	$(am__DEPENDENCIES_25) \
 @ENABLE_SERVER_TRUE@@WITH_OSD_TRUE@	$(am__DEPENDENCIES_10) \
 @ENABLE_SERVER_TRUE@@WITH_OSD_TRUE@	$(LIBCOMPRESSOR) \
 @ENABLE_SERVER_TRUE@@WITH_OSD_TRUE@	$(am__DEPENDENCIES_1)
@@ -5621,9 +5613,9 @@ am__objects_53 = compressor/unittest_compression_plugin_zlib-Compressor.$(OBJEXT
 unittest_compression_plugin_zlib_OBJECTS =  \
 	$(am_unittest_compression_plugin_zlib_OBJECTS)
 @ENABLE_SERVER_TRUE@@WITH_OSD_TRUE at unittest_compression_plugin_zlib_DEPENDENCIES =  \
- at ENABLE_SERVER_TRUE@@WITH_OSD_TRUE@	$(am__DEPENDENCIES_23) \
- at ENABLE_SERVER_TRUE@@WITH_OSD_TRUE@	$(am__DEPENDENCIES_4) \
 @ENABLE_SERVER_TRUE@@WITH_OSD_TRUE@	$(am__DEPENDENCIES_24) \
+ at ENABLE_SERVER_TRUE@@WITH_OSD_TRUE@	$(am__DEPENDENCIES_4) \
+ at ENABLE_SERVER_TRUE@@WITH_OSD_TRUE@	$(am__DEPENDENCIES_25) \
 @ENABLE_SERVER_TRUE@@WITH_OSD_TRUE@	$(am__DEPENDENCIES_10) \
 @ENABLE_SERVER_TRUE@@WITH_OSD_TRUE@	$(LIBCOMPRESSOR) \
 @ENABLE_SERVER_TRUE@@WITH_OSD_TRUE@	$(am__DEPENDENCIES_1)
@@ -5644,9 +5636,9 @@ am__objects_54 =  \
 unittest_compression_snappy_OBJECTS =  \
 	$(am_unittest_compression_snappy_OBJECTS)
 @ENABLE_SERVER_TRUE@@WITH_OSD_TRUE at unittest_compression_snappy_DEPENDENCIES =  \
- at ENABLE_SERVER_TRUE@@WITH_OSD_TRUE@	$(am__DEPENDENCIES_23) \
- at ENABLE_SERVER_TRUE@@WITH_OSD_TRUE@	$(am__DEPENDENCIES_4) \
 @ENABLE_SERVER_TRUE@@WITH_OSD_TRUE@	$(am__DEPENDENCIES_24) \
+ at ENABLE_SERVER_TRUE@@WITH_OSD_TRUE@	$(am__DEPENDENCIES_4) \
+ at ENABLE_SERVER_TRUE@@WITH_OSD_TRUE@	$(am__DEPENDENCIES_25) \
 @ENABLE_SERVER_TRUE@@WITH_OSD_TRUE@	$(am__DEPENDENCIES_10) \
 @ENABLE_SERVER_TRUE@@WITH_OSD_TRUE@	$(am__DEPENDENCIES_1)
 unittest_compression_snappy_LINK = $(LIBTOOL) $(AM_V_lt) --tag=CXX \
@@ -5667,9 +5659,9 @@ am__objects_55 =  \
 unittest_compression_zlib_OBJECTS =  \
 	$(am_unittest_compression_zlib_OBJECTS)
 @ENABLE_SERVER_TRUE@@WITH_OSD_TRUE at unittest_compression_zlib_DEPENDENCIES =  \
- at ENABLE_SERVER_TRUE@@WITH_OSD_TRUE@	$(am__DEPENDENCIES_23) \
- at ENABLE_SERVER_TRUE@@WITH_OSD_TRUE@	$(am__DEPENDENCIES_4) \
 @ENABLE_SERVER_TRUE@@WITH_OSD_TRUE@	$(am__DEPENDENCIES_24) \
+ at ENABLE_SERVER_TRUE@@WITH_OSD_TRUE@	$(am__DEPENDENCIES_4) \
+ at ENABLE_SERVER_TRUE@@WITH_OSD_TRUE@	$(am__DEPENDENCIES_25) \
 @ENABLE_SERVER_TRUE@@WITH_OSD_TRUE@	$(am__DEPENDENCIES_10) \
 @ENABLE_SERVER_TRUE@@WITH_OSD_TRUE@	$(am__DEPENDENCIES_1)
 unittest_compression_zlib_LINK = $(LIBTOOL) $(AM_V_lt) --tag=CXX \
@@ -5679,7 +5671,7 @@ unittest_compression_zlib_LINK = $(LIBTOOL) $(AM_V_lt) --tag=CXX \
 am_unittest_config_OBJECTS =  \
 	test/common/unittest_config-test_config.$(OBJEXT)
 unittest_config_OBJECTS = $(am_unittest_config_OBJECTS)
-unittest_config_DEPENDENCIES = $(am__DEPENDENCIES_24) \
+unittest_config_DEPENDENCIES = $(am__DEPENDENCIES_25) \
 	$(am__DEPENDENCIES_10)
 unittest_config_LINK = $(LIBTOOL) $(AM_V_lt) --tag=CXX \
 	$(AM_LIBTOOLFLAGS) $(LIBTOOLFLAGS) --mode=link $(CXXLD) \
@@ -5688,7 +5680,7 @@ unittest_config_LINK = $(LIBTOOL) $(AM_V_lt) --tag=CXX \
 am_unittest_confutils_OBJECTS =  \
 	test/unittest_confutils-confutils.$(OBJEXT)
 unittest_confutils_OBJECTS = $(am_unittest_confutils_OBJECTS)
-unittest_confutils_DEPENDENCIES = $(am__DEPENDENCIES_24) \
+unittest_confutils_DEPENDENCIES = $(am__DEPENDENCIES_25) \
 	$(am__DEPENDENCIES_10)
 unittest_confutils_LINK = $(LIBTOOL) $(AM_V_lt) --tag=CXX \
 	$(AM_LIBTOOLFLAGS) $(LIBTOOLFLAGS) --mode=link $(CXXLD) \
@@ -5697,7 +5689,7 @@ unittest_confutils_LINK = $(LIBTOOL) $(AM_V_lt) --tag=CXX \
 am_unittest_context_OBJECTS =  \
 	test/common/unittest_context-test_context.$(OBJEXT)
 unittest_context_OBJECTS = $(am_unittest_context_OBJECTS)
-unittest_context_DEPENDENCIES = $(am__DEPENDENCIES_24) \
+unittest_context_DEPENDENCIES = $(am__DEPENDENCIES_25) \
 	$(am__DEPENDENCIES_10)
 unittest_context_LINK = $(LIBTOOL) $(AM_V_lt) --tag=CXX \
 	$(AM_LIBTOOLFLAGS) $(LIBTOOLFLAGS) --mode=link $(CXXLD) \
@@ -5706,7 +5698,7 @@ unittest_context_LINK = $(LIBTOOL) $(AM_V_lt) --tag=CXX \
 am_unittest_crc32c_OBJECTS =  \
 	test/common/unittest_crc32c-test_crc32c.$(OBJEXT)
 unittest_crc32c_OBJECTS = $(am_unittest_crc32c_OBJECTS)
-unittest_crc32c_DEPENDENCIES = $(am__DEPENDENCIES_24) \
+unittest_crc32c_DEPENDENCIES = $(am__DEPENDENCIES_25) \
 	$(am__DEPENDENCIES_10)
 unittest_crc32c_LINK = $(LIBTOOL) $(AM_V_lt) --tag=CXX \
 	$(AM_LIBTOOLFLAGS) $(LIBTOOLFLAGS) --mode=link $(CXXLD) \
@@ -5715,7 +5707,7 @@ unittest_crc32c_LINK = $(LIBTOOL) $(AM_V_lt) --tag=CXX \
 am_unittest_crush_OBJECTS = test/crush/unittest_crush-crush.$(OBJEXT)
 unittest_crush_OBJECTS = $(am_unittest_crush_OBJECTS)
 unittest_crush_DEPENDENCIES = $(am__DEPENDENCIES_4) \
-	$(am__DEPENDENCIES_24) $(am__DEPENDENCIES_3) \
+	$(am__DEPENDENCIES_25) $(am__DEPENDENCIES_3) \
 	$(am__DEPENDENCIES_10)
 unittest_crush_LINK = $(LIBTOOL) $(AM_V_lt) --tag=CXX \
 	$(AM_LIBTOOLFLAGS) $(LIBTOOLFLAGS) --mode=link $(CXXLD) \
@@ -5724,7 +5716,7 @@ unittest_crush_LINK = $(LIBTOOL) $(AM_V_lt) --tag=CXX \
 am_unittest_crush_wrapper_OBJECTS =  \
 	test/crush/unittest_crush_wrapper-CrushWrapper.$(OBJEXT)
 unittest_crush_wrapper_OBJECTS = $(am_unittest_crush_wrapper_OBJECTS)
-unittest_crush_wrapper_DEPENDENCIES = $(am__DEPENDENCIES_24) \
+unittest_crush_wrapper_DEPENDENCIES = $(am__DEPENDENCIES_25) \
 	$(am__DEPENDENCIES_10) $(LIBCRUSH)
 unittest_crush_wrapper_LINK = $(LIBTOOL) $(AM_V_lt) --tag=CXX \
 	$(AM_LIBTOOLFLAGS) $(LIBTOOLFLAGS) --mode=link $(CXXLD) \
@@ -5732,7 +5724,7 @@ unittest_crush_wrapper_LINK = $(LIBTOOL) $(AM_V_lt) --tag=CXX \
 	$(LDFLAGS) -o $@
 am_unittest_crypto_OBJECTS = test/unittest_crypto-crypto.$(OBJEXT)
 unittest_crypto_OBJECTS = $(am_unittest_crypto_OBJECTS)
-unittest_crypto_DEPENDENCIES = $(am__DEPENDENCIES_24) \
+unittest_crypto_DEPENDENCIES = $(am__DEPENDENCIES_25) \
 	$(am__DEPENDENCIES_10)
 unittest_crypto_LINK = $(LIBTOOL) $(AM_V_lt) --tag=CXX \
 	$(AM_LIBTOOLFLAGS) $(LIBTOOLFLAGS) --mode=link $(CXXLD) \
@@ -5741,7 +5733,7 @@ unittest_crypto_LINK = $(LIBTOOL) $(AM_V_lt) --tag=CXX \
 am_unittest_crypto_init_OBJECTS =  \
 	test/unittest_crypto_init-crypto_init.$(OBJEXT)
 unittest_crypto_init_OBJECTS = $(am_unittest_crypto_init_OBJECTS)
-unittest_crypto_init_DEPENDENCIES = $(am__DEPENDENCIES_24) \
+unittest_crypto_init_DEPENDENCIES = $(am__DEPENDENCIES_25) \
 	$(am__DEPENDENCIES_10)
 unittest_crypto_init_LINK = $(LIBTOOL) $(AM_V_lt) --tag=CXX \
 	$(AM_LIBTOOLFLAGS) $(LIBTOOLFLAGS) --mode=link $(CXXLD) \
@@ -5750,7 +5742,7 @@ unittest_crypto_init_LINK = $(LIBTOOL) $(AM_V_lt) --tag=CXX \
 am_unittest_daemon_config_OBJECTS =  \
 	test/unittest_daemon_config-daemon_config.$(OBJEXT)
 unittest_daemon_config_OBJECTS = $(am_unittest_daemon_config_OBJECTS)
-unittest_daemon_config_DEPENDENCIES = $(am__DEPENDENCIES_24) \
+unittest_daemon_config_DEPENDENCIES = $(am__DEPENDENCIES_25) \
 	$(am__DEPENDENCIES_10)
 unittest_daemon_config_LINK = $(LIBTOOL) $(AM_V_lt) --tag=CXX \
 	$(AM_LIBTOOLFLAGS) $(LIBTOOLFLAGS) --mode=link $(CXXLD) \
@@ -5760,8 +5752,8 @@ am__unittest_ecbackend_SOURCES_DIST = test/osd/TestECBackend.cc
 @ENABLE_SERVER_TRUE@@WITH_OSD_TRUE at am_unittest_ecbackend_OBJECTS = test/osd/unittest_ecbackend-TestECBackend.$(OBJEXT)
 unittest_ecbackend_OBJECTS = $(am_unittest_ecbackend_OBJECTS)
 @ENABLE_SERVER_TRUE@@WITH_OSD_TRUE at unittest_ecbackend_DEPENDENCIES =  \
- at ENABLE_SERVER_TRUE@@WITH_OSD_TRUE@	$(am__DEPENDENCIES_23) \
 @ENABLE_SERVER_TRUE@@WITH_OSD_TRUE@	$(am__DEPENDENCIES_24) \
+ at ENABLE_SERVER_TRUE@@WITH_OSD_TRUE@	$(am__DEPENDENCIES_25) \
 @ENABLE_SERVER_TRUE@@WITH_OSD_TRUE@	$(am__DEPENDENCIES_10)
 unittest_ecbackend_LINK = $(LIBTOOL) $(AM_V_lt) --tag=CXX \
 	$(AM_LIBTOOLFLAGS) $(LIBTOOLFLAGS) --mode=link $(CXXLD) \
@@ -5773,7 +5765,7 @@ unittest_encoding_OBJECTS = $(am_unittest_encoding_OBJECTS)
 @ENABLE_CLIENT_TRUE@@WITH_CEPHFS_TRUE@@WITH_RADOS_TRUE at unittest_encoding_DEPENDENCIES = $(LIBCEPHFS) \
 @ENABLE_CLIENT_TRUE@@WITH_CEPHFS_TRUE@@WITH_RADOS_TRUE@	$(LIBRADOS) \
 @ENABLE_CLIENT_TRUE@@WITH_CEPHFS_TRUE@@WITH_RADOS_TRUE@	$(am__DEPENDENCIES_10) \
- at ENABLE_CLIENT_TRUE@@WITH_CEPHFS_TRUE@@WITH_RADOS_TRUE@	$(am__DEPENDENCIES_24)
+ at ENABLE_CLIENT_TRUE@@WITH_CEPHFS_TRUE@@WITH_RADOS_TRUE@	$(am__DEPENDENCIES_25)
 unittest_encoding_LINK = $(LIBTOOL) $(AM_V_lt) --tag=CXX \
 	$(AM_LIBTOOLFLAGS) $(LIBTOOLFLAGS) --mode=link $(CXXLD) \
 	$(unittest_encoding_CXXFLAGS) $(CXXFLAGS) $(AM_LDFLAGS) \
@@ -5784,9 +5776,9 @@ am__unittest_erasure_code_SOURCES_DIST = erasure-code/ErasureCode.cc \
 @ENABLE_SERVER_TRUE@@WITH_OSD_TRUE@	test/erasure-code/unittest_erasure_code-TestErasureCode.$(OBJEXT)
 unittest_erasure_code_OBJECTS = $(am_unittest_erasure_code_OBJECTS)
 @ENABLE_SERVER_TRUE@@WITH_OSD_TRUE at unittest_erasure_code_DEPENDENCIES =  \
- at ENABLE_SERVER_TRUE@@WITH_OSD_TRUE@	$(am__DEPENDENCIES_23) \
- at ENABLE_SERVER_TRUE@@WITH_OSD_TRUE@	$(am__DEPENDENCIES_4) \
 @ENABLE_SERVER_TRUE@@WITH_OSD_TRUE@	$(am__DEPENDENCIES_24) \
+ at ENABLE_SERVER_TRUE@@WITH_OSD_TRUE@	$(am__DEPENDENCIES_4) \
+ at ENABLE_SERVER_TRUE@@WITH_OSD_TRUE@	$(am__DEPENDENCIES_25) \
 @ENABLE_SERVER_TRUE@@WITH_OSD_TRUE@	$(am__DEPENDENCIES_10)
 unittest_erasure_code_LINK = $(LIBTOOL) $(AM_V_lt) --tag=CXX \
 	$(AM_LIBTOOLFLAGS) $(LIBTOOLFLAGS) --mode=link $(CXXLD) \
@@ -5800,9 +5792,9 @@ am__unittest_erasure_code_example_SOURCES_DIST =  \
 unittest_erasure_code_example_OBJECTS =  \
 	$(am_unittest_erasure_code_example_OBJECTS)
 @ENABLE_SERVER_TRUE@@WITH_OSD_TRUE at unittest_erasure_code_example_DEPENDENCIES =  \
- at ENABLE_SERVER_TRUE@@WITH_OSD_TRUE@	$(am__DEPENDENCIES_23) \
- at ENABLE_SERVER_TRUE@@WITH_OSD_TRUE@	$(am__DEPENDENCIES_4) \
 @ENABLE_SERVER_TRUE@@WITH_OSD_TRUE@	$(am__DEPENDENCIES_24) \
+ at ENABLE_SERVER_TRUE@@WITH_OSD_TRUE@	$(am__DEPENDENCIES_4) \
+ at ENABLE_SERVER_TRUE@@WITH_OSD_TRUE@	$(am__DEPENDENCIES_25) \
 @ENABLE_SERVER_TRUE@@WITH_OSD_TRUE@	$(am__DEPENDENCIES_10)
 unittest_erasure_code_example_LINK = $(LIBTOOL) $(AM_V_lt) --tag=CXX \
 	$(AM_LIBTOOLFLAGS) $(LIBTOOLFLAGS) --mode=link $(CXXLD) \
@@ -5815,9 +5807,9 @@ am__unittest_erasure_code_isa_SOURCES_DIST =  \
 @ENABLE_SERVER_TRUE@@WITH_BETTER_YASM_ELF64_TRUE@@WITH_OSD_TRUE@	test/erasure-code/unittest_erasure_code_isa-TestErasureCodeIsa.$(OBJEXT)
 unittest_erasure_code_isa_OBJECTS =  \
 	$(am_unittest_erasure_code_isa_OBJECTS)
- at ENABLE_SERVER_TRUE@@WITH_BETTER_YASM_ELF64_TRUE@@WITH_OSD_TRUE at unittest_erasure_code_isa_DEPENDENCIES = $(am__DEPENDENCIES_23) \
+ at ENABLE_SERVER_TRUE@@WITH_BETTER_YASM_ELF64_TRUE@@WITH_OSD_TRUE at unittest_erasure_code_isa_DEPENDENCIES = $(am__DEPENDENCIES_24) \
 @ENABLE_SERVER_TRUE@@WITH_BETTER_YASM_ELF64_TRUE@@WITH_OSD_TRUE@	$(am__DEPENDENCIES_4) \
- at ENABLE_SERVER_TRUE@@WITH_BETTER_YASM_ELF64_TRUE@@WITH_OSD_TRUE@	$(am__DEPENDENCIES_24) \
+ at ENABLE_SERVER_TRUE@@WITH_BETTER_YASM_ELF64_TRUE@@WITH_OSD_TRUE@	$(am__DEPENDENCIES_25) \
 @ENABLE_SERVER_TRUE@@WITH_BETTER_YASM_ELF64_TRUE@@WITH_OSD_TRUE@	$(am__DEPENDENCIES_10) \
 @ENABLE_SERVER_TRUE@@WITH_BETTER_YASM_ELF64_TRUE@@WITH_OSD_TRUE@	libisa.la \
 @ENABLE_SERVER_TRUE@@WITH_BETTER_YASM_ELF64_TRUE@@WITH_OSD_TRUE@	$(LIBERASURE_CODE) \
@@ -5871,9 +5863,9 @@ am__objects_56 = erasure-code/unittest_erasure_code_jerasure-ErasureCode.$(OBJEX
 unittest_erasure_code_jerasure_OBJECTS =  \
 	$(am_unittest_erasure_code_jerasure_OBJECTS)
 @ENABLE_SERVER_TRUE@@WITH_OSD_TRUE at unittest_erasure_code_jerasure_DEPENDENCIES =  \
- at ENABLE_SERVER_TRUE@@WITH_OSD_TRUE@	$(am__DEPENDENCIES_23) \
- at ENABLE_SERVER_TRUE@@WITH_OSD_TRUE@	$(am__DEPENDENCIES_4) \
 @ENABLE_SERVER_TRUE@@WITH_OSD_TRUE@	$(am__DEPENDENCIES_24) \
+ at ENABLE_SERVER_TRUE@@WITH_OSD_TRUE@	$(am__DEPENDENCIES_4) \
+ at ENABLE_SERVER_TRUE@@WITH_OSD_TRUE@	$(am__DEPENDENCIES_25) \
 @ENABLE_SERVER_TRUE@@WITH_OSD_TRUE@	$(am__DEPENDENCIES_10) \
 @ENABLE_SERVER_TRUE@@WITH_OSD_TRUE@	$(am__DEPENDENCIES_1)
 unittest_erasure_code_jerasure_LINK = $(LIBTOOL) $(AM_V_lt) --tag=CXX \
@@ -5894,9 +5886,9 @@ am__objects_57 =  \
 unittest_erasure_code_lrc_OBJECTS =  \
 	$(am_unittest_erasure_code_lrc_OBJECTS)
 @ENABLE_SERVER_TRUE@@WITH_OSD_TRUE at unittest_erasure_code_lrc_DEPENDENCIES =  \
- at ENABLE_SERVER_TRUE@@WITH_OSD_TRUE@	$(am__DEPENDENCIES_23) \
- at ENABLE_SERVER_TRUE@@WITH_OSD_TRUE@	$(am__DEPENDENCIES_4) \
 @ENABLE_SERVER_TRUE@@WITH_OSD_TRUE@	$(am__DEPENDENCIES_24) \
+ at ENABLE_SERVER_TRUE@@WITH_OSD_TRUE@	$(am__DEPENDENCIES_4) \
+ at ENABLE_SERVER_TRUE@@WITH_OSD_TRUE@	$(am__DEPENDENCIES_25) \
 @ENABLE_SERVER_TRUE@@WITH_OSD_TRUE@	$(am__DEPENDENCIES_10) \
 @ENABLE_SERVER_TRUE@@WITH_OSD_TRUE@	$(am__DEPENDENCIES_1)
 unittest_erasure_code_lrc_LINK = $(LIBTOOL) $(AM_V_lt) --tag=CXX \
@@ -5911,9 +5903,9 @@ am__unittest_erasure_code_plugin_SOURCES_DIST =  \
 unittest_erasure_code_plugin_OBJECTS =  \
 	$(am_unittest_erasure_code_plugin_OBJECTS)
 @ENABLE_SERVER_TRUE@@WITH_OSD_TRUE at unittest_erasure_code_plugin_DEPENDENCIES =  \
- at ENABLE_SERVER_TRUE@@WITH_OSD_TRUE@	$(am__DEPENDENCIES_23) \
- at ENABLE_SERVER_TRUE@@WITH_OSD_TRUE@	$(am__DEPENDENCIES_4) \
 @ENABLE_SERVER_TRUE@@WITH_OSD_TRUE@	$(am__DEPENDENCIES_24) \
+ at ENABLE_SERVER_TRUE@@WITH_OSD_TRUE@	$(am__DEPENDENCIES_4) \
+ at ENABLE_SERVER_TRUE@@WITH_OSD_TRUE@	$(am__DEPENDENCIES_25) \
 @ENABLE_SERVER_TRUE@@WITH_OSD_TRUE@	$(am__DEPENDENCIES_10) \
 @ENABLE_SERVER_TRUE@@WITH_OSD_TRUE@	$(am__DEPENDENCIES_1)
 unittest_erasure_code_plugin_LINK = $(LIBTOOL) $(AM_V_lt) --tag=CXX \
@@ -5927,9 +5919,9 @@ am__unittest_erasure_code_plugin_isa_SOURCES_DIST =  \
 @ENABLE_SERVER_TRUE@@WITH_BETTER_YASM_ELF64_TRUE@@WITH_OSD_TRUE@	test/erasure-code/unittest_erasure_code_plugin_isa-TestErasureCodePluginIsa.$(OBJEXT)
 unittest_erasure_code_plugin_isa_OBJECTS =  \
 	$(am_unittest_erasure_code_plugin_isa_OBJECTS)
- at ENABLE_SERVER_TRUE@@WITH_BETTER_YASM_ELF64_TRUE@@WITH_OSD_TRUE at unittest_erasure_code_plugin_isa_DEPENDENCIES = $(am__DEPENDENCIES_23) \
+ at ENABLE_SERVER_TRUE@@WITH_BETTER_YASM_ELF64_TRUE@@WITH_OSD_TRUE at unittest_erasure_code_plugin_isa_DEPENDENCIES = $(am__DEPENDENCIES_24) \
 @ENABLE_SERVER_TRUE@@WITH_BETTER_YASM_ELF64_TRUE@@WITH_OSD_TRUE@	$(am__DEPENDENCIES_4) \
- at ENABLE_SERVER_TRUE@@WITH_BETTER_YASM_ELF64_TRUE@@WITH_OSD_TRUE@	$(am__DEPENDENCIES_24) \
+ at ENABLE_SERVER_TRUE@@WITH_BETTER_YASM_ELF64_TRUE@@WITH_OSD_TRUE@	$(am__DEPENDENCIES_25) \
 @ENABLE_SERVER_TRUE@@WITH_BETTER_YASM_ELF64_TRUE@@WITH_OSD_TRUE@	$(am__DEPENDENCIES_10) \
 @ENABLE_SERVER_TRUE@@WITH_BETTER_YASM_ELF64_TRUE@@WITH_OSD_TRUE@	$(LIBERASURE_CODE) \
 @ENABLE_SERVER_TRUE@@WITH_BETTER_YASM_ELF64_TRUE@@WITH_OSD_TRUE@	$(am__DEPENDENCIES_1)
@@ -5943,9 +5935,9 @@ am__unittest_erasure_code_plugin_jerasure_SOURCES_DIST =  \
 unittest_erasure_code_plugin_jerasure_OBJECTS =  \
 	$(am_unittest_erasure_code_plugin_jerasure_OBJECTS)
 @ENABLE_SERVER_TRUE@@WITH_OSD_TRUE at unittest_erasure_code_plugin_jerasure_DEPENDENCIES =  \
- at ENABLE_SERVER_TRUE@@WITH_OSD_TRUE@	$(am__DEPENDENCIES_23) \
- at ENABLE_SERVER_TRUE@@WITH_OSD_TRUE@	$(am__DEPENDENCIES_4) \
 @ENABLE_SERVER_TRUE@@WITH_OSD_TRUE@	$(am__DEPENDENCIES_24) \
+ at ENABLE_SERVER_TRUE@@WITH_OSD_TRUE@	$(am__DEPENDENCIES_4) \
+ at ENABLE_SERVER_TRUE@@WITH_OSD_TRUE@	$(am__DEPENDENCIES_25) \
 @ENABLE_SERVER_TRUE@@WITH_OSD_TRUE@	$(am__DEPENDENCIES_10) \
 @ENABLE_SERVER_TRUE@@WITH_OSD_TRUE@	$(am__DEPENDENCIES_1)
 unittest_erasure_code_plugin_jerasure_LINK = $(LIBTOOL) $(AM_V_lt) \
@@ -5958,9 +5950,9 @@ am__unittest_erasure_code_plugin_lrc_SOURCES_DIST =  \
 unittest_erasure_code_plugin_lrc_OBJECTS =  \
 	$(am_unittest_erasure_code_plugin_lrc_OBJECTS)
 @ENABLE_SERVER_TRUE@@WITH_OSD_TRUE at unittest_erasure_code_plugin_lrc_DEPENDENCIES =  \
- at ENABLE_SERVER_TRUE@@WITH_OSD_TRUE@	$(am__DEPENDENCIES_23) \
- at ENABLE_SERVER_TRUE@@WITH_OSD_TRUE@	$(am__DEPENDENCIES_4) \
 @ENABLE_SERVER_TRUE@@WITH_OSD_TRUE@	$(am__DEPENDENCIES_24) \
+ at ENABLE_SERVER_TRUE@@WITH_OSD_TRUE@	$(am__DEPENDENCIES_4) \
+ at ENABLE_SERVER_TRUE@@WITH_OSD_TRUE@	$(am__DEPENDENCIES_25) \
 @ENABLE_SERVER_TRUE@@WITH_OSD_TRUE@	$(am__DEPENDENCIES_10) \
 @ENABLE_SERVER_TRUE@@WITH_OSD_TRUE@	$(am__DEPENDENCIES_1)
 unittest_erasure_code_plugin_lrc_LINK = $(LIBTOOL) $(AM_V_lt) \
@@ -5973,9 +5965,9 @@ am__unittest_erasure_code_plugin_shec_SOURCES_DIST =  \
 unittest_erasure_code_plugin_shec_OBJECTS =  \
 	$(am_unittest_erasure_code_plugin_shec_OBJECTS)
 @ENABLE_SERVER_TRUE@@WITH_OSD_TRUE at unittest_erasure_code_plugin_shec_DEPENDENCIES =  \
- at ENABLE_SERVER_TRUE@@WITH_OSD_TRUE@	$(am__DEPENDENCIES_23) \
- at ENABLE_SERVER_TRUE@@WITH_OSD_TRUE@	$(am__DEPENDENCIES_4) \
 @ENABLE_SERVER_TRUE@@WITH_OSD_TRUE@	$(am__DEPENDENCIES_24) \
+ at ENABLE_SERVER_TRUE@@WITH_OSD_TRUE@	$(am__DEPENDENCIES_4) \
+ at ENABLE_SERVER_TRUE@@WITH_OSD_TRUE@	$(am__DEPENDENCIES_25) \
 @ENABLE_SERVER_TRUE@@WITH_OSD_TRUE@	$(am__DEPENDENCIES_10) \
 @ENABLE_SERVER_TRUE@@WITH_OSD_TRUE@	$(am__DEPENDENCIES_1)
 unittest_erasure_code_plugin_shec_LINK = $(LIBTOOL) $(AM_V_lt) \
@@ -6032,9 +6024,9 @@ am__objects_58 =  \
 unittest_erasure_code_shec_OBJECTS =  \
 	$(am_unittest_erasure_code_shec_OBJECTS)
 @ENABLE_SERVER_TRUE@@WITH_OSD_TRUE at unittest_erasure_code_shec_DEPENDENCIES =  \
- at ENABLE_SERVER_TRUE@@WITH_OSD_TRUE@	$(am__DEPENDENCIES_23) \
- at ENABLE_SERVER_TRUE@@WITH_OSD_TRUE@	$(am__DEPENDENCIES_4) \
 @ENABLE_SERVER_TRUE@@WITH_OSD_TRUE@	$(am__DEPENDENCIES_24) \
+ at ENABLE_SERVER_TRUE@@WITH_OSD_TRUE@	$(am__DEPENDENCIES_4) \
+ at ENABLE_SERVER_TRUE@@WITH_OSD_TRUE@	$(am__DEPENDENCIES_25) \
 @ENABLE_SERVER_TRUE@@WITH_OSD_TRUE@	$(am__DEPENDENCIES_10) \
 @ENABLE_SERVER_TRUE@@WITH_OSD_TRUE@	$(am__DEPENDENCIES_1)
 unittest_erasure_code_shec_LINK = $(LIBTOOL) $(AM_V_lt) --tag=CXX \
@@ -6090,9 +6082,9 @@ am__objects_59 = erasure-code/unittest_erasure_code_shec_all-ErasureCode.$(OBJEX
 unittest_erasure_code_shec_all_OBJECTS =  \
 	$(am_unittest_erasure_code_shec_all_OBJECTS)
 @ENABLE_SERVER_TRUE@@WITH_OSD_TRUE at unittest_erasure_code_shec_all_DEPENDENCIES =  \
- at ENABLE_SERVER_TRUE@@WITH_OSD_TRUE@	$(am__DEPENDENCIES_23) \
- at ENABLE_SERVER_TRUE@@WITH_OSD_TRUE@	$(am__DEPENDENCIES_4) \
 @ENABLE_SERVER_TRUE@@WITH_OSD_TRUE@	$(am__DEPENDENCIES_24) \
+ at ENABLE_SERVER_TRUE@@WITH_OSD_TRUE@	$(am__DEPENDENCIES_4) \
+ at ENABLE_SERVER_TRUE@@WITH_OSD_TRUE@	$(am__DEPENDENCIES_25) \
 @ENABLE_SERVER_TRUE@@WITH_OSD_TRUE@	$(am__DEPENDENCIES_10) \
 @ENABLE_SERVER_TRUE@@WITH_OSD_TRUE@	$(am__DEPENDENCIES_1)
 unittest_erasure_code_shec_all_LINK = $(LIBTOOL) $(AM_V_lt) --tag=CXX \
@@ -6148,9 +6140,9 @@ am__objects_60 = erasure-code/unittest_erasure_code_shec_arguments-ErasureCode.$
 unittest_erasure_code_shec_arguments_OBJECTS =  \
 	$(am_unittest_erasure_code_shec_arguments_OBJECTS)
 @ENABLE_SERVER_TRUE@@WITH_OSD_TRUE at unittest_erasure_code_shec_arguments_DEPENDENCIES =  \
- at ENABLE_SERVER_TRUE@@WITH_OSD_TRUE@	$(am__DEPENDENCIES_23) \
- at ENABLE_SERVER_TRUE@@WITH_OSD_TRUE@	$(am__DEPENDENCIES_4) \
 @ENABLE_SERVER_TRUE@@WITH_OSD_TRUE@	$(am__DEPENDENCIES_24) \
+ at ENABLE_SERVER_TRUE@@WITH_OSD_TRUE@	$(am__DEPENDENCIES_4) \
+ at ENABLE_SERVER_TRUE@@WITH_OSD_TRUE@	$(am__DEPENDENCIES_25) \
 @ENABLE_SERVER_TRUE@@WITH_OSD_TRUE@	$(am__DEPENDENCIES_10) \
 @ENABLE_SERVER_TRUE@@WITH_OSD_TRUE@	$(am__DEPENDENCIES_1)
 unittest_erasure_code_shec_arguments_LINK = $(LIBTOOL) $(AM_V_lt) \
@@ -6206,9 +6198,9 @@ am__objects_61 = erasure-code/unittest_erasure_code_shec_thread-ErasureCode.$(OB
 unittest_erasure_code_shec_thread_OBJECTS =  \
 	$(am_unittest_erasure_code_shec_thread_OBJECTS)
 @ENABLE_SERVER_TRUE@@WITH_OSD_TRUE at unittest_erasure_code_shec_thread_DEPENDENCIES =  \
- at ENABLE_SERVER_TRUE@@WITH_OSD_TRUE@	$(am__DEPENDENCIES_23) \
- at ENABLE_SERVER_TRUE@@WITH_OSD_TRUE@	$(am__DEPENDENCIES_4) \
 @ENABLE_SERVER_TRUE@@WITH_OSD_TRUE@	$(am__DEPENDENCIES_24) \
+ at ENABLE_SERVER_TRUE@@WITH_OSD_TRUE@	$(am__DEPENDENCIES_4) \
+ at ENABLE_SERVER_TRUE@@WITH_OSD_TRUE@	$(am__DEPENDENCIES_25) \
 @ENABLE_SERVER_TRUE@@WITH_OSD_TRUE@	$(am__DEPENDENCIES_10) \
 @ENABLE_SERVER_TRUE@@WITH_OSD_TRUE@	$(am__DEPENDENCIES_1)
 unittest_erasure_code_shec_thread_LINK = $(LIBTOOL) $(AM_V_lt) \
@@ -6217,7 +6209,7 @@ unittest_erasure_code_shec_thread_LINK = $(LIBTOOL) $(AM_V_lt) \
 	$(CXXFLAGS) $(AM_LDFLAGS) $(LDFLAGS) -o $@
 am_unittest_escape_OBJECTS = test/unittest_escape-escape.$(OBJEXT)
 unittest_escape_OBJECTS = $(am_unittest_escape_OBJECTS)
-unittest_escape_DEPENDENCIES = $(am__DEPENDENCIES_24) \
+unittest_escape_DEPENDENCIES = $(am__DEPENDENCIES_25) \
 	$(am__DEPENDENCIES_10)
 unittest_escape_LINK = $(LIBTOOL) $(AM_V_lt) --tag=CXX \
 	$(AM_LIBTOOLFLAGS) $(LIBTOOLFLAGS) --mode=link $(CXXLD) \
@@ -6227,7 +6219,7 @@ am_unittest_formatter_OBJECTS =  \
 	test/unittest_formatter-formatter.$(OBJEXT) \
 	rgw/unittest_formatter-rgw_formats.$(OBJEXT)
 unittest_formatter_OBJECTS = $(am_unittest_formatter_OBJECTS)
-unittest_formatter_DEPENDENCIES = $(am__DEPENDENCIES_24) \
+unittest_formatter_DEPENDENCIES = $(am__DEPENDENCIES_25) \
 	$(am__DEPENDENCIES_10)
 unittest_formatter_LINK = $(LIBTOOL) $(AM_V_lt) --tag=CXX \
 	$(AM_LIBTOOLFLAGS) $(LIBTOOLFLAGS) --mode=link $(CXXLD) \
@@ -6235,7 +6227,7 @@ unittest_formatter_LINK = $(LIBTOOL) $(AM_V_lt) --tag=CXX \
 	$(LDFLAGS) -o $@
 am_unittest_gather_OBJECTS = test/unittest_gather-gather.$(OBJEXT)
 unittest_gather_OBJECTS = $(am_unittest_gather_OBJECTS)
-unittest_gather_DEPENDENCIES = $(am__DEPENDENCIES_24) \
+unittest_gather_DEPENDENCIES = $(am__DEPENDENCIES_25) \
 	$(am__DEPENDENCIES_10)
 unittest_gather_LINK = $(LIBTOOL) $(AM_V_lt) --tag=CXX \
 	$(AM_LIBTOOLFLAGS) $(LIBTOOLFLAGS) --mode=link $(CXXLD) \
@@ -6245,7 +6237,7 @@ am_unittest_heartbeatmap_OBJECTS =  \
 	test/unittest_heartbeatmap-heartbeat_map.$(OBJEXT)
 unittest_heartbeatmap_OBJECTS = $(am_unittest_heartbeatmap_OBJECTS)
 unittest_heartbeatmap_DEPENDENCIES = $(am__DEPENDENCIES_4) \
-	$(am__DEPENDENCIES_24) $(am__DEPENDENCIES_10)
+	$(am__DEPENDENCIES_25) $(am__DEPENDENCIES_10)
 unittest_heartbeatmap_LINK = $(LIBTOOL) $(AM_V_lt) --tag=CXX \
 	$(AM_LIBTOOLFLAGS) $(LIBTOOLFLAGS) --mode=link $(CXXLD) \
 	$(unittest_heartbeatmap_CXXFLAGS) $(CXXFLAGS) $(AM_LDFLAGS) \
@@ -6253,7 +6245,7 @@ unittest_heartbeatmap_LINK = $(LIBTOOL) $(AM_V_lt) --tag=CXX \
 am_unittest_histogram_OBJECTS =  \
 	test/common/unittest_histogram-histogram.$(OBJEXT)
 unittest_histogram_OBJECTS = $(am_unittest_histogram_OBJECTS)
-unittest_histogram_DEPENDENCIES = $(am__DEPENDENCIES_24) \
+unittest_histogram_DEPENDENCIES = $(am__DEPENDENCIES_25) \
 	$(am__DEPENDENCIES_10)
 unittest_histogram_LINK = $(LIBTOOL) $(AM_V_lt) --tag=CXX \
 	$(AM_LIBTOOLFLAGS) $(LIBTOOLFLAGS) --mode=link $(CXXLD) \
@@ -6263,8 +6255,8 @@ am__unittest_hitset_SOURCES_DIST = test/osd/hitset.cc
 @ENABLE_SERVER_TRUE@@WITH_OSD_TRUE at am_unittest_hitset_OBJECTS = test/osd/unittest_hitset-hitset.$(OBJEXT)
 unittest_hitset_OBJECTS = $(am_unittest_hitset_OBJECTS)
 @ENABLE_SERVER_TRUE@@WITH_OSD_TRUE at unittest_hitset_DEPENDENCIES =  \
- at ENABLE_SERVER_TRUE@@WITH_OSD_TRUE@	$(am__DEPENDENCIES_23) \
 @ENABLE_SERVER_TRUE@@WITH_OSD_TRUE@	$(am__DEPENDENCIES_24) \
+ at ENABLE_SERVER_TRUE@@WITH_OSD_TRUE@	$(am__DEPENDENCIES_25) \
 @ENABLE_SERVER_TRUE@@WITH_OSD_TRUE@	$(am__DEPENDENCIES_10)
 unittest_hitset_LINK = $(LIBTOOL) $(AM_V_lt) --tag=CXX \
 	$(AM_LIBTOOLFLAGS) $(LIBTOOLFLAGS) --mode=link $(CXXLD) \
@@ -6273,7 +6265,7 @@ unittest_hitset_LINK = $(LIBTOOL) $(AM_V_lt) --tag=CXX \
 am_unittest_interval_set_OBJECTS =  \
 	test/common/unittest_interval_set-test_interval_set.$(OBJEXT)
 unittest_interval_set_OBJECTS = $(am_unittest_interval_set_OBJECTS)
-unittest_interval_set_DEPENDENCIES = $(am__DEPENDENCIES_24) \
+unittest_interval_set_DEPENDENCIES = $(am__DEPENDENCIES_25) \
 	$(am__DEPENDENCIES_10)
 unittest_interval_set_LINK = $(LIBTOOL) $(AM_V_lt) --tag=CXX \
 	$(AM_LIBTOOLFLAGS) $(LIBTOOLFLAGS) --mode=link $(CXXLD) \
@@ -6282,7 +6274,7 @@ unittest_interval_set_LINK = $(LIBTOOL) $(AM_V_lt) --tag=CXX \
 am_unittest_io_priority_OBJECTS =  \
 	test/common/unittest_io_priority-test_io_priority.$(OBJEXT)
 unittest_io_priority_OBJECTS = $(am_unittest_io_priority_OBJECTS)
-unittest_io_priority_DEPENDENCIES = $(am__DEPENDENCIES_24) \
+unittest_io_priority_DEPENDENCIES = $(am__DEPENDENCIES_25) \
 	$(am__DEPENDENCIES_10)
 unittest_io_priority_LINK = $(LIBTOOL) $(AM_V_lt) --tag=CXX \
 	$(AM_LIBTOOLFLAGS) $(LIBTOOLFLAGS) --mode=link $(CXXLD) \
@@ -6291,7 +6283,7 @@ unittest_io_priority_LINK = $(LIBTOOL) $(AM_V_lt) --tag=CXX \
 am_unittest_ipaddr_OBJECTS =  \
 	test/unittest_ipaddr-test_ipaddr.$(OBJEXT)
 unittest_ipaddr_OBJECTS = $(am_unittest_ipaddr_OBJECTS)
-unittest_ipaddr_DEPENDENCIES = $(am__DEPENDENCIES_24) \
+unittest_ipaddr_DEPENDENCIES = $(am__DEPENDENCIES_25) \
 	$(am__DEPENDENCIES_10)
 unittest_ipaddr_LINK = $(LIBTOOL) $(AM_V_lt) --tag=CXX \
 	$(AM_LIBTOOLFLAGS) $(LIBTOOLFLAGS) --mode=link $(CXXLD) \
@@ -6324,7 +6316,7 @@ unittest_journal_OBJECTS = $(am_unittest_journal_OBJECTS)
 @ENABLE_CLIENT_TRUE@@WITH_RADOS_TRUE@	libcls_journal_client.la \
 @ENABLE_CLIENT_TRUE@@WITH_RADOS_TRUE@	librados_test_stub.la \
 @ENABLE_CLIENT_TRUE@@WITH_RADOS_TRUE@	librados_internal.la \
- at ENABLE_CLIENT_TRUE@@WITH_RADOS_TRUE@	$(am__DEPENDENCIES_24) \
+ at ENABLE_CLIENT_TRUE@@WITH_RADOS_TRUE@	$(am__DEPENDENCIES_25) \
 @ENABLE_CLIENT_TRUE@@WITH_RADOS_TRUE@	$(am__DEPENDENCIES_10) \
 @ENABLE_CLIENT_TRUE@@WITH_RADOS_TRUE@	$(am__DEPENDENCIES_9) \
 @ENABLE_CLIENT_TRUE@@WITH_RADOS_TRUE@	$(LIBRADOS)
@@ -6337,7 +6329,7 @@ am__unittest_lfnindex_SOURCES_DIST = test/os/TestLFNIndex.cc
 unittest_lfnindex_OBJECTS = $(am_unittest_lfnindex_OBJECTS)
 @ENABLE_SERVER_TRUE at unittest_lfnindex_DEPENDENCIES =  \
 @ENABLE_SERVER_TRUE@	$(am__DEPENDENCIES_16) \
- at ENABLE_SERVER_TRUE@	$(am__DEPENDENCIES_24) \
+ at ENABLE_SERVER_TRUE@	$(am__DEPENDENCIES_25) \
 @ENABLE_SERVER_TRUE@	$(am__DEPENDENCIES_10)
 unittest_lfnindex_LINK = $(LIBTOOL) $(AM_V_lt) --tag=CXX \
 	$(AM_LIBTOOLFLAGS) $(LIBTOOLFLAGS) --mode=link $(CXXLD) \
@@ -6349,7 +6341,7 @@ unittest_libcephfs_config_OBJECTS =  \
 	$(am_unittest_libcephfs_config_OBJECTS)
 @ENABLE_CLIENT_TRUE@@WITH_CEPHFS_TRUE@@WITH_RADOS_TRUE at unittest_libcephfs_config_DEPENDENCIES = $(LIBCEPHFS) \
 @ENABLE_CLIENT_TRUE@@WITH_CEPHFS_TRUE@@WITH_RADOS_TRUE@	$(am__DEPENDENCIES_10) \
- at ENABLE_CLIENT_TRUE@@WITH_CEPHFS_TRUE@@WITH_RADOS_TRUE@	$(am__DEPENDENCIES_24)
+ at ENABLE_CLIENT_TRUE@@WITH_CEPHFS_TRUE@@WITH_RADOS_TRUE@	$(am__DEPENDENCIES_25)
 unittest_libcephfs_config_LINK = $(LIBTOOL) $(AM_V_lt) --tag=CXX \
 	$(AM_LIBTOOLFLAGS) $(LIBTOOLFLAGS) --mode=link $(CXXLD) \
 	$(unittest_libcephfs_config_CXXFLAGS) $(CXXFLAGS) \
@@ -6360,7 +6352,7 @@ unittest_librados_OBJECTS = $(am_unittest_librados_OBJECTS)
 @ENABLE_CLIENT_TRUE@@WITH_RADOS_TRUE at unittest_librados_DEPENDENCIES =  \
 @ENABLE_CLIENT_TRUE@@WITH_RADOS_TRUE@	$(LIBRADOS) \
 @ENABLE_CLIENT_TRUE@@WITH_RADOS_TRUE@	$(am__DEPENDENCIES_10) \
- at ENABLE_CLIENT_TRUE@@WITH_RADOS_TRUE@	$(am__DEPENDENCIES_24)
+ at ENABLE_CLIENT_TRUE@@WITH_RADOS_TRUE@	$(am__DEPENDENCIES_25)
 unittest_librados_LINK = $(LIBTOOL) $(AM_V_lt) --tag=CXX \
 	$(AM_LIBTOOLFLAGS) $(LIBTOOLFLAGS) --mode=link $(CXXLD) \
 	$(unittest_librados_CXXFLAGS) $(CXXFLAGS) $(AM_LDFLAGS) \
@@ -6373,7 +6365,7 @@ unittest_librados_config_OBJECTS =  \
 @ENABLE_CLIENT_TRUE@@WITH_RADOS_TRUE at unittest_librados_config_DEPENDENCIES =  \
 @ENABLE_CLIENT_TRUE@@WITH_RADOS_TRUE@	$(LIBRADOS) \
 @ENABLE_CLIENT_TRUE@@WITH_RADOS_TRUE@	$(am__DEPENDENCIES_10) \
- at ENABLE_CLIENT_TRUE@@WITH_RADOS_TRUE@	$(am__DEPENDENCIES_24)
+ at ENABLE_CLIENT_TRUE@@WITH_RADOS_TRUE@	$(am__DEPENDENCIES_25)
 unittest_librados_config_LINK = $(LIBTOOL) $(AM_V_lt) --tag=CXX \
 	$(AM_LIBTOOLFLAGS) $(LIBTOOLFLAGS) --mode=link $(CXXLD) \
 	$(unittest_librados_config_CXXFLAGS) $(CXXFLAGS) $(AM_LDFLAGS) \
@@ -6441,7 +6433,7 @@ unittest_librbd_OBJECTS = $(am_unittest_librbd_OBJECTS)
 @ENABLE_CLIENT_TRUE@@WITH_RADOS_TRUE@@WITH_RBD_TRUE@	librados_internal.la \
 @ENABLE_CLIENT_TRUE@@WITH_RADOS_TRUE@@WITH_RBD_TRUE@	$(LIBRADOS) \
 @ENABLE_CLIENT_TRUE@@WITH_RADOS_TRUE@@WITH_RBD_TRUE@	$(LIBOSDC) \
- at ENABLE_CLIENT_TRUE@@WITH_RADOS_TRUE@@WITH_RBD_TRUE@	$(am__DEPENDENCIES_24) \
+ at ENABLE_CLIENT_TRUE@@WITH_RADOS_TRUE@@WITH_RBD_TRUE@	$(am__DEPENDENCIES_25) \
 @ENABLE_CLIENT_TRUE@@WITH_RADOS_TRUE@@WITH_RBD_TRUE@	$(am__DEPENDENCIES_10) \
 @ENABLE_CLIENT_TRUE@@WITH_RADOS_TRUE@@WITH_RBD_TRUE@	$(am__DEPENDENCIES_9)
 unittest_librbd_LINK = $(LIBTOOL) $(AM_V_lt) --tag=CXX \
@@ -6451,13 +6443,13 @@ unittest_librbd_LINK = $(LIBTOOL) $(AM_V_lt) --tag=CXX \
 am_unittest_log_OBJECTS = log/unittest_log-test.$(OBJEXT)
 unittest_log_OBJECTS = $(am_unittest_log_OBJECTS)
 unittest_log_DEPENDENCIES = $(am__DEPENDENCIES_4) \
-	$(am__DEPENDENCIES_24)
+	$(am__DEPENDENCIES_25)
 unittest_log_LINK = $(LIBTOOL) $(AM_V_lt) --tag=CXX $(AM_LIBTOOLFLAGS) \
 	$(LIBTOOLFLAGS) --mode=link $(CXXLD) $(unittest_log_CXXFLAGS) \
 	$(CXXFLAGS) $(AM_LDFLAGS) $(LDFLAGS) -o $@
 am_unittest_lru_OBJECTS = test/common/unittest_lru-test_lru.$(OBJEXT)
 unittest_lru_OBJECTS = $(am_unittest_lru_OBJECTS)
-unittest_lru_DEPENDENCIES = $(am__DEPENDENCIES_24) \
+unittest_lru_DEPENDENCIES = $(am__DEPENDENCIES_25) \
 	$(am__DEPENDENCIES_10)
 unittest_lru_LINK = $(LIBTOOL) $(AM_V_lt) --tag=CXX $(AM_LIBTOOLFLAGS) \
 	$(LIBTOOLFLAGS) --mode=link $(CXXLD) $(unittest_lru_CXXFLAGS) \
@@ -6466,8 +6458,8 @@ am__unittest_mds_authcap_SOURCES_DIST = test/mds/TestMDSAuthCaps.cc
 @ENABLE_SERVER_TRUE@@WITH_MDS_TRUE at am_unittest_mds_authcap_OBJECTS = test/mds/unittest_mds_authcap-TestMDSAuthCaps.$(OBJEXT)
 unittest_mds_authcap_OBJECTS = $(am_unittest_mds_authcap_OBJECTS)
 @ENABLE_SERVER_TRUE@@WITH_MDS_TRUE at unittest_mds_authcap_DEPENDENCIES =  \
- at ENABLE_SERVER_TRUE@@WITH_MDS_TRUE@	$(am__DEPENDENCIES_21) \
- at ENABLE_SERVER_TRUE@@WITH_MDS_TRUE@	$(am__DEPENDENCIES_24) \
+ at ENABLE_SERVER_TRUE@@WITH_MDS_TRUE@	$(am__DEPENDENCIES_22) \
+ at ENABLE_SERVER_TRUE@@WITH_MDS_TRUE@	$(am__DEPENDENCIES_25) \
 @ENABLE_SERVER_TRUE@@WITH_MDS_TRUE@	$(am__DEPENDENCIES_10)
 unittest_mds_authcap_LINK = $(LIBTOOL) $(AM_V_lt) --tag=CXX \
 	$(AM_LIBTOOLFLAGS) $(LIBTOOLFLAGS) --mode=link $(CXXLD) \
@@ -6476,7 +6468,7 @@ unittest_mds_authcap_LINK = $(LIBTOOL) $(AM_V_lt) --tag=CXX \
 am_unittest_mds_types_OBJECTS =  \
 	test/fs/unittest_mds_types-mds_types.$(OBJEXT)
 unittest_mds_types_OBJECTS = $(am_unittest_mds_types_OBJECTS)
-unittest_mds_types_DEPENDENCIES = $(am__DEPENDENCIES_24) \
+unittest_mds_types_DEPENDENCIES = $(am__DEPENDENCIES_25) \
 	$(am__DEPENDENCIES_10)
 unittest_mds_types_LINK = $(LIBTOOL) $(AM_V_lt) --tag=CXX \
 	$(AM_LIBTOOLFLAGS) $(LIBTOOLFLAGS) --mode=link $(CXXLD) \
@@ -6484,7 +6476,7 @@ unittest_mds_types_LINK = $(LIBTOOL) $(AM_V_lt) --tag=CXX \
 	$(LDFLAGS) -o $@
 am_unittest_mime_OBJECTS = test/unittest_mime-mime.$(OBJEXT)
 unittest_mime_OBJECTS = $(am_unittest_mime_OBJECTS)
-unittest_mime_DEPENDENCIES = $(am__DEPENDENCIES_24) \
+unittest_mime_DEPENDENCIES = $(am__DEPENDENCIES_25) \
 	$(am__DEPENDENCIES_10)
 unittest_mime_LINK = $(LIBTOOL) $(AM_V_lt) --tag=CXX \
 	$(AM_LIBTOOLFLAGS) $(LIBTOOLFLAGS) --mode=link $(CXXLD) \
@@ -6494,8 +6486,8 @@ am__unittest_mon_moncap_SOURCES_DIST = test/mon/moncap.cc
 @ENABLE_SERVER_TRUE@@WITH_MON_TRUE at am_unittest_mon_moncap_OBJECTS = test/mon/unittest_mon_moncap-moncap.$(OBJEXT)
 unittest_mon_moncap_OBJECTS = $(am_unittest_mon_moncap_OBJECTS)
 @ENABLE_SERVER_TRUE@@WITH_MON_TRUE at unittest_mon_moncap_DEPENDENCIES =  \
- at ENABLE_SERVER_TRUE@@WITH_MON_TRUE@	$(am__DEPENDENCIES_22) \
- at ENABLE_SERVER_TRUE@@WITH_MON_TRUE@	$(am__DEPENDENCIES_24) \
+ at ENABLE_SERVER_TRUE@@WITH_MON_TRUE@	$(am__DEPENDENCIES_23) \
+ at ENABLE_SERVER_TRUE@@WITH_MON_TRUE@	$(am__DEPENDENCIES_25) \
 @ENABLE_SERVER_TRUE@@WITH_MON_TRUE@	$(am__DEPENDENCIES_10)
 unittest_mon_moncap_LINK = $(LIBTOOL) $(AM_V_lt) --tag=CXX \
 	$(AM_LIBTOOLFLAGS) $(LIBTOOLFLAGS) --mode=link $(CXXLD) \
@@ -6505,8 +6497,8 @@ am__unittest_mon_pgmap_SOURCES_DIST = test/mon/PGMap.cc
 @ENABLE_SERVER_TRUE@@WITH_MON_TRUE at am_unittest_mon_pgmap_OBJECTS = test/mon/unittest_mon_pgmap-PGMap.$(OBJEXT)
 unittest_mon_pgmap_OBJECTS = $(am_unittest_mon_pgmap_OBJECTS)
 @ENABLE_SERVER_TRUE@@WITH_MON_TRUE at unittest_mon_pgmap_DEPENDENCIES =  \
- at ENABLE_SERVER_TRUE@@WITH_MON_TRUE@	$(am__DEPENDENCIES_22) \
- at ENABLE_SERVER_TRUE@@WITH_MON_TRUE@	$(am__DEPENDENCIES_24) \
+ at ENABLE_SERVER_TRUE@@WITH_MON_TRUE@	$(am__DEPENDENCIES_23) \
+ at ENABLE_SERVER_TRUE@@WITH_MON_TRUE@	$(am__DEPENDENCIES_25) \
 @ENABLE_SERVER_TRUE@@WITH_MON_TRUE@	$(am__DEPENDENCIES_10)
 unittest_mon_pgmap_LINK = $(LIBTOOL) $(AM_V_lt) --tag=CXX \
 	$(AM_LIBTOOLFLAGS) $(LIBTOOLFLAGS) --mode=link $(CXXLD) \
@@ -6515,7 +6507,7 @@ unittest_mon_pgmap_LINK = $(LIBTOOL) $(AM_V_lt) --tag=CXX \
 am_unittest_mutex_debug_OBJECTS =  \
 	test/common/unittest_mutex_debug-test_mutex_debug.$(OBJEXT)
 unittest_mutex_debug_OBJECTS = $(am_unittest_mutex_debug_OBJECTS)
-unittest_mutex_debug_DEPENDENCIES = $(am__DEPENDENCIES_24) \
+unittest_mutex_debug_DEPENDENCIES = $(am__DEPENDENCIES_25) \
 	$(am__DEPENDENCIES_10) $(am__DEPENDENCIES_3)
 unittest_mutex_debug_LINK = $(LIBTOOL) $(AM_V_lt) --tag=CXX \
 	$(AM_LIBTOOLFLAGS) $(LIBTOOLFLAGS) --mode=link $(CXXLD) \
@@ -6528,8 +6520,8 @@ am__unittest_osd_osdcap_SOURCES_DIST = test/osd/osdcap.cc
 @ENABLE_SERVER_TRUE@@WITH_OSD_TRUE at am_unittest_osd_osdcap_OBJECTS = test/osd/unittest_osd_osdcap-osdcap.$(OBJEXT)
 unittest_osd_osdcap_OBJECTS = $(am_unittest_osd_osdcap_OBJECTS)
 @ENABLE_SERVER_TRUE@@WITH_OSD_TRUE at unittest_osd_osdcap_DEPENDENCIES =  \
- at ENABLE_SERVER_TRUE@@WITH_OSD_TRUE@	$(am__DEPENDENCIES_23) \
 @ENABLE_SERVER_TRUE@@WITH_OSD_TRUE@	$(am__DEPENDENCIES_24) \
+ at ENABLE_SERVER_TRUE@@WITH_OSD_TRUE@	$(am__DEPENDENCIES_25) \
 @ENABLE_SERVER_TRUE@@WITH_OSD_TRUE@	$(am__DEPENDENCIES_10)
 unittest_osd_osdcap_LINK = $(LIBTOOL) $(AM_V_lt) --tag=CXX \
 	$(AM_LIBTOOLFLAGS) $(LIBTOOLFLAGS) --mode=link $(CXXLD) \
@@ -6538,7 +6530,7 @@ unittest_osd_osdcap_LINK = $(LIBTOOL) $(AM_V_lt) --tag=CXX \
 am_unittest_osd_types_OBJECTS =  \
 	test/osd/unittest_osd_types-types.$(OBJEXT)
 unittest_osd_types_OBJECTS = $(am_unittest_osd_types_OBJECTS)
-unittest_osd_types_DEPENDENCIES = $(am__DEPENDENCIES_24) \
+unittest_osd_types_DEPENDENCIES = $(am__DEPENDENCIES_25) \
 	$(am__DEPENDENCIES_10)
 unittest_osd_types_LINK = $(LIBTOOL) $(AM_V_lt) --tag=CXX \
 	$(AM_LIBTOOLFLAGS) $(LIBTOOLFLAGS) --mode=link $(CXXLD) \
@@ -6547,7 +6539,7 @@ unittest_osd_types_LINK = $(LIBTOOL) $(AM_V_lt) --tag=CXX \
 am_unittest_osdmap_OBJECTS =  \
 	test/osd/unittest_osdmap-TestOSDMap.$(OBJEXT)
 unittest_osdmap_OBJECTS = $(am_unittest_osdmap_OBJECTS)
-unittest_osdmap_DEPENDENCIES = $(am__DEPENDENCIES_24) \
+unittest_osdmap_DEPENDENCIES = $(am__DEPENDENCIES_25) \
 	$(am__DEPENDENCIES_4) $(am__DEPENDENCIES_10)
 unittest_osdmap_LINK = $(LIBTOOL) $(AM_V_lt) --tag=CXX \
 	$(AM_LIBTOOLFLAGS) $(LIBTOOLFLAGS) --mode=link $(CXXLD) \
@@ -6557,8 +6549,8 @@ am__unittest_osdscrub_SOURCES_DIST = test/osd/TestOSDScrub.cc
 @ENABLE_SERVER_TRUE@@WITH_OSD_TRUE at am_unittest_osdscrub_OBJECTS = test/osd/unittest_osdscrub-TestOSDScrub.$(OBJEXT)
 unittest_osdscrub_OBJECTS = $(am_unittest_osdscrub_OBJECTS)
 @ENABLE_SERVER_TRUE@@WITH_OSD_TRUE at unittest_osdscrub_DEPENDENCIES =  \
- at ENABLE_SERVER_TRUE@@WITH_OSD_TRUE@	$(am__DEPENDENCIES_23) \
 @ENABLE_SERVER_TRUE@@WITH_OSD_TRUE@	$(am__DEPENDENCIES_24) \
+ at ENABLE_SERVER_TRUE@@WITH_OSD_TRUE@	$(am__DEPENDENCIES_25) \
 @ENABLE_SERVER_TRUE@@WITH_OSD_TRUE@	$(am__DEPENDENCIES_10) \
 @ENABLE_SERVER_TRUE@@WITH_OSD_TRUE@	$(am__DEPENDENCIES_1)
 unittest_osdscrub_LINK = $(LIBTOOL) $(AM_V_lt) --tag=CXX \
@@ -6569,7 +6561,7 @@ am__unittest_pageset_SOURCES_DIST = test/test_pageset.cc
 @ENABLE_SERVER_TRUE@@WITH_OSD_TRUE at am_unittest_pageset_OBJECTS = test/unittest_pageset-test_pageset.$(OBJEXT)
 unittest_pageset_OBJECTS = $(am_unittest_pageset_OBJECTS)
 @ENABLE_SERVER_TRUE@@WITH_OSD_TRUE at unittest_pageset_DEPENDENCIES =  \
- at ENABLE_SERVER_TRUE@@WITH_OSD_TRUE@	$(am__DEPENDENCIES_24)
+ at ENABLE_SERVER_TRUE@@WITH_OSD_TRUE@	$(am__DEPENDENCIES_25)
 unittest_pageset_LINK = $(LIBTOOL) $(AM_V_lt) --tag=CXX \
 	$(AM_LIBTOOLFLAGS) $(LIBTOOLFLAGS) --mode=link $(CXXLD) \
 	$(unittest_pageset_CXXFLAGS) $(CXXFLAGS) $(AM_LDFLAGS) \
@@ -6577,7 +6569,7 @@ unittest_pageset_LINK = $(LIBTOOL) $(AM_V_lt) --tag=CXX \
 am_unittest_perf_counters_OBJECTS =  \
 	test/unittest_perf_counters-perf_counters.$(OBJEXT)
 unittest_perf_counters_OBJECTS = $(am_unittest_perf_counters_OBJECTS)
-unittest_perf_counters_DEPENDENCIES = $(am__DEPENDENCIES_24) \
+unittest_perf_counters_DEPENDENCIES = $(am__DEPENDENCIES_25) \
 	$(am__DEPENDENCIES_10)
 unittest_perf_counters_LINK = $(LIBTOOL) $(AM_V_lt) --tag=CXX \
 	$(AM_LIBTOOLFLAGS) $(LIBTOOLFLAGS) --mode=link $(CXXLD) \
@@ -6587,8 +6579,8 @@ am__unittest_pglog_SOURCES_DIST = test/osd/TestPGLog.cc
 @ENABLE_SERVER_TRUE@@WITH_OSD_TRUE at am_unittest_pglog_OBJECTS = test/osd/unittest_pglog-TestPGLog.$(OBJEXT)
 unittest_pglog_OBJECTS = $(am_unittest_pglog_OBJECTS)
 @ENABLE_SERVER_TRUE@@WITH_OSD_TRUE at unittest_pglog_DEPENDENCIES =  \
- at ENABLE_SERVER_TRUE@@WITH_OSD_TRUE@	$(am__DEPENDENCIES_23) \
 @ENABLE_SERVER_TRUE@@WITH_OSD_TRUE@	$(am__DEPENDENCIES_24) \
+ at ENABLE_SERVER_TRUE@@WITH_OSD_TRUE@	$(am__DEPENDENCIES_25) \
 @ENABLE_SERVER_TRUE@@WITH_OSD_TRUE@	$(am__DEPENDENCIES_10) \
 @ENABLE_SERVER_TRUE@@WITH_OSD_TRUE@	$(am__DEPENDENCIES_1)
 unittest_pglog_LINK = $(LIBTOOL) $(AM_V_lt) --tag=CXX \
@@ -6599,7 +6591,7 @@ am_unittest_prebufferedstreambuf_OBJECTS = test/unittest_prebufferedstreambuf-te
 unittest_prebufferedstreambuf_OBJECTS =  \
 	$(am_unittest_prebufferedstreambuf_OBJECTS)
 unittest_prebufferedstreambuf_DEPENDENCIES = $(am__DEPENDENCIES_4) \
-	$(am__DEPENDENCIES_24) $(am__DEPENDENCIES_3)
+	$(am__DEPENDENCIES_25) $(am__DEPENDENCIES_3)
 unittest_prebufferedstreambuf_LINK = $(LIBTOOL) $(AM_V_lt) --tag=CXX \
 	$(AM_LIBTOOLFLAGS) $(LIBTOOLFLAGS) --mode=link $(CXXLD) \
 	$(unittest_prebufferedstreambuf_CXXFLAGS) $(CXXFLAGS) \
@@ -6607,7 +6599,7 @@ unittest_prebufferedstreambuf_LINK = $(LIBTOOL) $(AM_V_lt) --tag=CXX \
 am_unittest_prioritized_queue_OBJECTS = test/common/unittest_prioritized_queue-test_prioritized_queue.$(OBJEXT)
 unittest_prioritized_queue_OBJECTS =  \
 	$(am_unittest_prioritized_queue_OBJECTS)
-unittest_prioritized_queue_DEPENDENCIES = $(am__DEPENDENCIES_24) \
+unittest_prioritized_queue_DEPENDENCIES = $(am__DEPENDENCIES_25) \
 	$(am__DEPENDENCIES_10)
 unittest_prioritized_queue_LINK = $(LIBTOOL) $(AM_V_lt) --tag=CXX \
 	$(AM_LIBTOOLFLAGS) $(LIBTOOLFLAGS) --mode=link $(CXXLD) \
@@ -6617,8 +6609,10 @@ am__unittest_rbd_mirror_SOURCES_DIST = test/rbd_mirror/test_main.cc \
 	test/rbd_mirror/test_mock_fixture.cc \
 	test/rbd_mirror/test_mock_ImageReplayer.cc \
 	test/rbd_mirror/test_mock_ImageSync.cc \
+	test/rbd_mirror/test_mock_ImageSyncThrottler.cc \
 	test/rbd_mirror/image_replayer/test_mock_BootstrapRequest.cc \
 	test/rbd_mirror/image_replayer/test_mock_CreateImageRequest.cc \
+	test/rbd_mirror/image_replayer/test_mock_EventPreprocessor.cc \
 	test/rbd_mirror/image_sync/test_mock_ImageCopyRequest.cc \
 	test/rbd_mirror/image_sync/test_mock_ObjectCopyRequest.cc \
 	test/rbd_mirror/image_sync/test_mock_SnapshotCopyRequest.cc \
@@ -6629,8 +6623,10 @@ am__unittest_rbd_mirror_SOURCES_DIST = test/rbd_mirror/test_main.cc \
 @ENABLE_CLIENT_TRUE@@WITH_RADOS_TRUE@@WITH_RBD_TRUE@	test/rbd_mirror/unittest_rbd_mirror-test_mock_fixture.$(OBJEXT) \
 @ENABLE_CLIENT_TRUE@@WITH_RADOS_TRUE@@WITH_RBD_TRUE@	test/rbd_mirror/unittest_rbd_mirror-test_mock_ImageReplayer.$(OBJEXT) \
 @ENABLE_CLIENT_TRUE@@WITH_RADOS_TRUE@@WITH_RBD_TRUE@	test/rbd_mirror/unittest_rbd_mirror-test_mock_ImageSync.$(OBJEXT) \
+ at ENABLE_CLIENT_TRUE@@WITH_RADOS_TRUE@@WITH_RBD_TRUE@	test/rbd_mirror/unittest_rbd_mirror-test_mock_ImageSyncThrottler.$(OBJEXT) \
 @ENABLE_CLIENT_TRUE@@WITH_RADOS_TRUE@@WITH_RBD_TRUE@	test/rbd_mirror/image_replayer/unittest_rbd_mirror-test_mock_BootstrapRequest.$(OBJEXT) \
 @ENABLE_CLIENT_TRUE@@WITH_RADOS_TRUE@@WITH_RBD_TRUE@	test/rbd_mirror/image_replayer/unittest_rbd_mirror-test_mock_CreateImageRequest.$(OBJEXT) \
+ at ENABLE_CLIENT_TRUE@@WITH_RADOS_TRUE@@WITH_RBD_TRUE@	test/rbd_mirror/image_replayer/unittest_rbd_mirror-test_mock_EventPreprocessor.$(OBJEXT) \
 @ENABLE_CLIENT_TRUE@@WITH_RADOS_TRUE@@WITH_RBD_TRUE@	test/rbd_mirror/image_sync/unittest_rbd_mirror-test_mock_ImageCopyRequest.$(OBJEXT) \
 @ENABLE_CLIENT_TRUE@@WITH_RADOS_TRUE@@WITH_RBD_TRUE@	test/rbd_mirror/image_sync/unittest_rbd_mirror-test_mock_ObjectCopyRequest.$(OBJEXT) \
 @ENABLE_CLIENT_TRUE@@WITH_RADOS_TRUE@@WITH_RBD_TRUE@	test/rbd_mirror/image_sync/unittest_rbd_mirror-test_mock_SnapshotCopyRequest.$(OBJEXT) \
@@ -6653,7 +6649,7 @@ unittest_rbd_mirror_OBJECTS = $(am_unittest_rbd_mirror_OBJECTS)
 @ENABLE_CLIENT_TRUE@@WITH_RADOS_TRUE@@WITH_RBD_TRUE@	$(LIBRBD_TYPES) \
 @ENABLE_CLIENT_TRUE@@WITH_RADOS_TRUE@@WITH_RBD_TRUE@	$(LIBRADOS) \
 @ENABLE_CLIENT_TRUE@@WITH_RADOS_TRUE@@WITH_RBD_TRUE@	$(LIBOSDC) \
- at ENABLE_CLIENT_TRUE@@WITH_RADOS_TRUE@@WITH_RBD_TRUE@	$(am__DEPENDENCIES_24) \
+ at ENABLE_CLIENT_TRUE@@WITH_RADOS_TRUE@@WITH_RBD_TRUE@	$(am__DEPENDENCIES_25) \
 @ENABLE_CLIENT_TRUE@@WITH_RADOS_TRUE@@WITH_RBD_TRUE@	$(am__DEPENDENCIES_10) \
 @ENABLE_CLIENT_TRUE@@WITH_RADOS_TRUE@@WITH_RBD_TRUE@	$(am__DEPENDENCIES_9)
 unittest_rbd_mirror_LINK = $(LIBTOOL) $(AM_V_lt) --tag=CXX \
@@ -6669,7 +6665,7 @@ unittest_rbd_replay_OBJECTS = $(am_unittest_rbd_replay_OBJECTS)
 @ENABLE_CLIENT_TRUE@@WITH_RADOS_TRUE@@WITH_RBD_TRUE@	$(am__DEPENDENCIES_10) \
 @ENABLE_CLIENT_TRUE@@WITH_RADOS_TRUE@@WITH_RBD_TRUE@	librbd_replay.la \
 @ENABLE_CLIENT_TRUE@@WITH_RADOS_TRUE@@WITH_RBD_TRUE@	librbd_replay_ios.la \
- at ENABLE_CLIENT_TRUE@@WITH_RADOS_TRUE@@WITH_RBD_TRUE@	$(am__DEPENDENCIES_24)
+ at ENABLE_CLIENT_TRUE@@WITH_RADOS_TRUE@@WITH_RBD_TRUE@	$(am__DEPENDENCIES_25)
 unittest_rbd_replay_LINK = $(LIBTOOL) $(AM_V_lt) --tag=CXX \
 	$(AM_LIBTOOLFLAGS) $(LIBTOOLFLAGS) --mode=link $(CXXLD) \
 	$(unittest_rbd_replay_CXXFLAGS) $(CXXFLAGS) $(AM_LDFLAGS) \
@@ -6677,7 +6673,7 @@ unittest_rbd_replay_LINK = $(LIBTOOL) $(AM_V_lt) --tag=CXX \
 am_unittest_readahead_OBJECTS =  \
 	test/common/unittest_readahead-Readahead.$(OBJEXT)
 unittest_readahead_OBJECTS = $(am_unittest_readahead_OBJECTS)
-unittest_readahead_DEPENDENCIES = $(am__DEPENDENCIES_24) \
+unittest_readahead_DEPENDENCIES = $(am__DEPENDENCIES_25) \
 	$(am__DEPENDENCIES_10)
 unittest_readahead_LINK = $(LIBTOOL) $(AM_V_lt) --tag=CXX \
 	$(AM_LIBTOOLFLAGS) $(LIBTOOLFLAGS) --mode=link $(CXXLD) \
@@ -6689,7 +6685,7 @@ am__unittest_rocksdb_option_SOURCES_DIST =  \
 unittest_rocksdb_option_OBJECTS =  \
 	$(am_unittest_rocksdb_option_OBJECTS)
 @ENABLE_SERVER_TRUE@@WITH_DLIBROCKSDB_TRUE at unittest_rocksdb_option_DEPENDENCIES = $(am__DEPENDENCIES_16) \
- at ENABLE_SERVER_TRUE@@WITH_DLIBROCKSDB_TRUE@	$(am__DEPENDENCIES_24) \
+ at ENABLE_SERVER_TRUE@@WITH_DLIBROCKSDB_TRUE@	$(am__DEPENDENCIES_25) \
 @ENABLE_SERVER_TRUE@@WITH_DLIBROCKSDB_TRUE@	$(am__DEPENDENCIES_10)
 unittest_rocksdb_option_LINK = $(LIBTOOL) $(AM_V_lt) --tag=CXX \
 	$(AM_LIBTOOLFLAGS) $(LIBTOOLFLAGS) --mode=link $(CXXLD) \
@@ -6701,7 +6697,7 @@ am__unittest_rocksdb_option_static_SOURCES_DIST =  \
 unittest_rocksdb_option_static_OBJECTS =  \
 	$(am_unittest_rocksdb_option_static_OBJECTS)
 @ENABLE_SERVER_TRUE@@WITH_SLIBROCKSDB_TRUE at unittest_rocksdb_option_static_DEPENDENCIES = $(am__DEPENDENCIES_16) \
- at ENABLE_SERVER_TRUE@@WITH_SLIBROCKSDB_TRUE@	$(am__DEPENDENCIES_24) \
+ at ENABLE_SERVER_TRUE@@WITH_SLIBROCKSDB_TRUE@	$(am__DEPENDENCIES_25) \
 @ENABLE_SERVER_TRUE@@WITH_SLIBROCKSDB_TRUE@	$(am__DEPENDENCIES_10)
 unittest_rocksdb_option_static_LINK = $(LIBTOOL) $(AM_V_lt) --tag=CXX \
 	$(AM_LIBTOOLFLAGS) $(LIBTOOLFLAGS) --mode=link $(CXXLD) \
@@ -6712,7 +6708,7 @@ am__unittest_run_cmd_SOURCES_DIST = test/run_cmd.cc
 unittest_run_cmd_OBJECTS = $(am_unittest_run_cmd_OBJECTS)
 @ENABLE_CLIENT_TRUE@@WITH_CEPHFS_TRUE@@WITH_RADOS_TRUE at unittest_run_cmd_DEPENDENCIES = $(LIBCEPHFS) \
 @ENABLE_CLIENT_TRUE@@WITH_CEPHFS_TRUE@@WITH_RADOS_TRUE@	$(am__DEPENDENCIES_10) \
- at ENABLE_CLIENT_TRUE@@WITH_CEPHFS_TRUE@@WITH_RADOS_TRUE@	$(am__DEPENDENCIES_24)
+ at ENABLE_CLIENT_TRUE@@WITH_CEPHFS_TRUE@@WITH_RADOS_TRUE@	$(am__DEPENDENCIES_25)
 unittest_run_cmd_LINK = $(LIBTOOL) $(AM_V_lt) --tag=CXX \
 	$(AM_LIBTOOLFLAGS) $(LIBTOOLFLAGS) --mode=link $(CXXLD) \
 	$(unittest_run_cmd_CXXFLAGS) $(CXXFLAGS) $(AM_LDFLAGS) \
@@ -6720,7 +6716,7 @@ unittest_run_cmd_LINK = $(LIBTOOL) $(AM_V_lt) --tag=CXX \
 am_unittest_safe_io_OBJECTS =  \
 	test/common/unittest_safe_io-test_safe_io.$(OBJEXT)
 unittest_safe_io_OBJECTS = $(am_unittest_safe_io_OBJECTS)
-unittest_safe_io_DEPENDENCIES = $(am__DEPENDENCIES_24) \
+unittest_safe_io_DEPENDENCIES = $(am__DEPENDENCIES_25) \
 	$(am__DEPENDENCIES_10)
 unittest_safe_io_LINK = $(LIBTOOL) $(AM_V_lt) --tag=CXX \
 	$(AM_LIBTOOLFLAGS) $(LIBTOOLFLAGS) --mode=link $(CXXLD) \
@@ -6729,7 +6725,7 @@ unittest_safe_io_LINK = $(LIBTOOL) $(AM_V_lt) --tag=CXX \
 am_unittest_shared_cache_OBJECTS =  \
 	test/common/unittest_shared_cache-test_shared_cache.$(OBJEXT)
 unittest_shared_cache_OBJECTS = $(am_unittest_shared_cache_OBJECTS)
-unittest_shared_cache_DEPENDENCIES = $(am__DEPENDENCIES_24) \
+unittest_shared_cache_DEPENDENCIES = $(am__DEPENDENCIES_25) \
 	$(am__DEPENDENCIES_10)
 unittest_shared_cache_LINK = $(LIBTOOL) $(AM_V_lt) --tag=CXX \
 	$(AM_LIBTOOLFLAGS) $(LIBTOOLFLAGS) --mode=link $(CXXLD) \
@@ -6738,7 +6734,7 @@ unittest_shared_cache_LINK = $(LIBTOOL) $(AM_V_lt) --tag=CXX \
 am_unittest_sharedptr_registry_OBJECTS = test/common/unittest_sharedptr_registry-test_sharedptr_registry.$(OBJEXT)
 unittest_sharedptr_registry_OBJECTS =  \
 	$(am_unittest_sharedptr_registry_OBJECTS)
-unittest_sharedptr_registry_DEPENDENCIES = $(am__DEPENDENCIES_24) \
+unittest_sharedptr_registry_DEPENDENCIES = $(am__DEPENDENCIES_25) \
 	$(am__DEPENDENCIES_10)
 unittest_sharedptr_registry_LINK = $(LIBTOOL) $(AM_V_lt) --tag=CXX \
 	$(AM_LIBTOOLFLAGS) $(LIBTOOLFLAGS) --mode=link $(CXXLD) \
@@ -6746,7 +6742,7 @@ unittest_sharedptr_registry_LINK = $(LIBTOOL) $(AM_V_lt) --tag=CXX \
 	$(AM_LDFLAGS) $(LDFLAGS) -o $@
 am_unittest_shunique_lock_OBJECTS = test/common/unittest_shunique_lock-test_shunique_lock.$(OBJEXT)
 unittest_shunique_lock_OBJECTS = $(am_unittest_shunique_lock_OBJECTS)
-unittest_shunique_lock_DEPENDENCIES = $(am__DEPENDENCIES_24) \
+unittest_shunique_lock_DEPENDENCIES = $(am__DEPENDENCIES_25) \
 	$(am__DEPENDENCIES_10) $(am__DEPENDENCIES_3)
 unittest_shunique_lock_LINK = $(LIBTOOL) $(AM_V_lt) --tag=CXX \
 	$(AM_LIBTOOLFLAGS) $(LIBTOOLFLAGS) --mode=link $(CXXLD) \
@@ -6754,7 +6750,7 @@ unittest_shunique_lock_LINK = $(LIBTOOL) $(AM_V_lt) --tag=CXX \
 	$(LDFLAGS) -o $@
 am_unittest_signals_OBJECTS = test/unittest_signals-signals.$(OBJEXT)
 unittest_signals_OBJECTS = $(am_unittest_signals_OBJECTS)
-unittest_signals_DEPENDENCIES = $(am__DEPENDENCIES_24) \
+unittest_signals_DEPENDENCIES = $(am__DEPENDENCIES_25) \
 	$(am__DEPENDENCIES_10)
 unittest_signals_LINK = $(LIBTOOL) $(AM_V_lt) --tag=CXX \
 	$(AM_LIBTOOLFLAGS) $(LIBTOOLFLAGS) --mode=link $(CXXLD) \
@@ -6765,7 +6761,7 @@ am__unittest_simple_spin_SOURCES_DIST = test/simple_spin.cc
 unittest_simple_spin_OBJECTS = $(am_unittest_simple_spin_OBJECTS)
 @ENABLE_CLIENT_TRUE@@WITH_CEPHFS_TRUE@@WITH_RADOS_TRUE at unittest_simple_spin_DEPENDENCIES = $(LIBCEPHFS) \
 @ENABLE_CLIENT_TRUE@@WITH_CEPHFS_TRUE@@WITH_RADOS_TRUE@	$(am__DEPENDENCIES_10) \
- at ENABLE_CLIENT_TRUE@@WITH_CEPHFS_TRUE@@WITH_RADOS_TRUE@	$(am__DEPENDENCIES_24)
+ at ENABLE_CLIENT_TRUE@@WITH_CEPHFS_TRUE@@WITH_RADOS_TRUE@	$(am__DEPENDENCIES_25)
 unittest_simple_spin_LINK = $(LIBTOOL) $(AM_V_lt) --tag=CXX \
 	$(AM_LIBTOOLFLAGS) $(LIBTOOLFLAGS) --mode=link $(CXXLD) \
 	$(unittest_simple_spin_CXXFLAGS) $(CXXFLAGS) $(AM_LDFLAGS) \
@@ -6773,7 +6769,7 @@ unittest_simple_spin_LINK = $(LIBTOOL) $(AM_V_lt) --tag=CXX \
 am_unittest_sloppy_crc_map_OBJECTS = test/common/unittest_sloppy_crc_map-test_sloppy_crc_map.$(OBJEXT)
 unittest_sloppy_crc_map_OBJECTS =  \
 	$(am_unittest_sloppy_crc_map_OBJECTS)
-unittest_sloppy_crc_map_DEPENDENCIES = $(am__DEPENDENCIES_24) \
+unittest_sloppy_crc_map_DEPENDENCIES = $(am__DEPENDENCIES_25) \
 	$(am__DEPENDENCIES_10)
 unittest_sloppy_crc_map_LINK = $(LIBTOOL) $(AM_V_lt) --tag=CXX \
 	$(AM_LIBTOOLFLAGS) $(LIBTOOLFLAGS) --mode=link $(CXXLD) \
@@ -6782,7 +6778,7 @@ unittest_sloppy_crc_map_LINK = $(LIBTOOL) $(AM_V_lt) --tag=CXX \
 am_unittest_str_list_OBJECTS =  \
 	test/unittest_str_list-test_str_list.$(OBJEXT)
 unittest_str_list_OBJECTS = $(am_unittest_str_list_OBJECTS)
-unittest_str_list_DEPENDENCIES = $(am__DEPENDENCIES_24) \
+unittest_str_list_DEPENDENCIES = $(am__DEPENDENCIES_25) \
 	$(am__DEPENDENCIES_10)
 unittest_str_list_LINK = $(LIBTOOL) $(AM_V_lt) --tag=CXX \
 	$(AM_LIBTOOLFLAGS) $(LIBTOOLFLAGS) --mode=link $(CXXLD) \
@@ -6791,7 +6787,7 @@ unittest_str_list_LINK = $(LIBTOOL) $(AM_V_lt) --tag=CXX \
 am_unittest_str_map_OBJECTS =  \
 	test/common/unittest_str_map-test_str_map.$(OBJEXT)
 unittest_str_map_OBJECTS = $(am_unittest_str_map_OBJECTS)
-unittest_str_map_DEPENDENCIES = $(am__DEPENDENCIES_24) \
+unittest_str_map_DEPENDENCIES = $(am__DEPENDENCIES_25) \
 	$(am__DEPENDENCIES_10)
 unittest_str_map_LINK = $(LIBTOOL) $(AM_V_lt) --tag=CXX \
 	$(AM_LIBTOOLFLAGS) $(LIBTOOLFLAGS) --mode=link $(CXXLD) \
@@ -6800,7 +6796,7 @@ unittest_str_map_LINK = $(LIBTOOL) $(AM_V_lt) --tag=CXX \
 am_unittest_striper_OBJECTS =  \
 	test/unittest_striper-test_striper.$(OBJEXT)
 unittest_striper_OBJECTS = $(am_unittest_striper_OBJECTS)
-unittest_striper_DEPENDENCIES = $(LIBOSDC) $(am__DEPENDENCIES_24) \
+unittest_striper_DEPENDENCIES = $(LIBOSDC) $(am__DEPENDENCIES_25) \
 	$(am__DEPENDENCIES_10)
 unittest_striper_LINK = $(LIBTOOL) $(AM_V_lt) --tag=CXX \
 	$(AM_LIBTOOLFLAGS) $(LIBTOOLFLAGS) --mode=link $(CXXLD) \
@@ -6808,7 +6804,7 @@ unittest_striper_LINK = $(LIBTOOL) $(AM_V_lt) --tag=CXX \
 	$(LDFLAGS) -o $@
 am_unittest_strtol_OBJECTS = test/unittest_strtol-strtol.$(OBJEXT)
 unittest_strtol_OBJECTS = $(am_unittest_strtol_OBJECTS)
-unittest_strtol_DEPENDENCIES = $(am__DEPENDENCIES_24) \
+unittest_strtol_DEPENDENCIES = $(am__DEPENDENCIES_25) \
 	$(am__DEPENDENCIES_10)
 unittest_strtol_LINK = $(LIBTOOL) $(AM_V_lt) --tag=CXX \
 	$(AM_LIBTOOLFLAGS) $(LIBTOOLFLAGS) --mode=link $(CXXLD) \
@@ -6818,7 +6814,7 @@ am_unittest_subprocess_OBJECTS =  \
 	test/unittest_subprocess-test_subprocess.$(OBJEXT)
 unittest_subprocess_OBJECTS = $(am_unittest_subprocess_OBJECTS)
 unittest_subprocess_DEPENDENCIES = $(am__DEPENDENCIES_4) \
-	$(am__DEPENDENCIES_24)
+	$(am__DEPENDENCIES_25)
 unittest_subprocess_LINK = $(LIBTOOL) $(AM_V_lt) --tag=CXX \
 	$(AM_LIBTOOLFLAGS) $(LIBTOOLFLAGS) --mode=link $(CXXLD) \
 	$(unittest_subprocess_CXXFLAGS) $(CXXFLAGS) $(AM_LDFLAGS) \
@@ -6826,7 +6822,7 @@ unittest_subprocess_LINK = $(LIBTOOL) $(AM_V_lt) --tag=CXX \
 am_unittest_tableformatter_OBJECTS = test/common/unittest_tableformatter-test_tableformatter.$(OBJEXT)
 unittest_tableformatter_OBJECTS =  \
 	$(am_unittest_tableformatter_OBJECTS)
-unittest_tableformatter_DEPENDENCIES = $(am__DEPENDENCIES_24) \
+unittest_tableformatter_DEPENDENCIES = $(am__DEPENDENCIES_25) \
 	$(am__DEPENDENCIES_10)
 unittest_tableformatter_LINK = $(LIBTOOL) $(AM_V_lt) --tag=CXX \
 	$(AM_LIBTOOLFLAGS) $(LIBTOOLFLAGS) --mode=link $(CXXLD) \
@@ -6836,7 +6832,7 @@ am_unittest_texttable_OBJECTS =  \
 	test/unittest_texttable-test_texttable.$(OBJEXT)
 unittest_texttable_OBJECTS = $(am_unittest_texttable_OBJECTS)
 unittest_texttable_DEPENDENCIES = $(am__DEPENDENCIES_4) \
-	$(am__DEPENDENCIES_24)
+	$(am__DEPENDENCIES_25)
 unittest_texttable_LINK = $(LIBTOOL) $(AM_V_lt) --tag=CXX \
 	$(AM_LIBTOOLFLAGS) $(LIBTOOLFLAGS) --mode=link $(CXXLD) \
 	$(unittest_texttable_CXXFLAGS) $(CXXFLAGS) $(AM_LDFLAGS) \
@@ -6844,7 +6840,7 @@ unittest_texttable_LINK = $(LIBTOOL) $(AM_V_lt) --tag=CXX \
 am_unittest_throttle_OBJECTS =  \
 	test/common/unittest_throttle-Throttle.$(OBJEXT)
 unittest_throttle_OBJECTS = $(am_unittest_throttle_OBJECTS)
-unittest_throttle_DEPENDENCIES = $(am__DEPENDENCIES_24) \
+unittest_throttle_DEPENDENCIES = $(am__DEPENDENCIES_25) \
 	$(am__DEPENDENCIES_10)
 unittest_throttle_LINK = $(LIBTOOL) $(AM_V_lt) --tag=CXX \
 	$(AM_LIBTOOLFLAGS) $(LIBTOOLFLAGS) --mode=link $(CXXLD) \
@@ -6854,7 +6850,7 @@ am_unittest_time_OBJECTS =  \
 	test/common/unittest_time-test_time.$(OBJEXT)
 unittest_time_OBJECTS = $(am_unittest_time_OBJECTS)
 unittest_time_DEPENDENCIES = $(am__DEPENDENCIES_4) \
-	$(am__DEPENDENCIES_24) $(am__DEPENDENCIES_1) \
+	$(am__DEPENDENCIES_25) $(am__DEPENDENCIES_1) \
 	$(am__DEPENDENCIES_3)
 unittest_time_LINK = $(LIBTOOL) $(AM_V_lt) --tag=CXX \
 	$(AM_LIBTOOLFLAGS) $(LIBTOOLFLAGS) --mode=link $(CXXLD) \
@@ -6866,7 +6862,7 @@ am__unittest_transaction_SOURCES_DIST =  \
 unittest_transaction_OBJECTS = $(am_unittest_transaction_OBJECTS)
 @ENABLE_SERVER_TRUE at unittest_transaction_DEPENDENCIES =  \
 @ENABLE_SERVER_TRUE@	$(am__DEPENDENCIES_16) \
- at ENABLE_SERVER_TRUE@	$(am__DEPENDENCIES_24) \
+ at ENABLE_SERVER_TRUE@	$(am__DEPENDENCIES_25) \
 @ENABLE_SERVER_TRUE@	$(am__DEPENDENCIES_10)
 unittest_transaction_LINK = $(LIBTOOL) $(AM_V_lt) --tag=CXX \
 	$(AM_LIBTOOLFLAGS) $(LIBTOOLFLAGS) --mode=link $(CXXLD) \
@@ -6874,7 +6870,7 @@ unittest_transaction_LINK = $(LIBTOOL) $(AM_V_lt) --tag=CXX \
 	$(LDFLAGS) -o $@
 am_unittest_utf8_OBJECTS = test/unittest_utf8-utf8.$(OBJEXT)
 unittest_utf8_OBJECTS = $(am_unittest_utf8_OBJECTS)
-unittest_utf8_DEPENDENCIES = $(am__DEPENDENCIES_24) \
+unittest_utf8_DEPENDENCIES = $(am__DEPENDENCIES_25) \
 	$(am__DEPENDENCIES_10)
 unittest_utf8_LINK = $(LIBTOOL) $(AM_V_lt) --tag=CXX \
 	$(AM_LIBTOOLFLAGS) $(LIBTOOLFLAGS) --mode=link $(CXXLD) \
@@ -6884,7 +6880,7 @@ am_unittest_util_OBJECTS =  \
 	test/common/unittest_util-test_util.$(OBJEXT)
 unittest_util_OBJECTS = $(am_unittest_util_OBJECTS)
 unittest_util_DEPENDENCIES = $(am__DEPENDENCIES_4) \
-	$(am__DEPENDENCIES_24) $(am__DEPENDENCIES_1) \
+	$(am__DEPENDENCIES_25) $(am__DEPENDENCIES_1) \
 	$(am__DEPENDENCIES_3)
 unittest_util_LINK = $(LIBTOOL) $(AM_V_lt) --tag=CXX \
 	$(AM_LIBTOOLFLAGS) $(LIBTOOLFLAGS) --mode=link $(CXXLD) \
@@ -6894,7 +6890,7 @@ am_unittest_weighted_priority_queue_OBJECTS = test/common/unittest_weighted_prio
 unittest_weighted_priority_queue_OBJECTS =  \
 	$(am_unittest_weighted_priority_queue_OBJECTS)
 unittest_weighted_priority_queue_DEPENDENCIES =  \
-	$(am__DEPENDENCIES_24) $(am__DEPENDENCIES_10)
+	$(am__DEPENDENCIES_25) $(am__DEPENDENCIES_10)
 unittest_weighted_priority_queue_LINK = $(LIBTOOL) $(AM_V_lt) \
 	--tag=CXX $(AM_LIBTOOLFLAGS) $(LIBTOOLFLAGS) --mode=link \
 	$(CXXLD) $(unittest_weighted_priority_queue_CXXFLAGS) \
@@ -6902,7 +6898,7 @@ unittest_weighted_priority_queue_LINK = $(LIBTOOL) $(AM_V_lt) \
 am_unittest_workqueue_OBJECTS =  \
 	test/unittest_workqueue-test_workqueue.$(OBJEXT)
 unittest_workqueue_OBJECTS = $(am_unittest_workqueue_OBJECTS)
-unittest_workqueue_DEPENDENCIES = $(am__DEPENDENCIES_24) \
+unittest_workqueue_DEPENDENCIES = $(am__DEPENDENCIES_25) \
 	$(am__DEPENDENCIES_10)
 unittest_workqueue_LINK = $(LIBTOOL) $(AM_V_lt) --tag=CXX \
 	$(AM_LIBTOOLFLAGS) $(LIBTOOLFLAGS) --mode=link $(CXXLD) \
@@ -6910,7 +6906,7 @@ unittest_workqueue_LINK = $(LIBTOOL) $(AM_V_lt) --tag=CXX \
 	$(LDFLAGS) -o $@
 am_unittest_xlist_OBJECTS = test/unittest_xlist-test_xlist.$(OBJEXT)
 unittest_xlist_OBJECTS = $(am_unittest_xlist_OBJECTS)
-unittest_xlist_DEPENDENCIES = $(am__DEPENDENCIES_24) \
+unittest_xlist_DEPENDENCIES = $(am__DEPENDENCIES_25) \
 	$(am__DEPENDENCIES_4)
 unittest_xlist_LINK = $(LIBTOOL) $(AM_V_lt) --tag=CXX \
 	$(AM_LIBTOOLFLAGS) $(LIBTOOLFLAGS) --mode=link $(CXXLD) \
@@ -7168,7 +7164,6 @@ SOURCES = $(libkv_a_SOURCES) $(libmon_a_SOURCES) $(libos_a_SOURCES) \
 	$(ceph_test_rados_striper_api_striping_SOURCES) \
 	$(ceph_test_rados_watch_notify_SOURCES) \
 	$(ceph_test_rbd_mirror_SOURCES) \
-	$(ceph_test_rbd_mirror_image_replay_SOURCES) \
 	$(ceph_test_rbd_mirror_random_write_SOURCES) \
 	$(ceph_test_rewrite_latency_SOURCES) \
 	$(ceph_test_rgw_manifest_SOURCES) $(ceph_test_rgw_obj_SOURCES) \
@@ -7460,7 +7455,6 @@ DIST_SOURCES = $(am__libkv_a_SOURCES_DIST) \
 	$(am__ceph_test_rados_striper_api_striping_SOURCES_DIST) \
 	$(am__ceph_test_rados_watch_notify_SOURCES_DIST) \
 	$(am__ceph_test_rbd_mirror_SOURCES_DIST) \
-	$(am__ceph_test_rbd_mirror_image_replay_SOURCES_DIST) \
 	$(am__ceph_test_rbd_mirror_random_write_SOURCES_DIST) \
 	$(ceph_test_rewrite_latency_SOURCES) \
 	$(am__ceph_test_rgw_manifest_SOURCES_DIST) \
@@ -7855,9 +7849,10 @@ am__noinst_HEADERS_DIST = arch/intel.h arch/arm.h arch/probe.h \
 	messages/MRoute.h messages/MForward.h messages/MStatfs.h \
 	messages/MStatfsReply.h messages/MTimeCheck.h \
 	messages/MWatchNotify.h messages/PaxosServiceMessage.h \
-	include/Context.h include/CompatSet.h include/Distribution.h \
-	include/Spinlock.h include/addr_parsing.h include/assert.h \
-	include/atomic.h include/bitmapper.h include/blobhash.h \
+	messages/MNop.h include/Context.h include/CompatSet.h \
+	include/Distribution.h include/Spinlock.h \
+	include/addr_parsing.h include/assert.h include/atomic.h \
+	include/bitmapper.h include/blobhash.h \
 	include/btree_interval_set.h include/buffer.h \
 	include/buffer_fwd.h include/byteorder.h \
 	include/cephfs/libcephfs.h include/ceph_features.h \
@@ -7897,10 +7892,11 @@ am__noinst_HEADERS_DIST = arch/intel.h arch/arm.h arch/probe.h \
 	libradosstriper/MultiAioCompletionImpl.h \
 	journal/AsyncOpTracker.h journal/Entry.h journal/Future.h \
 	journal/FutureImpl.h journal/Journaler.h \
-	journal/JournalMetadata.h journal/JournalPlayer.h \
-	journal/JournalRecorder.h journal/JournalTrimmer.h \
-	journal/ObjectPlayer.h journal/ObjectRecorder.h \
-	journal/ReplayEntry.h journal/ReplayHandler.h journal/Utils.h \
+	journal/JournalMetadata.h journal/JournalMetadataListener.h \
+	journal/JournalPlayer.h journal/JournalRecorder.h \
+	journal/JournalTrimmer.h journal/ObjectPlayer.h \
+	journal/ObjectRecorder.h journal/ReplayEntry.h \
+	journal/ReplayHandler.h journal/Settings.h journal/Utils.h \
 	librbd/AioCompletion.h librbd/AioImageRequest.h \
 	librbd/AioImageRequestWQ.h librbd/AioObjectRequest.h \
 	librbd/AsyncObjectThrottle.h librbd/AsyncOperation.h \
@@ -8052,8 +8048,8 @@ am__noinst_HEADERS_DIST = arch/intel.h arch/arm.h arch/probe.h \
 	test/librados/test.h test/librados/TestCase.h \
 	test/libradosstriper/TestCase.h \
 	test/ObjectMap/KeyValueDBMemory.h test/omap_bench.h \
-	test/osdc/FakeWriteback.h test/osd/Object.h \
-	test/osd/RadosModel.h test/osd/TestOpStat.h \
+	test/osdc/FakeWriteback.h test/osdc/MemWriteback.h \
+	test/osd/Object.h test/osd/RadosModel.h test/osd/TestOpStat.h \
 	test/system/cross_process_sem.h \
 	test/system/st_rados_create_pool.h \
 	test/system/st_rados_delete_objs.h \
@@ -8067,6 +8063,7 @@ am__noinst_HEADERS_DIST = arch/intel.h arch/arm.h arch/probe.h \
 	tools/rbd_mirror/BaseRequest.h \
 	tools/rbd_mirror/ClusterWatcher.h \
 	tools/rbd_mirror/ImageReplayer.h tools/rbd_mirror/ImageSync.h \
+	tools/rbd_mirror/ImageSyncThrottler.h \
 	tools/rbd_mirror/Mirror.h tools/rbd_mirror/PoolWatcher.h \
 	tools/rbd_mirror/ProgressContext.h tools/rbd_mirror/Replayer.h \
 	tools/rbd_mirror/ImageDeleter.h tools/rbd_mirror/Threads.h \
@@ -8074,6 +8071,7 @@ am__noinst_HEADERS_DIST = arch/intel.h arch/arm.h arch/probe.h \
 	tools/rbd_mirror/image_replayer/BootstrapRequest.h \
 	tools/rbd_mirror/image_replayer/CloseImageRequest.h \
 	tools/rbd_mirror/image_replayer/CreateImageRequest.h \
+	tools/rbd_mirror/image_replayer/EventPreprocessor.h \
 	tools/rbd_mirror/image_replayer/OpenImageRequest.h \
 	tools/rbd_mirror/image_replayer/OpenLocalImageRequest.h \
 	tools/rbd_mirror/image_replayer/ReplayStatusFormatter.h \
@@ -8346,8 +8344,8 @@ ACLOCAL = @ACLOCAL@
 AMTAR = @AMTAR@
 AM_CXXFLAGS = @AM_CXXFLAGS@ $(AM_COMMON_CFLAGS) -ftemplate-depth-1024 \
 	-Wnon-virtual-dtor -Wno-invalid-offsetof $(am__append_7) \
-	$(am__append_10) $(am__append_13) $(am__append_99) \
-	$(am__append_102)
+	$(am__append_10) $(am__append_13) $(am__append_100) \
+	$(am__append_103)
 AM_DEFAULT_VERBOSITY = @AM_DEFAULT_VERBOSITY@
 AR = @AR@
 ARM_CRC_FLAGS = @ARM_CRC_FLAGS@
@@ -8551,10 +8549,10 @@ top_srcdir = @top_srcdir@
 AUTOMAKE_OPTIONS = gnu subdir-objects
 SUBDIRS = ocf java
 DIST_SUBDIRS = gmock ocf java
-BUILT_SOURCES = $(am__append_279) $(am__append_309)
+BUILT_SOURCES = $(am__append_280) $(am__append_312)
 
 # extra bits
-EXTRA_DIST = $(am__append_31) ceph-detect-init/AUTHORS.rst \
+EXTRA_DIST = $(am__append_32) ceph-detect-init/AUTHORS.rst \
 	ceph-detect-init/ceph_detect_init/centos/__init__.py \
 	ceph-detect-init/ceph_detect_init/exc.py \
 	ceph-detect-init/ceph_detect_init/main.py \
@@ -9243,7 +9241,7 @@ EXTRA_DIST = $(am__append_31) ceph-detect-init/AUTHORS.rst \
 	spdk/include/spdk/queue_extras.h spdk/include/spdk/file.h \
 	spdk/include/spdk/assert.h spdk/include/spdk/barrier.h \
 	spdk/include/spdk/mmio.h tracing/tracing-common.h \
-	$(am__append_280) $(am__append_284) $(am__append_288) \
+	$(am__append_281) $(am__append_285) $(am__append_289) \
 	$(srcdir)/$(shell_scripts:%=%.in) $(srcdir)/vstart.sh \
 	$(srcdir)/stop.sh ceph-run $(srcdir)/ceph-osd-prestart.sh \
 	$(srcdir)/ceph_common.sh $(srcdir)/init-radosgw \
@@ -9311,10 +9309,10 @@ noinst_HEADERS = arch/intel.h arch/arm.h arch/probe.h \
 	crush/CrushWrapper.h crush/CrushWrapper.i crush/builder.h \
 	crush/crush.h crush/crush_compat.h crush/crush_ln_table.h \
 	crush/grammar.h crush/hash.h crush/mapper.h crush/sample.txt \
-	crush/types.h $(am__append_33) $(am__append_37) \
-	$(am__append_40) $(am__append_44) $(am__append_46) \
-	$(am__append_50) $(am__append_59) $(am__append_60) \
-	$(am__append_62) $(am__append_65) $(am__append_68) \
+	crush/types.h $(am__append_34) $(am__append_38) \
+	$(am__append_41) $(am__append_45) $(am__append_47) \
+	$(am__append_51) $(am__append_60) $(am__append_61) \
+	$(am__append_63) $(am__append_66) $(am__append_69) \
 	erasure-code/jerasure/gf-complete/include/gf_complete.h \
 	erasure-code/jerasure/gf-complete/include/gf_general.h \
 	erasure-code/jerasure/gf-complete/include/gf_int.h \
@@ -9344,7 +9342,7 @@ noinst_HEADERS = arch/intel.h arch/arm.h arch/probe.h \
 	erasure-code/jerasure/gf-complete/include/gf_rand.h \
 	erasure-code/jerasure/gf-complete/include/gf_method.h \
 	erasure-code/jerasure/gf-complete/include/gf_general.h \
-	$(am__append_86) erasure-code/ErasureCode.h \
+	$(am__append_87) erasure-code/ErasureCode.h \
 	erasure-code/ErasureCodeInterface.h \
 	erasure-code/ErasureCodePlugin.h \
 	compressor/zlib/CompressionZlib.h \
@@ -9352,7 +9350,7 @@ noinst_HEADERS = arch/intel.h arch/arm.h arch/probe.h \
 	compressor/AsyncCompressor.h compressor/CompressionPlugin.h \
 	osdc/Filer.h osdc/Journaler.h osdc/ObjectCacher.h \
 	osdc/Objecter.h osdc/Striper.h osdc/WritebackHandler.h \
-	$(am__append_93) $(am__append_95) global/pidfile.h \
+	$(am__append_94) $(am__append_96) global/pidfile.h \
 	global/global_init.h global/global_context.h \
 	global/signal_handler.h json_spirit/json_spirit.h \
 	json_spirit/json_spirit_error_position.h \
@@ -9407,7 +9405,7 @@ noinst_HEADERS = arch/intel.h arch/arm.h arch/probe.h \
 	common/TracepointProvider.h common/event_socket.h \
 	common/PluginRegistry.h common/scrub_types.h \
 	common/ceph_time.h common/ceph_timer.h common/align.h \
-	common/mutex_debug.h common/shunique_lock.h $(am__append_115) \
+	common/mutex_debug.h common/shunique_lock.h $(am__append_116) \
 	common/secret.h msg/Connection.h msg/Dispatcher.h \
 	msg/Message.h msg/Messenger.h msg/SimplePolicyMessenger.h \
 	msg/msg_types.h msg/simple/Accepter.h \
@@ -9416,7 +9414,7 @@ noinst_HEADERS = arch/intel.h arch/arm.h arch/probe.h \
 	msg/async/AsyncConnection.h msg/async/AsyncMessenger.h \
 	msg/async/Event.h msg/async/EventEpoll.h \
 	msg/async/EventSelect.h msg/async/net_handler.h \
-	$(am__append_124) messages/MAuth.h messages/MAuthReply.h \
+	$(am__append_125) messages/MAuth.h messages/MAuthReply.h \
 	messages/MCacheExpire.h messages/MClientCaps.h \
 	messages/MClientCapRelease.h messages/MClientLease.h \
 	messages/MClientReconnect.h messages/MClientReply.h \
@@ -9475,9 +9473,10 @@ noinst_HEADERS = arch/intel.h arch/arm.h arch/probe.h \
 	messages/MRoute.h messages/MForward.h messages/MStatfs.h \
 	messages/MStatfsReply.h messages/MTimeCheck.h \
 	messages/MWatchNotify.h messages/PaxosServiceMessage.h \
-	include/Context.h include/CompatSet.h include/Distribution.h \
-	include/Spinlock.h include/addr_parsing.h include/assert.h \
-	include/atomic.h include/bitmapper.h include/blobhash.h \
+	messages/MNop.h include/Context.h include/CompatSet.h \
+	include/Distribution.h include/Spinlock.h \
+	include/addr_parsing.h include/assert.h include/atomic.h \
+	include/bitmapper.h include/blobhash.h \
 	include/btree_interval_set.h include/buffer.h \
 	include/buffer_fwd.h include/byteorder.h \
 	include/cephfs/libcephfs.h include/ceph_features.h \
@@ -9509,12 +9508,12 @@ noinst_HEADERS = arch/intel.h arch/arm.h arch/probe.h \
 	include/util.h include/stat.h include/on_exit.h \
 	include/memory.h include/rados/memory.h \
 	include/unordered_set.h include/unordered_map.h \
-	include/timegm.h include/event_type.h $(am__append_131) \
-	$(am__append_134) $(am__append_135) $(am__append_140) \
-	$(am__append_150) $(am__append_152) $(am__append_155) \
-	$(am__append_156) $(am__append_162) $(am__append_198) \
-	$(am__append_204) $(am__append_216) $(am__append_224) \
-	$(am__append_230) $(am__append_242) test/bench/backend.h \
+	include/timegm.h include/event_type.h $(am__append_132) \
+	$(am__append_135) $(am__append_136) $(am__append_141) \
+	$(am__append_151) $(am__append_153) $(am__append_156) \
+	$(am__append_157) $(am__append_163) $(am__append_199) \
+	$(am__append_205) $(am__append_217) $(am__append_225) \
+	$(am__append_231) $(am__append_243) test/bench/backend.h \
 	test/bench/bencher.h test/bench/detailed_stat_collector.h \
 	test/bench/distribution.h test/bench/dumb_backend.h \
 	test/bench/rados_backend.h test/bench/rbd_backend.h \
@@ -9528,8 +9527,8 @@ noinst_HEADERS = arch/intel.h arch/arm.h arch/probe.h \
 	test/librados/test.h test/librados/TestCase.h \
 	test/libradosstriper/TestCase.h \
 	test/ObjectMap/KeyValueDBMemory.h test/omap_bench.h \
-	test/osdc/FakeWriteback.h test/osd/Object.h \
-	test/osd/RadosModel.h test/osd/TestOpStat.h \
+	test/osdc/FakeWriteback.h test/osdc/MemWriteback.h \
+	test/osd/Object.h test/osd/RadosModel.h test/osd/TestOpStat.h \
 	test/system/cross_process_sem.h \
 	test/system/st_rados_create_pool.h \
 	test/system/st_rados_delete_objs.h \
@@ -9537,7 +9536,7 @@ noinst_HEADERS = arch/intel.h arch/arm.h arch/probe.h \
 	test/system/st_rados_list_objects.h \
 	test/system/st_rados_notify.h test/system/st_rados_watch.h \
 	test/system/systest_runnable.h test/system/systest_settings.h \
-	test/unit.h test/journal/RadosTestFixture.h $(am__append_267) \
+	test/unit.h test/journal/RadosTestFixture.h $(am__append_268) \
 	tools/cephfs/JournalTool.h tools/cephfs/JournalScanner.h \
 	tools/cephfs/JournalFilter.h tools/cephfs/EventOutput.h \
 	tools/cephfs/Resetter.h tools/cephfs/Dumper.h \
@@ -9549,42 +9548,42 @@ noinst_HEADERS = arch/intel.h arch/arm.h arch/probe.h \
 	bash_completion/ceph bash_completion/rados bash_completion/rbd \
 	bash_completion/radosgw-admin mount/canonicalize.c \
 	mount/mtab.c objclass/objclass.h
-bin_SCRIPTS = $(am__append_30) $(am__append_297) $(am__append_306) \
-	$(am__append_314)
-sbin_SCRIPTS = 
-su_sbin_SCRIPTS = $(am__append_311)
+bin_SCRIPTS = $(am__append_31) $(am__append_298) $(am__append_309) \
+	$(am__append_315)
+sbin_SCRIPTS = $(am__append_301)
+su_sbin_SCRIPTS = 
 dist_bin_SCRIPTS = 
-lib_LTLIBRARIES = $(am__append_130) $(am__append_133) \
-	$(am__append_139) $(am__append_145) $(am__append_278) \
-	$(am__append_304) $(am__append_305)
+lib_LTLIBRARIES = $(am__append_131) $(am__append_134) \
+	$(am__append_140) $(am__append_146) $(am__append_279) \
+	$(am__append_307) $(am__append_308)
 noinst_LTLIBRARIES = libarch.la libauth.la libcrush.la libmon_types.la \
-	$(am__append_49) libosd_types.la $(am__append_87) \
+	$(am__append_50) libosd_types.la $(am__append_88) \
 	liberasure_code.la libcompressor.la libosdc.la \
-	$(am__append_92) $(am__append_94) libglobal.la \
+	$(am__append_93) $(am__append_95) libglobal.la \
 	libjson_spirit.la liblog.la libperfglue.la \
-	libcommon_internal.la libcommon_crc.la $(am__append_113) \
-	libcommon.la $(am__append_116) libmsg.la $(am__append_126) \
-	librbd_types.la $(am__append_137) $(am__append_147) \
-	$(am__append_151) $(am__append_157) $(am__append_217) \
-	$(am__append_227) $(am__append_232) $(am__append_260) \
-	$(am__append_271) $(am__append_298)
-noinst_LIBRARIES = $(am__append_32) $(am__append_45) libos_types.a \
-	$(am__append_57) $(am__append_61) $(am__append_67)
-radoslib_LTLIBRARIES = $(am__append_153) $(am__append_154)
+	libcommon_internal.la libcommon_crc.la $(am__append_114) \
+	libcommon.la $(am__append_117) libmsg.la $(am__append_127) \
+	librbd_types.la $(am__append_138) $(am__append_148) \
+	$(am__append_152) $(am__append_158) $(am__append_218) \
+	$(am__append_228) $(am__append_233) $(am__append_261) \
+	$(am__append_272) $(am__append_299)
+noinst_LIBRARIES = $(am__append_33) $(am__append_46) libos_types.a \
+	$(am__append_58) $(am__append_62) $(am__append_68)
+radoslib_LTLIBRARIES = $(am__append_154) $(am__append_155)
 
 # like bin_PROGRAMS, but these targets are only built for debug builds
-bin_DEBUGPROGRAMS = $(am__append_96) $(am__append_149) \
-	$(am__append_164) $(am__append_218) $(am__append_219) \
-	$(am__append_220) $(am__append_221) $(am__append_223) \
-	$(am__append_225) $(am__append_231) $(am__append_233) \
-	$(am__append_234) $(am__append_237) $(am__append_239) \
-	$(am__append_240) $(am__append_241) $(am__append_243) \
-	$(am__append_245) $(am__append_247) $(am__append_248) \
-	$(am__append_254) ceph_test_timers ceph_test_signal_handlers \
-	ceph_test_rewrite_latency ceph_test_crypto $(am__append_259) \
+bin_DEBUGPROGRAMS = $(am__append_97) $(am__append_150) \
+	$(am__append_165) $(am__append_219) $(am__append_220) \
+	$(am__append_221) $(am__append_222) $(am__append_224) \
+	$(am__append_226) $(am__append_232) $(am__append_234) \
+	$(am__append_235) $(am__append_238) $(am__append_240) \
+	$(am__append_241) $(am__append_242) $(am__append_244) \
+	$(am__append_246) $(am__append_248) $(am__append_249) \
+	$(am__append_255) ceph_test_timers ceph_test_signal_handlers \
+	ceph_test_rewrite_latency ceph_test_crypto $(am__append_260) \
 	ceph_bench_log ceph_test_objectcacher_stress \
-	ceph_test_cfuse_cache_invalidate $(am__append_263) \
-	$(am__append_264) $(am__append_273) $(am__append_274) \
+	ceph_test_cfuse_cache_invalidate $(am__append_264) \
+	$(am__append_265) $(am__append_274) $(am__append_275) \
 	ceph_psim
 
 # like sbin_SCRIPTS but can be used to install to e.g. /usr/sbin
@@ -9594,12 +9593,12 @@ ceph_sbindir = $(sbindir)
 su_sbindir = /sbin
 
 # C/C++ tests to build and executed will be appended to this
-check_TESTPROGRAMS = $(am__append_180) $(am__append_184) \
-	$(am__append_187) $(am__append_222) $(am__append_226) \
-	$(am__append_235) $(am__append_244) $(am__append_246) \
-	$(am__append_250) $(am__append_251) $(am__append_255) \
-	$(am__append_256) $(am__append_257) $(am__append_258) \
-	unittest_addrs $(am__append_262) unittest_bloom_filter \
+check_TESTPROGRAMS = $(am__append_181) $(am__append_185) \
+	$(am__append_188) $(am__append_223) $(am__append_227) \
+	$(am__append_236) $(am__append_245) $(am__append_247) \
+	$(am__append_251) $(am__append_252) $(am__append_256) \
+	$(am__append_257) $(am__append_258) $(am__append_259) \
+	unittest_addrs $(am__append_263) unittest_bloom_filter \
 	unittest_histogram unittest_prioritized_queue \
 	unittest_weighted_priority_queue unittest_str_map \
 	unittest_mutex_debug unittest_shunique_lock \
@@ -9661,7 +9660,7 @@ check_TESTPROGRAMS = $(am__append_180) $(am__append_184) \
 # GNU Library Public License for more details.
 #
 check_SCRIPTS = ceph-detect-init/run-tox.sh ceph-disk/run-tox.sh \
-	$(am__append_161) $(am__append_229) \
+	$(am__append_162) $(am__append_230) \
 	test/ceph_objectstore_tool.py test/test-ceph-helpers.sh \
 	test/cephtool-test-osd.sh test/cephtool-test-mon.sh \
 	test/cephtool-test-mds.sh test/cephtool-test-rados.sh \
@@ -9678,7 +9677,7 @@ check_SCRIPTS = ceph-detect-init/run-tox.sh ceph-disk/run-tox.sh \
 	test/mon/mon-handle-forward.sh \
 	test/libradosstriper/rados-striper.sh \
 	test/test_objectstore_memstore.sh test/test_pidfile.sh \
-	test/pybind/test_ceph_argparse.py \
+	test/test_subman.sh test/pybind/test_ceph_argparse.py \
 	test/pybind/test_ceph_daemon.py \
 	../qa/workunits/erasure-code/encode-decode-non-regression.sh \
 	test/encoding/readable.sh
@@ -9717,7 +9716,7 @@ AM_COMMON_CFLAGS = \
 	-fsigned-char
 
 AM_CFLAGS = $(AM_COMMON_CFLAGS) $(am__append_6) $(am__append_12) \
-	$(am__append_98) $(am__append_101)
+	$(am__append_99) $(am__append_102)
 AM_CPPFLAGS = $(AM_COMMON_CPPFLAGS)
 
 # note: this is position dependant, it affects the -l options that
@@ -9733,7 +9732,7 @@ AM_CCASFLAGS = -f elf64
 
 #####################
 EXTRALIBS = -lm $(am__append_14) $(am__append_15) $(am__append_16) \
-	$(am__append_26)
+	$(am__append_27)
 LIBGLOBAL = libglobal.la
 LIBCOMMON = libcommon.la -luuid
 LIBSECRET = libsecret.la
@@ -9744,7 +9743,7 @@ LIBMSG = libmsg.la
 LIBCRUSH = libcrush.la
 LIBCOMPRESSOR = libcompressor.la
 LIBJSON_SPIRIT = libjson_spirit.la
-LIBKV = libkv.a $(am__append_27) $(am__append_28) $(am__append_29) -lz \
+LIBKV = libkv.a $(am__append_28) $(am__append_29) $(am__append_30) -lz \
 	-lleveldb -lsnappy
 LIBLOG = liblog.la
 LIBOS = libos.a $(am__append_17) $(am__append_18) $(am__append_19) \
@@ -9764,7 +9763,7 @@ LIBMON = libmon.a $(am__append_22) $(LIBPERFGLUE) $(LIBMON_TYPES)
 LIBMON_TYPES = libmon_types.la
 LIBMDS = libmds.la $(am__append_24) $(LIBPERFGLUE)
 LIBCLIENT = libclient.la
-LIBCLIENT_FUSE = libclient_fuse.la
+LIBCLIENT_FUSE = libclient_fuse.la $(am__append_26) $(LIBPERFGLUE)
 LIBRADOS = librados.la
 LIBRADOSSTRIPER = libradosstriper.la
 LIBRGW = librgw.la $(am__append_25)
@@ -9782,29 +9781,29 @@ CEPH_GLOBAL = $(LIBGLOBAL) $(LIBCOMMON) $(PTHREAD_LIBS) -lm $(CRYPTO_LIBS) $(EXT
 
 # important; libmsg before libauth!
 LIBCOMMON_DEPS = libcommon_internal.la libcommon_crc.la \
-	$(am__append_112) $(LIBERASURE_CODE) $(LIBCOMPRESSOR) \
+	$(am__append_113) $(LIBERASURE_CODE) $(LIBCOMPRESSOR) \
 	$(LIBMSG) $(LIBAUTH) $(LIBCRUSH) $(LIBJSON_SPIRIT) $(LIBLOG) \
-	$(LIBARCH) $(BOOST_RANDOM_LIBS) -luuid $(am__append_114)
-LIBRADOS_DEPS = $(am__append_127)
-LIBRGW_DEPS = $(am__append_144)
-LIBCIVETWEB_DEPS = $(am__append_146)
+	$(LIBARCH) $(BOOST_RANDOM_LIBS) -luuid $(am__append_115)
+LIBRADOS_DEPS = $(am__append_128)
+LIBRGW_DEPS = $(am__append_145)
+LIBCIVETWEB_DEPS = $(am__append_147)
 
 # This is used by the dencoder test
 
 # Do not use TCMALLOC with dencoder
-DENCODER_SOURCES = $(am__append_47) perfglue/disabled_heap_profiler.cc \
-	perfglue/disabled_stubs.cc $(am__append_141)
-DENCODER_DEPS = $(am__append_48) $(am__append_136) $(am__append_142) \
-	$(am__append_158)
+DENCODER_SOURCES = $(am__append_48) perfglue/disabled_heap_profiler.cc \
+	perfglue/disabled_stubs.cc $(am__append_142)
+DENCODER_DEPS = $(am__append_49) $(am__append_137) $(am__append_143) \
+	$(am__append_159)
 radoslibdir = $(libdir)/rados-classes
-LOCAL_ALL = ceph-detect-init-all ceph-disk-all $(am__append_281) \
-	$(am__append_285) $(am__append_289)
-LOCAL_CLEAN = ceph-detect-init-clean ceph-disk-clean $(am__append_282) \
-	$(am__append_286) $(am__append_290) base-clean-local
+LOCAL_ALL = ceph-detect-init-all ceph-disk-all $(am__append_282) \
+	$(am__append_286) $(am__append_290)
+LOCAL_CLEAN = ceph-detect-init-clean ceph-disk-clean $(am__append_283) \
+	$(am__append_287) $(am__append_291) base-clean-local
 LOCAL_INSTALLDATA = ceph-detect-init-install-data \
 	ceph-disk-install-data base-install-data-local
-LOCAL_INSTALLEXEC = $(am__append_283) $(am__append_287) \
-	$(am__append_291)
+LOCAL_INSTALLEXEC = $(am__append_284) $(am__append_288) \
+	$(am__append_292)
 libarch_la_SOURCES = \
 	arch/intel.c \
 	arch/arm.c \
@@ -9838,12 +9837,12 @@ libcrush_la_SOURCES = \
 	crush/CrushTester.cc
 
 @ENABLE_SERVER_TRUE at libkv_a_SOURCES = kv/KeyValueDB.cc \
- at ENABLE_SERVER_TRUE@	kv/LevelDBStore.cc $(am__append_35) \
- at ENABLE_SERVER_TRUE@	$(am__append_38) $(am__append_41)
- at ENABLE_SERVER_TRUE@libkv_a_CXXFLAGS = ${AM_CXXFLAGS} $(am__append_34) \
- at ENABLE_SERVER_TRUE@	$(am__append_42)
- at ENABLE_SERVER_TRUE@libkv_a_LIBADD = $(am__append_36) $(am__append_39) \
+ at ENABLE_SERVER_TRUE@	kv/LevelDBStore.cc $(am__append_36) \
+ at ENABLE_SERVER_TRUE@	$(am__append_39) $(am__append_42)
+ at ENABLE_SERVER_TRUE@libkv_a_CXXFLAGS = ${AM_CXXFLAGS} $(am__append_35) \
 @ENABLE_SERVER_TRUE@	$(am__append_43)
+ at ENABLE_SERVER_TRUE@libkv_a_LIBADD = $(am__append_37) $(am__append_40) \
+ at ENABLE_SERVER_TRUE@	$(am__append_44)
 @ENABLE_SERVER_TRUE@@FREEBSD_TRUE@@WITH_SLIBROCKSDB_TRUE at NPROC = sysctl -n hw.ncpu
 
 # build rocksdb with its own makefile
@@ -9908,7 +9907,7 @@ LIBMDS_DEPS = $(LIBOSDC)
 @ENABLE_SERVER_TRUE@@WITH_MDS_TRUE at libmds_la_SOURCES = $(LIBMDS_SOURCES)
 @ENABLE_SERVER_TRUE@@WITH_MDS_TRUE at libmds_la_LIBADD = $(LIBMDS_DEPS)
 libos_types_a_SOURCES = os/kstore/kstore_types.cc os/Transaction.cc \
-	$(am__append_51)
+	$(am__append_52)
 libos_types_a_CXXFLAGS = ${AM_CXXFLAGS}
 @ENABLE_SERVER_TRUE at libos_a_SOURCES = os/filestore/chain_xattr.cc \
 @ENABLE_SERVER_TRUE@	os/filestore/DBObjectMap.cc \
@@ -9923,13 +9922,13 @@ libos_types_a_CXXFLAGS = ${AM_CXXFLAGS}
 @ENABLE_SERVER_TRUE@	os/filestore/WBThrottle.cc os/fs/FS.cc \
 @ENABLE_SERVER_TRUE@	os/kstore/kv.cc os/kstore/KStore.cc \
 @ENABLE_SERVER_TRUE@	os/memstore/MemStore.cc os/ObjectStore.cc \
- at ENABLE_SERVER_TRUE@	$(am__append_52) $(am__append_53) \
- at ENABLE_SERVER_TRUE@	$(am__append_54) $(am__append_55) \
- at ENABLE_SERVER_TRUE@	$(am__append_56) $(am__append_64)
+ at ENABLE_SERVER_TRUE@	$(am__append_53) $(am__append_54) \
+ at ENABLE_SERVER_TRUE@	$(am__append_55) $(am__append_56) \
+ at ENABLE_SERVER_TRUE@	$(am__append_57) $(am__append_65)
 @ENABLE_SERVER_TRUE at libos_a_CXXFLAGS = ${AM_CXXFLAGS} -I \
- at ENABLE_SERVER_TRUE@	rocksdb/include -fPIC $(am__append_63)
+ at ENABLE_SERVER_TRUE@	rocksdb/include -fPIC $(am__append_64)
 @ENABLE_SERVER_TRUE at libos_a_LIBADD = libos_types.a libkv.a \
- at ENABLE_SERVER_TRUE@	$(am__append_58)
+ at ENABLE_SERVER_TRUE@	$(am__append_59)
 @ENABLE_SERVER_TRUE@@WITH_LIBZFS_TRUE at libos_zfs_a_SOURCES = os/fs/ZFS.cc
 @ENABLE_SERVER_TRUE@@WITH_LIBZFS_TRUE at libos_zfs_a_CXXFLAGS = ${AM_CXXFLAGS} ${LIBZFS_CFLAGS}
 @ENABLE_SERVER_TRUE@@WITH_SPDK_TRUE at SPDK_SRCDIR = ${top_srcdir}/src/spdk/lib
@@ -9970,11 +9969,11 @@ libosd_types_la_CXXFLAGS = ${AM_CXXFLAGS}
 @ENABLE_SERVER_TRUE@@WITH_OSD_TRUE at libosd_a_LIBADD = 
 erasure_codelibdir = $(pkglibdir)/erasure-code
 erasure_codelib_LTLIBRARIES = libec_jerasure_generic.la \
-	$(am__append_71) $(am__append_73) $(am__append_75) \
+	$(am__append_72) $(am__append_74) $(am__append_76) \
 	libec_jerasure.la libec_lrc.la libec_shec_generic.la \
-	$(am__append_80) $(am__append_82) $(am__append_84) \
-	libec_shec.la $(am__append_89)
-check_LTLIBRARIES = $(am__append_169)
+	$(am__append_81) $(am__append_83) $(am__append_85) \
+	libec_shec.la $(am__append_90)
+check_LTLIBRARIES = $(am__append_170)
 jerasure_sources = \
   erasure-code/ErasureCode.cc \
   erasure-code/jerasure/jerasure/src/cauchy.c \
@@ -10007,7 +10006,7 @@ libec_jerasure_generic_la_CXXFLAGS = ${AM_CXXFLAGS} \
 
 libec_jerasure_generic_la_LIBADD = $(LIBCRUSH) $(PTHREAD_LIBS) $(EXTRALIBS)
 libec_jerasure_generic_la_LDFLAGS = ${AM_LDFLAGS} -module \
-	-avoid-version -shared $(am__append_69)
+	-avoid-version -shared $(am__append_70)
 libec_jerasure_neon_la_SOURCES = ${jerasure_sources}                                       \
                                   erasure-code/jerasure/gf-complete/src/neon/gf_w4_neon.c  \
                                   erasure-code/jerasure/gf-complete/src/neon/gf_w8_neon.c  \
@@ -10027,7 +10026,7 @@ libec_jerasure_neon_la_CXXFLAGS = ${AM_CXXFLAGS} \
 
 libec_jerasure_neon_la_LIBADD = $(LIBCRUSH) $(PTHREAD_LIBS) $(EXTRALIBS)
 libec_jerasure_neon_la_LDFLAGS = ${AM_LDFLAGS} -module -avoid-version \
-	-shared $(am__append_70)
+	-shared $(am__append_71)
 libec_jerasure_sse3_la_SOURCES = ${jerasure_sources}
 libec_jerasure_sse3_la_CFLAGS = ${AM_CFLAGS}  \
 	${INTEL_SSE_FLAGS} \
@@ -10047,7 +10046,7 @@ libec_jerasure_sse3_la_CXXFLAGS = ${AM_CXXFLAGS} \
 
 libec_jerasure_sse3_la_LIBADD = $(LIBCRUSH) $(PTHREAD_LIBS) $(EXTRALIBS)
 libec_jerasure_sse3_la_LDFLAGS = ${AM_LDFLAGS} -module -avoid-version \
-	-shared $(am__append_72)
+	-shared $(am__append_73)
 libec_jerasure_sse4_la_SOURCES = ${jerasure_sources}
 libec_jerasure_sse4_la_CFLAGS = ${AM_CFLAGS}  \
 	${INTEL_SSE_FLAGS} \
@@ -10071,7 +10070,7 @@ libec_jerasure_sse4_la_CXXFLAGS = ${AM_CXXFLAGS} \
 
 libec_jerasure_sse4_la_LIBADD = $(LIBCRUSH) $(PTHREAD_LIBS) $(EXTRALIBS)
 libec_jerasure_sse4_la_LDFLAGS = ${AM_LDFLAGS} -module -avoid-version \
-	-shared $(am__append_74)
+	-shared $(am__append_75)
 libec_jerasure_la_SOURCES = \
 	erasure-code/jerasure/ErasureCodePluginSelectJerasure.cc
 
@@ -10079,7 +10078,7 @@ libec_jerasure_la_CFLAGS = ${AM_CFLAGS}
 libec_jerasure_la_CXXFLAGS = ${AM_CXXFLAGS}
 libec_jerasure_la_LIBADD = $(LIBCRUSH) $(PTHREAD_LIBS) $(EXTRALIBS)
 libec_jerasure_la_LDFLAGS = ${AM_LDFLAGS} -module -avoid-version \
-	-shared $(am__append_76)
+	-shared $(am__append_77)
 lrc_sources = \
   erasure-code/ErasureCode.cc \
   erasure-code/lrc/ErasureCodePluginLrc.cc \
@@ -10090,7 +10089,7 @@ libec_lrc_la_CFLAGS = ${AM_CFLAGS}
 libec_lrc_la_CXXFLAGS = ${AM_CXXFLAGS}
 libec_lrc_la_LIBADD = $(LIBCRUSH) $(PTHREAD_LIBS) $(LIBJSON_SPIRIT)
 libec_lrc_la_LDFLAGS = ${AM_LDFLAGS} -module -avoid-version -shared \
-	$(am__append_77)
+	$(am__append_78)
 
 # SHEC plugin
 shec_sources = \
@@ -10131,7 +10130,7 @@ libec_shec_generic_la_CXXFLAGS = ${AM_CXXFLAGS} \
 
 libec_shec_generic_la_LIBADD = $(LIBCRUSH) $(PTHREAD_LIBS) $(EXTRALIBS)
 libec_shec_generic_la_LDFLAGS = ${AM_LDFLAGS} -module -avoid-version \
-	-shared $(am__append_78)
+	-shared $(am__append_79)
 libec_shec_neon_la_SOURCES = ${shec_sources} \
 	erasure-code/jerasure/gf-complete/src/neon/gf_w4_neon.c \
 	erasure-code/jerasure/gf-complete/src/neon/gf_w8_neon.c \
@@ -10155,7 +10154,7 @@ libec_shec_neon_la_CXXFLAGS = ${AM_CXXFLAGS} \
 
 libec_shec_neon_la_LIBADD = $(LIBCRUSH) $(PTHREAD_LIBS) $(EXTRALIBS)
 libec_shec_neon_la_LDFLAGS = ${AM_LDFLAGS} -module -avoid-version \
-	-shared $(am__append_79)
+	-shared $(am__append_80)
 libec_shec_sse3_la_SOURCES = ${shec_sources}
 libec_shec_sse3_la_CFLAGS = ${AM_CFLAGS}  \
 	${INTEL_SSE_FLAGS} \
@@ -10179,7 +10178,7 @@ libec_shec_sse3_la_CXXFLAGS = ${AM_CXXFLAGS} \
 
 libec_shec_sse3_la_LIBADD = $(LIBCRUSH) $(PTHREAD_LIBS) $(EXTRALIBS)
 libec_shec_sse3_la_LDFLAGS = ${AM_LDFLAGS} -module -avoid-version \
-	-shared $(am__append_81)
+	-shared $(am__append_82)
 libec_shec_sse4_la_SOURCES = ${shec_sources}
 libec_shec_sse4_la_CFLAGS = ${AM_CFLAGS}  \
 	${INTEL_SSE_FLAGS} \
@@ -10207,7 +10206,7 @@ libec_shec_sse4_la_CXXFLAGS = ${AM_CXXFLAGS} \
 
 libec_shec_sse4_la_LIBADD = $(LIBCRUSH) $(PTHREAD_LIBS) $(EXTRALIBS)
 libec_shec_sse4_la_LDFLAGS = ${AM_LDFLAGS} -module -avoid-version \
-	-shared $(am__append_83)
+	-shared $(am__append_84)
 libec_shec_la_SOURCES = \
 	erasure-code/shec/ErasureCodePluginSelectShec.cc
 
@@ -10215,7 +10214,7 @@ libec_shec_la_CFLAGS = ${AM_CFLAGS}
 libec_shec_la_CXXFLAGS = ${AM_CXXFLAGS}
 libec_shec_la_LIBADD = $(LIBCRUSH) $(PTHREAD_LIBS) $(EXTRALIBS)
 libec_shec_la_LDFLAGS = ${AM_LDFLAGS} -module -avoid-version -shared \
-	$(am__append_85)
+	$(am__append_86)
 @WITH_BETTER_YASM_ELF64_TRUE at isa_sources = \
 @WITH_BETTER_YASM_ELF64_TRUE@	erasure-code/ErasureCode.cc \
 @WITH_BETTER_YASM_ELF64_TRUE@	erasure-code/isa/isa-l/erasure_code/ec_base.c \
@@ -10273,7 +10272,7 @@ libec_shec_la_LDFLAGS = ${AM_LDFLAGS} -module -avoid-version -shared \
 @WITH_BETTER_YASM_ELF64_TRUE at libec_isa_la_LIBADD = libisa.la $(LIBCRUSH) $(PTHREAD_LIBS) $(EXTRALIBS)
 @WITH_BETTER_YASM_ELF64_TRUE at libec_isa_la_LDFLAGS = ${AM_LDFLAGS} \
 @WITH_BETTER_YASM_ELF64_TRUE@	-module -avoid-version -shared \
- at WITH_BETTER_YASM_ELF64_TRUE@	$(am__append_88)
+ at WITH_BETTER_YASM_ELF64_TRUE@	$(am__append_89)
 liberasure_code_la_SOURCES = \
 	erasure-code/ErasureCodePlugin.cc
 
@@ -10281,7 +10280,7 @@ liberasure_code_la_DEPENDENCIES = $(erasure_codelib_LTLIBRARIES)
 @LINUX_TRUE at liberasure_code_la_LIBADD = -ldl
 compressorlibdir = $(pkglibdir)/compressor
 compressorlib_LTLIBRARIES = libceph_zlib.la libceph_snappy.la \
-	$(am__append_205)
+	$(am__append_206)
 zlib_sources = \
   compressor/Compressor.cc \
   compressor/zlib/CompressionPluginZlib.cc \
@@ -10292,7 +10291,7 @@ libceph_zlib_la_CFLAGS = ${AM_CFLAGS}
 libceph_zlib_la_CXXFLAGS = ${AM_CXXFLAGS} 
 libceph_zlib_la_LIBADD = $(LIBCRUSH) $(PTHREAD_LIBS) $(EXTRALIBS)
 libceph_zlib_la_LDFLAGS = ${AM_LDFLAGS} -lz -version-info 2:0:0 \
-	$(am__append_90)
+	$(am__append_91)
 snappy_sources = \
   compressor/Compressor.cc \
   compressor/snappy/CompressionPluginSnappy.cc
@@ -10306,7 +10305,7 @@ libceph_snappy_la_CXXFLAGS = ${AM_CXXFLAGS} \
 
 libceph_snappy_la_LIBADD = $(LIBCRUSH) $(PTHREAD_LIBS) $(EXTRALIBS)
 libceph_snappy_la_LDFLAGS = ${AM_LDFLAGS} -lsnappy -version-info 2:0:0 \
-	$(am__append_91)
+	$(am__append_92)
 libcompressor_la_SOURCES = \
 	compressor/Compressor.cc \
 	compressor/AsyncCompressor.cc
@@ -10352,8 +10351,8 @@ liblog_la_SOURCES = \
 	log/Log.cc \
 	log/SubsystemMap.cc
 
-libperfglue_la_SOURCES = $(am__append_97) $(am__append_100) \
-	$(am__append_103) $(am__append_104) $(am__append_105)
+libperfglue_la_SOURCES = $(am__append_98) $(am__append_101) \
+	$(am__append_104) $(am__append_105) $(am__append_106)
 @WITH_TCMALLOC_FALSE@@WITH_TCMALLOC_MINIMAL_TRUE at libperfglue_la_LIBADD = -ltcmalloc_minimal
 @WITH_TCMALLOC_TRUE at libperfglue_la_LIBADD = -ltcmalloc
 
@@ -10387,9 +10386,9 @@ libcommon_internal_la_SOURCES = ceph_ver.c common/DecayCounter.cc \
 	common/bloom_filter.cc common/module.c common/Readahead.cc \
 	common/Cycles.cc common/ContextCompletion.cc \
 	common/TracepointProvider.cc common/PluginRegistry.cc \
-	common/scrub_types.cc common/blkdev.cc $(am__append_106) \
-	$(am__append_107) $(am__append_108) $(am__append_109) \
-	$(am__append_110) mon/MonCap.cc mon/MonClient.cc mon/MonMap.cc \
+	common/scrub_types.cc common/blkdev.cc $(am__append_107) \
+	$(am__append_108) $(am__append_109) $(am__append_110) \
+	$(am__append_111) mon/MonCap.cc mon/MonClient.cc mon/MonMap.cc \
 	osd/OSDMap.cc osd/osd_types.cc osd/ECMsgTypes.cc osd/HitSet.cc \
 	mds/MDSMap.cc mds/FSMap.cc mds/inode_backtrace.cc \
 	mds/mdstypes.cc mds/flock.cc
@@ -10397,7 +10396,7 @@ libcommon_internal_la_SOURCES = ceph_ver.c common/DecayCounter.cc \
 # inject crc in common
 libcommon_crc_la_SOURCES = common/sctp_crc32.c common/crc32c.cc \
 	common/crc32c_intel_baseline.c common/crc32c_intel_fast.c \
-	$(am__append_111)
+	$(am__append_112)
 @WITH_GOOD_YASM_ELF64_TRUE at libcommon_crc_la_LIBTOOLFLAGS = --tag=CC
 @HAVE_ARMV8_CRC_TRUE at libcommon_crc_aarch64_la_SOURCES = common/crc32c_aarch64.c
 @HAVE_ARMV8_CRC_TRUE at libcommon_crc_aarch64_la_CFLAGS = $(AM_CFLAGS) $(ARM_CRC_FLAGS)
@@ -10411,9 +10410,9 @@ libmsg_la_SOURCES = msg/Message.cc msg/Messenger.cc msg/msg_types.cc \
 	msg/simple/SimpleMessenger.cc msg/async/AsyncConnection.cc \
 	msg/async/AsyncMessenger.cc msg/async/Event.cc \
 	msg/async/net_handler.cc msg/async/EventSelect.cc \
-	$(am__append_117) $(am__append_118) $(am__append_119) \
-	$(am__append_120) $(am__append_121) $(am__append_122) \
-	$(am__append_123)
+	$(am__append_118) $(am__append_119) $(am__append_120) \
+	$(am__append_121) $(am__append_122) $(am__append_123) \
+	$(am__append_124)
 @ENABLE_CLIENT_TRUE@@WITH_RADOS_TRUE at rados_includedir = $(includedir)/rados
 @ENABLE_CLIENT_TRUE@@WITH_RADOS_TRUE at rados_include_DATA = $(srcdir)/include/rados/librados.h \
 @ENABLE_CLIENT_TRUE@@WITH_RADOS_TRUE@	$(srcdir)/include/rados/rados_types.h \
@@ -10424,7 +10423,7 @@ libmsg_la_SOURCES = msg/Message.cc msg/Messenger.cc msg/msg_types.cc \
 @ENABLE_CLIENT_TRUE@@WITH_RADOS_TRUE@	$(srcdir)/include/page.h \
 @ENABLE_CLIENT_TRUE@@WITH_RADOS_TRUE@	$(srcdir)/include/crc32c.h \
 @ENABLE_CLIENT_TRUE@@WITH_RADOS_TRUE@	$(srcdir)/include/memory.h \
- at ENABLE_CLIENT_TRUE@@WITH_RADOS_TRUE@	$(am__append_125)
+ at ENABLE_CLIENT_TRUE@@WITH_RADOS_TRUE@	$(am__append_126)
 @ENABLE_CLIENT_TRUE@@WITH_RADOS_TRUE@@WITH_RBD_TRUE at librbd_includedir = $(includedir)/rbd
 @ENABLE_CLIENT_TRUE@@WITH_RADOS_TRUE@@WITH_RBD_TRUE at librbd_include_DATA = \
 @ENABLE_CLIENT_TRUE@@WITH_RADOS_TRUE@@WITH_RBD_TRUE@	$(srcdir)/include/rbd/features.h \
@@ -10456,12 +10455,12 @@ libmsg_la_SOURCES = msg/Message.cc msg/Messenger.cc msg/msg_types.cc \
 # We need this to avoid basename conflicts with the librados build tests in test/Makefile.am
 @ENABLE_CLIENT_TRUE@@WITH_RADOS_TRUE at librados_la_CXXFLAGS =  \
 @ENABLE_CLIENT_TRUE@@WITH_RADOS_TRUE@	${AM_CXXFLAGS} \
- at ENABLE_CLIENT_TRUE@@WITH_RADOS_TRUE@	$(am__append_128)
+ at ENABLE_CLIENT_TRUE@@WITH_RADOS_TRUE@	$(am__append_129)
 @ENABLE_CLIENT_TRUE@@WITH_RADOS_TRUE at librados_la_LIBADD = $(LIBRADOS_DEPS) $(PTHREAD_LIBS) $(CRYPTO_LIBS) $(EXTRALIBS)
 @ENABLE_CLIENT_TRUE@@WITH_RADOS_TRUE at librados_la_LDFLAGS =  \
 @ENABLE_CLIENT_TRUE@@WITH_RADOS_TRUE@	${AM_LDFLAGS} \
 @ENABLE_CLIENT_TRUE@@WITH_RADOS_TRUE@	-version-info 2:0:0 \
- at ENABLE_CLIENT_TRUE@@WITH_RADOS_TRUE@	$(am__append_129)
+ at ENABLE_CLIENT_TRUE@@WITH_RADOS_TRUE@	$(am__append_130)
 @ENABLE_CLIENT_TRUE@@WITH_RADOSSTRIPER_TRUE@@WITH_RADOS_TRUE at libradosstriper_la_SOURCES = \
 @ENABLE_CLIENT_TRUE@@WITH_RADOSSTRIPER_TRUE@@WITH_RADOS_TRUE@	libradosstriper/libradosstriper.cc \
 @ENABLE_CLIENT_TRUE@@WITH_RADOSSTRIPER_TRUE@@WITH_RADOS_TRUE@	libradosstriper/RadosStriperImpl.cc \
@@ -10475,7 +10474,7 @@ libmsg_la_SOURCES = msg/Message.cc msg/Messenger.cc msg/msg_types.cc \
 @ENABLE_CLIENT_TRUE@@WITH_RADOSSTRIPER_TRUE@@WITH_RADOS_TRUE at libradosstriper_la_LDFLAGS = ${AM_LDFLAGS} \
 @ENABLE_CLIENT_TRUE@@WITH_RADOSSTRIPER_TRUE@@WITH_RADOS_TRUE@	-version-info \
 @ENABLE_CLIENT_TRUE@@WITH_RADOSSTRIPER_TRUE@@WITH_RADOS_TRUE@	1:0:0 \
- at ENABLE_CLIENT_TRUE@@WITH_RADOSSTRIPER_TRUE@@WITH_RADOS_TRUE@	$(am__append_132)
+ at ENABLE_CLIENT_TRUE@@WITH_RADOSSTRIPER_TRUE@@WITH_RADOS_TRUE@	$(am__append_133)
 @ENABLE_CLIENT_TRUE@@WITH_RADOS_TRUE at libjournal_la_SOURCES = \
 @ENABLE_CLIENT_TRUE@@WITH_RADOS_TRUE@	journal/AsyncOpTracker.cc \
 @ENABLE_CLIENT_TRUE@@WITH_RADOS_TRUE@	journal/Entry.cc \
@@ -10571,7 +10570,7 @@ librbd_types_la_SOURCES = \
 @ENABLE_CLIENT_TRUE@@WITH_RADOS_TRUE@@WITH_RBD_TRUE at librbd_la_LDFLAGS = ${AM_LDFLAGS} \
 @ENABLE_CLIENT_TRUE@@WITH_RADOS_TRUE@@WITH_RBD_TRUE@	-version-info \
 @ENABLE_CLIENT_TRUE@@WITH_RADOS_TRUE@@WITH_RBD_TRUE@	1:0:0 \
- at ENABLE_CLIENT_TRUE@@WITH_RADOS_TRUE@@WITH_RBD_TRUE@	$(am__append_138)
+ at ENABLE_CLIENT_TRUE@@WITH_RADOS_TRUE@@WITH_RBD_TRUE@	$(am__append_139)
 @ENABLE_CLIENT_TRUE@@LINUX_TRUE@@WITH_RADOS_TRUE@@WITH_RBD_TRUE at librbd_la_CXXFLAGS = -fvisibility=hidden -fvisibility-inlines-hidden
 @ENABLE_CLIENT_TRUE@@WITH_RADOSGW_TRUE@@WITH_RADOS_TRUE at librgw_la_SOURCES = rgw/rgw_acl.cc \
 @ENABLE_CLIENT_TRUE@@WITH_RADOSGW_TRUE@@WITH_RADOS_TRUE@	rgw/rgw_acl_s3.cc \
@@ -10641,7 +10640,7 @@ librbd_types_la_SOURCES = \
 @ENABLE_CLIENT_TRUE@@WITH_RADOSGW_TRUE@@WITH_RADOS_TRUE@	rgw/rgw_xml.cc \
 @ENABLE_CLIENT_TRUE@@WITH_RADOSGW_TRUE@@WITH_RADOS_TRUE@	rgw/rgw_xml_enc.cc \
 @ENABLE_CLIENT_TRUE@@WITH_RADOSGW_TRUE@@WITH_RADOS_TRUE@	rgw/rgw_website.cc \
- at ENABLE_CLIENT_TRUE@@WITH_RADOSGW_TRUE@@WITH_RADOS_TRUE@	$(am__append_143)
+ at ENABLE_CLIENT_TRUE@@WITH_RADOSGW_TRUE@@WITH_RADOS_TRUE@	$(am__append_144)
 @ENABLE_CLIENT_TRUE@@WITH_RADOSGW_TRUE@@WITH_RADOS_TRUE at librgw_la_CXXFLAGS = -Woverloaded-virtual -fPIC -I$(srcdir)/xxHash \
 @ENABLE_CLIENT_TRUE@@WITH_RADOSGW_TRUE@@WITH_RADOS_TRUE@	${AM_CXXFLAGS}
 
@@ -10669,7 +10668,7 @@ librbd_types_la_SOURCES = \
 @ENABLE_CLIENT_TRUE@@WITH_RADOSGW_TRUE@@WITH_RADOS_TRUE@	civetweb/src/civetweb.c \
 @ENABLE_CLIENT_TRUE@@WITH_RADOSGW_TRUE@@WITH_RADOS_TRUE@	rgw/rgw_main.cc
 
- at ENABLE_CLIENT_TRUE@@WITH_RADOSGW_TRUE@@WITH_RADOS_TRUE at radosgw_CFLAGS = -I$(srcdir)/civetweb/include -fPIC -I$(srcdir)/xxHash
+ at ENABLE_CLIENT_TRUE@@WITH_RADOSGW_TRUE@@WITH_RADOS_TRUE at radosgw_CFLAGS = -I$(srcdir)/civetweb/include -fPIC -I$(srcdir)/xxHash ${CIVETWEB_INCLUDE}
 @ENABLE_CLIENT_TRUE@@WITH_RADOSGW_TRUE@@WITH_RADOS_TRUE at radosgw_LDADD = $(LIBRGW) $(LIBCIVETWEB) $(LIBCIVETWEB_DEPS) $(LIBRGW_DEPS) $(RESOLV_LIBS) \
 @ENABLE_CLIENT_TRUE@@WITH_RADOSGW_TRUE@@WITH_RADOS_TRUE@	$(CEPH_GLOBAL)
 
@@ -10845,7 +10844,7 @@ librbd_types_la_SOURCES = \
 @ENABLE_SERVER_TRUE@@WITH_OSD_TRUE@	$(LIBOSD) $(LIBCOMMON) \
 @ENABLE_SERVER_TRUE@@WITH_OSD_TRUE@	$(BOOST_PROGRAM_OPTIONS_LIBS) \
 @ENABLE_SERVER_TRUE@@WITH_OSD_TRUE@	$(CEPH_GLOBAL) \
- at ENABLE_SERVER_TRUE@@WITH_OSD_TRUE@	$(am__append_163)
+ at ENABLE_SERVER_TRUE@@WITH_OSD_TRUE@	$(am__append_164)
 @ENABLE_SERVER_TRUE@@WITH_OSD_TRUE at ceph_erasure_code_non_regression_SOURCES = \
 @ENABLE_SERVER_TRUE@@WITH_OSD_TRUE@	test/erasure-code/ceph_erasure_code_non_regression.cc
 
@@ -10853,7 +10852,7 @@ librbd_types_la_SOURCES = \
 @ENABLE_SERVER_TRUE@@WITH_OSD_TRUE@	$(LIBOSD) $(LIBCOMMON) \
 @ENABLE_SERVER_TRUE@@WITH_OSD_TRUE@	$(BOOST_PROGRAM_OPTIONS_LIBS) \
 @ENABLE_SERVER_TRUE@@WITH_OSD_TRUE@	$(CEPH_GLOBAL) \
- at ENABLE_SERVER_TRUE@@WITH_OSD_TRUE@	$(am__append_165)
+ at ENABLE_SERVER_TRUE@@WITH_OSD_TRUE@	$(am__append_166)
 @ENABLE_SERVER_TRUE@@WITH_OSD_TRUE at ceph_erasure_code_SOURCES = \
 @ENABLE_SERVER_TRUE@@WITH_OSD_TRUE@	test/erasure-code/ceph_erasure_code.cc
 
@@ -10861,7 +10860,7 @@ librbd_types_la_SOURCES = \
 @ENABLE_SERVER_TRUE@@WITH_OSD_TRUE@	$(LIBOSD) $(LIBCOMMON) \
 @ENABLE_SERVER_TRUE@@WITH_OSD_TRUE@	$(BOOST_PROGRAM_OPTIONS_LIBS) \
 @ENABLE_SERVER_TRUE@@WITH_OSD_TRUE@	$(CEPH_GLOBAL) \
- at ENABLE_SERVER_TRUE@@WITH_OSD_TRUE@	$(am__append_167)
+ at ENABLE_SERVER_TRUE@@WITH_OSD_TRUE@	$(am__append_168)
 @ENABLE_SERVER_TRUE@@WITH_OSD_TRUE at libec_example_la_SOURCES = \
 @ENABLE_SERVER_TRUE@@WITH_OSD_TRUE@	erasure-code/ErasureCode.cc \
 @ENABLE_SERVER_TRUE@@WITH_OSD_TRUE@	test/erasure-code/ErasureCodePluginExample.cc
@@ -10873,7 +10872,7 @@ librbd_types_la_SOURCES = \
 @ENABLE_SERVER_TRUE@@WITH_OSD_TRUE@	${AM_LDFLAGS} -module \
 @ENABLE_SERVER_TRUE@@WITH_OSD_TRUE@	-avoid-version -shared \
 @ENABLE_SERVER_TRUE@@WITH_OSD_TRUE@	-rpath /nowhere \
- at ENABLE_SERVER_TRUE@@WITH_OSD_TRUE@	$(am__append_168)
+ at ENABLE_SERVER_TRUE@@WITH_OSD_TRUE@	$(am__append_169)
 @ENABLE_SERVER_TRUE@@WITH_OSD_TRUE at libec_missing_entry_point_la_SOURCES = test/erasure-code/ErasureCodePluginMissingEntryPoint.cc
 @ENABLE_SERVER_TRUE@@WITH_OSD_TRUE at libec_missing_entry_point_la_CFLAGS = ${AM_CFLAGS}
 @ENABLE_SERVER_TRUE@@WITH_OSD_TRUE at libec_missing_entry_point_la_CXXFLAGS = ${AM_CXXFLAGS}
@@ -10882,7 +10881,7 @@ librbd_types_la_SOURCES = \
 @ENABLE_SERVER_TRUE@@WITH_OSD_TRUE@	${AM_LDFLAGS} -module \
 @ENABLE_SERVER_TRUE@@WITH_OSD_TRUE@	-avoid-version -shared \
 @ENABLE_SERVER_TRUE@@WITH_OSD_TRUE@	-rpath /nowhere \
- at ENABLE_SERVER_TRUE@@WITH_OSD_TRUE@	$(am__append_170)
+ at ENABLE_SERVER_TRUE@@WITH_OSD_TRUE@	$(am__append_171)
 @ENABLE_SERVER_TRUE@@WITH_OSD_TRUE at libec_missing_version_la_SOURCES = test/erasure-code/ErasureCodePluginMissingVersion.cc
 @ENABLE_SERVER_TRUE@@WITH_OSD_TRUE at libec_missing_version_la_CFLAGS = ${AM_CFLAGS}
 @ENABLE_SERVER_TRUE@@WITH_OSD_TRUE at libec_missing_version_la_CXXFLAGS = ${AM_CXXFLAGS}
@@ -10891,7 +10890,7 @@ librbd_types_la_SOURCES = \
 @ENABLE_SERVER_TRUE@@WITH_OSD_TRUE@	${AM_LDFLAGS} -module \
 @ENABLE_SERVER_TRUE@@WITH_OSD_TRUE@	-avoid-version -shared \
 @ENABLE_SERVER_TRUE@@WITH_OSD_TRUE@	-rpath /nowhere \
- at ENABLE_SERVER_TRUE@@WITH_OSD_TRUE@	$(am__append_171)
+ at ENABLE_SERVER_TRUE@@WITH_OSD_TRUE@	$(am__append_172)
 @ENABLE_SERVER_TRUE@@WITH_OSD_TRUE at libec_hangs_la_SOURCES = test/erasure-code/ErasureCodePluginHangs.cc
 @ENABLE_SERVER_TRUE@@WITH_OSD_TRUE at libec_hangs_la_CFLAGS = ${AM_CFLAGS}
 @ENABLE_SERVER_TRUE@@WITH_OSD_TRUE at libec_hangs_la_CXXFLAGS = ${AM_CXXFLAGS}
@@ -10900,7 +10899,7 @@ librbd_types_la_SOURCES = \
 @ENABLE_SERVER_TRUE@@WITH_OSD_TRUE@	${AM_LDFLAGS} -module \
 @ENABLE_SERVER_TRUE@@WITH_OSD_TRUE@	-avoid-version -shared \
 @ENABLE_SERVER_TRUE@@WITH_OSD_TRUE@	-rpath /nowhere \
- at ENABLE_SERVER_TRUE@@WITH_OSD_TRUE@	$(am__append_172)
+ at ENABLE_SERVER_TRUE@@WITH_OSD_TRUE@	$(am__append_173)
 @ENABLE_SERVER_TRUE@@WITH_OSD_TRUE at libec_fail_to_initialize_la_SOURCES = test/erasure-code/ErasureCodePluginFailToInitialize.cc
 @ENABLE_SERVER_TRUE@@WITH_OSD_TRUE at libec_fail_to_initialize_la_CFLAGS = ${AM_CFLAGS}
 @ENABLE_SERVER_TRUE@@WITH_OSD_TRUE at libec_fail_to_initialize_la_CXXFLAGS = ${AM_CXXFLAGS}
@@ -10909,7 +10908,7 @@ librbd_types_la_SOURCES = \
 @ENABLE_SERVER_TRUE@@WITH_OSD_TRUE@	${AM_LDFLAGS} -module \
 @ENABLE_SERVER_TRUE@@WITH_OSD_TRUE@	-avoid-version -shared \
 @ENABLE_SERVER_TRUE@@WITH_OSD_TRUE@	-rpath /nowhere \
- at ENABLE_SERVER_TRUE@@WITH_OSD_TRUE@	$(am__append_173)
+ at ENABLE_SERVER_TRUE@@WITH_OSD_TRUE@	$(am__append_174)
 @ENABLE_SERVER_TRUE@@WITH_OSD_TRUE at libec_fail_to_register_la_SOURCES = test/erasure-code/ErasureCodePluginFailToRegister.cc
 @ENABLE_SERVER_TRUE@@WITH_OSD_TRUE at libec_fail_to_register_la_CFLAGS = ${AM_CFLAGS}
 @ENABLE_SERVER_TRUE@@WITH_OSD_TRUE at libec_fail_to_register_la_CXXFLAGS = ${AM_CXXFLAGS}
@@ -10918,7 +10917,7 @@ librbd_types_la_SOURCES = \
 @ENABLE_SERVER_TRUE@@WITH_OSD_TRUE@	${AM_LDFLAGS} -module \
 @ENABLE_SERVER_TRUE@@WITH_OSD_TRUE@	-avoid-version -shared \
 @ENABLE_SERVER_TRUE@@WITH_OSD_TRUE@	-rpath /nowhere \
- at ENABLE_SERVER_TRUE@@WITH_OSD_TRUE@	$(am__append_174)
+ at ENABLE_SERVER_TRUE@@WITH_OSD_TRUE@	$(am__append_175)
 @ENABLE_SERVER_TRUE@@WITH_OSD_TRUE at libec_test_jerasure_neon_la_SOURCES = test/erasure-code/TestJerasurePluginNEON.cc
 @ENABLE_SERVER_TRUE@@WITH_OSD_TRUE at libec_test_jerasure_neon_la_CFLAGS = ${AM_CFLAGS}
 @ENABLE_SERVER_TRUE@@WITH_OSD_TRUE at libec_test_jerasure_neon_la_CXXFLAGS = ${AM_CXXFLAGS}
@@ -10927,7 +10926,7 @@ librbd_types_la_SOURCES = \
 @ENABLE_SERVER_TRUE@@WITH_OSD_TRUE@	${AM_LDFLAGS} -module \
 @ENABLE_SERVER_TRUE@@WITH_OSD_TRUE@	-avoid-version -shared \
 @ENABLE_SERVER_TRUE@@WITH_OSD_TRUE@	-rpath /nowhere \
- at ENABLE_SERVER_TRUE@@WITH_OSD_TRUE@	$(am__append_175)
+ at ENABLE_SERVER_TRUE@@WITH_OSD_TRUE@	$(am__append_176)
 @ENABLE_SERVER_TRUE@@WITH_OSD_TRUE at libec_test_jerasure_sse4_la_SOURCES = test/erasure-code/TestJerasurePluginSSE4.cc
 @ENABLE_SERVER_TRUE@@WITH_OSD_TRUE at libec_test_jerasure_sse4_la_CFLAGS = ${AM_CFLAGS}
 @ENABLE_SERVER_TRUE@@WITH_OSD_TRUE at libec_test_jerasure_sse4_la_CXXFLAGS = ${AM_CXXFLAGS}
@@ -10936,7 +10935,7 @@ librbd_types_la_SOURCES = \
 @ENABLE_SERVER_TRUE@@WITH_OSD_TRUE@	${AM_LDFLAGS} -module \
 @ENABLE_SERVER_TRUE@@WITH_OSD_TRUE@	-avoid-version -shared \
 @ENABLE_SERVER_TRUE@@WITH_OSD_TRUE@	-rpath /nowhere \
- at ENABLE_SERVER_TRUE@@WITH_OSD_TRUE@	$(am__append_176)
+ at ENABLE_SERVER_TRUE@@WITH_OSD_TRUE@	$(am__append_177)
 @ENABLE_SERVER_TRUE@@WITH_OSD_TRUE at libec_test_jerasure_sse3_la_SOURCES = test/erasure-code/TestJerasurePluginSSE3.cc
 @ENABLE_SERVER_TRUE@@WITH_OSD_TRUE at libec_test_jerasure_sse3_la_CFLAGS = ${AM_CFLAGS}
 @ENABLE_SERVER_TRUE@@WITH_OSD_TRUE at libec_test_jerasure_sse3_la_CXXFLAGS = ${AM_CXXFLAGS}
@@ -10945,7 +10944,7 @@ librbd_types_la_SOURCES = \
 @ENABLE_SERVER_TRUE@@WITH_OSD_TRUE@	${AM_LDFLAGS} -module \
 @ENABLE_SERVER_TRUE@@WITH_OSD_TRUE@	-avoid-version -shared \
 @ENABLE_SERVER_TRUE@@WITH_OSD_TRUE@	-rpath /nowhere \
- at ENABLE_SERVER_TRUE@@WITH_OSD_TRUE@	$(am__append_177)
+ at ENABLE_SERVER_TRUE@@WITH_OSD_TRUE@	$(am__append_178)
 @ENABLE_SERVER_TRUE@@WITH_OSD_TRUE at libec_test_jerasure_generic_la_SOURCES = test/erasure-code/TestJerasurePluginGeneric.cc
 @ENABLE_SERVER_TRUE@@WITH_OSD_TRUE at libec_test_jerasure_generic_la_CFLAGS = ${AM_CFLAGS}
 @ENABLE_SERVER_TRUE@@WITH_OSD_TRUE at libec_test_jerasure_generic_la_CXXFLAGS = ${AM_CXXFLAGS}
@@ -10954,7 +10953,7 @@ librbd_types_la_SOURCES = \
 @ENABLE_SERVER_TRUE@@WITH_OSD_TRUE@	${AM_LDFLAGS} -module \
 @ENABLE_SERVER_TRUE@@WITH_OSD_TRUE@	-avoid-version -shared \
 @ENABLE_SERVER_TRUE@@WITH_OSD_TRUE@	-rpath /nowhere \
- at ENABLE_SERVER_TRUE@@WITH_OSD_TRUE@	$(am__append_178)
+ at ENABLE_SERVER_TRUE@@WITH_OSD_TRUE@	$(am__append_179)
 @ENABLE_SERVER_TRUE@@WITH_OSD_TRUE at unittest_erasure_code_plugin_SOURCES = \
 @ENABLE_SERVER_TRUE@@WITH_OSD_TRUE@	erasure-code/ErasureCode.cc \
 @ENABLE_SERVER_TRUE@@WITH_OSD_TRUE@	test/erasure-code/TestErasureCodePlugin.cc 
@@ -10964,7 +10963,7 @@ librbd_types_la_SOURCES = \
 @ENABLE_SERVER_TRUE@@WITH_OSD_TRUE@	$(LIBOSD) $(LIBCOMMON) \
 @ENABLE_SERVER_TRUE@@WITH_OSD_TRUE@	$(UNITTEST_LDADD) \
 @ENABLE_SERVER_TRUE@@WITH_OSD_TRUE@	$(CEPH_GLOBAL) \
- at ENABLE_SERVER_TRUE@@WITH_OSD_TRUE@	$(am__append_179)
+ at ENABLE_SERVER_TRUE@@WITH_OSD_TRUE@	$(am__append_180)
 @ENABLE_SERVER_TRUE@@WITH_OSD_TRUE at unittest_erasure_code_SOURCES = \
 @ENABLE_SERVER_TRUE@@WITH_OSD_TRUE@	erasure-code/ErasureCode.cc \
 @ENABLE_SERVER_TRUE@@WITH_OSD_TRUE@	test/erasure-code/TestErasureCode.cc
@@ -10987,7 +10986,7 @@ librbd_types_la_SOURCES = \
 @ENABLE_SERVER_TRUE@@WITH_OSD_TRUE@	$(LIBOSD) $(LIBCOMMON) \
 @ENABLE_SERVER_TRUE@@WITH_OSD_TRUE@	$(UNITTEST_LDADD) \
 @ENABLE_SERVER_TRUE@@WITH_OSD_TRUE@	$(CEPH_GLOBAL) \
- at ENABLE_SERVER_TRUE@@WITH_OSD_TRUE@	$(am__append_181)
+ at ENABLE_SERVER_TRUE@@WITH_OSD_TRUE@	$(am__append_182)
 @ENABLE_SERVER_TRUE@@WITH_OSD_TRUE at unittest_erasure_code_plugin_jerasure_SOURCES = \
 @ENABLE_SERVER_TRUE@@WITH_OSD_TRUE@	test/erasure-code/TestErasureCodePluginJerasure.cc
 
@@ -10996,7 +10995,7 @@ librbd_types_la_SOURCES = \
 @ENABLE_SERVER_TRUE@@WITH_OSD_TRUE@	$(LIBOSD) $(LIBCOMMON) \
 @ENABLE_SERVER_TRUE@@WITH_OSD_TRUE@	$(UNITTEST_LDADD) \
 @ENABLE_SERVER_TRUE@@WITH_OSD_TRUE@	$(CEPH_GLOBAL) \
- at ENABLE_SERVER_TRUE@@WITH_OSD_TRUE@	$(am__append_182)
+ at ENABLE_SERVER_TRUE@@WITH_OSD_TRUE@	$(am__append_183)
 @ENABLE_SERVER_TRUE@@WITH_BETTER_YASM_ELF64_TRUE@@WITH_OSD_TRUE at unittest_erasure_code_isa_SOURCES = \
 @ENABLE_SERVER_TRUE@@WITH_BETTER_YASM_ELF64_TRUE@@WITH_OSD_TRUE@	erasure-code/ErasureCode.cc \
 @ENABLE_SERVER_TRUE@@WITH_BETTER_YASM_ELF64_TRUE@@WITH_OSD_TRUE@	test/erasure-code/TestErasureCodeIsa.cc
@@ -11008,7 +11007,7 @@ librbd_types_la_SOURCES = \
 @ENABLE_SERVER_TRUE@@WITH_BETTER_YASM_ELF64_TRUE@@WITH_OSD_TRUE@	$(CEPH_GLOBAL) \
 @ENABLE_SERVER_TRUE@@WITH_BETTER_YASM_ELF64_TRUE@@WITH_OSD_TRUE@	libisa.la \
 @ENABLE_SERVER_TRUE@@WITH_BETTER_YASM_ELF64_TRUE@@WITH_OSD_TRUE@	$(LIBERASURE_CODE) \
- at ENABLE_SERVER_TRUE@@WITH_BETTER_YASM_ELF64_TRUE@@WITH_OSD_TRUE@	$(am__append_183)
+ at ENABLE_SERVER_TRUE@@WITH_BETTER_YASM_ELF64_TRUE@@WITH_OSD_TRUE@	$(am__append_184)
 @ENABLE_SERVER_TRUE@@WITH_BETTER_YASM_ELF64_TRUE@@WITH_OSD_TRUE at unittest_erasure_code_plugin_isa_SOURCES = \
 @ENABLE_SERVER_TRUE@@WITH_BETTER_YASM_ELF64_TRUE@@WITH_OSD_TRUE@	erasure-code/ErasureCode.cc \
 @ENABLE_SERVER_TRUE@@WITH_BETTER_YASM_ELF64_TRUE@@WITH_OSD_TRUE@	test/erasure-code/TestErasureCodePluginIsa.cc
@@ -11019,7 +11018,7 @@ librbd_types_la_SOURCES = \
 @ENABLE_SERVER_TRUE@@WITH_BETTER_YASM_ELF64_TRUE@@WITH_OSD_TRUE@	$(UNITTEST_LDADD) \
 @ENABLE_SERVER_TRUE@@WITH_BETTER_YASM_ELF64_TRUE@@WITH_OSD_TRUE@	$(CEPH_GLOBAL) \
 @ENABLE_SERVER_TRUE@@WITH_BETTER_YASM_ELF64_TRUE@@WITH_OSD_TRUE@	$(LIBERASURE_CODE) \
- at ENABLE_SERVER_TRUE@@WITH_BETTER_YASM_ELF64_TRUE@@WITH_OSD_TRUE@	$(am__append_185)
+ at ENABLE_SERVER_TRUE@@WITH_BETTER_YASM_ELF64_TRUE@@WITH_OSD_TRUE@	$(am__append_186)
 @ENABLE_SERVER_TRUE@@WITH_OSD_TRUE at unittest_erasure_code_lrc_SOURCES = \
 @ENABLE_SERVER_TRUE@@WITH_OSD_TRUE@	test/erasure-code/TestErasureCodeLrc.cc \
 @ENABLE_SERVER_TRUE@@WITH_OSD_TRUE@	${lrc_sources}
@@ -11029,7 +11028,7 @@ librbd_types_la_SOURCES = \
 @ENABLE_SERVER_TRUE@@WITH_OSD_TRUE@	$(LIBOSD) $(LIBCOMMON) \
 @ENABLE_SERVER_TRUE@@WITH_OSD_TRUE@	$(UNITTEST_LDADD) \
 @ENABLE_SERVER_TRUE@@WITH_OSD_TRUE@	$(CEPH_GLOBAL) \
- at ENABLE_SERVER_TRUE@@WITH_OSD_TRUE@	$(am__append_186)
+ at ENABLE_SERVER_TRUE@@WITH_OSD_TRUE@	$(am__append_187)
 @ENABLE_SERVER_TRUE@@WITH_OSD_TRUE at unittest_erasure_code_plugin_lrc_SOURCES = \
 @ENABLE_SERVER_TRUE@@WITH_OSD_TRUE@	test/erasure-code/TestErasureCodePluginLrc.cc
 
@@ -11038,7 +11037,7 @@ librbd_types_la_SOURCES = \
 @ENABLE_SERVER_TRUE@@WITH_OSD_TRUE@	$(LIBOSD) $(LIBCOMMON) \
 @ENABLE_SERVER_TRUE@@WITH_OSD_TRUE@	$(UNITTEST_LDADD) \
 @ENABLE_SERVER_TRUE@@WITH_OSD_TRUE@	$(CEPH_GLOBAL) \
- at ENABLE_SERVER_TRUE@@WITH_OSD_TRUE@	$(am__append_188)
+ at ENABLE_SERVER_TRUE@@WITH_OSD_TRUE@	$(am__append_189)
 @ENABLE_SERVER_TRUE@@WITH_OSD_TRUE at unittest_erasure_code_shec_SOURCES = \
 @ENABLE_SERVER_TRUE@@WITH_OSD_TRUE@	test/erasure-code/TestErasureCodeShec.cc \
 @ENABLE_SERVER_TRUE@@WITH_OSD_TRUE@	${shec_sources}
@@ -11059,7 +11058,7 @@ librbd_types_la_SOURCES = \
 @ENABLE_SERVER_TRUE@@WITH_OSD_TRUE@	$(LIBOSD) $(LIBCOMMON) \
 @ENABLE_SERVER_TRUE@@WITH_OSD_TRUE@	$(UNITTEST_LDADD) \
 @ENABLE_SERVER_TRUE@@WITH_OSD_TRUE@	$(CEPH_GLOBAL) \
- at ENABLE_SERVER_TRUE@@WITH_OSD_TRUE@	$(am__append_189)
+ at ENABLE_SERVER_TRUE@@WITH_OSD_TRUE@	$(am__append_190)
 @ENABLE_SERVER_TRUE@@WITH_OSD_TRUE at unittest_erasure_code_shec_all_SOURCES = \
 @ENABLE_SERVER_TRUE@@WITH_OSD_TRUE@	test/erasure-code/TestErasureCodeShec_all.cc \
 @ENABLE_SERVER_TRUE@@WITH_OSD_TRUE@	${shec_sources}
@@ -11080,7 +11079,7 @@ librbd_types_la_SOURCES = \
 @ENABLE_SERVER_TRUE@@WITH_OSD_TRUE@	$(LIBOSD) $(LIBCOMMON) \
 @ENABLE_SERVER_TRUE@@WITH_OSD_TRUE@	$(UNITTEST_LDADD) \
 @ENABLE_SERVER_TRUE@@WITH_OSD_TRUE@	$(CEPH_GLOBAL) \
- at ENABLE_SERVER_TRUE@@WITH_OSD_TRUE@	$(am__append_190)
+ at ENABLE_SERVER_TRUE@@WITH_OSD_TRUE@	$(am__append_191)
 @ENABLE_SERVER_TRUE@@WITH_OSD_TRUE at unittest_erasure_code_shec_thread_SOURCES = \
 @ENABLE_SERVER_TRUE@@WITH_OSD_TRUE@	test/erasure-code/TestErasureCodeShec_thread.cc \
 @ENABLE_SERVER_TRUE@@WITH_OSD_TRUE@	${shec_sources}
@@ -11101,7 +11100,7 @@ librbd_types_la_SOURCES = \
 @ENABLE_SERVER_TRUE@@WITH_OSD_TRUE@	$(LIBOSD) $(LIBCOMMON) \
 @ENABLE_SERVER_TRUE@@WITH_OSD_TRUE@	$(UNITTEST_LDADD) \
 @ENABLE_SERVER_TRUE@@WITH_OSD_TRUE@	$(CEPH_GLOBAL) \
- at ENABLE_SERVER_TRUE@@WITH_OSD_TRUE@	$(am__append_191)
+ at ENABLE_SERVER_TRUE@@WITH_OSD_TRUE@	$(am__append_192)
 @ENABLE_SERVER_TRUE@@WITH_OSD_TRUE at unittest_erasure_code_shec_arguments_SOURCES = \
 @ENABLE_SERVER_TRUE@@WITH_OSD_TRUE@	test/erasure-code/TestErasureCodeShec_arguments.cc \
 @ENABLE_SERVER_TRUE@@WITH_OSD_TRUE@	${shec_sources}
@@ -11122,7 +11121,7 @@ librbd_types_la_SOURCES = \
 @ENABLE_SERVER_TRUE@@WITH_OSD_TRUE@	$(LIBOSD) $(LIBCOMMON) \
 @ENABLE_SERVER_TRUE@@WITH_OSD_TRUE@	$(UNITTEST_LDADD) \
 @ENABLE_SERVER_TRUE@@WITH_OSD_TRUE@	$(CEPH_GLOBAL) \
- at ENABLE_SERVER_TRUE@@WITH_OSD_TRUE@	$(am__append_192)
+ at ENABLE_SERVER_TRUE@@WITH_OSD_TRUE@	$(am__append_193)
 @ENABLE_SERVER_TRUE@@WITH_OSD_TRUE at unittest_erasure_code_plugin_shec_SOURCES = \
 @ENABLE_SERVER_TRUE@@WITH_OSD_TRUE@        test/erasure-code/TestErasureCodePluginShec.cc
 
@@ -11131,7 +11130,7 @@ librbd_types_la_SOURCES = \
 @ENABLE_SERVER_TRUE@@WITH_OSD_TRUE@	$(LIBOSD) $(LIBCOMMON) \
 @ENABLE_SERVER_TRUE@@WITH_OSD_TRUE@	$(UNITTEST_LDADD) \
 @ENABLE_SERVER_TRUE@@WITH_OSD_TRUE@	$(CEPH_GLOBAL) \
- at ENABLE_SERVER_TRUE@@WITH_OSD_TRUE@	$(am__append_193)
+ at ENABLE_SERVER_TRUE@@WITH_OSD_TRUE@	$(am__append_194)
 @ENABLE_SERVER_TRUE@@WITH_OSD_TRUE at libec_test_shec_neon_la_SOURCES = test/erasure-code/TestShecPluginNEON.cc
 @ENABLE_SERVER_TRUE@@WITH_OSD_TRUE at libec_test_shec_neon_la_CFLAGS = ${AM_CFLAGS}
 @ENABLE_SERVER_TRUE@@WITH_OSD_TRUE at libec_test_shec_neon_la_CXXFLAGS = ${AM_CXXFLAGS}
@@ -11140,7 +11139,7 @@ librbd_types_la_SOURCES = \
 @ENABLE_SERVER_TRUE@@WITH_OSD_TRUE@	${AM_LDFLAGS} -module \
 @ENABLE_SERVER_TRUE@@WITH_OSD_TRUE@	-avoid-version -shared \
 @ENABLE_SERVER_TRUE@@WITH_OSD_TRUE@	-rpath /nowhere \
- at ENABLE_SERVER_TRUE@@WITH_OSD_TRUE@	$(am__append_194)
+ at ENABLE_SERVER_TRUE@@WITH_OSD_TRUE@	$(am__append_195)
 @ENABLE_SERVER_TRUE@@WITH_OSD_TRUE at libec_test_shec_sse4_la_SOURCES = test/erasure-code/TestShecPluginSSE4.cc
 @ENABLE_SERVER_TRUE@@WITH_OSD_TRUE at libec_test_shec_sse4_la_CFLAGS = ${AM_CFLAGS}
 @ENABLE_SERVER_TRUE@@WITH_OSD_TRUE at libec_test_shec_sse4_la_CXXFLAGS = ${AM_CXXFLAGS}
@@ -11149,7 +11148,7 @@ librbd_types_la_SOURCES = \
 @ENABLE_SERVER_TRUE@@WITH_OSD_TRUE@	${AM_LDFLAGS} -module \
 @ENABLE_SERVER_TRUE@@WITH_OSD_TRUE@	-avoid-version -shared \
 @ENABLE_SERVER_TRUE@@WITH_OSD_TRUE@	-rpath /nowhere \
- at ENABLE_SERVER_TRUE@@WITH_OSD_TRUE@	$(am__append_195)
+ at ENABLE_SERVER_TRUE@@WITH_OSD_TRUE@	$(am__append_196)
 @ENABLE_SERVER_TRUE@@WITH_OSD_TRUE at libec_test_shec_sse3_la_SOURCES = test/erasure-code/TestShecPluginSSE3.cc
 @ENABLE_SERVER_TRUE@@WITH_OSD_TRUE at libec_test_shec_sse3_la_CFLAGS = ${AM_CFLAGS}
 @ENABLE_SERVER_TRUE@@WITH_OSD_TRUE at libec_test_shec_sse3_la_CXXFLAGS = ${AM_CXXFLAGS}
@@ -11158,7 +11157,7 @@ librbd_types_la_SOURCES = \
 @ENABLE_SERVER_TRUE@@WITH_OSD_TRUE@	${AM_LDFLAGS} -module \
 @ENABLE_SERVER_TRUE@@WITH_OSD_TRUE@	-avoid-version -shared \
 @ENABLE_SERVER_TRUE@@WITH_OSD_TRUE@	-rpath /nowhere \
- at ENABLE_SERVER_TRUE@@WITH_OSD_TRUE@	$(am__append_196)
+ at ENABLE_SERVER_TRUE@@WITH_OSD_TRUE@	$(am__append_197)
 @ENABLE_SERVER_TRUE@@WITH_OSD_TRUE at libec_test_shec_generic_la_SOURCES = test/erasure-code/TestShecPluginGeneric.cc
 @ENABLE_SERVER_TRUE@@WITH_OSD_TRUE at libec_test_shec_generic_la_CFLAGS = ${AM_CFLAGS}
 @ENABLE_SERVER_TRUE@@WITH_OSD_TRUE at libec_test_shec_generic_la_CXXFLAGS = ${AM_CXXFLAGS}
@@ -11167,7 +11166,7 @@ librbd_types_la_SOURCES = \
 @ENABLE_SERVER_TRUE@@WITH_OSD_TRUE@	${AM_LDFLAGS} -module \
 @ENABLE_SERVER_TRUE@@WITH_OSD_TRUE@	-avoid-version -shared \
 @ENABLE_SERVER_TRUE@@WITH_OSD_TRUE@	-rpath /nowhere \
- at ENABLE_SERVER_TRUE@@WITH_OSD_TRUE@	$(am__append_197)
+ at ENABLE_SERVER_TRUE@@WITH_OSD_TRUE@	$(am__append_198)
 @ENABLE_SERVER_TRUE@@WITH_OSD_TRUE at unittest_erasure_code_example_SOURCES = \
 @ENABLE_SERVER_TRUE@@WITH_OSD_TRUE@	erasure-code/ErasureCode.cc \
 @ENABLE_SERVER_TRUE@@WITH_OSD_TRUE@	test/erasure-code/TestErasureCodeExample.cc
@@ -11185,7 +11184,7 @@ librbd_types_la_SOURCES = \
 @ENABLE_SERVER_TRUE@@ENABLE_XIO_TRUE@	$(CEPH_GLOBAL) \
 @ENABLE_SERVER_TRUE@@ENABLE_XIO_TRUE@	$(PTHREAD_LIBS) \
 @ENABLE_SERVER_TRUE@@ENABLE_XIO_TRUE@	$(EXTRALIBS) \
- at ENABLE_SERVER_TRUE@@ENABLE_XIO_TRUE@	$(am__append_199)
+ at ENABLE_SERVER_TRUE@@ENABLE_XIO_TRUE@	$(am__append_200)
 @ENABLE_SERVER_TRUE@@ENABLE_XIO_TRUE at simple_client_SOURCES = \
 @ENABLE_SERVER_TRUE@@ENABLE_XIO_TRUE@	test/messenger/simple_client.cc \
 @ENABLE_SERVER_TRUE@@ENABLE_XIO_TRUE@	test/messenger/simple_dispatcher.cc
@@ -11197,7 +11196,7 @@ librbd_types_la_SOURCES = \
 @ENABLE_SERVER_TRUE@@ENABLE_XIO_TRUE@	$(CEPH_GLOBAL) \
 @ENABLE_SERVER_TRUE@@ENABLE_XIO_TRUE@	$(PTHREAD_LIBS) \
 @ENABLE_SERVER_TRUE@@ENABLE_XIO_TRUE@	$(EXTRALIBS) \
- at ENABLE_SERVER_TRUE@@ENABLE_XIO_TRUE@	$(am__append_200)
+ at ENABLE_SERVER_TRUE@@ENABLE_XIO_TRUE@	$(am__append_201)
 @ENABLE_SERVER_TRUE@@ENABLE_XIO_TRUE at xio_server_SOURCES = \
 @ENABLE_SERVER_TRUE@@ENABLE_XIO_TRUE@	test/messenger/xio_server.cc \
 @ENABLE_SERVER_TRUE@@ENABLE_XIO_TRUE@	test/messenger/xio_dispatcher.cc
@@ -11209,7 +11208,7 @@ librbd_types_la_SOURCES = \
 @ENABLE_SERVER_TRUE@@ENABLE_XIO_TRUE@	$(LIBCOMMON) \
 @ENABLE_SERVER_TRUE@@ENABLE_XIO_TRUE@	$(PTHREAD_LIBS) \
 @ENABLE_SERVER_TRUE@@ENABLE_XIO_TRUE@	$(EXTRALIBS) \
- at ENABLE_SERVER_TRUE@@ENABLE_XIO_TRUE@	$(am__append_202)
+ at ENABLE_SERVER_TRUE@@ENABLE_XIO_TRUE@	$(am__append_203)
 @ENABLE_SERVER_TRUE@@ENABLE_XIO_TRUE at xio_client_SOURCES = \
 @ENABLE_SERVER_TRUE@@ENABLE_XIO_TRUE@	test/messenger/xio_client.cc \
 @ENABLE_SERVER_TRUE@@ENABLE_XIO_TRUE@	test/messenger/xio_dispatcher.cc
@@ -11221,7 +11220,7 @@ librbd_types_la_SOURCES = \
 @ENABLE_SERVER_TRUE@@ENABLE_XIO_TRUE@	$(LIBCOMMON) \
 @ENABLE_SERVER_TRUE@@ENABLE_XIO_TRUE@	$(PTHREAD_LIBS) \
 @ENABLE_SERVER_TRUE@@ENABLE_XIO_TRUE@	$(EXTRALIBS) \
- at ENABLE_SERVER_TRUE@@ENABLE_XIO_TRUE@	$(am__append_203)
+ at ENABLE_SERVER_TRUE@@ENABLE_XIO_TRUE@	$(am__append_204)
 @ENABLE_SERVER_TRUE@@WITH_OSD_TRUE at libceph_example_la_SOURCES = \
 @ENABLE_SERVER_TRUE@@WITH_OSD_TRUE@	compressor/Compressor.cc \
 @ENABLE_SERVER_TRUE@@WITH_OSD_TRUE@	test/compressor/compressor_plugin_example.cc
@@ -11239,7 +11238,7 @@ librbd_types_la_SOURCES = \
 @ENABLE_SERVER_TRUE@@WITH_OSD_TRUE@	$(LIBOSD) $(LIBCOMMON) \
 @ENABLE_SERVER_TRUE@@WITH_OSD_TRUE@	$(UNITTEST_LDADD) \
 @ENABLE_SERVER_TRUE@@WITH_OSD_TRUE@	$(CEPH_GLOBAL) \
- at ENABLE_SERVER_TRUE@@WITH_OSD_TRUE@	$(am__append_206)
+ at ENABLE_SERVER_TRUE@@WITH_OSD_TRUE@	$(am__append_207)
 @ENABLE_SERVER_TRUE@@WITH_OSD_TRUE at unittest_compression_snappy_SOURCES = \
 @ENABLE_SERVER_TRUE@@WITH_OSD_TRUE@	test/compressor/test_compression_snappy.cc \
 @ENABLE_SERVER_TRUE@@WITH_OSD_TRUE@	${snappy_sources}
@@ -11249,7 +11248,7 @@ librbd_types_la_SOURCES = \
 @ENABLE_SERVER_TRUE@@WITH_OSD_TRUE@	$(LIBOSD) $(LIBCOMMON) \
 @ENABLE_SERVER_TRUE@@WITH_OSD_TRUE@	$(UNITTEST_LDADD) \
 @ENABLE_SERVER_TRUE@@WITH_OSD_TRUE@	$(CEPH_GLOBAL) \
- at ENABLE_SERVER_TRUE@@WITH_OSD_TRUE@	$(am__append_207)
+ at ENABLE_SERVER_TRUE@@WITH_OSD_TRUE@	$(am__append_208)
 @ENABLE_SERVER_TRUE@@WITH_OSD_TRUE at unittest_compression_snappy_LDFLAGS = -lsnappy
 @ENABLE_SERVER_TRUE@@WITH_OSD_TRUE at unittest_compression_plugin_snappy_SOURCES = \
 @ENABLE_SERVER_TRUE@@WITH_OSD_TRUE@	test/compressor/test_compression_plugin_snappy.cc \
@@ -11261,7 +11260,7 @@ librbd_types_la_SOURCES = \
 @ENABLE_SERVER_TRUE@@WITH_OSD_TRUE@	$(UNITTEST_LDADD) \
 @ENABLE_SERVER_TRUE@@WITH_OSD_TRUE@	$(CEPH_GLOBAL) \
 @ENABLE_SERVER_TRUE@@WITH_OSD_TRUE@	$(LIBCOMPRESSOR) \
- at ENABLE_SERVER_TRUE@@WITH_OSD_TRUE@	$(am__append_208)
+ at ENABLE_SERVER_TRUE@@WITH_OSD_TRUE@	$(am__append_209)
 @ENABLE_SERVER_TRUE@@WITH_OSD_TRUE at unittest_compression_plugin_snappy_LDFLAGS = -lsnappy
 @ENABLE_SERVER_TRUE@@WITH_OSD_TRUE at unittest_compression_zlib_SOURCES = \
 @ENABLE_SERVER_TRUE@@WITH_OSD_TRUE@	test/compressor/test_compression_zlib.cc \
@@ -11272,7 +11271,7 @@ librbd_types_la_SOURCES = \
 @ENABLE_SERVER_TRUE@@WITH_OSD_TRUE@	$(LIBOSD) $(LIBCOMMON) \
 @ENABLE_SERVER_TRUE@@WITH_OSD_TRUE@	$(UNITTEST_LDADD) \
 @ENABLE_SERVER_TRUE@@WITH_OSD_TRUE@	$(CEPH_GLOBAL) \
- at ENABLE_SERVER_TRUE@@WITH_OSD_TRUE@	$(am__append_209)
+ at ENABLE_SERVER_TRUE@@WITH_OSD_TRUE@	$(am__append_210)
 @ENABLE_SERVER_TRUE@@WITH_OSD_TRUE at unittest_compression_zlib_LDFLAGS = -lz
 @ENABLE_SERVER_TRUE@@WITH_OSD_TRUE at unittest_compression_plugin_zlib_SOURCES = \
 @ENABLE_SERVER_TRUE@@WITH_OSD_TRUE@	test/compressor/test_compression_plugin_zlib.cc \
@@ -11284,7 +11283,7 @@ librbd_types_la_SOURCES = \
 @ENABLE_SERVER_TRUE@@WITH_OSD_TRUE@	$(UNITTEST_LDADD) \
 @ENABLE_SERVER_TRUE@@WITH_OSD_TRUE@	$(CEPH_GLOBAL) \
 @ENABLE_SERVER_TRUE@@WITH_OSD_TRUE@	$(LIBCOMPRESSOR) \
- at ENABLE_SERVER_TRUE@@WITH_OSD_TRUE@	$(am__append_210)
+ at ENABLE_SERVER_TRUE@@WITH_OSD_TRUE@	$(am__append_211)
 @ENABLE_SERVER_TRUE@@WITH_OSD_TRUE at unittest_compression_plugin_zlib_LDFLAGS = -lz
 
 # This should use LIBMDS_TYPES once it exists
@@ -11305,10 +11304,10 @@ librbd_types_la_SOURCES = \
 
 # These should always use explicit _CFLAGS/_CXXFLAGS so avoid basename conflicts
 @ENABLE_CLIENT_TRUE at ceph_dencoder_CFLAGS = ${AM_CFLAGS} \
- at ENABLE_CLIENT_TRUE@	$(am__append_211)
+ at ENABLE_CLIENT_TRUE@	$(am__append_212)
 @ENABLE_CLIENT_TRUE at ceph_dencoder_CXXFLAGS = ${AM_CXXFLAGS} \
- at ENABLE_CLIENT_TRUE@	$(am__append_212) $(am__append_213) \
- at ENABLE_CLIENT_TRUE@	$(am__append_214)
+ at ENABLE_CLIENT_TRUE@	$(am__append_213) $(am__append_214) \
+ at ENABLE_CLIENT_TRUE@	$(am__append_215)
 @ENABLE_CLIENT_TRUE@@WITH_RADOS_TRUE at libradostest_la_SOURCES = \
 @ENABLE_CLIENT_TRUE@@WITH_RADOS_TRUE@	test/librados/test.cc \
 @ENABLE_CLIENT_TRUE@@WITH_RADOS_TRUE@	test/librados/TestCase.cc
@@ -11636,8 +11635,10 @@ librbd_types_la_SOURCES = \
 @ENABLE_CLIENT_TRUE@@WITH_RADOS_TRUE@@WITH_RBD_TRUE@	test/rbd_mirror/test_mock_fixture.cc \
 @ENABLE_CLIENT_TRUE@@WITH_RADOS_TRUE@@WITH_RBD_TRUE@	test/rbd_mirror/test_mock_ImageReplayer.cc \
 @ENABLE_CLIENT_TRUE@@WITH_RADOS_TRUE@@WITH_RBD_TRUE@	test/rbd_mirror/test_mock_ImageSync.cc \
+ at ENABLE_CLIENT_TRUE@@WITH_RADOS_TRUE@@WITH_RBD_TRUE@	test/rbd_mirror/test_mock_ImageSyncThrottler.cc \
 @ENABLE_CLIENT_TRUE@@WITH_RADOS_TRUE@@WITH_RBD_TRUE@	test/rbd_mirror/image_replayer/test_mock_BootstrapRequest.cc \
 @ENABLE_CLIENT_TRUE@@WITH_RADOS_TRUE@@WITH_RBD_TRUE@	test/rbd_mirror/image_replayer/test_mock_CreateImageRequest.cc \
+ at ENABLE_CLIENT_TRUE@@WITH_RADOS_TRUE@@WITH_RBD_TRUE@	test/rbd_mirror/image_replayer/test_mock_EventPreprocessor.cc \
 @ENABLE_CLIENT_TRUE@@WITH_RADOS_TRUE@@WITH_RBD_TRUE@	test/rbd_mirror/image_sync/test_mock_ImageCopyRequest.cc \
 @ENABLE_CLIENT_TRUE@@WITH_RADOS_TRUE@@WITH_RBD_TRUE@	test/rbd_mirror/image_sync/test_mock_ObjectCopyRequest.cc \
 @ENABLE_CLIENT_TRUE@@WITH_RADOS_TRUE@@WITH_RBD_TRUE@	test/rbd_mirror/image_sync/test_mock_SnapshotCopyRequest.cc \
@@ -11689,22 +11690,6 @@ librbd_types_la_SOURCES = \
 @ENABLE_CLIENT_TRUE@@WITH_RADOS_TRUE@@WITH_RBD_TRUE at ceph_test_rbd_mirror_random_write_LDADD = \
 @ENABLE_CLIENT_TRUE@@WITH_RADOS_TRUE@@WITH_RBD_TRUE@	$(LIBRBD) $(LIBRADOS) $(CEPH_GLOBAL)
 
- at ENABLE_CLIENT_TRUE@@WITH_RADOS_TRUE@@WITH_RBD_TRUE at ceph_test_rbd_mirror_image_replay_SOURCES = \
- at ENABLE_CLIENT_TRUE@@WITH_RADOS_TRUE@@WITH_RBD_TRUE@	test/rbd_mirror/image_replay.cc
-
- at ENABLE_CLIENT_TRUE@@WITH_RADOS_TRUE@@WITH_RBD_TRUE at ceph_test_rbd_mirror_image_replay_LDADD = \
- at ENABLE_CLIENT_TRUE@@WITH_RADOS_TRUE@@WITH_RBD_TRUE@	librbd_mirror_internal.la \
- at ENABLE_CLIENT_TRUE@@WITH_RADOS_TRUE@@WITH_RBD_TRUE@	librbd_internal.la \
- at ENABLE_CLIENT_TRUE@@WITH_RADOS_TRUE@@WITH_RBD_TRUE@	librbd_api.la \
- at ENABLE_CLIENT_TRUE@@WITH_RADOS_TRUE@@WITH_RBD_TRUE@	$(LIBRBD_TYPES) \
- at ENABLE_CLIENT_TRUE@@WITH_RADOS_TRUE@@WITH_RBD_TRUE@	libjournal.la \
- at ENABLE_CLIENT_TRUE@@WITH_RADOS_TRUE@@WITH_RBD_TRUE@	$(LIBRADOS) $(LIBOSDC) \
- at ENABLE_CLIENT_TRUE@@WITH_RADOS_TRUE@@WITH_RBD_TRUE@	librados_internal.la \
- at ENABLE_CLIENT_TRUE@@WITH_RADOS_TRUE@@WITH_RBD_TRUE@	libcls_rbd_client.la \
- at ENABLE_CLIENT_TRUE@@WITH_RADOS_TRUE@@WITH_RBD_TRUE@	libcls_lock_client.la \
- at ENABLE_CLIENT_TRUE@@WITH_RADOS_TRUE@@WITH_RBD_TRUE@	libcls_journal_client.la \
- at ENABLE_CLIENT_TRUE@@WITH_RADOS_TRUE@@WITH_RBD_TRUE@	$(CEPH_GLOBAL)
-
 @ENABLE_CLIENT_TRUE@@LINUX_TRUE@@WITH_RADOS_TRUE@@WITH_RBD_TRUE at ceph_test_librbd_fsx_SOURCES = test/librbd/fsx.cc
 @ENABLE_CLIENT_TRUE@@LINUX_TRUE@@WITH_RADOS_TRUE@@WITH_RBD_TRUE at ceph_test_librbd_fsx_LDADD = \
 @ENABLE_CLIENT_TRUE@@LINUX_TRUE@@WITH_RADOS_TRUE@@WITH_RBD_TRUE@	libjournal.la libcls_journal_client.la \
@@ -11761,7 +11746,7 @@ librbd_types_la_SOURCES = \
 @ENABLE_CLIENT_TRUE@@WITH_CEPHFS_TRUE@@WITH_RADOS_TRUE@	test/libcephfs/multiclient.cc \
 @ENABLE_CLIENT_TRUE@@WITH_CEPHFS_TRUE@@WITH_RADOS_TRUE@	test/libcephfs/access.cc \
 @ENABLE_CLIENT_TRUE@@WITH_CEPHFS_TRUE@@WITH_RADOS_TRUE@	test/libcephfs/acl.cc \
- at ENABLE_CLIENT_TRUE@@WITH_CEPHFS_TRUE@@WITH_RADOS_TRUE@	$(am__append_236)
+ at ENABLE_CLIENT_TRUE@@WITH_CEPHFS_TRUE@@WITH_RADOS_TRUE@	$(am__append_237)
 @ENABLE_CLIENT_TRUE@@WITH_CEPHFS_TRUE@@WITH_RADOS_TRUE at ceph_test_libcephfs_LDADD = $(LIBRADOS) $(LIBCEPHFS) $(LIBCOMMON) $(UNITTEST_LDADD)
 @ENABLE_CLIENT_TRUE@@WITH_CEPHFS_TRUE@@WITH_RADOS_TRUE at ceph_test_libcephfs_CXXFLAGS = $(UNITTEST_CXXFLAGS)
 @ENABLE_CLIENT_TRUE@@WITH_CEPHFS_TRUE@@WITH_RADOS_TRUE at unittest_encoding_SOURCES = test/encoding.cc
@@ -11784,7 +11769,7 @@ librbd_types_la_SOURCES = \
 @ENABLE_CLIENT_TRUE@@WITH_CEPHFS_TRUE@@WITH_RADOS_TRUE@	-Wignored-qualifiers \
 @ENABLE_CLIENT_TRUE@@WITH_CEPHFS_TRUE@@WITH_RADOS_TRUE@	-Wold-style-definition \
 @ENABLE_CLIENT_TRUE@@WITH_CEPHFS_TRUE@@WITH_RADOS_TRUE@	-Wtype-limits \
- at ENABLE_CLIENT_TRUE@@WITH_CEPHFS_TRUE@@WITH_RADOS_TRUE@	$(am__append_238)
+ at ENABLE_CLIENT_TRUE@@WITH_CEPHFS_TRUE@@WITH_RADOS_TRUE@	$(am__append_239)
 @ENABLE_CLIENT_TRUE@@WITH_BUILD_TESTS_TRUE@@WITH_RADOSGW_TRUE@@WITH_RADOS_TRUE at test_build_librgw_SOURCES = \
 @ENABLE_CLIENT_TRUE@@WITH_BUILD_TESTS_TRUE@@WITH_RADOSGW_TRUE@@WITH_RADOS_TRUE@	test/buildtest_skeleton.cc \
 @ENABLE_CLIENT_TRUE@@WITH_BUILD_TESTS_TRUE@@WITH_RADOSGW_TRUE@@WITH_RADOS_TRUE@	$(librgw_la_SOURCES)
@@ -12012,13 +11997,13 @@ librbd_types_la_SOURCES = \
 @ENABLE_SERVER_TRUE@@WITH_OSD_TRUE at unittest_osdscrub_LDADD =  \
 @ENABLE_SERVER_TRUE@@WITH_OSD_TRUE@	$(LIBOSD) $(UNITTEST_LDADD) \
 @ENABLE_SERVER_TRUE@@WITH_OSD_TRUE@	$(CEPH_GLOBAL) \
- at ENABLE_SERVER_TRUE@@WITH_OSD_TRUE@	$(am__append_252)
+ at ENABLE_SERVER_TRUE@@WITH_OSD_TRUE@	$(am__append_253)
 @ENABLE_SERVER_TRUE@@WITH_OSD_TRUE at unittest_pglog_SOURCES = test/osd/TestPGLog.cc
 @ENABLE_SERVER_TRUE@@WITH_OSD_TRUE at unittest_pglog_CXXFLAGS = $(UNITTEST_CXXFLAGS)
 @ENABLE_SERVER_TRUE@@WITH_OSD_TRUE at unittest_pglog_LDADD = $(LIBOSD) \
 @ENABLE_SERVER_TRUE@@WITH_OSD_TRUE@	$(UNITTEST_LDADD) \
 @ENABLE_SERVER_TRUE@@WITH_OSD_TRUE@	$(CEPH_GLOBAL) \
- at ENABLE_SERVER_TRUE@@WITH_OSD_TRUE@	$(am__append_253)
+ at ENABLE_SERVER_TRUE@@WITH_OSD_TRUE@	$(am__append_254)
 @ENABLE_SERVER_TRUE@@WITH_OSD_TRUE at unittest_hitset_SOURCES = test/osd/hitset.cc
 @ENABLE_SERVER_TRUE@@WITH_OSD_TRUE at unittest_hitset_CXXFLAGS = $(UNITTEST_CXXFLAGS)
 @ENABLE_SERVER_TRUE@@WITH_OSD_TRUE at unittest_hitset_LDADD = $(LIBOSD) $(UNITTEST_LDADD) $(CEPH_GLOBAL)
@@ -12085,7 +12070,7 @@ UNITTEST_CXXFLAGS = \
 UNITTEST_LDADD = $(top_builddir)/src/gmock/lib/libgmock_main.la \
 	$(top_builddir)/src/gmock/lib/libgmock.la \
 	$(top_builddir)/src/gmock/gtest/lib/libgtest.la \
-	$(PTHREAD_LIBS) $(am__append_261)
+	$(PTHREAD_LIBS) $(am__append_262)
 unittest_addrs_SOURCES = test/test_addrs.cc
 unittest_addrs_CXXFLAGS = $(UNITTEST_CXXFLAGS)
 unittest_addrs_LDADD = $(UNITTEST_LDADD) $(CEPH_GLOBAL)
@@ -12272,7 +12257,8 @@ unittest_async_compressor_CXXFLAGS = $(UNITTEST_CXXFLAGS)
 unittest_async_compressor_LDADD = $(UNITTEST_LDADD) $(CEPH_GLOBAL) $(LIBCOMPRESSOR) $(LIBCOMMON)
 ceph_test_objectcacher_stress_SOURCES = \
 	test/osdc/object_cacher_stress.cc \
-	test/osdc/FakeWriteback.cc
+	test/osdc/FakeWriteback.cc \
+	test/osdc/MemWriteback.cc
 
 ceph_test_objectcacher_stress_LDADD = $(LIBOSDC) $(CEPH_GLOBAL)
 ceph_test_cfuse_cache_invalidate_SOURCES = test/test_cfuse_cache_invalidate.cc
@@ -12327,7 +12313,7 @@ ceph_test_cfuse_cache_invalidate_SOURCES = test/test_cfuse_cache_invalidate.cc
 @ENABLE_CLIENT_TRUE@@WITH_RADOS_TRUE@@WITH_RBD_TRUE@	tools/rbd/action/Snap.cc \
 @ENABLE_CLIENT_TRUE@@WITH_RADOS_TRUE@@WITH_RBD_TRUE@	tools/rbd/action/Status.cc \
 @ENABLE_CLIENT_TRUE@@WITH_RADOS_TRUE@@WITH_RBD_TRUE@	tools/rbd/action/Watch.cc \
- at ENABLE_CLIENT_TRUE@@WITH_RADOS_TRUE@@WITH_RBD_TRUE@	$(am__append_266)
+ at ENABLE_CLIENT_TRUE@@WITH_RADOS_TRUE@@WITH_RBD_TRUE@	$(am__append_267)
 @ENABLE_CLIENT_TRUE@@WITH_RADOS_TRUE@@WITH_RBD_TRUE at rbd_LDADD = libjournal.la \
 @ENABLE_CLIENT_TRUE@@WITH_RADOS_TRUE@@WITH_RBD_TRUE@	libcls_journal_client.la \
 @ENABLE_CLIENT_TRUE@@WITH_RADOS_TRUE@@WITH_RBD_TRUE@	libcls_rbd_client.la \
@@ -12338,7 +12324,7 @@ ceph_test_cfuse_cache_invalidate_SOURCES = test/test_cfuse_cache_invalidate.cc
 @ENABLE_CLIENT_TRUE@@WITH_RADOS_TRUE@@WITH_RBD_TRUE@	$(CEPH_GLOBAL) \
 @ENABLE_CLIENT_TRUE@@WITH_RADOS_TRUE@@WITH_RBD_TRUE@	$(BOOST_REGEX_LIBS) \
 @ENABLE_CLIENT_TRUE@@WITH_RADOS_TRUE@@WITH_RBD_TRUE@	$(BOOST_PROGRAM_OPTIONS_LIBS) \
- at ENABLE_CLIENT_TRUE@@WITH_RADOS_TRUE@@WITH_RBD_TRUE@	$(am__append_268)
+ at ENABLE_CLIENT_TRUE@@WITH_RADOS_TRUE@@WITH_RBD_TRUE@	$(am__append_269)
 @ENABLE_CLIENT_TRUE@@LINUX_TRUE@@WITH_RADOS_TRUE@@WITH_RBD_TRUE at rbd_nbd_SOURCES = tools/rbd_nbd/rbd-nbd.cc
 @ENABLE_CLIENT_TRUE@@LINUX_TRUE@@WITH_RADOS_TRUE@@WITH_RBD_TRUE at rbd_nbd_CXXFLAGS = $(AM_CXXFLAGS)
 @ENABLE_CLIENT_TRUE@@LINUX_TRUE@@WITH_RADOS_TRUE@@WITH_RBD_TRUE at rbd_nbd_LDADD = $(LIBRBD) $(LIBRADOS) $(CEPH_GLOBAL) $(BOOST_REGEX_LIBS)
@@ -12348,6 +12334,7 @@ ceph_test_cfuse_cache_invalidate_SOURCES = test/test_cfuse_cache_invalidate.cc
 @ENABLE_CLIENT_TRUE@@WITH_RADOS_TRUE@@WITH_RBD_TRUE@	tools/rbd_mirror/ClusterWatcher.cc \
 @ENABLE_CLIENT_TRUE@@WITH_RADOS_TRUE@@WITH_RBD_TRUE@	tools/rbd_mirror/ImageReplayer.cc \
 @ENABLE_CLIENT_TRUE@@WITH_RADOS_TRUE@@WITH_RBD_TRUE@	tools/rbd_mirror/ImageSync.cc \
+ at ENABLE_CLIENT_TRUE@@WITH_RADOS_TRUE@@WITH_RBD_TRUE@        tools/rbd_mirror/ImageSyncThrottler.cc \
 @ENABLE_CLIENT_TRUE@@WITH_RADOS_TRUE@@WITH_RBD_TRUE@	tools/rbd_mirror/Mirror.cc \
 @ENABLE_CLIENT_TRUE@@WITH_RADOS_TRUE@@WITH_RBD_TRUE@	tools/rbd_mirror/PoolWatcher.cc \
 @ENABLE_CLIENT_TRUE@@WITH_RADOS_TRUE@@WITH_RBD_TRUE@	tools/rbd_mirror/Replayer.cc \
@@ -12357,6 +12344,7 @@ ceph_test_cfuse_cache_invalidate_SOURCES = test/test_cfuse_cache_invalidate.cc
 @ENABLE_CLIENT_TRUE@@WITH_RADOS_TRUE@@WITH_RBD_TRUE@	tools/rbd_mirror/image_replayer/BootstrapRequest.cc \
 @ENABLE_CLIENT_TRUE@@WITH_RADOS_TRUE@@WITH_RBD_TRUE@	tools/rbd_mirror/image_replayer/CloseImageRequest.cc \
 @ENABLE_CLIENT_TRUE@@WITH_RADOS_TRUE@@WITH_RBD_TRUE@	tools/rbd_mirror/image_replayer/CreateImageRequest.cc \
+ at ENABLE_CLIENT_TRUE@@WITH_RADOS_TRUE@@WITH_RBD_TRUE@	tools/rbd_mirror/image_replayer/EventPreprocessor.cc \
 @ENABLE_CLIENT_TRUE@@WITH_RADOS_TRUE@@WITH_RBD_TRUE@	tools/rbd_mirror/image_replayer/OpenImageRequest.cc \
 @ENABLE_CLIENT_TRUE@@WITH_RADOS_TRUE@@WITH_RBD_TRUE@	tools/rbd_mirror/image_replayer/OpenLocalImageRequest.cc \
 @ENABLE_CLIENT_TRUE@@WITH_RADOS_TRUE@@WITH_RBD_TRUE@	tools/rbd_mirror/image_replayer/ReplayStatusFormatter.cc \
@@ -12399,7 +12387,7 @@ ceph_test_cfuse_cache_invalidate_SOURCES = test/test_cfuse_cache_invalidate.cc
 @ENABLE_SERVER_TRUE@@WITH_OSD_TRUE@	$(LIBOSD) $(LIBOS) \
 @ENABLE_SERVER_TRUE@@WITH_OSD_TRUE@	$(CEPH_GLOBAL) \
 @ENABLE_SERVER_TRUE@@WITH_OSD_TRUE@	$(BOOST_PROGRAM_OPTIONS_LIBS) \
- at ENABLE_SERVER_TRUE@@WITH_OSD_TRUE@	$(am__append_275)
+ at ENABLE_SERVER_TRUE@@WITH_OSD_TRUE@	$(am__append_276)
 @ENABLE_CLIENT_TRUE@@ENABLE_SERVER_TRUE@@WITH_MDS_TRUE@@WITH_RADOS_TRUE at cephfs_journal_tool_SOURCES = \
 @ENABLE_CLIENT_TRUE@@ENABLE_SERVER_TRUE@@WITH_MDS_TRUE@@WITH_RADOS_TRUE@	tools/cephfs/cephfs-journal-tool.cc \
 @ENABLE_CLIENT_TRUE@@ENABLE_SERVER_TRUE@@WITH_MDS_TRUE@@WITH_RADOS_TRUE@	tools/cephfs/JournalTool.cc \
@@ -12505,7 +12493,7 @@ editpaths = sed \
 	-e 's|@@GCOV_PREFIX_STRIP[@][@]|$(GCOV_PREFIX_STRIP)|g'
 
 shell_scripts = ceph-debugpack ceph-post-file ceph-crush-location \
-	$(am__append_308)
+	$(am__append_311)
 doc_DATA = $(srcdir)/sample.ceph.conf sample.fetch_config
 
 # various scripts in $(libexecdir)
@@ -12525,11 +12513,11 @@ AM_TESTS_ENVIRONMENT = export CEPH_ROOT="$(abs_top_srcdir)"; export \
 	PATH="$(abs_srcdir):$$PATH";
 
 # pybind
-python_PYTHON = $(am__append_292) $(am__append_302) $(am__append_307)
+python_PYTHON = $(am__append_293) $(am__append_305) $(am__append_310)
 @ENABLE_CLIENT_TRUE at bash_completiondir = $(sysconfdir)/bash_completion.d
 @ENABLE_CLIENT_TRUE at bash_completion_DATA =  \
 @ENABLE_CLIENT_TRUE@	$(srcdir)/bash_completion/ceph \
- at ENABLE_CLIENT_TRUE@	$(am__append_294) $(am__append_296)
+ at ENABLE_CLIENT_TRUE@	$(am__append_295) $(am__append_297)
 @ENABLE_CLIENT_TRUE at ceph_syn_SOURCES = ceph_syn.cc \
 @ENABLE_CLIENT_TRUE@	client/SyntheticClient.cc # uses g_conf.. \
 @ENABLE_CLIENT_TRUE@	needs cleanup
@@ -12547,6 +12535,8 @@ python_PYTHON = $(am__append_292) $(am__append_302) $(am__append_307)
 @ENABLE_CLIENT_TRUE@@WITH_FUSE_TRUE@@WITH_RADOS_TRUE@@WITH_RBD_TRUE at rbd_fuse_LDADD = $(LIBFUSE_LIBS) $(LIBRBD) $(LIBRADOS) $(CEPH_GLOBAL)
 @ENABLE_CLIENT_TRUE@@WITH_CEPHFS_TRUE@@WITH_RADOS_TRUE at cephfs_SOURCES = cephfs.cc
 @ENABLE_CLIENT_TRUE@@WITH_CEPHFS_TRUE@@WITH_RADOS_TRUE at cephfs_LDADD = $(LIBCOMMON)
+ at ENABLE_CLIENT_TRUE@@WITH_CEPHFS_TRUE@@WITH_RADOS_TRUE at mount_ceph_SOURCES = mount/mount.ceph.c
+ at ENABLE_CLIENT_TRUE@@WITH_CEPHFS_TRUE@@WITH_RADOS_TRUE at mount_ceph_LDADD = $(LIBSECRET) $(LIBCOMMON)
 
 # libcephfs (this should go somewhere else in the future)
 @ENABLE_CLIENT_TRUE@@WITH_CEPHFS_TRUE@@WITH_RADOS_TRUE at libcephfs_la_SOURCES = libcephfs.cc
@@ -12556,7 +12546,7 @@ python_PYTHON = $(am__append_292) $(am__append_302) $(am__append_307)
 @ENABLE_CLIENT_TRUE@@WITH_CEPHFS_TRUE@@WITH_RADOS_TRUE@	1:0:0 \
 @ENABLE_CLIENT_TRUE@@WITH_CEPHFS_TRUE@@WITH_RADOS_TRUE@	-export-symbols-regex \
 @ENABLE_CLIENT_TRUE@@WITH_CEPHFS_TRUE@@WITH_RADOS_TRUE@	'^ceph_.*' \
- at ENABLE_CLIENT_TRUE@@WITH_CEPHFS_TRUE@@WITH_RADOS_TRUE@	$(am__append_303)
+ at ENABLE_CLIENT_TRUE@@WITH_CEPHFS_TRUE@@WITH_RADOS_TRUE@	$(am__append_306)
 
 # jni library (java source is in src/java)
 @ENABLE_CEPHFS_JAVA_TRUE@@ENABLE_CLIENT_TRUE@@WITH_CEPHFS_TRUE@@WITH_RADOS_TRUE at libcephfs_jni_la_SOURCES = \
@@ -12569,9 +12559,7 @@ python_PYTHON = $(am__append_292) $(am__append_302) $(am__append_307)
 @ENABLE_CEPHFS_JAVA_TRUE@@ENABLE_CLIENT_TRUE@@WITH_CEPHFS_TRUE@@WITH_RADOS_TRUE at libcephfs_jni_la_CPPFLAGS = $(JDK_CPPFLAGS) $(AM_CPPFLAGS)
 @ENABLE_CEPHFS_JAVA_TRUE@@ENABLE_CLIENT_TRUE@@WITH_CEPHFS_TRUE@@WITH_RADOS_TRUE at libcephfs_jni_la_LDFLAGS = ${AM_LDFLAGS} -version-info 1:0:0
 @ENABLE_SERVER_TRUE at ceph_sbin_SCRIPTS = ceph-create-keys \
- at ENABLE_SERVER_TRUE@	$(am__append_313)
- at ENABLE_SERVER_TRUE@mount_ceph_SOURCES = mount/mount.ceph.c
- at ENABLE_SERVER_TRUE@mount_ceph_LDADD = $(LIBSECRET) $(LIBCOMMON)
+ at ENABLE_SERVER_TRUE@	$(am__append_314)
 @ENABLE_SERVER_TRUE@@WITH_MON_TRUE at ceph_mon_SOURCES = ceph_mon.cc
 @ENABLE_SERVER_TRUE@@WITH_MON_TRUE at ceph_mon_LDADD = $(LIBMON) $(LIBOS) $(CEPH_GLOBAL) $(LIBCOMMON) $(LIBAUTH) $(LIBCOMMON) $(LIBMON_TYPES)
 @ENABLE_SERVER_TRUE@@WITH_OSD_TRUE at ceph_osd_SOURCES = ceph_osd.cc
@@ -12579,7 +12567,7 @@ python_PYTHON = $(am__append_292) $(am__append_302) $(am__append_307)
 @ENABLE_SERVER_TRUE@@WITH_OSD_TRUE@	$(LIBOSD) $(LIBOSD_TYPES) \
 @ENABLE_SERVER_TRUE@@WITH_OSD_TRUE@	$(LIBOS_TYPES) $(LIBOS) \
 @ENABLE_SERVER_TRUE@@WITH_OSD_TRUE@	$(CEPH_GLOBAL) $(LIBCOMMON) \
- at ENABLE_SERVER_TRUE@@WITH_OSD_TRUE@	$(am__append_315)
+ at ENABLE_SERVER_TRUE@@WITH_OSD_TRUE@	$(am__append_316)
 @ENABLE_SERVER_TRUE@@WITH_MDS_TRUE at ceph_mds_SOURCES = ceph_mds.cc
 @ENABLE_SERVER_TRUE@@WITH_MDS_TRUE at ceph_mds_LDADD = $(LIBMDS) $(LIBOSDC) $(CEPH_GLOBAL) $(LIBCOMMON)
 @ENABLE_COVERAGE_TRUE@@ENABLE_SERVER_TRUE at COV_DIR = $(DESTDIR)$(libdir)/ceph/coverage
@@ -15179,6 +15167,9 @@ tools/rbd_mirror/ImageReplayer.lo: tools/rbd_mirror/$(am__dirstamp) \
 	tools/rbd_mirror/$(DEPDIR)/$(am__dirstamp)
 tools/rbd_mirror/ImageSync.lo: tools/rbd_mirror/$(am__dirstamp) \
 	tools/rbd_mirror/$(DEPDIR)/$(am__dirstamp)
+tools/rbd_mirror/ImageSyncThrottler.lo:  \
+	tools/rbd_mirror/$(am__dirstamp) \
+	tools/rbd_mirror/$(DEPDIR)/$(am__dirstamp)
 tools/rbd_mirror/Mirror.lo: tools/rbd_mirror/$(am__dirstamp) \
 	tools/rbd_mirror/$(DEPDIR)/$(am__dirstamp)
 tools/rbd_mirror/PoolWatcher.lo: tools/rbd_mirror/$(am__dirstamp) \
@@ -15206,6 +15197,9 @@ tools/rbd_mirror/image_replayer/CloseImageRequest.lo:  \
 tools/rbd_mirror/image_replayer/CreateImageRequest.lo:  \
 	tools/rbd_mirror/image_replayer/$(am__dirstamp) \
 	tools/rbd_mirror/image_replayer/$(DEPDIR)/$(am__dirstamp)
+tools/rbd_mirror/image_replayer/EventPreprocessor.lo:  \
+	tools/rbd_mirror/image_replayer/$(am__dirstamp) \
+	tools/rbd_mirror/image_replayer/$(DEPDIR)/$(am__dirstamp)
 tools/rbd_mirror/image_replayer/OpenImageRequest.lo:  \
 	tools/rbd_mirror/image_replayer/$(am__dirstamp) \
 	tools/rbd_mirror/image_replayer/$(DEPDIR)/$(am__dirstamp)
@@ -16469,6 +16463,8 @@ test/osdc/object_cacher_stress.$(OBJEXT): test/osdc/$(am__dirstamp) \
 	test/osdc/$(DEPDIR)/$(am__dirstamp)
 test/osdc/FakeWriteback.$(OBJEXT): test/osdc/$(am__dirstamp) \
 	test/osdc/$(DEPDIR)/$(am__dirstamp)
+test/osdc/MemWriteback.$(OBJEXT): test/osdc/$(am__dirstamp) \
+	test/osdc/$(DEPDIR)/$(am__dirstamp)
 
 ceph_test_objectcacher_stress$(EXEEXT): $(ceph_test_objectcacher_stress_OBJECTS) $(ceph_test_objectcacher_stress_DEPENDENCIES) $(EXTRA_ceph_test_objectcacher_stress_DEPENDENCIES) 
 	@rm -f ceph_test_objectcacher_stress$(EXEEXT)
@@ -16702,13 +16698,6 @@ test/rbd_mirror/ceph_test_rbd_mirror-test_main.$(OBJEXT):  \
 ceph_test_rbd_mirror$(EXEEXT): $(ceph_test_rbd_mirror_OBJECTS) $(ceph_test_rbd_mirror_DEPENDENCIES) $(EXTRA_ceph_test_rbd_mirror_DEPENDENCIES) 
 	@rm -f ceph_test_rbd_mirror$(EXEEXT)
 	$(AM_V_CXXLD)$(ceph_test_rbd_mirror_LINK) $(ceph_test_rbd_mirror_OBJECTS) $(ceph_test_rbd_mirror_LDADD) $(LIBS)
-test/rbd_mirror/image_replay.$(OBJEXT):  \
-	test/rbd_mirror/$(am__dirstamp) \
-	test/rbd_mirror/$(DEPDIR)/$(am__dirstamp)
-
-ceph_test_rbd_mirror_image_replay$(EXEEXT): $(ceph_test_rbd_mirror_image_replay_OBJECTS) $(ceph_test_rbd_mirror_image_replay_DEPENDENCIES) $(EXTRA_ceph_test_rbd_mirror_image_replay_DEPENDENCIES) 
-	@rm -f ceph_test_rbd_mirror_image_replay$(EXEEXT)
-	$(AM_V_CXXLD)$(CXXLINK) $(ceph_test_rbd_mirror_image_replay_OBJECTS) $(ceph_test_rbd_mirror_image_replay_LDADD) $(LIBS)
 test/rbd_mirror/ceph_test_rbd_mirror_random_write-random_write.$(OBJEXT):  \
 	test/rbd_mirror/$(am__dirstamp) \
 	test/rbd_mirror/$(DEPDIR)/$(am__dirstamp)
@@ -18344,6 +18333,9 @@ test/rbd_mirror/unittest_rbd_mirror-test_mock_ImageReplayer.$(OBJEXT):  \
 test/rbd_mirror/unittest_rbd_mirror-test_mock_ImageSync.$(OBJEXT):  \
 	test/rbd_mirror/$(am__dirstamp) \
 	test/rbd_mirror/$(DEPDIR)/$(am__dirstamp)
+test/rbd_mirror/unittest_rbd_mirror-test_mock_ImageSyncThrottler.$(OBJEXT):  \
+	test/rbd_mirror/$(am__dirstamp) \
+	test/rbd_mirror/$(DEPDIR)/$(am__dirstamp)
 test/rbd_mirror/image_replayer/$(am__dirstamp):
 	@$(MKDIR_P) test/rbd_mirror/image_replayer
 	@: > test/rbd_mirror/image_replayer/$(am__dirstamp)
@@ -18356,6 +18348,9 @@ test/rbd_mirror/image_replayer/unittest_rbd_mirror-test_mock_BootstrapRequest.$(
 test/rbd_mirror/image_replayer/unittest_rbd_mirror-test_mock_CreateImageRequest.$(OBJEXT):  \
 	test/rbd_mirror/image_replayer/$(am__dirstamp) \
 	test/rbd_mirror/image_replayer/$(DEPDIR)/$(am__dirstamp)
+test/rbd_mirror/image_replayer/unittest_rbd_mirror-test_mock_EventPreprocessor.$(OBJEXT):  \
+	test/rbd_mirror/image_replayer/$(am__dirstamp) \
+	test/rbd_mirror/image_replayer/$(DEPDIR)/$(am__dirstamp)
 test/rbd_mirror/image_sync/$(am__dirstamp):
 	@$(MKDIR_P) test/rbd_mirror/image_sync
 	@: > test/rbd_mirror/image_sync/$(am__dirstamp)
@@ -20309,10 +20304,10 @@ distclean-compile:
 @AMDEP_TRUE@@am__include@ @am__quote at test/osd/$(DEPDIR)/unittest_osdscrub-TestOSDScrub.Po at am__quote@
 @AMDEP_TRUE@@am__include@ @am__quote at test/osd/$(DEPDIR)/unittest_pglog-TestPGLog.Po at am__quote@
 @AMDEP_TRUE@@am__include@ @am__quote at test/osdc/$(DEPDIR)/FakeWriteback.Po at am__quote@
+ at AMDEP_TRUE@@am__include@ @am__quote at test/osdc/$(DEPDIR)/MemWriteback.Po at am__quote@
 @AMDEP_TRUE@@am__include@ @am__quote at test/osdc/$(DEPDIR)/object_cacher_stress.Po at am__quote@
 @AMDEP_TRUE@@am__include@ @am__quote at test/rbd_mirror/$(DEPDIR)/ceph_test_rbd_mirror-test_main.Po at am__quote@
 @AMDEP_TRUE@@am__include@ @am__quote at test/rbd_mirror/$(DEPDIR)/ceph_test_rbd_mirror_random_write-random_write.Po at am__quote@
- at AMDEP_TRUE@@am__include@ @am__quote at test/rbd_mirror/$(DEPDIR)/image_replay.Po at am__quote@
 @AMDEP_TRUE@@am__include@ @am__quote at test/rbd_mirror/$(DEPDIR)/librbd_mirror_test_la-test_ClusterWatcher.Plo at am__quote@
 @AMDEP_TRUE@@am__include@ @am__quote at test/rbd_mirror/$(DEPDIR)/librbd_mirror_test_la-test_ImageDeleter.Plo at am__quote@
 @AMDEP_TRUE@@am__include@ @am__quote at test/rbd_mirror/$(DEPDIR)/librbd_mirror_test_la-test_ImageReplayer.Plo at am__quote@
@@ -20322,9 +20317,11 @@ distclean-compile:
 @AMDEP_TRUE@@am__include@ @am__quote at test/rbd_mirror/$(DEPDIR)/unittest_rbd_mirror-test_main.Po at am__quote@
 @AMDEP_TRUE@@am__include@ @am__quote at test/rbd_mirror/$(DEPDIR)/unittest_rbd_mirror-test_mock_ImageReplayer.Po at am__quote@
 @AMDEP_TRUE@@am__include@ @am__quote at test/rbd_mirror/$(DEPDIR)/unittest_rbd_mirror-test_mock_ImageSync.Po at am__quote@
+ at AMDEP_TRUE@@am__include@ @am__quote at test/rbd_mirror/$(DEPDIR)/unittest_rbd_mirror-test_mock_ImageSyncThrottler.Po at am__quote@
 @AMDEP_TRUE@@am__include@ @am__quote at test/rbd_mirror/$(DEPDIR)/unittest_rbd_mirror-test_mock_fixture.Po at am__quote@
 @AMDEP_TRUE@@am__include@ @am__quote at test/rbd_mirror/image_replayer/$(DEPDIR)/unittest_rbd_mirror-test_mock_BootstrapRequest.Po at am__quote@
 @AMDEP_TRUE@@am__include@ @am__quote at test/rbd_mirror/image_replayer/$(DEPDIR)/unittest_rbd_mirror-test_mock_CreateImageRequest.Po at am__quote@
+ at AMDEP_TRUE@@am__include@ @am__quote at test/rbd_mirror/image_replayer/$(DEPDIR)/unittest_rbd_mirror-test_mock_EventPreprocessor.Po at am__quote@
 @AMDEP_TRUE@@am__include@ @am__quote at test/rbd_mirror/image_sync/$(DEPDIR)/unittest_rbd_mirror-test_mock_ImageCopyRequest.Po at am__quote@
 @AMDEP_TRUE@@am__include@ @am__quote at test/rbd_mirror/image_sync/$(DEPDIR)/unittest_rbd_mirror-test_mock_ObjectCopyRequest.Po at am__quote@
 @AMDEP_TRUE@@am__include@ @am__quote at test/rbd_mirror/image_sync/$(DEPDIR)/unittest_rbd_mirror-test_mock_SnapshotCopyRequest.Po at am__quote@
@@ -20420,6 +20417,7 @@ distclean-compile:
 @AMDEP_TRUE@@am__include@ @am__quote at tools/rbd_mirror/$(DEPDIR)/ImageDeleter.Plo at am__quote@
 @AMDEP_TRUE@@am__include@ @am__quote at tools/rbd_mirror/$(DEPDIR)/ImageReplayer.Plo at am__quote@
 @AMDEP_TRUE@@am__include@ @am__quote at tools/rbd_mirror/$(DEPDIR)/ImageSync.Plo at am__quote@
+ at AMDEP_TRUE@@am__include@ @am__quote at tools/rbd_mirror/$(DEPDIR)/ImageSyncThrottler.Plo at am__quote@
 @AMDEP_TRUE@@am__include@ @am__quote at tools/rbd_mirror/$(DEPDIR)/Mirror.Plo at am__quote@
 @AMDEP_TRUE@@am__include@ @am__quote at tools/rbd_mirror/$(DEPDIR)/PoolWatcher.Plo at am__quote@
 @AMDEP_TRUE@@am__include@ @am__quote at tools/rbd_mirror/$(DEPDIR)/Replayer.Plo at am__quote@
@@ -20429,6 +20427,7 @@ distclean-compile:
 @AMDEP_TRUE@@am__include@ @am__quote at tools/rbd_mirror/image_replayer/$(DEPDIR)/BootstrapRequest.Plo at am__quote@
 @AMDEP_TRUE@@am__include@ @am__quote at tools/rbd_mirror/image_replayer/$(DEPDIR)/CloseImageRequest.Plo at am__quote@
 @AMDEP_TRUE@@am__include@ @am__quote at tools/rbd_mirror/image_replayer/$(DEPDIR)/CreateImageRequest.Plo at am__quote@
+ at AMDEP_TRUE@@am__include@ @am__quote at tools/rbd_mirror/image_replayer/$(DEPDIR)/EventPreprocessor.Plo at am__quote@
 @AMDEP_TRUE@@am__include@ @am__quote at tools/rbd_mirror/image_replayer/$(DEPDIR)/OpenImageRequest.Plo at am__quote@
 @AMDEP_TRUE@@am__include@ @am__quote at tools/rbd_mirror/image_replayer/$(DEPDIR)/OpenLocalImageRequest.Plo at am__quote@
 @AMDEP_TRUE@@am__include@ @am__quote at tools/rbd_mirror/image_replayer/$(DEPDIR)/ReplayStatusFormatter.Plo at am__quote@
@@ -29857,6 +29856,20 @@ test/rbd_mirror/unittest_rbd_mirror-test_mock_ImageSync.obj: test/rbd_mirror/tes
 @AMDEP_TRUE@@am__fastdepCXX_FALSE@	DEPDIR=$(DEPDIR) $(CXXDEPMODE) $(depcomp) @AMDEPBACKSLASH@
 @am__fastdepCXX_FALSE@	$(AM_V_CXX at am__nodep@)$(CXX) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(unittest_rbd_mirror_CXXFLAGS) $(CXXFLAGS) -c -o test/rbd_mirror/unittest_rbd_mirror-test_mock_ImageSync.obj `if test -f 'test/rbd_mirror/test_mock_ImageSync.cc'; then $(CYGPATH_W) 'test/rbd_mirror/test_mock_ImageSync.cc'; else $(CYGPATH_W) '$(srcdir)/test/rbd_mirror/test_mock_ImageSync.cc'; fi`
 
+test/rbd_mirror/unittest_rbd_mirror-test_mock_ImageSyncThrottler.o: test/rbd_mirror/test_mock_ImageSyncThrottler.cc
+ at am__fastdepCXX_TRUE@	$(AM_V_CXX)$(CXX) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(unittest_rbd_mirror_CXXFLAGS) $(CXXFLAGS) -MT test/rbd_mirror/unittest_rbd_mirror-test_mock_ImageSyncThrottler.o -MD -MP -MF test/rbd_mirror/$(DEPDIR)/unittest_rbd_mirror-test_mock_ImageSyncThrottler.Tpo -c -o test/rbd_mirror/unittest_rbd_mirror-test_mock_ImageSyncThrottler.o `test -f 'test/rbd_mirror/test_mock_ImageSyncThrottler.cc' || echo '$(srcdir)/'`test/rbd_mirror/test_mock_ [...]
+ at am__fastdepCXX_TRUE@	$(AM_V_at)$(am__mv) test/rbd_mirror/$(DEPDIR)/unittest_rbd_mirror-test_mock_ImageSyncThrottler.Tpo test/rbd_mirror/$(DEPDIR)/unittest_rbd_mirror-test_mock_ImageSyncThrottler.Po
+ at AMDEP_TRUE@@am__fastdepCXX_FALSE@	$(AM_V_CXX)source='test/rbd_mirror/test_mock_ImageSyncThrottler.cc' object='test/rbd_mirror/unittest_rbd_mirror-test_mock_ImageSyncThrottler.o' libtool=no @AMDEPBACKSLASH@
+ at AMDEP_TRUE@@am__fastdepCXX_FALSE@	DEPDIR=$(DEPDIR) $(CXXDEPMODE) $(depcomp) @AMDEPBACKSLASH@
+ at am__fastdepCXX_FALSE@	$(AM_V_CXX at am__nodep@)$(CXX) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(unittest_rbd_mirror_CXXFLAGS) $(CXXFLAGS) -c -o test/rbd_mirror/unittest_rbd_mirror-test_mock_ImageSyncThrottler.o `test -f 'test/rbd_mirror/test_mock_ImageSyncThrottler.cc' || echo '$(srcdir)/'`test/rbd_mirror/test_mock_ImageSyncThrottler.cc
+
+test/rbd_mirror/unittest_rbd_mirror-test_mock_ImageSyncThrottler.obj: test/rbd_mirror/test_mock_ImageSyncThrottler.cc
+ at am__fastdepCXX_TRUE@	$(AM_V_CXX)$(CXX) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(unittest_rbd_mirror_CXXFLAGS) $(CXXFLAGS) -MT test/rbd_mirror/unittest_rbd_mirror-test_mock_ImageSyncThrottler.obj -MD -MP -MF test/rbd_mirror/$(DEPDIR)/unittest_rbd_mirror-test_mock_ImageSyncThrottler.Tpo -c -o test/rbd_mirror/unittest_rbd_mirror-test_mock_ImageSyncThrottler.obj `if test -f 'test/rbd_mirror/test_mock_ImageSyncThrottler.cc'; then $(CYGPATH_W) 'test/rbd_mirror/test [...]
+ at am__fastdepCXX_TRUE@	$(AM_V_at)$(am__mv) test/rbd_mirror/$(DEPDIR)/unittest_rbd_mirror-test_mock_ImageSyncThrottler.Tpo test/rbd_mirror/$(DEPDIR)/unittest_rbd_mirror-test_mock_ImageSyncThrottler.Po
+ at AMDEP_TRUE@@am__fastdepCXX_FALSE@	$(AM_V_CXX)source='test/rbd_mirror/test_mock_ImageSyncThrottler.cc' object='test/rbd_mirror/unittest_rbd_mirror-test_mock_ImageSyncThrottler.obj' libtool=no @AMDEPBACKSLASH@
+ at AMDEP_TRUE@@am__fastdepCXX_FALSE@	DEPDIR=$(DEPDIR) $(CXXDEPMODE) $(depcomp) @AMDEPBACKSLASH@
+ at am__fastdepCXX_FALSE@	$(AM_V_CXX at am__nodep@)$(CXX) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(unittest_rbd_mirror_CXXFLAGS) $(CXXFLAGS) -c -o test/rbd_mirror/unittest_rbd_mirror-test_mock_ImageSyncThrottler.obj `if test -f 'test/rbd_mirror/test_mock_ImageSyncThrottler.cc'; then $(CYGPATH_W) 'test/rbd_mirror/test_mock_ImageSyncThrottler.cc'; else $(CYGPATH_W) '$(srcdir)/test/rbd_mirror/test_mock_ImageSyncThrottler.cc'; fi`
+
 test/rbd_mirror/image_replayer/unittest_rbd_mirror-test_mock_BootstrapRequest.o: test/rbd_mirror/image_replayer/test_mock_BootstrapRequest.cc
 @am__fastdepCXX_TRUE@	$(AM_V_CXX)$(CXX) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(unittest_rbd_mirror_CXXFLAGS) $(CXXFLAGS) -MT test/rbd_mirror/image_replayer/unittest_rbd_mirror-test_mock_BootstrapRequest.o -MD -MP -MF test/rbd_mirror/image_replayer/$(DEPDIR)/unittest_rbd_mirror-test_mock_BootstrapRequest.Tpo -c -o test/rbd_mirror/image_replayer/unittest_rbd_mirror-test_mock_BootstrapRequest.o `test -f 'test/rbd_mirror/image_replayer/test_mock_BootstrapRequest [...]
 @am__fastdepCXX_TRUE@	$(AM_V_at)$(am__mv) test/rbd_mirror/image_replayer/$(DEPDIR)/unittest_rbd_mirror-test_mock_BootstrapRequest.Tpo test/rbd_mirror/image_replayer/$(DEPDIR)/unittest_rbd_mirror-test_mock_BootstrapRequest.Po
@@ -29885,6 +29898,20 @@ test/rbd_mirror/image_replayer/unittest_rbd_mirror-test_mock_CreateImageRequest.
 @AMDEP_TRUE@@am__fastdepCXX_FALSE@	DEPDIR=$(DEPDIR) $(CXXDEPMODE) $(depcomp) @AMDEPBACKSLASH@
 @am__fastdepCXX_FALSE@	$(AM_V_CXX at am__nodep@)$(CXX) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(unittest_rbd_mirror_CXXFLAGS) $(CXXFLAGS) -c -o test/rbd_mirror/image_replayer/unittest_rbd_mirror-test_mock_CreateImageRequest.obj `if test -f 'test/rbd_mirror/image_replayer/test_mock_CreateImageRequest.cc'; then $(CYGPATH_W) 'test/rbd_mirror/image_replayer/test_mock_CreateImageRequest.cc'; else $(CYGPATH_W) '$(srcdir)/test/rbd_mirror/image_replayer/test_mock_CreateI [...]
 
+test/rbd_mirror/image_replayer/unittest_rbd_mirror-test_mock_EventPreprocessor.o: test/rbd_mirror/image_replayer/test_mock_EventPreprocessor.cc
+ at am__fastdepCXX_TRUE@	$(AM_V_CXX)$(CXX) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(unittest_rbd_mirror_CXXFLAGS) $(CXXFLAGS) -MT test/rbd_mirror/image_replayer/unittest_rbd_mirror-test_mock_EventPreprocessor.o -MD -MP -MF test/rbd_mirror/image_replayer/$(DEPDIR)/unittest_rbd_mirror-test_mock_EventPreprocessor.Tpo -c -o test/rbd_mirror/image_replayer/unittest_rbd_mirror-test_mock_EventPreprocessor.o `test -f 'test/rbd_mirror/image_replayer/test_mock_EventPreproce [...]
+ at am__fastdepCXX_TRUE@	$(AM_V_at)$(am__mv) test/rbd_mirror/image_replayer/$(DEPDIR)/unittest_rbd_mirror-test_mock_EventPreprocessor.Tpo test/rbd_mirror/image_replayer/$(DEPDIR)/unittest_rbd_mirror-test_mock_EventPreprocessor.Po
+ at AMDEP_TRUE@@am__fastdepCXX_FALSE@	$(AM_V_CXX)source='test/rbd_mirror/image_replayer/test_mock_EventPreprocessor.cc' object='test/rbd_mirror/image_replayer/unittest_rbd_mirror-test_mock_EventPreprocessor.o' libtool=no @AMDEPBACKSLASH@
+ at AMDEP_TRUE@@am__fastdepCXX_FALSE@	DEPDIR=$(DEPDIR) $(CXXDEPMODE) $(depcomp) @AMDEPBACKSLASH@
+ at am__fastdepCXX_FALSE@	$(AM_V_CXX at am__nodep@)$(CXX) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(unittest_rbd_mirror_CXXFLAGS) $(CXXFLAGS) -c -o test/rbd_mirror/image_replayer/unittest_rbd_mirror-test_mock_EventPreprocessor.o `test -f 'test/rbd_mirror/image_replayer/test_mock_EventPreprocessor.cc' || echo '$(srcdir)/'`test/rbd_mirror/image_replayer/test_mock_EventPreprocessor.cc
+
+test/rbd_mirror/image_replayer/unittest_rbd_mirror-test_mock_EventPreprocessor.obj: test/rbd_mirror/image_replayer/test_mock_EventPreprocessor.cc
+ at am__fastdepCXX_TRUE@	$(AM_V_CXX)$(CXX) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(unittest_rbd_mirror_CXXFLAGS) $(CXXFLAGS) -MT test/rbd_mirror/image_replayer/unittest_rbd_mirror-test_mock_EventPreprocessor.obj -MD -MP -MF test/rbd_mirror/image_replayer/$(DEPDIR)/unittest_rbd_mirror-test_mock_EventPreprocessor.Tpo -c -o test/rbd_mirror/image_replayer/unittest_rbd_mirror-test_mock_EventPreprocessor.obj `if test -f 'test/rbd_mirror/image_replayer/test_mock_EventP [...]
+ at am__fastdepCXX_TRUE@	$(AM_V_at)$(am__mv) test/rbd_mirror/image_replayer/$(DEPDIR)/unittest_rbd_mirror-test_mock_EventPreprocessor.Tpo test/rbd_mirror/image_replayer/$(DEPDIR)/unittest_rbd_mirror-test_mock_EventPreprocessor.Po
+ at AMDEP_TRUE@@am__fastdepCXX_FALSE@	$(AM_V_CXX)source='test/rbd_mirror/image_replayer/test_mock_EventPreprocessor.cc' object='test/rbd_mirror/image_replayer/unittest_rbd_mirror-test_mock_EventPreprocessor.obj' libtool=no @AMDEPBACKSLASH@
+ at AMDEP_TRUE@@am__fastdepCXX_FALSE@	DEPDIR=$(DEPDIR) $(CXXDEPMODE) $(depcomp) @AMDEPBACKSLASH@
+ at am__fastdepCXX_FALSE@	$(AM_V_CXX at am__nodep@)$(CXX) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(unittest_rbd_mirror_CXXFLAGS) $(CXXFLAGS) -c -o test/rbd_mirror/image_replayer/unittest_rbd_mirror-test_mock_EventPreprocessor.obj `if test -f 'test/rbd_mirror/image_replayer/test_mock_EventPreprocessor.cc'; then $(CYGPATH_W) 'test/rbd_mirror/image_replayer/test_mock_EventPreprocessor.cc'; else $(CYGPATH_W) '$(srcdir)/test/rbd_mirror/image_replayer/test_mock_EventPrepr [...]
+
 test/rbd_mirror/image_sync/unittest_rbd_mirror-test_mock_ImageCopyRequest.o: test/rbd_mirror/image_sync/test_mock_ImageCopyRequest.cc
 @am__fastdepCXX_TRUE@	$(AM_V_CXX)$(CXX) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(unittest_rbd_mirror_CXXFLAGS) $(CXXFLAGS) -MT test/rbd_mirror/image_sync/unittest_rbd_mirror-test_mock_ImageCopyRequest.o -MD -MP -MF test/rbd_mirror/image_sync/$(DEPDIR)/unittest_rbd_mirror-test_mock_ImageCopyRequest.Tpo -c -o test/rbd_mirror/image_sync/unittest_rbd_mirror-test_mock_ImageCopyRequest.o `test -f 'test/rbd_mirror/image_sync/test_mock_ImageCopyRequest.cc' || echo '$( [...]
 @am__fastdepCXX_TRUE@	$(AM_V_at)$(am__mv) test/rbd_mirror/image_sync/$(DEPDIR)/unittest_rbd_mirror-test_mock_ImageCopyRequest.Tpo test/rbd_mirror/image_sync/$(DEPDIR)/unittest_rbd_mirror-test_mock_ImageCopyRequest.Po
@@ -32031,6 +32058,13 @@ test/test_pidfile.sh.log: test/test_pidfile.sh
 	--log-file $$b.log --trs-file $$b.trs \
 	$(am__common_driver_flags) $(AM_LOG_DRIVER_FLAGS) $(LOG_DRIVER_FLAGS) -- $(LOG_COMPILE) \
 	"$$tst" $(AM_TESTS_FD_REDIRECT)
+test/test_subman.sh.log: test/test_subman.sh
+	@p='test/test_subman.sh'; \
+	b='test/test_subman.sh'; \
+	$(am__check_pre) $(LOG_DRIVER) --test-name "$$f" \
+	--log-file $$b.log --trs-file $$b.trs \
+	$(am__common_driver_flags) $(AM_LOG_DRIVER_FLAGS) $(LOG_DRIVER_FLAGS) -- $(LOG_COMPILE) \
+	"$$tst" $(AM_TESTS_FD_REDIRECT)
 test/pybind/test_ceph_argparse.py.log: test/pybind/test_ceph_argparse.py
 	@p='test/pybind/test_ceph_argparse.py'; \
 	b='test/pybind/test_ceph_argparse.py'; \
diff --git a/src/ceph-disk/ceph_disk/main.py b/src/ceph-disk/ceph_disk/main.py
index 99558fb..807733c 100755
--- a/src/ceph-disk/ceph_disk/main.py
+++ b/src/ceph-disk/ceph_disk/main.py
@@ -713,6 +713,20 @@ def get_partition_base_mpath(dev):
     return os.path.join('/dev/mapper', name)
 
 
+def is_bcache(dev_name):
+    """
+    Check if dev_name is a bcache
+    """
+    if not dev_name.startswith('bcache'):
+        return False
+    if not os.path.exists(os.path.join('/sys/block', dev_name, 'bcache')):
+        return False
+    if os.path.exists(os.path.join('/sys/block', dev_name,
+                                   'bcache/cache_mode')):
+        return True
+    return False
+
+
 def is_partition(dev):
     """
     Check whether a given device path is a partition or a full disk.
@@ -726,6 +740,8 @@ def is_partition(dev):
         raise Error('not a block device', dev)
 
     name = get_dev_name(dev)
+    if is_bcache(name):
+        return True
     if os.path.exists(os.path.join('/sys/block', name)):
         return False
 
@@ -1395,10 +1411,11 @@ def update_partition(dev, description):
     LOG.debug('Calling partprobe on %s device %s', description, dev)
     partprobe_ok = False
     error = 'unknown error'
+    partprobe = _get_command_executable(['partprobe'])[0]
     for i in (1, 2, 3, 4, 5):
         command_check_call(['udevadm', 'settle', '--timeout=600'])
         try:
-            _check_output(['partprobe', dev])
+            _check_output(['flock', '-s', dev, partprobe, dev])
             partprobe_ok = True
             break
         except subprocess.CalledProcessError as e:
diff --git a/src/ceph-osd-prestart.sh b/src/ceph-osd-prestart.sh
index cefca85..314ea18 100644
--- a/src/ceph-osd-prestart.sh
+++ b/src/ceph-osd-prestart.sh
@@ -45,13 +45,11 @@ fi
 if [ -L "$journal" -a ! -e "$journal" ]; then
     udevadm settle --timeout=5 || :
     if [ -L "$journal" -a ! -e "$journal" ]; then
-        echo "ceph-osd($UPSTART_INSTANCE): journal not present, not starting yet." 1>&2
-        stop
+        echo "ceph-osd(${cluster:-ceph}-$id): journal not present, not starting yet." 1>&2
         exit 0
     fi
 fi
 
-
 # ensure ownership is correct
 owner=`stat -c %U $data/.`
 if [ $owner != 'ceph' -a $owner != 'root' ]; then
diff --git a/src/client/Client.cc b/src/client/Client.cc
index edbe073..b465cad 100644
--- a/src/client/Client.cc
+++ b/src/client/Client.cc
@@ -1797,10 +1797,11 @@ void Client::put_request(MetaRequest *request)
     request->take_other_inode(&other_in);
     delete request;
 
-    if (other_in) {
-      if (other_in->dir &&
-	  (op == CEPH_MDS_OP_RMDIR || op == CEPH_MDS_OP_RENAME))
-	_try_to_trim_inode(other_in.get());
+    if (other_in &&
+	(op == CEPH_MDS_OP_RMDIR ||
+	 op == CEPH_MDS_OP_RENAME ||
+	 op == CEPH_MDS_OP_RMSNAP)) {
+      _try_to_trim_inode(other_in.get(), false);
     }
   }
 }
@@ -1993,6 +1994,7 @@ MetaSession *Client::_open_mds_session(mds_rank_t mds)
   session->inst = mdsmap->get_inst(mds);
   session->con = messenger->get_connection(session->inst);
   session->state = MetaSession::STATE_OPENING;
+  session->mds_state = MDSMap::STATE_NULL;
   mds_sessions[mds] = session;
 
   // Maybe skip sending a request to open if this MDS daemon
@@ -2572,9 +2574,9 @@ void Client::handle_mds_map(MMDSMap* m)
     } else if (oldstate == newstate)
       continue;  // no change
     
-    if (newstate == MDSMap::STATE_RECONNECT &&
-	mds_sessions.count(p->first)) {
-      MetaSession *session = mds_sessions[p->first];
+    MetaSession *session = p->second;
+    session->mds_state = newstate;
+    if (newstate == MDSMap::STATE_RECONNECT) {
       session->inst = mdsmap->get_inst(p->first);
       session->con = messenger->get_connection(session->inst);
       send_reconnect(session);
@@ -2583,11 +2585,11 @@ void Client::handle_mds_map(MMDSMap* m)
     if (newstate >= MDSMap::STATE_ACTIVE) {
       if (oldstate < MDSMap::STATE_ACTIVE) {
 	// kick new requests
-	kick_requests(p->second);
-	kick_flushing_caps(p->second);
-	signal_context_list(p->second->waiting_for_open);
-	kick_maxsize_requests(p->second);
-	wake_inode_waiters(p->second);
+	kick_requests(session);
+	kick_flushing_caps(session);
+	signal_context_list(session->waiting_for_open);
+	kick_maxsize_requests(session);
+	wake_inode_waiters(session);
       }
       connect_mds_targets(p->first);
     }
@@ -2649,19 +2651,25 @@ void Client::send_reconnect(MetaSession *session)
       cap->issue_seq = 0;  // reset seq.
       cap->mseq = 0;  // reset seq.
       cap->issued = cap->implemented;
+
+      snapid_t snap_follows = 0;
+      if (!in->cap_snaps.empty())
+	snap_follows = in->cap_snaps.begin()->first;
+
       m->add_cap(p->first.ino, 
 		 cap->cap_id,
 		 path.get_ino(), path.get_path(),   // ino
 		 in->caps_wanted(), // wanted
 		 cap->issued,     // issued
 		 in->snaprealm->ino,
+		 snap_follows,
 		 flockbl);
 
       if (did_snaprealm.count(in->snaprealm->ino) == 0) {
 	ldout(cct, 10) << " snaprealm " << *in->snaprealm << dendl;
 	m->add_snaprealm(in->snaprealm->ino, in->snaprealm->seq, in->snaprealm->parent);
 	did_snaprealm.insert(in->snaprealm->ino);
-      }	
+      }
     }
   }
 
@@ -2839,7 +2847,10 @@ void Client::put_inode(Inode *in, int n)
 
     in->cap_item.remove_myself();
     in->snaprealm_item.remove_myself();
-    in->snapdir_parent.reset();
+    if (in->snapdir_parent) {
+      in->snapdir_parent->flags &= ~I_SNAPDIR_OPEN;
+      in->snapdir_parent.reset();
+    }
     if (in == root) {
       root = 0;
       root_ancestor = 0;
@@ -3358,6 +3369,19 @@ void Client::check_caps(Inode *in, bool is_delayed)
     }
 
   ack:
+    // re-send old cap/snapcap flushes first.
+    if (session->mds_state >= MDSMap::STATE_RECONNECT &&
+	session->mds_state < MDSMap::STATE_ACTIVE &&
+	session->early_flushing_caps.count(in) == 0) {
+      ldout(cct, 20) << " reflushing caps (check_caps) on " << *in
+		     << " to mds." << session->mds_num << dendl;
+      session->early_flushing_caps.insert(in);
+      if (in->cap_snaps.size())
+	flush_snaps(in, true);
+      if (in->flushing_caps)
+	flush_caps(in, session);
+    }
+
     int flushing;
     ceph_tid_t flush_tid;
     if (in->auth_cap == cap && in->dirty_caps) {
@@ -3443,11 +3467,9 @@ void Client::_flushed_cap_snap(Inode *in, snapid_t seq)
   flush_snaps(in);
 }
 
-void Client::flush_snaps(Inode *in, bool all_again, CapSnap *again)
+void Client::flush_snaps(Inode *in, bool all_again)
 {
-  ldout(cct, 10) << "flush_snaps on " << *in
-		 << " all_again " << all_again
-		 << " again " << again << dendl;
+  ldout(cct, 10) << "flush_snaps on " << *in << " all_again " << all_again << dendl;
   assert(in->cap_snaps.size());
 
   // pick auth mds
@@ -3457,13 +3479,9 @@ void Client::flush_snaps(Inode *in, bool all_again, CapSnap *again)
 
   for (map<snapid_t,CapSnap*>::iterator p = in->cap_snaps.begin(); p != in->cap_snaps.end(); ++p) {
     CapSnap *capsnap = p->second;
-    if (again) {
-      // only one capsnap
-      if (again != capsnap)
-	continue;
-    } else if (!all_again) {
+    if (!all_again) {
       // only flush once per session
-      if (capsnap->flushing_item.is_on_list())
+      if (capsnap->flush_tid > 0)
 	continue;
     }
 
@@ -3477,9 +3495,13 @@ void Client::flush_snaps(Inode *in, bool all_again, CapSnap *again)
     if (capsnap->dirty_data || capsnap->writing)
       continue;
     
-    in->auth_cap->session->flushing_capsnaps.push_back(&capsnap->flushing_item);
+    if (capsnap->flush_tid == 0) {
+      capsnap->flush_tid = ++last_flush_tid;
+      if (!in->flushing_cap_item.is_on_list())
+	session->flushing_caps.push_back(&in->flushing_cap_item);
+      session->flushing_caps_tids.insert(capsnap->flush_tid);
+    }
 
-    capsnap->flush_tid = ++last_flush_tid;
     MClientCaps *m = new MClientCaps(CEPH_CAP_OP_FLUSHSNAP, in->ino, in->snaprealm->ino, 0, mseq,
 				     cap_epoch_barrier);
     if (user_id >= 0)
@@ -3512,6 +3534,9 @@ void Client::flush_snaps(Inode *in, bool all_again, CapSnap *again)
       m->inline_data = in->inline_data;
     }
 
+    assert(!session->flushing_caps_tids.empty());
+    m->set_oldest_flush_tid(*session->flushing_caps_tids.begin());
+
     session->con->send_message(m);
   }
 }
@@ -3565,31 +3590,29 @@ void Client::wake_inode_waiters(MetaSession *s)
 class C_Client_CacheInvalidate : public Context  {
 private:
   Client *client;
-  InodeRef inode;
+  vinodeno_t ino;
   int64_t offset, length;
 public:
   C_Client_CacheInvalidate(Client *c, Inode *in, int64_t off, int64_t len) :
-			   client(c), inode(in), offset(off), length(len) {
+    client(c), offset(off), length(len) {
+    if (client->use_faked_inos())
+      ino = vinodeno_t(in->faked_ino, CEPH_NOSNAP);
+    else
+      ino = in->vino();
   }
   void finish(int r) {
     // _async_invalidate takes the lock when it needs to, call this back from outside of lock.
     assert(!client->client_lock.is_locked_by_me());
-    client->_async_invalidate(inode, offset, length);
+    client->_async_invalidate(ino, offset, length);
   }
 };
 
-void Client::_async_invalidate(InodeRef& in, int64_t off, int64_t len)
+void Client::_async_invalidate(vinodeno_t ino, int64_t off, int64_t len)
 {
-  ldout(cct, 10) << "_async_invalidate " << off << "~" << len << dendl;
-  if (use_faked_inos())
-    ino_invalidate_cb(callback_handle, vinodeno_t(in->faked_ino, CEPH_NOSNAP), off, len);
-  else
-    ino_invalidate_cb(callback_handle, in->vino(), off, len);
-
-  client_lock.Lock();
-  in.reset(); // put inode inside client_lock
-  client_lock.Unlock();
-  ldout(cct, 10) << "_async_invalidate " << off << "~" << len << " done" << dendl;
+  if (unmounting)
+    return;
+  ldout(cct, 10) << "_async_invalidate " << ino << " " << off << "~" << len << dendl;
+  ino_invalidate_cb(callback_handle, ino, off, len);
 }
 
 void Client::_schedule_invalidate_callback(Inode *in, int64_t off, int64_t len) {
@@ -4015,8 +4038,8 @@ int Client::mark_caps_flushing(Inode *in, ceph_tid_t* ptid)
   in->flushing_caps |= flushing;
   in->dirty_caps = 0;
  
-
-  session->flushing_caps.push_back(&in->flushing_cap_item);
+  if (!in->flushing_cap_item.is_on_list())
+    session->flushing_caps.push_back(&in->flushing_cap_item);
   session->flushing_caps_tids.insert(flush_tid);
 
   *ptid = flush_tid;
@@ -4025,6 +4048,13 @@ int Client::mark_caps_flushing(Inode *in, ceph_tid_t* ptid)
 
 void Client::adjust_session_flushing_caps(Inode *in, MetaSession *old_s,  MetaSession *new_s)
 {
+  for (auto p = in->cap_snaps.begin(); p != in->cap_snaps.end(); ++p) {
+    CapSnap *capsnap = p->second;
+    if (capsnap->flush_tid > 0) {
+      old_s->flushing_caps_tids.erase(capsnap->flush_tid);
+      new_s->flushing_caps_tids.insert(capsnap->flush_tid);
+    }
+  }
   for (map<ceph_tid_t, int>::iterator it = in->flushing_cap_tids.begin();
        it != in->flushing_cap_tids.end();
        ++it) {
@@ -4063,8 +4093,6 @@ void Client::flush_caps(Inode *in, MetaSession *session)
   for (map<ceph_tid_t,int>::iterator p = in->flushing_cap_tids.begin();
        p != in->flushing_cap_tids.end();
        ++p) {
-    if (session->kicked_flush_tids.count(p->first))
-	continue;
     send_cap(in, session, cap, (get_caps_used(in) | in->caps_dirty()),
 	     in->caps_wanted(), (cap->issued | cap->implemented),
 	     p->second, p->first);
@@ -4111,33 +4139,27 @@ void Client::kick_flushing_caps(MetaSession *session)
   mds_rank_t mds = session->mds_num;
   ldout(cct, 10) << "kick_flushing_caps mds." << mds << dendl;
 
-  for (xlist<CapSnap*>::iterator p = session->flushing_capsnaps.begin(); !p.end(); ++p) {
-    CapSnap *capsnap = *p;
-    InodeRef& in = capsnap->in;
-    ldout(cct, 20) << " reflushing capsnap " << capsnap
-		   << " on " << *in << " to mds." << mds << dendl;
-    flush_snaps(in.get(), false, capsnap);
-  }
   for (xlist<Inode*>::iterator p = session->flushing_caps.begin(); !p.end(); ++p) {
     Inode *in = *p;
+    if (session->early_flushing_caps.count(in))
+      continue;
     ldout(cct, 20) << " reflushing caps on " << *in << " to mds." << mds << dendl;
+    if (in->cap_snaps.size())
+      flush_snaps(in, true);
     if (in->flushing_caps)
       flush_caps(in, session);
   }
 
-  session->kicked_flush_tids.clear();
+  session->early_flushing_caps.clear();
 }
 
 void Client::early_kick_flushing_caps(MetaSession *session)
 {
-  session->kicked_flush_tids.clear();
+  session->early_flushing_caps.clear();
 
   for (xlist<Inode*>::iterator p = session->flushing_caps.begin(); !p.end(); ++p) {
     Inode *in = *p;
-    if (!in->flushing_caps)
-      continue;
     assert(in->auth_cap);
-    Cap *cap = in->auth_cap;
 
     // if flushing caps were revoked, we re-send the cap flush in client reconnect
     // stage. This guarantees that MDS processes the cap flush message before issuing
@@ -4145,17 +4167,16 @@ void Client::early_kick_flushing_caps(MetaSession *session)
     if ((in->flushing_caps & in->auth_cap->issued) == in->flushing_caps)
       continue;
 
-    ldout(cct, 20) << " reflushing caps (revoked) on " << *in
+    ldout(cct, 20) << " reflushing caps (early_kick) on " << *in
 		   << " to mds." << session->mds_num << dendl;
 
-    for (map<ceph_tid_t,int>::iterator q = in->flushing_cap_tids.begin();
-	 q != in->flushing_cap_tids.end();
-	 ++q) {
-      send_cap(in, session, cap, (get_caps_used(in) | in->caps_dirty()),
-	       in->caps_wanted(), (cap->issued | cap->implemented),
-	       q->second, q->first);
-      session->kicked_flush_tids.insert(q->first);
-    }
+    session->early_flushing_caps.insert(in);
+
+    if (in->cap_snaps.size())
+      flush_snaps(in, true);
+    if (in->flushing_caps)
+      flush_caps(in, session);
+
   }
 }
 
@@ -4692,8 +4713,9 @@ void Client::handle_cap_flush_ack(MetaSession *session, Inode *in, Cap *cap, MCl
       in->flushing_caps &= ~cleaned;
       if (in->flushing_caps == 0) {
 	ldout(cct, 10) << " " << *in << " !flushing" << dendl;
-	in->flushing_cap_item.remove_myself();
 	num_flushing_caps--;
+	if (in->cap_snaps.empty())
+	  in->flushing_cap_item.remove_myself();
       }
       if (!in->caps_dirty())
 	put_inode(in);
@@ -4718,7 +4740,10 @@ void Client::handle_cap_flushsnap_ack(MetaSession *session, Inode *in, MClientCa
       ldout(cct, 5) << "handle_cap_flushedsnap mds." << mds << " flushed snap follows " << follows
 	      << " on " << *in << dendl;
       in->cap_snaps.erase(follows);
-      capsnap->flushing_item.remove_myself();
+      if (in->flushing_caps == 0 && in->cap_snaps.empty())
+	in->flushing_cap_item.remove_myself();
+      session->flushing_caps_tids.erase(capsnap->flush_tid);
+
       delete capsnap;
     }
   } else {
@@ -4760,6 +4785,8 @@ public:
 
 void Client::_async_dentry_invalidate(vinodeno_t dirino, vinodeno_t ino, string& name)
 {
+  if (unmounting)
+    return;
   ldout(cct, 10) << "_async_dentry_invalidate '" << name << "' ino " << ino
 		 << " in dir " << dirino << dendl;
   dentry_invalidate_cb(callback_handle, dirino, ino, name);
@@ -4771,7 +4798,7 @@ void Client::_schedule_invalidate_dentry_callback(Dentry *dn, bool del)
     async_dentry_invalidator.queue(new C_Client_DentryInvalidate(this, dn, del));
 }
 
-void Client::_try_to_trim_inode(Inode *in)
+void Client::_try_to_trim_inode(Inode *in, bool sched_inval)
 {
   int ref = in->get_num_ref();
 
@@ -4780,22 +4807,37 @@ void Client::_try_to_trim_inode(Inode *in)
 	 p != in->dir->dentries.end(); ) {
       Dentry *dn = p->second;
       ++p;
+      /* rmsnap removes whole subtree, need trim inodes recursively.
+       * we don't need to invalidate dentries recursively. because
+       * invalidating a directory dentry effectively invalidate
+       * whole subtree */
+      if (in->snapid != CEPH_NOSNAP && dn->inode && dn->inode->is_dir())
+	_try_to_trim_inode(dn->inode.get(), false);
+
       if (dn->lru_is_expireable())
-	unlink(dn, false, false);  // close dir, drop dentry
+	unlink(dn, true, false);  // keep dir, drop dentry
     }
+    if (in->dir->dentries.empty()) {
+      close_dir(in->dir);
+      --ref;
+    }
+  }
+
+  if (ref > 0 && (in->flags & I_SNAPDIR_OPEN)) {
+    InodeRef snapdir = open_snapdir(in);
+    _try_to_trim_inode(snapdir.get(), false);
     --ref;
   }
-  // make sure inode was not freed when closing dir
-  if (ref == 0)
-    return;
 
-  set<Dentry*>::iterator q = in->dn_set.begin();
-  while (q != in->dn_set.end()) {
-    Dentry *dn = *q++;
-    // FIXME: we play lots of unlink/link tricks when handling MDS replies,
-    //        so in->dn_set doesn't always reflect the state of kernel's dcache.
-    _schedule_invalidate_dentry_callback(dn, true);
-    unlink(dn, true, true);
+  if (ref > 0 && in->ll_ref > 0 && sched_inval) {
+    set<Dentry*>::iterator q = in->dn_set.begin();
+    while (q != in->dn_set.end()) {
+      Dentry *dn = *q++;
+      // FIXME: we play lots of unlink/link tricks when handling MDS replies,
+      //        so in->dn_set doesn't always reflect the state of kernel's dcache.
+      _schedule_invalidate_dentry_callback(dn, true);
+      unlink(dn, true, true);
+    }
   }
 }
 
@@ -4906,7 +4948,7 @@ void Client::handle_cap_grant(MetaSession *session, Inode *in, Cap *cap, MClient
 
   // may drop inode's last ref
   if (deleted_inode)
-    _try_to_trim_inode(in);
+    _try_to_trim_inode(in, true);
 
   m->put();
 }
@@ -8700,16 +8742,16 @@ int Client::_fsync(Fh *f, bool syncdataonly)
   return _fsync(f->inode.get(), syncdataonly);
 }
 
-int Client::fstat(int fd, struct stat *stbuf) 
+int Client::fstat(int fd, struct stat *stbuf, int mask)
 {
   Mutex::Locker lock(client_lock);
-  tout(cct) << "fstat" << std::endl;
+  tout(cct) << "fstat mask " << hex << mask << std::endl;
   tout(cct) << fd << std::endl;
 
   Fh *f = get_filehandle(fd);
   if (!f)
     return -EBADF;
-  int r = _getattr(f->inode, -1);
+  int r = _getattr(f->inode, mask);
   if (r < 0)
     return r;
   fill_stat(f->inode, stbuf, NULL);
@@ -9365,6 +9407,7 @@ Inode *Client::open_snapdir(Inode *diri)
 
     in->dirfragtree.clear();
     in->snapdir_parent = diri;
+    diri->flags |= I_SNAPDIR_OPEN;
     inode_map[vino] = in;
     if (use_faked_inos())
       _assign_faked_ino(in);
@@ -10789,6 +10832,7 @@ int Client::_rmdir(Inode *dir, const char *name, int uid, int gid)
     req->set_other_inode(in.get());
   } else {
     unlink(de, true, true);
+    req->set_other_inode(in.get());
   }
 
   res = make_request(req, uid, gid);
diff --git a/src/client/Client.h b/src/client/Client.h
index 647a122..8417946 100644
--- a/src/client/Client.h
+++ b/src/client/Client.h
@@ -654,7 +654,7 @@ protected:
   void check_caps(Inode *in, bool is_delayed);
   void get_cap_ref(Inode *in, int cap);
   void put_cap_ref(Inode *in, int cap);
-  void flush_snaps(Inode *in, bool all_again=false, CapSnap *again=0);
+  void flush_snaps(Inode *in, bool all_again=false);
   void wait_sync_caps(Inode *in, ceph_tid_t want);
   void wait_sync_caps(ceph_tid_t want);
   void queue_cap_snap(Inode *in, SnapContext &old_snapc);
@@ -663,12 +663,12 @@ protected:
 
   void _schedule_invalidate_dentry_callback(Dentry *dn, bool del);
   void _async_dentry_invalidate(vinodeno_t dirino, vinodeno_t ino, string& name);
-  void _try_to_trim_inode(Inode *in);
+  void _try_to_trim_inode(Inode *in, bool sched_inval);
 
   void _schedule_invalidate_callback(Inode *in, int64_t off, int64_t len);
   void _invalidate_inode_cache(Inode *in);
   void _invalidate_inode_cache(Inode *in, int64_t off, int64_t len);
-  void _async_invalidate(InodeRef& in, int64_t off, int64_t len);
+  void _async_invalidate(vinodeno_t ino, int64_t off, int64_t len);
   bool _release(Inode *in);
   
   /**
@@ -1016,7 +1016,7 @@ public:
   int fake_write_size(int fd, loff_t size);
   int ftruncate(int fd, loff_t size);
   int fsync(int fd, bool syncdataonly);
-  int fstat(int fd, struct stat *stbuf);
+  int fstat(int fd, struct stat *stbuf, int mask=CEPH_STAT_CAP_INODE_ALL);
   int fallocate(int fd, int mode, loff_t offset, loff_t length);
 
   // full path xattr ops
diff --git a/src/client/Inode.h b/src/client/Inode.h
index 969da13..1f2e36d 100644
--- a/src/client/Inode.h
+++ b/src/client/Inode.h
@@ -64,13 +64,11 @@ struct CapSnap {
 
   bool writing, dirty_data;
   uint64_t flush_tid;
-  xlist<CapSnap*>::item flushing_item;
 
   explicit CapSnap(Inode *i)
     : in(i), issued(0), dirty(0),
       size(0), time_warp_seq(0), mode(0), uid(0), gid(0), xattr_version(0),
-      inline_version(0), writing(false), dirty_data(false), flush_tid(0),
-      flushing_item(this)
+      inline_version(0), writing(false), dirty_data(false), flush_tid(0)
   {}
 
   void dump(Formatter *f) const;
@@ -151,8 +149,9 @@ public:
 };
 
 // inode flags
-#define I_COMPLETE 1
-#define I_DIR_ORDERED 2
+#define I_COMPLETE	1
+#define I_DIR_ORDERED	2
+#define I_SNAPDIR_OPEN	8
 
 struct Inode {
   Client *client;
diff --git a/src/client/MetaRequest.h b/src/client/MetaRequest.h
index c180de6..43173b4 100644
--- a/src/client/MetaRequest.h
+++ b/src/client/MetaRequest.h
@@ -140,7 +140,7 @@ public:
     out->swap(_old_inode);
   }
   void set_other_inode(Inode *in) {
-    _old_inode = in;
+    _other_inode = in;
   }
   Inode *other_inode() {
     return _other_inode.get();
diff --git a/src/client/MetaSession.h b/src/client/MetaSession.h
index 2eb8cd6..aeb883c 100644
--- a/src/client/MetaSession.h
+++ b/src/client/MetaSession.h
@@ -37,17 +37,17 @@ struct MetaSession {
     STATE_STALE,
   } state;
 
+  int mds_state;
   bool readonly;
 
   list<Context*> waiting_for_open;
 
   xlist<Cap*> caps;
   xlist<Inode*> flushing_caps;
-  xlist<CapSnap*> flushing_capsnaps;
   xlist<MetaRequest*> requests;
   xlist<MetaRequest*> unsafe_requests;
   std::set<ceph_tid_t> flushing_caps_tids;
-  std::set<ceph_tid_t> kicked_flush_tids;
+  std::set<Inode*> early_flushing_caps;
 
   Cap *s_cap_iterator;
 
@@ -56,8 +56,8 @@ struct MetaSession {
   MetaSession()
     : mds_num(-1), con(NULL),
       seq(0), cap_gen(0), cap_renew_seq(0), num_caps(0),
-      state(STATE_NEW), readonly(false), s_cap_iterator(NULL),
-      release(NULL)
+      state(STATE_NEW), mds_state(0), readonly(false),
+      s_cap_iterator(NULL), release(NULL)
   {}
   ~MetaSession();
 
diff --git a/src/client/fuse_ll.cc b/src/client/fuse_ll.cc
index c6e533b..671e6a4 100644
--- a/src/client/fuse_ll.cc
+++ b/src/client/fuse_ll.cc
@@ -471,7 +471,9 @@ static void fuse_ll_open(fuse_req_t req, fuse_ino_t ino,
   if (r == 0) {
     fi->fh = (long)fh;
 #if FUSE_VERSION >= FUSE_MAKE_VERSION(2, 8)
-    if (cfuse->client->cct->_conf->fuse_use_invalidate_cb)
+    if (cfuse->client->cct->_conf->fuse_disable_pagecache)
+      fi->direct_io = 1;
+    else if (cfuse->client->cct->_conf->fuse_use_invalidate_cb)
       fi->keep_cache = 1;
 #endif
     fuse_reply_open(req, fi);
@@ -673,6 +675,12 @@ static void fuse_ll_create(fuse_req_t req, fuse_ino_t parent, const char *name,
   if (r == 0) {
     fi->fh = (long)fh;
     fe.ino = cfuse->make_fake_ino(fe.attr.st_ino, fe.attr.st_dev);
+#if FUSE_VERSION >= FUSE_MAKE_VERSION(2, 8)
+    if (cfuse->client->cct->_conf->fuse_disable_pagecache)
+      fi->direct_io = 1;
+    else if (cfuse->client->cct->_conf->fuse_use_invalidate_cb)
+      fi->keep_cache = 1;
+#endif
     fuse_reply_create(req, &fe, fi);
   } else
     fuse_reply_err(req, -r);
diff --git a/src/cls/rgw/cls_rgw.cc b/src/cls/rgw/cls_rgw.cc
index a29ad7b..46c2a3f 100644
--- a/src/cls/rgw/cls_rgw.cc
+++ b/src/cls/rgw/cls_rgw.cc
@@ -1912,7 +1912,7 @@ int rgw_dir_suggest_changes(cls_method_context_t hctx, bufferlist *in, bufferlis
                 cur_disk.pending_map.begin();
       while(iter != cur_disk.pending_map.end()) {
         map<string, struct rgw_bucket_pending_info>::iterator cur_iter=iter++;
-        if (cur_time > (cur_iter->second.timestamp + tag_timeout)) {
+        if (cur_time > (cur_iter->second.timestamp + timespan(tag_timeout))) {
           cur_disk.pending_map.erase(cur_iter);
         }
       }
@@ -1933,12 +1933,22 @@ int rgw_dir_suggest_changes(cls_method_context_t hctx, bufferlist *in, bufferlis
       }
       struct rgw_bucket_category_stats& stats =
           header.stats[cur_change.meta.category];
+      bool log_op = (op & CEPH_RGW_DIR_SUGGEST_LOG_OP) != 0;
+      op &= CEPH_RGW_DIR_SUGGEST_OP_MASK;
       switch(op) {
       case CEPH_RGW_REMOVE:
         CLS_LOG(10, "CEPH_RGW_REMOVE name=%s instance=%s\n", cur_change.key.name.c_str(), cur_change.key.instance.c_str());
 	ret = cls_cxx_map_remove_key(hctx, cur_change_key);
 	if (ret < 0)
 	  return ret;
+        if (log_op && cur_disk.exists) {
+          ret = log_index_operation(hctx, cur_disk.key, CLS_RGW_OP_DEL, cur_disk.tag, cur_disk.meta.mtime,
+                                    cur_disk.ver, CLS_RGW_STATE_COMPLETE, header.ver, header.max_marker, 0, NULL, NULL);
+          if (ret < 0) {
+            CLS_LOG(0, "ERROR: %s(): failed to log operation ret=%d", __func__, ret);
+            return ret;
+          }
+        }
         break;
       case CEPH_RGW_UPDATE:
         CLS_LOG(10, "CEPH_RGW_UPDATE name=%s instance=%s total_entries: %" PRId64 " -> %" PRId64 "\n",
@@ -1953,9 +1963,18 @@ int rgw_dir_suggest_changes(cls_method_context_t hctx, bufferlist *in, bufferlis
         ret = cls_cxx_map_set_val(hctx, cur_change_key, &cur_state_bl);
         if (ret < 0)
 	  return ret;
+        if (log_op) {
+          ret = log_index_operation(hctx, cur_change.key, CLS_RGW_OP_ADD, cur_change.tag, cur_change.meta.mtime,
+                                    cur_change.ver, CLS_RGW_STATE_COMPLETE, header.ver, header.max_marker, 0, NULL, NULL);
+          if (ret < 0) {
+            CLS_LOG(0, "ERROR: %s(): failed to log operation ret=%d", __func__, ret);
+            return ret;
+          }
+        }
         break;
       }
     }
+
   }
 
   if (header_changed) {
diff --git a/src/cls/rgw/cls_rgw_types.h b/src/cls/rgw/cls_rgw_types.h
index a7d3b7a..cf143ce 100644
--- a/src/cls/rgw/cls_rgw_types.h
+++ b/src/cls/rgw/cls_rgw_types.h
@@ -11,7 +11,9 @@
 
 #define CEPH_RGW_REMOVE 'r'
 #define CEPH_RGW_UPDATE 'u'
-#define CEPH_RGW_TAG_TIMEOUT 60*60*24
+#define CEPH_RGW_TAG_TIMEOUT 120
+#define CEPH_RGW_DIR_SUGGEST_LOG_OP  0x80
+#define CEPH_RGW_DIR_SUGGEST_OP_MASK 0x7f
 
 class JSONObj;
 
diff --git a/src/common/admin_socket.cc b/src/common/admin_socket.cc
index aedaed5..40fa12f 100644
--- a/src/common/admin_socket.cc
+++ b/src/common/admin_socket.cc
@@ -290,7 +290,7 @@ void* AdminSocket::entry()
 void AdminSocket::chown(uid_t uid, gid_t gid)
 {
   if (m_sock_fd >= 0) {
-    int r = ::fchown(m_sock_fd, uid, gid);
+    int r = ::chown(m_path.c_str(), uid, gid);
     if (r < 0) {
       r = -errno;
       lderr(m_cct) << "AdminSocket: failed to chown socket: "
diff --git a/src/common/ceph_context.cc b/src/common/ceph_context.cc
index e873c9f..c8dab36 100644
--- a/src/common/ceph_context.cc
+++ b/src/common/ceph_context.cc
@@ -641,6 +641,11 @@ uint32_t CephContext::get_module_type() const
   return _module_type;
 }
 
+void CephContext::set_init_flags(int flags)
+{
+  _init_flags = flags;
+}
+
 int CephContext::get_init_flags() const
 {
   return _init_flags;
diff --git a/src/common/ceph_context.h b/src/common/ceph_context.h
index cdb66f5..a455a0c 100644
--- a/src/common/ceph_context.h
+++ b/src/common/ceph_context.h
@@ -84,6 +84,7 @@ public:
   /* Get the module type (client, mon, osd, mds, etc.) */
   uint32_t get_module_type() const;
 
+  void set_init_flags(int flags);
   int get_init_flags() const;
 
   /* Get the PerfCountersCollection of this CephContext */
diff --git a/src/common/common_init.cc b/src/common/common_init.cc
index b1f7165..d4ed4f9 100644
--- a/src/common/common_init.cc
+++ b/src/common/common_init.cc
@@ -12,6 +12,7 @@
  *
  */
 
+#include "common/admin_socket.h"
 #include "common/ceph_argparse.h"
 #include "common/ceph_context.h"
 #include "common/ceph_crypto.h"
@@ -123,6 +124,12 @@ void common_init_finish(CephContext *cct)
 {
   cct->init_crypto();
 
-  if (!(cct->get_init_flags() & CINIT_FLAG_NO_DAEMON_ACTIONS))
+  int flags = cct->get_init_flags();
+  if (!(flags & CINIT_FLAG_NO_DAEMON_ACTIONS))
     cct->start_service_thread();
+
+  if ((flags & CINIT_FLAG_DEFER_DROP_PRIVILEGES) &&
+      (cct->get_set_uid() || cct->get_set_gid())) {
+    cct->get_admin_socket()->chown(cct->get_set_uid(), cct->get_set_gid());
+  }
 }
diff --git a/src/common/config_opts.h b/src/common/config_opts.h
index 398d068..7be4231 100644
--- a/src/common/config_opts.h
+++ b/src/common/config_opts.h
@@ -195,7 +195,8 @@ OPTION(ms_inject_delay_probability, OPT_DOUBLE, 0) // range [0, 1]
 OPTION(ms_inject_internal_delays, OPT_DOUBLE, 0)   // seconds
 OPTION(ms_dump_on_send, OPT_BOOL, false)           // hexdump msg to log on send
 OPTION(ms_dump_corrupt_message_level, OPT_INT, 1)  // debug level to hexdump undecodeable messages at
-OPTION(ms_async_op_threads, OPT_INT, 3)
+OPTION(ms_async_op_threads, OPT_INT, 3)            // number of worker processing threads for async messenger created on init
+OPTION(ms_async_max_op_threads, OPT_INT, 5)        // max number of worker processing threads for async messenger
 OPTION(ms_async_set_affinity, OPT_BOOL, true)
 // example: ms_async_affinity_cores = 0,1
 // The number of coreset is expected to equal to ms_async_op_threads, otherwise
@@ -400,7 +401,8 @@ OPTION(client_permissions, OPT_BOOL, true)
 OPTION(client_dirsize_rbytes, OPT_BOOL, true)
 
 // note: the max amount of "in flight" dirty data is roughly (max - target)
-OPTION(fuse_use_invalidate_cb, OPT_BOOL, false) // use fuse 2.8+ invalidate callback to keep page cache consistent
+OPTION(fuse_use_invalidate_cb, OPT_BOOL, true) // use fuse 2.8+ invalidate callback to keep page cache consistent
+OPTION(fuse_disable_pagecache, OPT_BOOL, false)
 OPTION(fuse_allow_other, OPT_BOOL, true)
 OPTION(fuse_default_permissions, OPT_BOOL, true)
 OPTION(fuse_big_writes, OPT_BOOL, true)
@@ -481,6 +483,7 @@ OPTION(mds_bal_merge_rd, OPT_FLOAT, 1000)
 OPTION(mds_bal_merge_wr, OPT_FLOAT, 1000)
 OPTION(mds_bal_interval, OPT_INT, 10)           // seconds
 OPTION(mds_bal_fragment_interval, OPT_INT, 5)      // seconds
+OPTION(mds_bal_fragment_size_max, OPT_INT, 10000*10) // order of magnitude higher than split size
 OPTION(mds_bal_idle_threshold, OPT_FLOAT, 0)
 OPTION(mds_bal_max, OPT_INT, -1)
 OPTION(mds_bal_max_until, OPT_INT, -1)
@@ -1209,6 +1212,16 @@ OPTION(rbd_journal_object_flush_interval, OPT_INT, 0) // maximum number of pendi
 OPTION(rbd_journal_object_flush_bytes, OPT_INT, 0) // maximum number of pending bytes per journal object
 OPTION(rbd_journal_object_flush_age, OPT_DOUBLE, 0) // maximum age (in seconds) for pending commits
 OPTION(rbd_journal_pool, OPT_STR, "") // pool for journal objects
+OPTION(rbd_journal_max_payload_bytes, OPT_U32, 16384) // maximum journal payload size before splitting
+
+/**
+ * RBD Mirror options
+ */
+OPTION(rbd_mirror_journal_commit_age, OPT_DOUBLE, 5) // commit time interval, seconds
+OPTION(rbd_mirror_journal_poll_age, OPT_DOUBLE, 5) // maximum age (in seconds) between successive journal polls
+OPTION(rbd_mirror_journal_max_fetch_bytes, OPT_U32, 32768) // maximum bytes to read from each journal data object per fetch
+OPTION(rbd_mirror_sync_point_update_age, OPT_DOUBLE, 30) // number of seconds between each update of the image sync point object number
+OPTION(rbd_mirror_concurrent_image_syncs, OPT_U32, 5) // maximum number of image syncs in parallel
 
 OPTION(nss_db_path, OPT_STR, "") // path to nss db
 
@@ -1335,6 +1348,7 @@ OPTION(rgw_enable_usage_log, OPT_BOOL, false) // enable logging bandwidth usage
 OPTION(rgw_ops_log_rados, OPT_BOOL, true) // whether ops log should go to rados
 OPTION(rgw_ops_log_socket_path, OPT_STR, "") // path to unix domain socket where ops log can go
 OPTION(rgw_ops_log_data_backlog, OPT_INT, 5 << 20) // max data backlog for ops log
+OPTION(rgw_fcgi_socket_backlog, OPT_INT, 1024) // socket  backlog for fcgi
 OPTION(rgw_usage_log_flush_threshold, OPT_INT, 1024) // threshold to flush pending log data
 OPTION(rgw_usage_log_tick_interval, OPT_INT, 30) // flush pending log data every X seconds
 OPTION(rgw_intent_log_object_name, OPT_STR, "%Y-%m-%d-%i-%n")  // man date to see codes (a subset are supported)
@@ -1361,6 +1375,8 @@ OPTION(rgw_opstate_ratelimit_sec, OPT_INT, 30) // min time between opstate updat
 OPTION(rgw_curl_wait_timeout_ms, OPT_INT, 1000) // timeout for certain curl calls
 OPTION(rgw_copy_obj_progress, OPT_BOOL, true) // should dump progress during long copy operations?
 OPTION(rgw_copy_obj_progress_every_bytes, OPT_INT, 1024 * 1024) // min bytes between copy progress output
+OPTION(rgw_obj_tombstone_cache_size, OPT_INT, 1000) // how many objects in tombstone cache, which is used in multi-zone sync to keep
+                                                    // track of removed objects' mtime
 
 OPTION(rgw_data_log_window, OPT_INT, 30) // data log entries window (in seconds)
 OPTION(rgw_data_log_changes_size, OPT_INT, 1000) // number of in-memory entries to hold for data changes log
@@ -1391,7 +1407,7 @@ OPTION(rgw_multipart_part_upload_limit, OPT_INT, 10000) // parts limit in multip
 OPTION(rgw_max_slo_entries, OPT_INT, 1000) // default number of max entries in slo
 
 OPTION(rgw_olh_pending_timeout_sec, OPT_INT, 3600) // time until we retire a pending olh change
-OPTION(rgw_user_max_buckets, OPT_U32, 1000) // global option to set max buckets count for all user
+OPTION(rgw_user_max_buckets, OPT_INT, 1000) // global option to set max buckets count for all user
 
 OPTION(rgw_objexp_gc_interval, OPT_U32, 60 * 10) // maximum time between round of expired objects garbage collecting
 OPTION(rgw_objexp_time_step, OPT_U32, 4096) // number of seconds for rounding the timestamps
diff --git a/src/common/event_socket.h b/src/common/event_socket.h
index 5c6b40b..2f3db40 100644
--- a/src/common/event_socket.h
+++ b/src/common/event_socket.h
@@ -18,6 +18,7 @@
 #define CEPH_COMMON_EVENT_SOCKET_H
 
 #include "include/event_type.h"
+#include <unistd.h>
 
 class EventSocket {
   int socket;
diff --git a/src/common/obj_bencher.cc b/src/common/obj_bencher.cc
index 9a2215e..267806c 100644
--- a/src/common/obj_bencher.cc
+++ b/src/common/obj_bencher.cc
@@ -231,6 +231,7 @@ int ObjBencher::aio_bench(
   int num_objects = 0;
   int r = 0;
   int prevPid = 0;
+  utime_t runtime;
 
   // default metadata object is used if user does not specify one
   const std::string run_name_meta = (run_name.empty() ? BENCH_LASTRUN_METADATA : run_name);
@@ -291,9 +292,15 @@ int ObjBencher::aio_bench(
       goto out;
     }
 
+    data.start_time = ceph_clock_now(cct);
+    out(cout) << "Cleaning up (deleting benchmark objects)" << std::endl;
+
     r = clean_up(num_objects, prevPid, concurrentios);
     if (r != 0) goto out;
 
+    runtime = ceph_clock_now(cct) - data.start_time;
+    out(cout) << "Clean up completed and total clean up time :" << runtime << std::endl;
+
     // lastrun file
     r = sync_remove(run_name_meta);
     if (r != 0) goto out;
diff --git a/src/common/scrub_types.cc b/src/common/scrub_types.cc
index 3342a49..336965c 100644
--- a/src/common/scrub_types.cc
+++ b/src/common/scrub_types.cc
@@ -158,12 +158,22 @@ void inconsistent_snapset_wrapper::set_headless()
 
 void inconsistent_snapset_wrapper::set_ss_attr_missing()
 {
-  errors |= inc_snapset_t::ATTR_MISSING;
+  errors |= inc_snapset_t::SNAPSET_MISSING;
+}
+
+void inconsistent_snapset_wrapper::set_oi_attr_missing()
+{
+  errors |= inc_snapset_t::OI_MISSING;
 }
 
 void inconsistent_snapset_wrapper::set_ss_attr_corrupted()
 {
-  errors |= inc_snapset_t::ATTR_CORRUPTED;
+  errors |= inc_snapset_t::SNAPSET_CORRUPTED;
+}
+
+void inconsistent_snapset_wrapper::set_oi_attr_corrupted()
+{
+  errors |= inc_snapset_t::OI_CORRUPTED;
 }
 
 void inconsistent_snapset_wrapper::set_clone_missing(snapid_t snap)
@@ -172,6 +182,12 @@ void inconsistent_snapset_wrapper::set_clone_missing(snapid_t snap)
   missing.push_back(snap);
 }
 
+void inconsistent_snapset_wrapper::set_clone(snapid_t snap)
+{
+  errors |= inc_snapset_t::EXTRA_CLONES;
+  clones.push_back(snap);
+}
+
 void inconsistent_snapset_wrapper::set_snapset_mismatch()
 {
   errors |= inc_snapset_t::SNAP_MISMATCH;
diff --git a/src/common/scrub_types.h b/src/common/scrub_types.h
index c9f4fda..dc93c88 100644
--- a/src/common/scrub_types.h
+++ b/src/common/scrub_types.h
@@ -51,13 +51,13 @@ public:
     errors |= err_t::SIZE_MISMATCH;
   }
   void set_attr_missing() {
-    errors |= err_t::ATTR_MISMATCH;
+    errors |= err_t::ATTR_MISSING;
   }
   void set_attr_mismatch() {
     errors |= err_t::ATTR_MISMATCH;
   }
   void set_attr_unexpected() {
-    errors |= err_t::ATTR_MISMATCH;
+    errors |= err_t::ATTR_UNEXPECTED;
   }
   void encode(bufferlist& bl) const;
   void decode(bufferlist::iterator& bp);
@@ -96,9 +96,13 @@ struct inconsistent_snapset_wrapper : public librados::inconsistent_snapset_t {
   // soid claims that it is a head or a snapdir, but its SS_ATTR
   // is missing.
   void set_ss_attr_missing();
+  void set_oi_attr_missing();
   void set_ss_attr_corrupted();
+  void set_oi_attr_corrupted();
   // snapset with missing clone
   void set_clone_missing(snapid_t);
+  // Clones that are there
+  void set_clone(snapid_t);
   // the snapset is not consistent with itself
   void set_snapset_mismatch();
   // soid.snap inconsistent with snapset
diff --git a/src/common/strtol.cc b/src/common/strtol.cc
index 50598b9..f43d661 100644
--- a/src/common/strtol.cc
+++ b/src/common/strtol.cc
@@ -189,6 +189,8 @@ template int strict_si_cast<int>(const char *str, std::string *err);
 
 template long long strict_si_cast<long long>(const char *str, std::string *err);
 
+template int64_t strict_si_cast<int64_t>(const char *str, std::string *err);
+
 template uint64_t strict_si_cast<uint64_t>(const char *str, std::string *err);
 
 uint64_t strict_sistrtoll(const char *str, std::string *err)
diff --git a/src/global/global_init.cc b/src/global/global_init.cc
index 5460eef..38636df 100644
--- a/src/global/global_init.cc
+++ b/src/global/global_init.cc
@@ -60,6 +60,26 @@ static const char* c_str_or_null(const std::string &str)
   return str.c_str();
 }
 
+static int chown_path(const std::string &pathname, const uid_t owner, const gid_t group,
+		      const std::string &uid_str, const std::string &gid_str)
+{
+  const char *pathname_cstr = c_str_or_null(pathname);
+
+  if (!pathname_cstr) {
+    return 0;
+  }
+
+  int r = ::chown(pathname_cstr, owner, group);
+
+  if (r < 0) {
+    r = -errno;
+    cerr << "warning: unable to chown() " << pathname << " as "
+	 << uid_str << ":" << gid_str << ": " << cpp_strerror(r) << std::endl;
+  }
+
+  return r;
+}
+
 void global_pre_init(std::vector < const char * > *alt_def_args,
 		     std::vector < const char* >& args,
 		     uint32_t module_type, code_environment_t code_env,
@@ -128,6 +148,12 @@ void global_init(std::vector < const char * > *alt_def_args,
   }
   first_run = false;
 
+  // Verify flags have not changed if global_pre_init() has been called
+  // manually. If they have, update them.
+  if (g_ceph_context->get_init_flags() != flags) {
+    g_ceph_context->set_init_flags(flags);
+  }
+
   // signal stuff
   int siglist[] = { SIGPIPE, 0 };
   block_signals(siglist, NULL);
@@ -265,16 +291,21 @@ void global_init(std::vector < const char * > *alt_def_args,
 
   if (priv_ss.str().length()) {
     dout(0) << priv_ss.str() << dendl;
+  }
 
-    if (g_ceph_context->get_set_uid() || g_ceph_context->get_set_gid()) {
-      // fix ownership on log, asok files.  this is sadly a bit of a hack :(
-      g_ceph_context->_log->chown_log_file(
-	g_ceph_context->get_set_uid(),
-	g_ceph_context->get_set_gid());
-      g_ceph_context->get_admin_socket()->chown(
-	g_ceph_context->get_set_uid(),
-	g_ceph_context->get_set_gid());
-    }
+  if ((flags & CINIT_FLAG_DEFER_DROP_PRIVILEGES) &&
+      (g_ceph_context->get_set_uid() || g_ceph_context->get_set_gid())) {
+    // Fix ownership on log files and run directories if needed.
+    // Admin socket files are chown()'d during the common init path _after_
+    // the service thread has been started. This is sadly a bit of a hack :(
+    chown_path(g_conf->run_dir,
+	       g_ceph_context->get_set_uid(),
+	       g_ceph_context->get_set_gid(),
+	       g_ceph_context->get_set_uid_string(),
+	       g_ceph_context->get_set_gid_string());
+    g_ceph_context->_log->chown_log_file(
+      g_ceph_context->get_set_uid(),
+      g_ceph_context->get_set_gid());
   }
 
   // Now we're ready to complain about config file parse errors
@@ -305,15 +336,21 @@ int global_init_prefork(CephContext *cct)
   const md_config_t *conf = cct->_conf;
   if (!conf->daemonize) {
 
-    if (pidfile_write(g_conf) < 0)
+    if (pidfile_write(conf) < 0)
       exit(1);
 
+    if ((cct->get_init_flags() & CINIT_FLAG_DEFER_DROP_PRIVILEGES) &&
+	(cct->get_set_uid() || cct->get_set_gid())) {
+      chown_path(conf->pid_file, cct->get_set_uid(), cct->get_set_gid(),
+		 cct->get_set_uid_string(), cct->get_set_gid_string());
+    }
+
     return -1;
   }
 
   // stop log thread
-  g_ceph_context->_log->flush();
-  g_ceph_context->_log->stop();
+  cct->_log->flush();
+  cct->_log->stop();
   return 0;
 }
 
@@ -341,7 +378,7 @@ void global_init_daemonize(CephContext *cct)
 void global_init_postfork_start(CephContext *cct)
 {
   // restart log thread
-  g_ceph_context->_log->start();
+  cct->_log->start();
 
   /* This is the old trick where we make file descriptors 0, 1, and possibly 2
    * point to /dev/null.
@@ -366,8 +403,15 @@ void global_init_postfork_start(CephContext *cct)
     exit(1);
   }
 
-  if (pidfile_write(g_conf) < 0)
+  const md_config_t *conf = cct->_conf;
+  if (pidfile_write(conf) < 0)
     exit(1);
+
+  if ((cct->get_init_flags() & CINIT_FLAG_DEFER_DROP_PRIVILEGES) &&
+      (cct->get_set_uid() || cct->get_set_gid())) {
+    chown_path(conf->pid_file, cct->get_set_uid(), cct->get_set_gid(),
+	       cct->get_set_uid_string(), cct->get_set_gid_string());
+  }
 }
 
 void global_init_postfork_finish(CephContext *cct)
diff --git a/src/include/buffer.h b/src/include/buffer.h
index c786bf2..363cf63 100644
--- a/src/include/buffer.h
+++ b/src/include/buffer.h
@@ -57,10 +57,8 @@
 
 #if __GNUC__ >= 4
   #define CEPH_BUFFER_API  __attribute__ ((visibility ("default")))
-  #define CEPH_BUFFER_DETAILS __attribute__ ((visibility ("hidden")))
 #else
   #define CEPH_BUFFER_API
-  #define CEPH_BUFFER_DETAILS
 #endif
 
 #if defined(HAVE_XIO)
@@ -270,7 +268,7 @@ namespace buffer CEPH_BUFFER_API {
 
   private:
     template <bool is_const>
-    class CEPH_BUFFER_DETAILS iterator_impl
+    class CEPH_BUFFER_API iterator_impl
       : public std::iterator<std::forward_iterator_tag, char> {
     protected:
       typedef typename std::conditional<is_const,
diff --git a/src/include/rados/buffer.h b/src/include/rados/buffer.h
index c786bf2..363cf63 100644
--- a/src/include/rados/buffer.h
+++ b/src/include/rados/buffer.h
@@ -57,10 +57,8 @@
 
 #if __GNUC__ >= 4
   #define CEPH_BUFFER_API  __attribute__ ((visibility ("default")))
-  #define CEPH_BUFFER_DETAILS __attribute__ ((visibility ("hidden")))
 #else
   #define CEPH_BUFFER_API
-  #define CEPH_BUFFER_DETAILS
 #endif
 
 #if defined(HAVE_XIO)
@@ -270,7 +268,7 @@ namespace buffer CEPH_BUFFER_API {
 
   private:
     template <bool is_const>
-    class CEPH_BUFFER_DETAILS iterator_impl
+    class CEPH_BUFFER_API iterator_impl
       : public std::iterator<std::forward_iterator_tag, char> {
     protected:
       typedef typename std::conditional<is_const,
diff --git a/src/include/rados/librados.h b/src/include/rados/librados.h
index 44373ff..9eff2b2 100644
--- a/src/include/rados/librados.h
+++ b/src/include/rados/librados.h
@@ -1819,6 +1819,22 @@ CEPH_RADOS_API int rados_aio_is_safe_and_cb(rados_completion_t c);
 CEPH_RADOS_API int rados_aio_get_return_value(rados_completion_t c);
 
 /**
+ * Get the internal object version of the target of an asychronous operation
+ *
+ * The return value is set when the operation is complete or safe,
+ * whichever comes first.
+ *
+ * @pre The operation is safe or complete
+ *
+ * @note BUG: complete callback may never be called when the safe
+ * message is received before the complete message
+ *
+ * @param c async operation to inspect
+ * @returns version number of the asychronous operation's target
+ */
+CEPH_RADOS_API uint64_t rados_aio_get_version(rados_completion_t c);
+
+/**
  * Release a completion
  *
  * Call this when you no longer need the completion. It may not be
diff --git a/src/include/rados/rados_types.hpp b/src/include/rados/rados_types.hpp
index d10e5f5..ca7a490 100644
--- a/src/include/rados/rados_types.hpp
+++ b/src/include/rados/rados_types.hpp
@@ -52,6 +52,7 @@ struct object_id_t {
 
 struct err_t {
   enum {
+    ATTR_UNEXPECTED      = 1 << 0,
     SHARD_MISSING        = 1 << 1,
     SHARD_STAT_ERR       = 1 << 2,
     SHARD_READ_ERR       = 1 << 3,
@@ -59,12 +60,15 @@ struct err_t {
     OMAP_DIGEST_MISMATCH = 1 << 5,
     SIZE_MISMATCH        = 1 << 6,
     ATTR_MISMATCH        = 1 << 7,
-    SNAPSET_MISSING      = 1 << 8,
+    ATTR_MISSING         = 1 << 8,
     DATA_DIGEST_MISMATCH_OI = 1 << 9,
     OMAP_DIGEST_MISMATCH_OI = 1 << 10,
     SIZE_MISMATCH_OI        = 1 << 11,
   };
   uint64_t errors = 0;
+  bool has_attr_unexpected() const {
+    return errors & ATTR_UNEXPECTED;
+  }
   bool has_shard_missing() const {
     return errors & SHARD_MISSING;
   }
@@ -97,6 +101,9 @@ struct err_t {
   bool has_attr_mismatch() const {
     return errors & ATTR_MISMATCH;
   }
+  bool has_attr_missing() const {
+    return errors & ATTR_MISSING;
+  }
 };
 
 struct shard_info_t : err_t {
@@ -124,24 +131,28 @@ struct inconsistent_snapset_t {
     : object{head}
   {}
   enum {
-    ATTR_MISSING   = 1 << 0,
-    ATTR_CORRUPTED = 1 << 1,
+    SNAPSET_MISSING = 1 << 0,
+    SNAPSET_CORRUPTED = 1 << 1,
     CLONE_MISSING  = 1 << 2,
     SNAP_MISMATCH  = 1 << 3,
     HEAD_MISMATCH  = 1 << 4,
     HEADLESS_CLONE = 1 << 5,
     SIZE_MISMATCH  = 1 << 6,
+    OI_MISSING   = 1 << 7,
+    OI_CORRUPTED = 1 << 8,
+    EXTRA_CLONES = 1 << 9,
   };
   uint64_t errors = 0;
   object_id_t object;
+  // Extra clones
   std::vector<snap_t> clones;
   std::vector<snap_t> missing;
 
   bool ss_attr_missing() const {
-    return errors & ATTR_MISSING;
+    return errors & SNAPSET_MISSING;
   }
   bool ss_attr_corrupted() const {
-    return errors & ATTR_CORRUPTED;
+    return errors & SNAPSET_CORRUPTED;
   }
   bool clone_missing() const  {
     return errors & CLONE_MISSING;
@@ -158,6 +169,15 @@ struct inconsistent_snapset_t {
   bool size_mismatch() const {
     return errors & SIZE_MISMATCH;
   }
+  bool oi_attr_missing() const {
+    return errors & OI_MISSING;
+  }
+  bool oi_attr_corrupted() const {
+    return errors & OI_CORRUPTED;
+  }
+  bool extra_clones() const {
+    return errors & EXTRA_CLONES;
+  }
 };
 
 /**
diff --git a/src/include/rbd/librbd.h b/src/include/rbd/librbd.h
index fb61b8f..c636494 100644
--- a/src/include/rbd/librbd.h
+++ b/src/include/rbd/librbd.h
@@ -61,6 +61,8 @@ typedef void (*rbd_callback_t)(rbd_completion_t cb, void *arg);
 
 typedef int (*librbd_progress_fn_t)(uint64_t offset, uint64_t total, void *ptr);
 
+typedef void (*rbd_update_callback_t)(void *arg);
+
 typedef struct {
   uint64_t id;
   uint64_t size;
@@ -641,6 +643,27 @@ CEPH_RBD_API int rbd_mirror_image_get_status(rbd_image_t image,
                                              rbd_mirror_image_status_t *mirror_image_status,
                                              size_t status_size);
 
+/**
+ * Register an image metadata change watcher.
+ *
+ * @param image the image to watch
+ * @param handle where to store the internal id assigned to this watch
+ * @param watch_cb what to do when a notify is received on this image
+ * @param arg opaque value to pass to the callback
+ * @returns 0 on success, negative error code on failure
+ */
+CEPH_RBD_API int rbd_update_watch(rbd_image_t image, uint64_t *handle,
+				  rbd_update_callback_t watch_cb, void *arg);
+
+/**
+ * Unregister an image watcher.
+ *
+ * @param image the image to unwatch
+ * @param handle which watch to unregister
+ * @returns 0 on success, negative error code on failure
+ */
+CEPH_RBD_API int rbd_update_unwatch(rbd_image_t image, uint64_t handle);
+
 #ifdef __cplusplus
 }
 #endif
diff --git a/src/include/rbd/librbd.hpp b/src/include/rbd/librbd.hpp
index e4c434a..4dd4e6b 100644
--- a/src/include/rbd/librbd.hpp
+++ b/src/include/rbd/librbd.hpp
@@ -176,6 +176,15 @@ private:
   rbd_image_options_t opts;
 };
 
+class CEPH_RBD_API UpdateWatchCtx {
+public:
+  virtual ~UpdateWatchCtx() {}
+  /**
+   * Callback activated when we receive a notify event.
+   */
+  virtual void handle_notify() = 0;
+};
+
 class CEPH_RBD_API Image
 {
 public:
@@ -356,6 +365,9 @@ public:
   int mirror_image_get_status(mirror_image_status_t *mirror_image_status,
 			      size_t status_size);
 
+  int update_watch(UpdateWatchCtx *ctx, uint64_t *handle);
+  int update_unwatch(uint64_t handle);
+
 private:
   friend class RBD;
 
diff --git a/src/journal/Entry.cc b/src/journal/Entry.cc
index f88dea8..257fa58 100644
--- a/src/journal/Entry.cc
+++ b/src/journal/Entry.cc
@@ -9,7 +9,7 @@
 
 #define dout_subsys ceph_subsys_journaler
 #undef dout_prefix
-#define dout_prefix *_dout << "Entry: "
+#define dout_prefix *_dout << "Entry: " << this << " "
 
 namespace journal {
 
diff --git a/src/journal/FutureImpl.cc b/src/journal/FutureImpl.cc
index aebfe12..1597c73 100644
--- a/src/journal/FutureImpl.cc
+++ b/src/journal/FutureImpl.cc
@@ -10,7 +10,7 @@ FutureImpl::FutureImpl(uint64_t tag_tid, uint64_t entry_tid,
                        uint64_t commit_tid)
   : RefCountedObject(NULL, 0), m_tag_tid(tag_tid), m_entry_tid(entry_tid),
     m_commit_tid(commit_tid),
-    m_lock(utils::unique_lock_name("FutureImpl::m_lock", this)), m_safe(false),
+    m_lock("FutureImpl::m_lock", false, false), m_safe(false),
     m_consistent(false), m_return_value(0), m_flush_state(FLUSH_STATE_NONE),
     m_consistent_ack(this) {
 }
@@ -27,36 +27,51 @@ void FutureImpl::init(const FutureImplPtr &prev_future) {
 }
 
 void FutureImpl::flush(Context *on_safe) {
+
   bool complete;
-  FlushHandlerPtr flush_handler;
+  FlushHandlers flush_handlers;
+  FutureImplPtr prev_future;
   {
     Mutex::Locker locker(m_lock);
     complete = (m_safe && m_consistent);
     if (!complete) {
-      if (on_safe != NULL) {
+      if (on_safe != nullptr) {
         m_contexts.push_back(on_safe);
       }
 
-      if (m_flush_state == FLUSH_STATE_NONE) {
-        m_flush_state = FLUSH_STATE_REQUESTED;
-        flush_handler = m_flush_handler;
-
-        // walk the chain backwards up to <splay width> futures
-        if (m_prev_future) {
-          m_prev_future->flush();
-        }
-      }
+      prev_future = prepare_flush(&flush_handlers);
     }
   }
 
+  // instruct prior futures to flush as well
+  while (prev_future) {
+    Mutex::Locker locker(prev_future->m_lock);
+    prev_future = prev_future->prepare_flush(&flush_handlers);
+  }
+
   if (complete && on_safe != NULL) {
     on_safe->complete(m_return_value);
-  } else if (flush_handler) {
+  } else if (!flush_handlers.empty()) {
     // attached to journal object -- instruct it to flush all entries through
     // this one.  possible to become detached while lock is released, so flush
     // will be re-requested by the object if it doesn't own the future
-    flush_handler->flush(this);
+    for (auto &pair : flush_handlers) {
+      pair.first->flush(pair.second);
+    }
+  }
+}
+
+FutureImplPtr FutureImpl::prepare_flush(FlushHandlers *flush_handlers) {
+  assert(m_lock.is_locked());
+
+  if (m_flush_state == FLUSH_STATE_NONE) {
+    m_flush_state = FLUSH_STATE_REQUESTED;
+
+    if (m_flush_handler && flush_handlers->count(m_flush_handler) == 0) {
+      flush_handlers->insert({m_flush_handler, this});
+    }
   }
+  return m_prev_future;
 }
 
 void FutureImpl::wait(Context *on_safe) {
diff --git a/src/journal/FutureImpl.h b/src/journal/FutureImpl.h
index 0d5e86f..0054272 100644
--- a/src/journal/FutureImpl.h
+++ b/src/journal/FutureImpl.h
@@ -9,6 +9,7 @@
 #include "common/RefCountedObj.h"
 #include "journal/Future.h"
 #include <list>
+#include <map>
 #include <boost/noncopyable.hpp>
 #include <boost/intrusive_ptr.hpp>
 #include "include/assert.h"
@@ -76,6 +77,7 @@ public:
 private:
   friend std::ostream &operator<<(std::ostream &, const FutureImpl &);
 
+  typedef std::map<FlushHandlerPtr, FutureImplPtr> FlushHandlers;
   typedef std::list<Context *> Contexts;
 
   enum FlushState {
@@ -110,6 +112,8 @@ private:
   C_ConsistentAck m_consistent_ack;
   Contexts m_contexts;
 
+  FutureImplPtr prepare_flush(FlushHandlers *flush_handlers);
+
   void consistent(int r);
   void finish_unlock();
 };
diff --git a/src/journal/JournalMetadata.cc b/src/journal/JournalMetadata.cc
index 18bef13..de46bd7 100644
--- a/src/journal/JournalMetadata.cc
+++ b/src/journal/JournalMetadata.cc
@@ -11,7 +11,7 @@
 
 #define dout_subsys ceph_subsys_journaler
 #undef dout_prefix
-#define dout_prefix *_dout << "JournalMetadata: "
+#define dout_prefix *_dout << "JournalMetadata: " << this << " "
 
 namespace journal {
 
@@ -402,9 +402,9 @@ JournalMetadata::JournalMetadata(ContextWQ *work_queue, SafeTimer *timer,
                                  Mutex *timer_lock, librados::IoCtx &ioctx,
                                  const std::string &oid,
                                  const std::string &client_id,
-                                 double commit_interval)
+                                 const Settings &settings)
     : RefCountedObject(NULL, 0), m_cct(NULL), m_oid(oid),
-      m_client_id(client_id), m_commit_interval(commit_interval), m_order(0),
+      m_client_id(client_id), m_settings(settings), m_order(0),
       m_splay_width(0), m_pool_id(-1), m_initialized(false),
       m_work_queue(work_queue), m_timer(timer), m_timer_lock(timer_lock),
       m_lock("JournalMetadata::m_lock"), m_commit_tid(0), m_watch_ctx(this),
@@ -587,7 +587,7 @@ void JournalMetadata::get_tags(const boost::optional<uint64_t> &tag_class,
   ctx->send();
 }
 
-void JournalMetadata::add_listener(Listener *listener) {
+void JournalMetadata::add_listener(JournalMetadataListener *listener) {
   Mutex::Locker locker(m_lock);
   while (m_update_notifications > 0) {
     m_update_cond.Wait(m_lock);
@@ -595,7 +595,7 @@ void JournalMetadata::add_listener(Listener *listener) {
   m_listeners.push_back(listener);
 }
 
-void JournalMetadata::remove_listener(Listener *listener) {
+void JournalMetadata::remove_listener(JournalMetadataListener *listener) {
   Mutex::Locker locker(m_lock);
   while (m_update_notifications > 0) {
     m_update_cond.Wait(m_lock);
@@ -795,7 +795,8 @@ void JournalMetadata::schedule_commit_task() {
   assert(m_commit_position_ctx != nullptr);
   if (m_commit_position_task_ctx == NULL) {
     m_commit_position_task_ctx = new C_CommitPositionTask(this);
-    m_timer->add_event_after(m_commit_interval, m_commit_position_task_ctx);
+    m_timer->add_event_after(m_settings.commit_interval,
+                             m_commit_position_task_ctx);
   }
 }
 
@@ -824,7 +825,7 @@ void JournalMetadata::handle_commit_position_task() {
 
 void JournalMetadata::schedule_watch_reset() {
   assert(m_timer_lock->is_locked());
-  m_timer->add_event_after(0.1, new C_WatchReset(this));
+  m_timer->add_event_after(1, new C_WatchReset(this));
 }
 
 void JournalMetadata::handle_watch_reset() {
@@ -835,8 +836,12 @@ void JournalMetadata::handle_watch_reset() {
 
   int r = m_ioctx.watch2(m_oid, &m_watch_handle, &m_watch_ctx);
   if (r < 0) {
-    lderr(m_cct) << __func__ << ": failed to watch journal"
-                 << cpp_strerror(r) << dendl;
+    if (r == -ENOENT) {
+      ldout(m_cct, 5) << __func__ << ": journal header not found" << dendl;
+    } else {
+      lderr(m_cct) << __func__ << ": failed to watch journal"
+                   << cpp_strerror(r) << dendl;
+    }
     schedule_watch_reset();
   } else {
     ldout(m_cct, 10) << __func__ << ": reset journal watch" << dendl;
@@ -854,7 +859,12 @@ void JournalMetadata::handle_watch_notify(uint64_t notify_id, uint64_t cookie) {
 }
 
 void JournalMetadata::handle_watch_error(int err) {
-  lderr(m_cct) << "journal watch error: " << cpp_strerror(err) << dendl;
+  if (err == -ENOTCONN) {
+    ldout(m_cct, 5) << "journal watch error: header removed" << dendl;
+  } else {
+    lderr(m_cct) << "journal watch error: " << cpp_strerror(err) << dendl;
+  }
+
   Mutex::Locker timer_locker(*m_timer_lock);
   Mutex::Locker locker(m_lock);
 
@@ -1036,7 +1046,7 @@ std::ostream &operator<<(std::ostream &os,
      << "active_set=" << jm.m_active_set << ", "
      << "client_id=" << jm.m_client_id << ", "
      << "commit_tid=" << jm.m_commit_tid << ", "
-     << "commit_interval=" << jm.m_commit_interval << ", "
+     << "commit_interval=" << jm.m_settings.commit_interval << ", "
      << "commit_position=" << jm.m_commit_position << ", "
      << "registered_clients=" << jm.m_registered_clients << "]";
   return os;
diff --git a/src/journal/JournalMetadata.h b/src/journal/JournalMetadata.h
index 1c084a4..01116d7 100644
--- a/src/journal/JournalMetadata.h
+++ b/src/journal/JournalMetadata.h
@@ -13,6 +13,8 @@
 #include "common/WorkQueue.h"
 #include "cls/journal/cls_journal_types.h"
 #include "journal/AsyncOpTracker.h"
+#include "journal/JournalMetadataListener.h"
+#include "journal/Settings.h"
 #include <boost/intrusive_ptr.hpp>
 #include <boost/noncopyable.hpp>
 #include <boost/optional.hpp>
@@ -41,14 +43,9 @@ public:
   typedef std::set<Client> RegisteredClients;
   typedef std::list<Tag> Tags;
 
-  struct Listener {
-    virtual ~Listener() {};
-    virtual void handle_update(JournalMetadata *) = 0;
-  };
-
   JournalMetadata(ContextWQ *work_queue, SafeTimer *timer, Mutex *timer_lock,
                   librados::IoCtx &ioctx, const std::string &oid,
-                  const std::string &client_id, double commit_interval);
+                  const std::string &client_id, const Settings &settings);
   ~JournalMetadata();
 
   void init(Context *on_init);
@@ -62,8 +59,8 @@ public:
   void get_mutable_metadata(uint64_t *minimum_set, uint64_t *active_set,
 			    RegisteredClients *clients, Context *on_finish);
 
-  void add_listener(Listener *listener);
-  void remove_listener(Listener *listener);
+  void add_listener(JournalMetadataListener *listener);
+  void remove_listener(JournalMetadataListener *listener);
 
   void register_client(const bufferlist &data, Context *on_finish);
   void update_client(const bufferlist &data, Context *on_finish);
@@ -77,6 +74,9 @@ public:
   void get_tags(const boost::optional<uint64_t> &tag_class, Tags *tags,
                 Context *on_finish);
 
+  inline const Settings &get_settings() const {
+    return m_settings;
+  }
   inline const std::string &get_client_id() const {
     return m_client_id;
   }
@@ -150,7 +150,7 @@ public:
 
 private:
   typedef std::map<uint64_t, uint64_t> AllocatedEntryTids;
-  typedef std::list<Listener*> Listeners;
+  typedef std::list<JournalMetadataListener*> Listeners;
 
   struct CommitEntry {
     uint64_t object_num;
@@ -291,7 +291,7 @@ private:
   CephContext *m_cct;
   std::string m_oid;
   std::string m_client_id;
-  double m_commit_interval;
+  Settings m_settings;
 
   uint8_t m_order;
   uint8_t m_splay_width;
diff --git a/src/journal/JournalMetadataListener.h b/src/journal/JournalMetadataListener.h
new file mode 100644
index 0000000..121fe68
--- /dev/null
+++ b/src/journal/JournalMetadataListener.h
@@ -0,0 +1,30 @@
+// -*- mode:C++; tab-width:8; c-basic-offset:2; indent-tabs-mode:t -*-
+// vim: ts=8 sw=2 smarttab
+/*
+ * Ceph - scalable distributed file system
+ *
+ * Copyright (C) 2016 SUSE LINUX GmbH
+ *
+ * This is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License version 2.1, as published by the Free Software
+ * Foundation.  See file COPYING.
+ *
+ */
+
+#ifndef CEPH_JOURNAL_JOURNAL_METADATA_LISTENER_H
+#define CEPH_JOURNAL_JOURNAL_METADATA_LISTENER_H
+
+namespace journal {
+
+class JournalMetadata;
+
+struct JournalMetadataListener {
+  virtual ~JournalMetadataListener() {};
+  virtual void handle_update(JournalMetadata *) = 0;
+};
+
+} // namespace journal
+
+#endif // CEPH_JOURNAL_JOURNAL_METADATA_LISTENER_H
+
diff --git a/src/journal/JournalPlayer.cc b/src/journal/JournalPlayer.cc
index 28905a2..09f2d2c 100644
--- a/src/journal/JournalPlayer.cc
+++ b/src/journal/JournalPlayer.cc
@@ -8,7 +8,7 @@
 
 #define dout_subsys ceph_subsys_journaler
 #undef dout_prefix
-#define dout_prefix *_dout << "JournalPlayer: "
+#define dout_prefix *_dout << "JournalPlayer: " << this << " "
 
 namespace journal {
 
@@ -53,8 +53,7 @@ JournalPlayer::JournalPlayer(librados::IoCtx &ioctx,
   : m_cct(NULL), m_object_oid_prefix(object_oid_prefix),
     m_journal_metadata(journal_metadata), m_replay_handler(replay_handler),
     m_lock("JournalPlayer::m_lock"), m_state(STATE_INIT), m_splay_offset(0),
-    m_watch_enabled(false), m_watch_scheduled(false), m_watch_interval(0),
-    m_commit_object(0) {
+    m_watch_enabled(false), m_watch_scheduled(false), m_watch_interval(0) {
   m_replay_handler->get();
   m_ioctx.dup(ioctx);
   m_cct = reinterpret_cast<CephContext *>(m_ioctx.cct());
@@ -69,8 +68,9 @@ JournalPlayer::JournalPlayer(librados::IoCtx &ioctx,
     uint8_t splay_width = m_journal_metadata->get_splay_width();
     auto &active_position = commit_position.object_positions.front();
     m_active_tag_tid = active_position.tag_tid;
-    m_commit_object = active_position.object_number;
-    m_splay_offset = m_commit_object % splay_width;
+    m_commit_position_valid = true;
+    m_commit_position = active_position;
+    m_splay_offset = active_position.object_number % splay_width;
     for (auto &position : commit_position.object_positions) {
       uint8_t splay_offset = position.object_number % splay_width;
       m_commit_positions[splay_offset] = position;
@@ -94,6 +94,7 @@ void JournalPlayer::prefetch() {
   assert(m_state == STATE_INIT);
   m_state = STATE_PREFETCH;
 
+  m_active_set = m_journal_metadata->get_active_set();
   uint8_t splay_width = m_journal_metadata->get_splay_width();
   for (uint8_t splay_offset = 0; splay_offset < splay_width; ++splay_offset) {
     m_prefetch_splay_offsets.insert(splay_offset);
@@ -107,9 +108,7 @@ void JournalPlayer::prefetch() {
     splay_offset_to_objects[position.first] = position.second.object_number;
   }
 
-  // prefetch the active object for each splay offset (and the following object)
-  uint64_t active_set = m_journal_metadata->get_active_set();
-  uint64_t max_object_number = (splay_width * (active_set + 1)) - 1;
+  // prefetch the active object for each splay offset
   std::set<uint64_t> prefetch_object_numbers;
   for (uint8_t splay_offset = 0; splay_offset < splay_width; ++splay_offset) {
     uint64_t object_number = splay_offset;
@@ -118,9 +117,6 @@ void JournalPlayer::prefetch() {
     }
 
     prefetch_object_numbers.insert(object_number);
-    if (object_number + splay_width <= max_object_number) {
-      prefetch_object_numbers.insert(object_number + splay_width);
-    }
   }
 
   ldout(m_cct, 10) << __func__ << ": prefetching "
@@ -156,7 +152,7 @@ void JournalPlayer::shut_down(Context *on_finish) {
     ObjectPlayerPtr object_player = get_object_player();
     switch (m_watch_step) {
     case WATCH_STEP_FETCH_FIRST:
-      object_player = m_object_players.begin()->second.begin()->second;
+      object_player = m_object_players.begin()->second;
       // fallthrough
     case WATCH_STEP_FETCH_CURRENT:
       object_player->unwatch();
@@ -182,12 +178,7 @@ bool JournalPlayer::try_pop_front(Entry *entry, uint64_t *commit_tid) {
     if (!is_object_set_ready()) {
       m_handler_notified = false;
     } else {
-      if (!m_watch_enabled) {
-        notify_complete(0);
-      } else if (!m_watch_scheduled) {
-        m_handler_notified = false;
-        schedule_watch();
-      }
+      refetch(true);
     }
     return false;
   }
@@ -266,14 +257,10 @@ int JournalPlayer::process_prefetch(uint64_t object_number) {
 
   bool prefetch_complete = false;
   assert(m_object_players.count(splay_offset) == 1);
-  ObjectPlayers &object_players = m_object_players[splay_offset];
+  ObjectPlayerPtr object_player = m_object_players[splay_offset];
 
   // prefetch in-order since a newer splay object could prefetch first
-  while (m_fetch_object_numbers.count(
-           object_players.begin()->second->get_object_number()) == 0) {
-    ObjectPlayerPtr object_player = object_players.begin()->second;
-    uint64_t player_object_number = object_player->get_object_number();
-
+  if (m_fetch_object_numbers.count(object_player->get_object_number()) == 0) {
     // skip past known committed records
     if (m_commit_positions.count(splay_offset) != 0 &&
         !object_player->empty()) {
@@ -302,20 +289,6 @@ int JournalPlayer::process_prefetch(uint64_t object_number) {
         object_player->pop_front();
       }
 
-      // if this object contains the commit position, our read should start with
-      // the next consistent journal entry in the sequence
-      if (player_object_number == m_commit_object) {
-        if (object_player->empty()) {
-          advance_splay_object();
-        } else {
-          Entry entry;
-          object_player->front(&entry);
-          if (entry.get_tag_tid() == position.tag_tid) {
-            advance_splay_object();
-          }
-        }
-      }
-
       // do not search for commit position for this object
       // if we've already seen it
       if (found_commit) {
@@ -324,9 +297,14 @@ int JournalPlayer::process_prefetch(uint64_t object_number) {
     }
 
     // if the object is empty, pre-fetch the next splay object
-    if (!remove_empty_object_player(object_player)) {
+    if (object_player->empty() && object_player->refetch_required()) {
+      ldout(m_cct, 10) << "refetching potentially partially decoded object"
+                       << dendl;
+      object_player->set_refetch_state(ObjectPlayer::REFETCH_STATE_NONE);
+      fetch(object_player);
+    } else if (!remove_empty_object_player(object_player)) {
+      ldout(m_cct, 10) << "prefetch of object complete" << dendl;
       prefetch_complete = true;
-      break;
     }
   }
 
@@ -339,17 +317,32 @@ int JournalPlayer::process_prefetch(uint64_t object_number) {
     return 0;
   }
 
+  ldout(m_cct, 10) << "switching to playback mode" << dendl;
   m_state = STATE_PLAYBACK;
+
+  // if we have a valid commit position, our read should start with
+  // the next consistent journal entry in the sequence
+  if (m_commit_position_valid) {
+    splay_offset = m_commit_position.object_number % splay_width;
+    object_player = m_object_players[splay_offset];
+
+    if (object_player->empty()) {
+      if (!object_player->refetch_required()) {
+        advance_splay_object();
+      }
+    } else {
+      Entry entry;
+      object_player->front(&entry);
+      if (entry.get_tag_tid() == m_commit_position.tag_tid) {
+        advance_splay_object();
+      }
+    }
+  }
+
   if (verify_playback_ready()) {
     notify_entries_available();
   } else if (is_object_set_ready()) {
-    if (m_watch_enabled) {
-      schedule_watch();
-    } else {
-      ldout(m_cct, 10) << __func__ << ": no uncommitted entries available"
-                       << dendl;
-      notify_complete(0);
-    }
+    refetch(false);
   }
   return 0;
 }
@@ -361,17 +354,7 @@ int JournalPlayer::process_playback(uint64_t object_number) {
   if (verify_playback_ready()) {
     notify_entries_available();
   } else if (is_object_set_ready()) {
-    if (m_watch_enabled) {
-      schedule_watch();
-    } else {
-      ObjectPlayerPtr object_player = get_object_player();
-      uint8_t splay_width = m_journal_metadata->get_splay_width();
-      uint64_t active_set = m_journal_metadata->get_active_set();
-      uint64_t object_set = object_player->get_object_number() / splay_width;
-      if (object_set == active_set) {
-        notify_complete(0);
-      }
-    }
+    refetch(false);
   }
   return 0;
 }
@@ -379,8 +362,10 @@ int JournalPlayer::process_playback(uint64_t object_number) {
 bool JournalPlayer::is_object_set_ready() const {
   assert(m_lock.is_locked());
   if (m_watch_scheduled || !m_fetch_object_numbers.empty()) {
+    ldout(m_cct, 20) << __func__ << ": waiting for in-flight fetch" << dendl;
     return false;
   }
+
   return true;
 }
 
@@ -455,13 +440,6 @@ bool JournalPlayer::verify_playback_ready() {
       } else if (m_prune_tag_tid && *m_prune_tag_tid == *m_active_tag_tid) {
         ldout(m_cct, 10) << __func__ << ": no more entries" << dendl;
         return false;
-      } else if (!m_watch_enabled) {
-        // current playback position is empty so this tag is done
-        ldout(m_cct, 10) << __func__ << ": no more in-sequence entries: "
-                         << "object_num=" << object_num << ", "
-                         << "active_tag=" << *m_active_tag_tid << dendl;
-        prune_active_tag(boost::none);
-        continue;
       } else if (m_watch_enabled && m_watch_prune_active_tag) {
         // detected current tag is now longer active and we have re-read the
         // current object but it's still empty, so this tag is done
@@ -470,12 +448,24 @@ bool JournalPlayer::verify_playback_ready() {
                          << "active_tag " << *m_active_tag_tid << dendl;
         prune_active_tag(boost::none);
         continue;
-      } else if (m_watch_enabled && object_player->refetch_required()) {
+      } else if (object_player->refetch_required()) {
         // if the active object requires a refetch, don't proceed looking for a
         // new tag before this process completes
         ldout(m_cct, 10) << __func__ << ": refetch required: "
                          << "object_num=" << object_num << dendl;
         return false;
+      } else if (!m_watch_enabled) {
+        // current playback position is empty so this tag is done
+        ldout(m_cct, 10) << __func__ << ": no more in-sequence entries: "
+                         << "object_num=" << object_num << ", "
+                         << "active_tag=" << *m_active_tag_tid << dendl;
+        prune_active_tag(boost::none);
+        continue;
+      } else if (!m_watch_scheduled) {
+        // no more entries and we don't have an active watch in-progress
+        ldout(m_cct, 10) << __func__ << ": no more entries -- watch required"
+                         << dendl;
+        return false;
       }
     }
   }
@@ -492,29 +482,38 @@ void JournalPlayer::prune_tag(uint64_t tag_tid) {
     m_prune_tag_tid = tag_tid;
   }
 
-  for (auto &players : m_object_players) {
-    for (auto player_pair : players.second) {
-      ObjectPlayerPtr object_player = player_pair.second;
-      ldout(m_cct, 15) << __func__ << ": checking " << object_player->get_oid()
-                       << dendl;
-      while (!object_player->empty()) {
-        Entry entry;
-        object_player->front(&entry);
-        if (entry.get_tag_tid() == tag_tid) {
-          ldout(m_cct, 20) << __func__ << ": pruned " << entry << dendl;
-          object_player->pop_front();
-        } else {
-          break;
-        }
+  bool pruned = false;
+  for (auto &player_pair : m_object_players) {
+    ObjectPlayerPtr object_player(player_pair.second);
+    ldout(m_cct, 15) << __func__ << ": checking " << object_player->get_oid()
+                     << dendl;
+    while (!object_player->empty()) {
+      Entry entry;
+      object_player->front(&entry);
+      if (entry.get_tag_tid() == tag_tid) {
+        ldout(m_cct, 20) << __func__ << ": pruned " << entry << dendl;
+        object_player->pop_front();
+        pruned = true;
+      } else {
+        break;
       }
     }
+  }
 
-    // trim any empty players to prefetch the next available object
-    ObjectPlayers object_players(players.second);
-    for (auto player_pair : object_players) {
-      remove_empty_object_player(player_pair.second);
+  // avoid watch delay when pruning stale tags from journal objects
+  if (pruned) {
+    ldout(m_cct, 15) << __func__ << ": reseting refetch state to immediate"
+                     << dendl;
+    for (auto &player_pair : m_object_players) {
+      ObjectPlayerPtr object_player(player_pair.second);
+      object_player->set_refetch_state(ObjectPlayer::REFETCH_STATE_IMMEDIATE);
     }
   }
+
+  // trim empty player to prefetch the next available object
+  for (auto &player_pair : m_object_players) {
+    remove_empty_object_player(player_pair.second);
+  }
 }
 
 void JournalPlayer::prune_active_tag(const boost::optional<uint64_t>& tag_tid) {
@@ -531,23 +530,15 @@ void JournalPlayer::prune_active_tag(const boost::optional<uint64_t>& tag_tid) {
   prune_tag(active_tag_tid);
 }
 
-const JournalPlayer::ObjectPlayers &JournalPlayer::get_object_players() const {
+ObjectPlayerPtr JournalPlayer::get_object_player() const {
   assert(m_lock.is_locked());
 
   SplayedObjectPlayers::const_iterator it = m_object_players.find(
     m_splay_offset);
   assert(it != m_object_players.end());
-
   return it->second;
 }
 
-ObjectPlayerPtr JournalPlayer::get_object_player() const {
-  assert(m_lock.is_locked());
-
-  const ObjectPlayers &object_players = get_object_players();
-  return object_players.begin()->second;
-}
-
 ObjectPlayerPtr JournalPlayer::get_object_player(uint64_t object_number) const {
   assert(m_lock.is_locked());
 
@@ -556,17 +547,9 @@ ObjectPlayerPtr JournalPlayer::get_object_player(uint64_t object_number) const {
   auto splay_it = m_object_players.find(splay_offset);
   assert(splay_it != m_object_players.end());
 
-  const ObjectPlayers &object_players = splay_it->second;
-  auto player_it = object_players.find(object_number);
-  assert(player_it != object_players.end());
-  return player_it->second;
-}
-
-ObjectPlayerPtr JournalPlayer::get_next_set_object_player() const {
-  assert(m_lock.is_locked());
-
-  const ObjectPlayers &object_players = get_object_players();
-  return object_players.rbegin()->second;
+  ObjectPlayerPtr object_player = splay_it->second;
+  assert(object_player->get_object_number() == object_number);
+  return object_player;
 }
 
 void JournalPlayer::advance_splay_object() {
@@ -587,10 +570,18 @@ bool JournalPlayer::remove_empty_object_player(const ObjectPlayerPtr &player) {
   uint64_t active_set = m_journal_metadata->get_active_set();
   if (!player->empty() || object_set == active_set) {
     return false;
-  } else if (m_watch_enabled && player->refetch_required()) {
+  } else if (player->refetch_required()) {
     ldout(m_cct, 20) << __func__ << ": " << player->get_oid() << " requires "
                      << "a refetch" << dendl;
     return false;
+  } else if (m_active_set != active_set) {
+    ldout(m_cct, 20) << __func__ << ": new active set detected, all players "
+                     << "require refetch" << dendl;
+    m_active_set = active_set;
+    for (auto &pair : m_object_players) {
+      pair.second->set_refetch_state(ObjectPlayer::REFETCH_STATE_IMMEDIATE);
+    }
+    return false;
   }
 
   ldout(m_cct, 15) << __func__ << ": " << player->get_oid() << " empty"
@@ -599,35 +590,35 @@ bool JournalPlayer::remove_empty_object_player(const ObjectPlayerPtr &player) {
   m_watch_prune_active_tag = false;
   m_watch_step = WATCH_STEP_FETCH_CURRENT;
 
-  ObjectPlayers &object_players = m_object_players[
-    player->get_object_number() % splay_width];
-  assert(!object_players.empty());
-
-  uint64_t next_object_num = object_players.rbegin()->first + splay_width;
-  uint64_t next_object_set = next_object_num / splay_width;
-  if (next_object_set <= active_set) {
-    fetch(next_object_num);
-  }
-  object_players.erase(player->get_object_number());
+  uint64_t next_object_num = player->get_object_number() + splay_width;
+  fetch(next_object_num);
   return true;
 }
 
 void JournalPlayer::fetch(uint64_t object_num) {
   assert(m_lock.is_locked());
 
-  std::string oid = utils::get_object_name(m_object_oid_prefix, object_num);
+  ObjectPlayerPtr object_player(new ObjectPlayer(
+    m_ioctx, m_object_oid_prefix, object_num, m_journal_metadata->get_timer(),
+    m_journal_metadata->get_timer_lock(), m_journal_metadata->get_order(),
+    m_journal_metadata->get_settings().max_fetch_bytes));
 
+  uint8_t splay_width = m_journal_metadata->get_splay_width();
+  m_object_players[object_num % splay_width] = object_player;
+  fetch(object_player);
+}
+
+void JournalPlayer::fetch(const ObjectPlayerPtr &object_player) {
+  assert(m_lock.is_locked());
+
+  uint64_t object_num = object_player->get_object_number();
+  std::string oid = utils::get_object_name(m_object_oid_prefix, object_num);
   assert(m_fetch_object_numbers.count(object_num) == 0);
   m_fetch_object_numbers.insert(object_num);
 
   ldout(m_cct, 10) << __func__ << ": " << oid << dendl;
   C_Fetch *fetch_ctx = new C_Fetch(this, object_num);
-  ObjectPlayerPtr object_player(new ObjectPlayer(
-    m_ioctx, m_object_oid_prefix, object_num, m_journal_metadata->get_timer(),
-    m_journal_metadata->get_timer_lock(), m_journal_metadata->get_order()));
 
-  uint8_t splay_width = m_journal_metadata->get_splay_width();
-  m_object_players[object_num % splay_width][object_num] = object_player;
   object_player->fetch(fetch_ctx);
 }
 
@@ -644,9 +635,6 @@ void JournalPlayer::handle_fetched(uint64_t object_num, int r) {
     return;
   }
 
-  if (r == -ENOENT) {
-    r = 0;
-  }
   if (r == 0) {
     ObjectPlayerPtr object_player = get_object_player(object_num);
     remove_empty_object_player(object_player);
@@ -654,7 +642,28 @@ void JournalPlayer::handle_fetched(uint64_t object_num, int r) {
   process_state(object_num, r);
 }
 
-void JournalPlayer::schedule_watch() {
+void JournalPlayer::refetch(bool immediate) {
+  ldout(m_cct, 10) << __func__ << dendl;
+  assert(m_lock.is_locked());
+  m_handler_notified = false;
+
+  // if watching the object, handle the periodic re-fetch
+  if (m_watch_enabled) {
+    schedule_watch(immediate);
+    return;
+  }
+
+  ObjectPlayerPtr object_player = get_object_player();
+  if (object_player->refetch_required()) {
+    object_player->set_refetch_state(ObjectPlayer::REFETCH_STATE_NONE);
+    fetch(object_player);
+    return;
+  }
+
+  notify_complete(0);
+}
+
+void JournalPlayer::schedule_watch(bool immediate) {
   ldout(m_cct, 10) << __func__ << dendl;
   assert(m_lock.is_locked());
   if (m_watch_scheduled) {
@@ -688,17 +697,20 @@ void JournalPlayer::schedule_watch() {
       uint8_t splay_width = m_journal_metadata->get_splay_width();
       uint64_t active_set = m_journal_metadata->get_active_set();
       uint64_t object_set = object_player->get_object_number() / splay_width;
-      if (object_set < active_set && object_player->refetch_required()) {
-        ldout(m_cct, 20) << __func__ << ": refetching "
+      if (immediate ||
+          (object_player->get_refetch_state() ==
+             ObjectPlayer::REFETCH_STATE_IMMEDIATE) ||
+          (object_set < active_set && object_player->refetch_required())) {
+        ldout(m_cct, 20) << __func__ << ": immediately refetching "
                          << object_player->get_oid()
                          << dendl;
-        object_player->clear_refetch_required();
+        object_player->set_refetch_state(ObjectPlayer::REFETCH_STATE_NONE);
         watch_interval = 0;
       }
     }
     break;
   case WATCH_STEP_FETCH_FIRST:
-    object_player = m_object_players.begin()->second.begin()->second;
+    object_player = m_object_players.begin()->second;
     watch_interval = 0;
     break;
   default:
@@ -762,7 +774,7 @@ void JournalPlayer::handle_watch_assert_active(int r) {
 
   m_watch_step = WATCH_STEP_FETCH_CURRENT;
   if (!m_shut_down && m_watch_enabled) {
-    schedule_watch();
+    schedule_watch(false);
   }
   m_async_op_tracker.finish_op();
 }
diff --git a/src/journal/JournalPlayer.h b/src/journal/JournalPlayer.h
index f502582..690eccd 100644
--- a/src/journal/JournalPlayer.h
+++ b/src/journal/JournalPlayer.h
@@ -42,8 +42,7 @@ public:
 
 private:
   typedef std::set<uint8_t> PrefetchSplayOffsets;
-  typedef std::map<uint64_t, ObjectPlayerPtr> ObjectPlayers;
-  typedef std::map<uint8_t, ObjectPlayers> SplayedObjectPlayers;
+  typedef std::map<uint8_t, ObjectPlayerPtr> SplayedObjectPlayers;
   typedef std::map<uint8_t, ObjectPosition> SplayedObjectPositions;
   typedef std::set<uint64_t> ObjectNumbers;
 
@@ -116,8 +115,11 @@ private:
 
   PrefetchSplayOffsets m_prefetch_splay_offsets;
   SplayedObjectPlayers m_object_players;
-  uint64_t m_commit_object;
+
+  bool m_commit_position_valid = false;
+  ObjectPosition m_commit_position;
   SplayedObjectPositions m_commit_positions;
+  uint64_t m_active_set;
 
   boost::optional<uint64_t> m_active_tag_tid = boost::none;
   boost::optional<uint64_t> m_prune_tag_tid = boost::none;
@@ -129,10 +131,8 @@ private:
   void prune_tag(uint64_t tag_tid);
   void prune_active_tag(const boost::optional<uint64_t>& tag_tid);
 
-  const ObjectPlayers &get_object_players() const;
   ObjectPlayerPtr get_object_player() const;
   ObjectPlayerPtr get_object_player(uint64_t object_number) const;
-  ObjectPlayerPtr get_next_set_object_player() const;
   bool remove_empty_object_player(const ObjectPlayerPtr &object_player);
 
   void process_state(uint64_t object_number, int r);
@@ -140,9 +140,11 @@ private:
   int process_playback(uint64_t object_number);
 
   void fetch(uint64_t object_num);
+  void fetch(const ObjectPlayerPtr &object_player);
   void handle_fetched(uint64_t object_num, int r);
+  void refetch(bool immediate);
 
-  void schedule_watch();
+  void schedule_watch(bool immediate);
   void handle_watch(uint64_t object_num, int r);
   void handle_watch_assert_active(int r);
 
diff --git a/src/journal/JournalRecorder.cc b/src/journal/JournalRecorder.cc
index b4da4ff..4cbe739 100644
--- a/src/journal/JournalRecorder.cc
+++ b/src/journal/JournalRecorder.cc
@@ -8,7 +8,7 @@
 
 #define dout_subsys ceph_subsys_journaler
 #undef dout_prefix
-#define dout_prefix *_dout << "JournalRecorder: "
+#define dout_prefix *_dout << "JournalRecorder: " << this << " "
 
 namespace journal {
 
diff --git a/src/journal/JournalRecorder.h b/src/journal/JournalRecorder.h
index 68a1d8f..6ed2e63 100644
--- a/src/journal/JournalRecorder.h
+++ b/src/journal/JournalRecorder.h
@@ -36,7 +36,7 @@ public:
 private:
   typedef std::map<uint8_t, ObjectRecorderPtr> ObjectRecorderPtrs;
 
-  struct Listener : public JournalMetadata::Listener {
+  struct Listener : public JournalMetadataListener {
     JournalRecorder *journal_recorder;
 
     Listener(JournalRecorder *_journal_recorder)
diff --git a/src/journal/JournalTrimmer.cc b/src/journal/JournalTrimmer.cc
index a4a47fa..aef16c2 100644
--- a/src/journal/JournalTrimmer.cc
+++ b/src/journal/JournalTrimmer.cc
@@ -9,7 +9,7 @@
 
 #define dout_subsys ceph_subsys_journaler
 #undef dout_prefix
-#define dout_prefix *_dout << "JournalTrimmer: "
+#define dout_prefix *_dout << "JournalTrimmer: " << this << " "
 
 namespace journal {
 
diff --git a/src/journal/JournalTrimmer.h b/src/journal/JournalTrimmer.h
index 26bfca7..ec76d72 100644
--- a/src/journal/JournalTrimmer.h
+++ b/src/journal/JournalTrimmer.h
@@ -33,7 +33,7 @@ public:
 private:
   typedef std::function<Context*()> CreateContext;
 
-  struct MetadataListener : public JournalMetadata::Listener {
+  struct MetadataListener : public JournalMetadataListener {
     JournalTrimmer *journal_trimmmer;
 
     MetadataListener(JournalTrimmer *journal_trimmmer)
diff --git a/src/journal/Journaler.cc b/src/journal/Journaler.cc
index 1db5247..c08a11b 100644
--- a/src/journal/Journaler.cc
+++ b/src/journal/Journaler.cc
@@ -19,7 +19,7 @@
 
 #define dout_subsys ceph_subsys_journaler
 #undef dout_prefix
-#define dout_prefix *_dout << "Journaler: "
+#define dout_prefix *_dout << "Journaler: " << this << " "
 
 namespace journal {
 
@@ -68,25 +68,26 @@ Journaler::Threads::~Threads() {
 
 Journaler::Journaler(librados::IoCtx &header_ioctx,
                      const std::string &journal_id,
-                     const std::string &client_id, double commit_interval)
+                     const std::string &client_id, const Settings &settings)
     : m_threads(new Threads(reinterpret_cast<CephContext*>(header_ioctx.cct()))),
       m_client_id(client_id) {
   set_up(m_threads->work_queue, m_threads->timer, &m_threads->timer_lock,
-         header_ioctx, journal_id, commit_interval);
+         header_ioctx, journal_id, settings);
 }
 
 Journaler::Journaler(ContextWQ *work_queue, SafeTimer *timer,
                      Mutex *timer_lock, librados::IoCtx &header_ioctx,
 		     const std::string &journal_id,
-		     const std::string &client_id, double commit_interval)
+		     const std::string &client_id, const Settings &settings)
     : m_client_id(client_id) {
   set_up(work_queue, timer, timer_lock, header_ioctx, journal_id,
-         commit_interval);
+         settings);
 }
 
 void Journaler::set_up(ContextWQ *work_queue, SafeTimer *timer,
                        Mutex *timer_lock, librados::IoCtx &header_ioctx,
-                       const std::string &journal_id, double commit_interval) {
+                       const std::string &journal_id,
+                       const Settings &settings) {
   m_header_ioctx.dup(header_ioctx);
   m_cct = reinterpret_cast<CephContext *>(m_header_ioctx.cct());
 
@@ -95,7 +96,7 @@ void Journaler::set_up(ContextWQ *work_queue, SafeTimer *timer,
 
   m_metadata = new JournalMetadata(work_queue, timer, timer_lock,
                                    m_header_ioctx, m_header_oid, m_client_id,
-                                   commit_interval);
+                                   settings);
   m_metadata->get();
 }
 
@@ -244,6 +245,14 @@ void Journaler::flush_commit_position(Context *on_safe) {
   m_metadata->flush_commit_position(on_safe);
 }
 
+void Journaler::add_listener(JournalMetadataListener *listener) {
+  m_metadata->add_listener(listener);
+}
+
+void Journaler::remove_listener(JournalMetadataListener *listener) {
+  m_metadata->remove_listener(listener);
+}
+
 int Journaler::register_client(const bufferlist &data) {
   C_SaferCond cond;
   register_client(data, &cond);
@@ -386,7 +395,13 @@ void Journaler::stop_append(Context *on_safe) {
 }
 
 uint64_t Journaler::get_max_append_size() const {
-  return m_metadata->get_object_size() - Entry::get_fixed_size();
+  uint64_t max_payload_size = m_metadata->get_object_size() -
+                              Entry::get_fixed_size();
+  if (m_metadata->get_settings().max_payload_bytes > 0) {
+    max_payload_size = MIN(max_payload_size,
+                           m_metadata->get_settings().max_payload_bytes);
+  }
+  return max_payload_size;
 }
 
 Future Journaler::append(uint64_t tag_tid, const bufferlist &payload_bl) {
diff --git a/src/journal/Journaler.h b/src/journal/Journaler.h
index f30a3a5..93a89bb 100644
--- a/src/journal/Journaler.h
+++ b/src/journal/Journaler.h
@@ -9,6 +9,7 @@
 #include "include/Context.h"
 #include "include/rados/librados.hpp"
 #include "journal/Future.h"
+#include "journal/JournalMetadataListener.h"
 #include "cls/journal/cls_journal_types.h"
 #include <list>
 #include <map>
@@ -27,6 +28,7 @@ class JournalRecorder;
 class JournalTrimmer;
 class ReplayEntry;
 class ReplayHandler;
+class Settings;
 
 class Journaler {
 public:
@@ -50,10 +52,10 @@ public:
 				       const std::string &journal_id);
 
   Journaler(librados::IoCtx &header_ioctx, const std::string &journal_id,
-	    const std::string &client_id, double commit_interval);
+	    const std::string &client_id, const Settings &settings);
   Journaler(ContextWQ *work_queue, SafeTimer *timer, Mutex *timer_lock,
             librados::IoCtx &header_ioctx, const std::string &journal_id,
-	    const std::string &client_id, double commit_interval);
+	    const std::string &client_id, const Settings &settings);
   ~Journaler();
 
   int exists(bool *header_exists) const;
@@ -71,6 +73,9 @@ public:
   void get_mutable_metadata(uint64_t *minimum_set, uint64_t *active_set,
 			    RegisteredClients *clients, Context *on_finish);
 
+  void add_listener(JournalMetadataListener *listener);
+  void remove_listener(JournalMetadataListener *listener);
+
   int register_client(const bufferlist &data);
   void register_client(const bufferlist &data, Context *on_finish);
 
@@ -141,7 +146,7 @@ private:
 
   void set_up(ContextWQ *work_queue, SafeTimer *timer, Mutex *timer_lock,
               librados::IoCtx &header_ioctx, const std::string &journal_id,
-              double commit_interval);
+              const Settings &settings);
 
   int init_complete();
   void create_player(ReplayHandler *replay_handler);
diff --git a/src/journal/Makefile.am b/src/journal/Makefile.am
index 4f0cd18..ad4d54d 100644
--- a/src/journal/Makefile.am
+++ b/src/journal/Makefile.am
@@ -23,6 +23,7 @@ noinst_HEADERS += \
 	journal/FutureImpl.h \
 	journal/Journaler.h \
 	journal/JournalMetadata.h \
+	journal/JournalMetadataListener.h \
 	journal/JournalPlayer.h \
 	journal/JournalRecorder.h \
 	journal/JournalTrimmer.h \
@@ -30,6 +31,7 @@ noinst_HEADERS += \
 	journal/ObjectRecorder.h \
 	journal/ReplayEntry.h \
 	journal/ReplayHandler.h \
+	journal/Settings.h \
 	journal/Utils.h
 DENCODER_DEPS += libjournal.la
 
diff --git a/src/journal/ObjectPlayer.cc b/src/journal/ObjectPlayer.cc
index f86e3ef..92dd702 100644
--- a/src/journal/ObjectPlayer.cc
+++ b/src/journal/ObjectPlayer.cc
@@ -8,20 +8,22 @@
 
 #define dout_subsys ceph_subsys_journaler
 #undef dout_prefix
-#define dout_prefix *_dout << "ObjectPlayer: "
+#define dout_prefix *_dout << "ObjectPlayer: " << this << " "
 
 namespace journal {
 
 ObjectPlayer::ObjectPlayer(librados::IoCtx &ioctx,
                            const std::string &object_oid_prefix,
                            uint64_t object_num, SafeTimer &timer,
-                           Mutex &timer_lock, uint8_t order)
+                           Mutex &timer_lock, uint8_t order,
+                           uint64_t max_fetch_bytes)
   : RefCountedObject(NULL, 0), m_object_num(object_num),
     m_oid(utils::get_object_name(object_oid_prefix, m_object_num)),
     m_cct(NULL), m_timer(timer), m_timer_lock(timer_lock), m_order(order),
+    m_max_fetch_bytes(max_fetch_bytes > 0 ? max_fetch_bytes : 2 << order),
     m_watch_interval(0), m_watch_task(NULL),
     m_lock(utils::unique_lock_name("ObjectPlayer::m_lock", this)),
-    m_fetch_in_progress(false), m_read_off(0) {
+    m_fetch_in_progress(false) {
   m_ioctx.dup(ioctx);
   m_cct = reinterpret_cast<CephContext*>(m_ioctx.cct());
 }
@@ -39,11 +41,12 @@ void ObjectPlayer::fetch(Context *on_finish) {
   ldout(m_cct, 10) << __func__ << ": " << m_oid << dendl;
 
   Mutex::Locker locker(m_lock);
+  assert(!m_fetch_in_progress);
   m_fetch_in_progress = true;
 
   C_Fetch *context = new C_Fetch(this, on_finish);
   librados::ObjectReadOperation op;
-  op.read(m_read_off, 2 << m_order, &context->read_bl, NULL);
+  op.read(m_read_off, m_max_fetch_bytes, &context->read_bl, NULL);
   op.set_op_flags2(CEPH_OSD_OP_FLAG_FADVISE_DONTNEED);
 
   librados::AioCompletion *rados_completion =
@@ -95,41 +98,59 @@ void ObjectPlayer::front(Entry *entry) const {
 void ObjectPlayer::pop_front() {
   Mutex::Locker locker(m_lock);
   assert(!m_entries.empty());
+
+  auto &entry = m_entries.front();
+  m_entry_keys.erase({entry.get_tag_tid(), entry.get_entry_tid()});
   m_entries.pop_front();
 }
 
-int ObjectPlayer::handle_fetch_complete(int r, const bufferlist &bl) {
+int ObjectPlayer::handle_fetch_complete(int r, const bufferlist &bl,
+                                        bool *refetch) {
   ldout(m_cct, 10) << __func__ << ": " << m_oid << ", r=" << r << ", len="
                    << bl.length() << dendl;
 
-  m_fetch_in_progress = false;
-  if (r < 0) {
+  *refetch = false;
+  if (r == -ENOENT) {
+    return 0;
+  } else if (r < 0) {
     return r;
-  }
-  if (bl.length() == 0) {
-    return -ENOENT;
+  } else if (bl.length() == 0) {
+    return 0;
   }
 
   Mutex::Locker locker(m_lock);
+  assert(m_fetch_in_progress);
+  m_read_off += bl.length();
   m_read_bl.append(bl);
+  m_refetch_state = REFETCH_STATE_REQUIRED;
 
+  bool full_fetch = (m_max_fetch_bytes == 2U << m_order);
+  bool partial_entry = false;
   bool invalid = false;
   uint32_t invalid_start_off = 0;
 
-  bufferlist::iterator iter(&m_read_bl, m_read_off);
+  clear_invalid_range(m_read_bl_off, m_read_bl.length());
+  bufferlist::iterator iter(&m_read_bl, 0);
   while (!iter.end()) {
     uint32_t bytes_needed;
+    uint32_t bl_off = iter.get_off();
     if (!Entry::is_readable(iter, &bytes_needed)) {
       if (bytes_needed != 0) {
-        invalid_start_off = iter.get_off();
+        invalid_start_off = m_read_bl_off + bl_off;
         invalid = true;
-        lderr(m_cct) << ": partial record at offset " << iter.get_off()
-                     << dendl;
+        partial_entry = true;
+        if (full_fetch) {
+          lderr(m_cct) << ": partial record at offset " << invalid_start_off
+                       << dendl;
+        } else {
+          ldout(m_cct, 20) << ": partial record detected, will re-fetch"
+                           << dendl;
+        }
         break;
       }
 
       if (!invalid) {
-        invalid_start_off = iter.get_off();
+        invalid_start_off = m_read_bl_off + bl_off;
         invalid = true;
         lderr(m_cct) << ": detected corrupt journal entry at offset "
                      << invalid_start_off << dendl;
@@ -138,18 +159,21 @@ int ObjectPlayer::handle_fetch_complete(int r, const bufferlist &bl) {
       continue;
     }
 
+    Entry entry;
+    ::decode(entry, iter);
+    ldout(m_cct, 20) << ": " << entry << " decoded" << dendl;
+
+    uint32_t entry_len = iter.get_off() - bl_off;
     if (invalid) {
-      uint32_t invalid_end_off = iter.get_off();
+      // new corrupt region detected
+      uint32_t invalid_end_off = m_read_bl_off + bl_off;
       lderr(m_cct) << ": corruption range [" << invalid_start_off
                    << ", " << invalid_end_off << ")" << dendl;
-      m_invalid_ranges.insert(invalid_start_off, invalid_end_off);
+      m_invalid_ranges.insert(invalid_start_off,
+                              invalid_end_off - invalid_start_off);
       invalid = false;
     }
 
-    Entry entry;
-    ::decode(entry, iter);
-    ldout(m_cct, 20) << ": " << entry << " decoded" << dendl;
-
     EntryKey entry_key(std::make_pair(entry.get_tag_tid(),
                                       entry.get_entry_tid()));
     if (m_entry_keys.find(entry_key) == m_entry_keys.end()) {
@@ -158,20 +182,49 @@ int ObjectPlayer::handle_fetch_complete(int r, const bufferlist &bl) {
       ldout(m_cct, 10) << ": " << entry << " is duplicate, replacing" << dendl;
       *m_entry_keys[entry_key] = entry;
     }
+
+    // prune decoded / corrupted journal entries from front of bl
+    bufferlist sub_bl;
+    sub_bl.substr_of(m_read_bl, iter.get_off(),
+                     m_read_bl.length() - iter.get_off());
+    sub_bl.swap(m_read_bl);
+    iter = bufferlist::iterator(&m_read_bl, 0);
+
+    // advance the decoded entry offset
+    m_read_bl_off += entry_len;
   }
 
-  m_read_off = m_read_bl.length();
   if (invalid) {
-    uint32_t invalid_end_off = m_read_bl.length();
-    lderr(m_cct) << ": corruption range [" << invalid_start_off
-                 << ", " << invalid_end_off << ")" << dendl;
-    m_invalid_ranges.insert(invalid_start_off, invalid_end_off);
+    uint32_t invalid_end_off = m_read_bl_off + m_read_bl.length();
+    if (!partial_entry) {
+      lderr(m_cct) << ": corruption range [" << invalid_start_off
+                   << ", " << invalid_end_off << ")" << dendl;
+    }
+    m_invalid_ranges.insert(invalid_start_off,
+                            invalid_end_off - invalid_start_off);
   }
 
-  if (!m_invalid_ranges.empty()) {
-    r = -EBADMSG;
+  if (!m_invalid_ranges.empty() && !partial_entry) {
+    return -EBADMSG;
+  } else if (partial_entry && (full_fetch || m_entries.empty())) {
+    *refetch = true;
+    return -EAGAIN;
+  }
+
+  return 0;
+}
+
+void ObjectPlayer::clear_invalid_range(uint32_t off, uint32_t len) {
+  // possibly remove previously partial record region
+  InvalidRanges decode_range;
+  decode_range.insert(off, len);
+  InvalidRanges intersect_range;
+  intersect_range.intersection_of(m_invalid_ranges, decode_range);
+  if (!intersect_range.empty()) {
+    ldout(m_cct, 20) << ": clearing invalid range: " << intersect_range
+                     << dendl;
+    m_invalid_ranges.subtract(intersect_range);
   }
-  return r;
 }
 
 void ObjectPlayer::schedule_watch() {
@@ -217,11 +270,6 @@ void ObjectPlayer::handle_watch_fetched(int r) {
   Context *watch_ctx = nullptr;
   {
     Mutex::Locker timer_locker(m_timer_lock);
-    if (r == -ENOENT) {
-      r = 0;
-    } else {
-      m_refetch_required = true;
-    }
     std::swap(watch_ctx, m_watch_ctx);
 
     if (m_unwatched) {
@@ -236,9 +284,20 @@ void ObjectPlayer::handle_watch_fetched(int r) {
 }
 
 void ObjectPlayer::C_Fetch::finish(int r) {
-  r = object_player->handle_fetch_complete(r, read_bl);
-  object_player.reset();
+  bool refetch = false;
+  r = object_player->handle_fetch_complete(r, read_bl, &refetch);
 
+  {
+    Mutex::Locker locker(object_player->m_lock);
+    object_player->m_fetch_in_progress = false;
+  }
+
+  if (refetch) {
+    object_player->fetch(on_finish);
+    return;
+  }
+
+  object_player.reset();
   on_finish->complete(r);
 }
 
diff --git a/src/journal/ObjectPlayer.h b/src/journal/ObjectPlayer.h
index d0809ce..cff33dc 100644
--- a/src/journal/ObjectPlayer.h
+++ b/src/journal/ObjectPlayer.h
@@ -30,9 +30,15 @@ public:
   typedef std::list<Entry> Entries;
   typedef interval_set<uint64_t> InvalidRanges;
 
+  enum RefetchState {
+    REFETCH_STATE_NONE,
+    REFETCH_STATE_REQUIRED,
+    REFETCH_STATE_IMMEDIATE
+  };
+
   ObjectPlayer(librados::IoCtx &ioctx, const std::string &object_oid_prefix,
                uint64_t object_num, SafeTimer &timer, Mutex &timer_lock,
-               uint8_t order);
+               uint8_t order, uint64_t max_fetch_bytes);
   ~ObjectPlayer();
 
   inline const std::string &get_oid() const {
@@ -63,10 +69,13 @@ public:
   }
 
   inline bool refetch_required() const {
-    return m_refetch_required;
+    return (get_refetch_state() != REFETCH_STATE_NONE);
   }
-  inline void clear_refetch_required() {
-    m_refetch_required = false;
+  inline RefetchState get_refetch_state() const {
+    return m_refetch_state;
+  }
+  inline void set_refetch_state(RefetchState refetch_state) {
+    m_refetch_state = refetch_state;
   }
 
 private:
@@ -77,8 +86,7 @@ private:
     ObjectPlayerPtr object_player;
     Context *on_finish;
     bufferlist read_bl;
-    C_Fetch(ObjectPlayer *o, Context *ctx)
-      : object_player(o), on_finish(ctx) {
+    C_Fetch(ObjectPlayer *o, Context *ctx) : object_player(o), on_finish(ctx) {
     }
     virtual void finish(int r);
   };
@@ -104,6 +112,7 @@ private:
   Mutex &m_timer_lock;
 
   uint8_t m_order;
+  uint64_t m_max_fetch_bytes;
 
   double m_watch_interval;
   Context *m_watch_task;
@@ -111,7 +120,8 @@ private:
   mutable Mutex m_lock;
   bool m_fetch_in_progress;
   bufferlist m_read_bl;
-  uint32_t m_read_off;
+  uint32_t m_read_off = 0;
+  uint32_t m_read_bl_off = 0;
 
   Entries m_entries;
   EntryKeys m_entry_keys;
@@ -120,9 +130,11 @@ private:
   Context *m_watch_ctx = nullptr;
 
   bool m_unwatched = false;
-  bool m_refetch_required = true;
+  RefetchState m_refetch_state = REFETCH_STATE_IMMEDIATE;
+
+  int handle_fetch_complete(int r, const bufferlist &bl, bool *refetch);
 
-  int handle_fetch_complete(int r, const bufferlist &bl);
+  void clear_invalid_range(uint32_t off, uint32_t len);
 
   void schedule_watch();
   bool cancel_watch();
diff --git a/src/journal/ObjectRecorder.cc b/src/journal/ObjectRecorder.cc
index 5972d89..0cf2fd1 100644
--- a/src/journal/ObjectRecorder.cc
+++ b/src/journal/ObjectRecorder.cc
@@ -10,7 +10,7 @@
 
 #define dout_subsys ceph_subsys_journaler
 #undef dout_prefix
-#define dout_prefix *_dout << "ObjectRecorder: "
+#define dout_prefix *_dout << "ObjectRecorder: " << this << " "
 
 using namespace cls::journal;
 
diff --git a/src/journal/Settings.h b/src/journal/Settings.h
new file mode 100644
index 0000000..603770c
--- /dev/null
+++ b/src/journal/Settings.h
@@ -0,0 +1,19 @@
+// -*- mode:C++; tab-width:8; c-basic-offset:2; indent-tabs-mode:t -*-
+// vim: ts=8 sw=2 smarttab
+
+#ifndef CEPH_JOURNAL_SETTINGS_H
+#define CEPH_JOURNAL_SETTINGS_H
+
+#include "include/int_types.h"
+
+namespace journal {
+
+struct Settings {
+  double commit_interval = 5;         ///< commit position throttle (in secs)
+  uint64_t max_fetch_bytes = 0;       ///< 0 implies no limit
+  uint64_t max_payload_bytes = 0;     ///< 0 implies object size limit
+};
+
+} // namespace journal
+
+#endif // # CEPH_JOURNAL_SETTINGS_H
diff --git a/src/librbd/AioCompletion.cc b/src/librbd/AioCompletion.cc
index 1e892ac..e4044fa 100644
--- a/src/librbd/AioCompletion.cc
+++ b/src/librbd/AioCompletion.cc
@@ -9,7 +9,6 @@
 #include "common/perf_counters.h"
 #include "common/WorkQueue.h"
 
-#include "librbd/AioObjectRequest.h"
 #include "librbd/ImageCtx.h"
 #include "librbd/internal.h"
 
@@ -28,239 +27,198 @@
 
 namespace librbd {
 
-  int AioCompletion::wait_for_complete() {
-    tracepoint(librbd, aio_wait_for_complete_enter, this);
-    lock.Lock();
-    while (state != STATE_COMPLETE)
-      cond.Wait(lock);
-    lock.Unlock();
-    tracepoint(librbd, aio_wait_for_complete_exit, 0);
-    return 0;
-  }
+int AioCompletion::wait_for_complete() {
+  tracepoint(librbd, aio_wait_for_complete_enter, this);
+  lock.Lock();
+  while (state != STATE_COMPLETE)
+    cond.Wait(lock);
+  lock.Unlock();
+  tracepoint(librbd, aio_wait_for_complete_exit, 0);
+  return 0;
+}
 
-  void AioCompletion::finalize(ssize_t rval)
-  {
-    assert(lock.is_locked());
-    assert(ictx != nullptr);
-    CephContext *cct = ictx->cct;
-
-    ldout(cct, 20) << this << " " << __func__ << ": r=" << rval << ", "
-                   << "read_buf=" << reinterpret_cast<void*>(read_buf) << ", "
-                   << "real_bl=" <<  reinterpret_cast<void*>(read_bl) << dendl;
-    if (rval >= 0 && aio_type == AIO_TYPE_READ) {
-      if (read_buf && !read_bl) {
-	destriper.assemble_result(cct, read_buf, read_buf_len);
-      } else {
-	// FIXME: make the destriper write directly into a buffer so
-	// that we avoid shuffling pointers and copying zeros around.
-	bufferlist bl;
-	destriper.assemble_result(cct, bl, true);
-
-	if (read_buf) {
-	  assert(bl.length() == read_buf_len);
-	  bl.copy(0, read_buf_len, read_buf);
-	  ldout(cct, 20) << "copied resulting " << bl.length()
-	    << " bytes to " << (void*)read_buf << dendl;
-	}
-	if (read_bl) {
-	  ldout(cct, 20) << " moving resulting " << bl.length()
-	    << " bytes to bl " << (void*)read_bl << dendl;
-	  read_bl->claim(bl);
-	}
+void AioCompletion::finalize(ssize_t rval)
+{
+  assert(lock.is_locked());
+  assert(ictx != nullptr);
+  CephContext *cct = ictx->cct;
+
+  ldout(cct, 20) << this << " " << __func__ << ": r=" << rval << ", "
+                 << "read_buf=" << reinterpret_cast<void*>(read_buf) << ", "
+                 << "real_bl=" <<  reinterpret_cast<void*>(read_bl) << dendl;
+  if (rval >= 0 && aio_type == AIO_TYPE_READ) {
+    if (read_buf && !read_bl) {
+      destriper.assemble_result(cct, read_buf, read_buf_len);
+    } else {
+      // FIXME: make the destriper write directly into a buffer so
+      // that we avoid shuffling pointers and copying zeros around.
+      bufferlist bl;
+      destriper.assemble_result(cct, bl, true);
+
+      if (read_buf) {
+        assert(bl.length() == read_buf_len);
+        bl.copy(0, read_buf_len, read_buf);
+        ldout(cct, 20) << "copied resulting " << bl.length()
+                       << " bytes to " << (void*)read_buf << dendl;
+      }
+      if (read_bl) {
+        ldout(cct, 20) << " moving resulting " << bl.length()
+                       << " bytes to bl " << (void*)read_bl << dendl;
+        read_bl->claim(bl);
       }
     }
   }
+}
 
-  void AioCompletion::complete() {
-    assert(lock.is_locked());
-    assert(ictx != nullptr);
-    CephContext *cct = ictx->cct;
-
-    tracepoint(librbd, aio_complete_enter, this, rval);
-    utime_t elapsed;
-    elapsed = ceph_clock_now(cct) - start_time;
-    switch (aio_type) {
-    case AIO_TYPE_OPEN:
-    case AIO_TYPE_CLOSE:
-      break;
-    case AIO_TYPE_READ:
-      ictx->perfcounter->tinc(l_librbd_rd_latency, elapsed); break;
-    case AIO_TYPE_WRITE:
-      ictx->perfcounter->tinc(l_librbd_wr_latency, elapsed); break;
-    case AIO_TYPE_DISCARD:
-      ictx->perfcounter->tinc(l_librbd_discard_latency, elapsed); break;
-    case AIO_TYPE_FLUSH:
-      ictx->perfcounter->tinc(l_librbd_aio_flush_latency, elapsed); break;
-    default:
-      lderr(cct) << "completed invalid aio_type: " << aio_type << dendl;
-      break;
-    }
-
-    // inform the journal that the op has successfully committed
-    if (journal_tid != 0) {
-      assert(ictx->journal != NULL);
-      ictx->journal->commit_io_event(journal_tid, rval);
-    }
-
-    state = STATE_CALLBACK;
-    if (complete_cb) {
-      lock.Unlock();
-      complete_cb(rbd_comp, complete_arg);
-      lock.Lock();
-    }
-
-    if (ictx && event_notify && ictx->event_socket.is_valid()) {
-      ictx->completed_reqs_lock.Lock();
-      ictx->completed_reqs.push_back(&m_xlist_item);
-      ictx->completed_reqs_lock.Unlock();
-      ictx->event_socket.notify();
-    }
-
-    state = STATE_COMPLETE;
-    cond.Signal();
-
-    // note: possible for image to be closed after op marked finished
-    if (async_op.started()) {
-      async_op.finish_op();
-    }
-    tracepoint(librbd, aio_complete_exit);
-  }
-
-  void AioCompletion::init_time(ImageCtx *i, aio_type_t t) {
-    Mutex::Locker locker(lock);
-    if (ictx == nullptr) {
-      ictx = i;
-      aio_type = t;
-      start_time = ceph_clock_now(ictx->cct);
-    }
+void AioCompletion::complete() {
+  assert(lock.is_locked());
+  assert(ictx != nullptr);
+  CephContext *cct = ictx->cct;
+
+  tracepoint(librbd, aio_complete_enter, this, rval);
+  utime_t elapsed;
+  elapsed = ceph_clock_now(cct) - start_time;
+  switch (aio_type) {
+  case AIO_TYPE_OPEN:
+  case AIO_TYPE_CLOSE:
+    break;
+  case AIO_TYPE_READ:
+    ictx->perfcounter->tinc(l_librbd_rd_latency, elapsed); break;
+  case AIO_TYPE_WRITE:
+    ictx->perfcounter->tinc(l_librbd_wr_latency, elapsed); break;
+  case AIO_TYPE_DISCARD:
+    ictx->perfcounter->tinc(l_librbd_discard_latency, elapsed); break;
+  case AIO_TYPE_FLUSH:
+    ictx->perfcounter->tinc(l_librbd_aio_flush_latency, elapsed); break;
+  default:
+    lderr(cct) << "completed invalid aio_type: " << aio_type << dendl;
+    break;
   }
 
-  void AioCompletion::start_op(bool ignore_type) {
-    Mutex::Locker locker(lock);
-    assert(ictx != nullptr);
-    assert(!async_op.started());
-    if (state == STATE_PENDING && (ignore_type || aio_type != AIO_TYPE_FLUSH)) {
-      async_op.start_op(*ictx);
-    }
+  // inform the journal that the op has successfully committed
+  if (journal_tid != 0) {
+    assert(ictx->journal != NULL);
+    ictx->journal->commit_io_event(journal_tid, rval);
   }
 
-  void AioCompletion::fail(int r)
-  {
+  state = STATE_CALLBACK;
+  if (complete_cb) {
+    lock.Unlock();
+    complete_cb(rbd_comp, complete_arg);
     lock.Lock();
-    assert(ictx != nullptr);
-    CephContext *cct = ictx->cct;
-
-    lderr(cct) << this << " " << __func__ << ": " << cpp_strerror(r)
-               << dendl;
-    assert(pending_count == 0);
-    rval = r;
-    complete();
-    put_unlock();
   }
 
-  void AioCompletion::set_request_count(uint32_t count) {
-    lock.Lock();
-    assert(ictx != nullptr);
-    CephContext *cct = ictx->cct;
+  if (ictx && event_notify && ictx->event_socket.is_valid()) {
+    ictx->completed_reqs_lock.Lock();
+    ictx->completed_reqs.push_back(&m_xlist_item);
+    ictx->completed_reqs_lock.Unlock();
+    ictx->event_socket.notify();
+  }
 
-    ldout(cct, 20) << this << " " << __func__ << ": pending=" << count << dendl;
-    assert(pending_count == 0);
-    pending_count = count;
-    lock.Unlock();
+  state = STATE_COMPLETE;
+  cond.Signal();
 
-    // if no pending requests, completion will fire now
-    unblock();
+  // note: possible for image to be closed after op marked finished
+  if (async_op.started()) {
+    async_op.finish_op();
   }
+  tracepoint(librbd, aio_complete_exit);
+}
 
-  void AioCompletion::complete_request(ssize_t r)
-  {
-    lock.Lock();
-    assert(ictx != nullptr);
-    CephContext *cct = ictx->cct;
-
-    if (rval >= 0) {
-      if (r < 0 && r != -EEXIST)
-	rval = r;
-      else if (r > 0)
-	rval += r;
-    }
-    assert(pending_count);
-    int count = --pending_count;
-
-    ldout(cct, 20) << this << " " << __func__ << ": cb=" << complete_cb << ", "
-                   << "pending=" << pending_count << dendl;
-    if (!count && blockers == 0) {
-      finalize(rval);
-      complete();
-    }
-    put_unlock();
+void AioCompletion::init_time(ImageCtx *i, aio_type_t t) {
+  Mutex::Locker locker(lock);
+  if (ictx == nullptr) {
+    ictx = i;
+    aio_type = t;
+    start_time = ceph_clock_now(ictx->cct);
   }
+}
 
-  void AioCompletion::associate_journal_event(uint64_t tid) {
-    Mutex::Locker l(lock);
-    assert(state == STATE_PENDING);
-    journal_tid = tid;
+void AioCompletion::start_op(bool ignore_type) {
+  Mutex::Locker locker(lock);
+  assert(ictx != nullptr);
+  assert(!async_op.started());
+  if (state == STATE_PENDING && (ignore_type || aio_type != AIO_TYPE_FLUSH)) {
+    async_op.start_op(*ictx);
   }
+}
 
-  bool AioCompletion::is_complete() {
-    tracepoint(librbd, aio_is_complete_enter, this);
-    bool done;
-    {
-      Mutex::Locker l(lock);
-      done = this->state == STATE_COMPLETE;
-    }
-    tracepoint(librbd, aio_is_complete_exit, done);
-    return done;
-  }
+void AioCompletion::fail(int r)
+{
+  lock.Lock();
+  assert(ictx != nullptr);
+  CephContext *cct = ictx->cct;
+
+  lderr(cct) << this << " " << __func__ << ": " << cpp_strerror(r)
+             << dendl;
+  assert(pending_count == 0);
+  rval = r;
+  complete();
+  put_unlock();
+}
 
-  ssize_t AioCompletion::get_return_value() {
-    tracepoint(librbd, aio_get_return_value_enter, this);
-    lock.Lock();
-    ssize_t r = rval;
-    lock.Unlock();
-    tracepoint(librbd, aio_get_return_value_exit, r);
-    return r;
-  }
+void AioCompletion::set_request_count(uint32_t count) {
+  lock.Lock();
+  assert(ictx != nullptr);
+  CephContext *cct = ictx->cct;
 
-  void C_AioRead::finish(int r)
-  {
-    m_completion->lock.Lock();
-    CephContext *cct = m_completion->ictx->cct;
-    ldout(cct, 10) << "C_AioRead::finish() " << this << " r = " << r << dendl;
-
-    if (r >= 0 || r == -ENOENT) { // this was a sparse_read operation
-      ldout(cct, 10) << " got " << m_req->m_ext_map
-		     << " for " << m_req->m_buffer_extents
-		     << " bl " << m_req->data().length() << dendl;
-      // reads from the parent don't populate the m_ext_map and the overlap
-      // may not be the full buffer.  compensate here by filling in m_ext_map
-      // with the read extent when it is empty.
-      if (m_req->m_ext_map.empty())
-	m_req->m_ext_map[m_req->m_object_off] = m_req->data().length();
-
-      m_completion->destriper.add_partial_sparse_result(
-	  cct, m_req->data(), m_req->m_ext_map, m_req->m_object_off,
-	  m_req->m_buffer_extents);
-      r = m_req->m_object_len;
-    }
-    m_completion->lock.Unlock();
+  ldout(cct, 20) << this << " " << __func__ << ": pending=" << count << dendl;
+  assert(pending_count == 0);
+  pending_count = count;
+  lock.Unlock();
+
+  // if no pending requests, completion will fire now
+  unblock();
+}
 
-    C_AioRequest::finish(r);
+void AioCompletion::complete_request(ssize_t r)
+{
+  lock.Lock();
+  assert(ictx != nullptr);
+  CephContext *cct = ictx->cct;
+
+  if (rval >= 0) {
+    if (r < 0 && r != -EEXIST)
+      rval = r;
+    else if (r > 0)
+      rval += r;
   }
+  assert(pending_count);
+  int count = --pending_count;
 
-  void C_CacheRead::complete(int r) {
-    if (!m_enqueued) {
-      // cache_lock creates a lock ordering issue -- so re-execute this context
-      // outside the cache_lock
-      m_enqueued = true;
-      m_image_ctx.op_work_queue->queue(this, r);
-      return;
-    }
-    Context::complete(r);
+  ldout(cct, 20) << this << " " << __func__ << ": cb=" << complete_cb << ", "
+                 << "pending=" << pending_count << dendl;
+  if (!count && blockers == 0) {
+    finalize(rval);
+    complete();
   }
+  put_unlock();
+}
 
-  void C_CacheRead::finish(int r)
+void AioCompletion::associate_journal_event(uint64_t tid) {
+  Mutex::Locker l(lock);
+  assert(state == STATE_PENDING);
+  journal_tid = tid;
+}
+
+bool AioCompletion::is_complete() {
+  tracepoint(librbd, aio_is_complete_enter, this);
+  bool done;
   {
-    m_req->complete(r);
+    Mutex::Locker l(lock);
+    done = this->state == STATE_COMPLETE;
   }
+  tracepoint(librbd, aio_is_complete_exit, done);
+  return done;
 }
+
+ssize_t AioCompletion::get_return_value() {
+  tracepoint(librbd, aio_get_return_value_enter, this);
+  lock.Lock();
+  ssize_t r = rval;
+  lock.Unlock();
+  tracepoint(librbd, aio_get_return_value_exit, r);
+  return r;
+}
+
+} // namespace librbd
diff --git a/src/librbd/AioCompletion.h b/src/librbd/AioCompletion.h
index 758b1d8..6a83677 100644
--- a/src/librbd/AioCompletion.h
+++ b/src/librbd/AioCompletion.h
@@ -1,7 +1,8 @@
 // -*- mode:C++; tab-width:8; c-basic-offset:2; indent-tabs-mode:t -*-
 // vim: ts=8 sw=2 smarttab
-#ifndef CEPH_LIBRBD_AIOCOMPLETION_H
-#define CEPH_LIBRBD_AIOCOMPLETION_H
+
+#ifndef CEPH_LIBRBD_AIO_COMPLETION_H
+#define CEPH_LIBRBD_AIO_COMPLETION_H
 
 #include "common/Cond.h"
 #include "common/Mutex.h"
@@ -18,230 +19,220 @@ class CephContext;
 
 namespace librbd {
 
-  class AioObjectRead;
-
-  typedef enum {
-    AIO_TYPE_NONE = 0,
-    AIO_TYPE_OPEN,
-    AIO_TYPE_CLOSE,
-    AIO_TYPE_READ,
-    AIO_TYPE_WRITE,
-    AIO_TYPE_DISCARD,
-    AIO_TYPE_FLUSH,
-  } aio_type_t;
-
-  typedef enum {
-    STATE_PENDING = 0,
-    STATE_CALLBACK,
-    STATE_COMPLETE,
-  } aio_state_t;
-
-  /**
-   * AioCompletion is the overall completion for a single
-   * rbd I/O request. It may be composed of many AioObjectRequests,
-   * which each go to a single object.
-   *
-   * The retrying of individual requests is handled at a lower level,
-   * so all AioCompletion cares about is the count of outstanding
-   * requests. The number of expected individual requests should be
-   * set initially using set_request_count() prior to issuing the
-   * requests.  This ensures that the completion will not be completed
-   * within the caller's thread of execution (instead via a librados
-   * context or via a thread pool context for cache read hits).
-   */
-  struct AioCompletion {
-    Mutex lock;
-    Cond cond;
-    aio_state_t state;
-    ssize_t rval;
-    callback_t complete_cb;
-    void *complete_arg;
-    rbd_completion_t rbd_comp;
-    uint32_t pending_count;   ///< number of requests
-    uint32_t blockers;
-    int ref;
-    bool released;
-    ImageCtx *ictx;
-    utime_t start_time;
-    aio_type_t aio_type;
-
-    Striper::StripedReadResult destriper;
-    bufferlist *read_bl;
-    char *read_buf;
-    size_t read_buf_len;
-
-    AsyncOperation async_op;
-
-    uint64_t journal_tid;
-    xlist<AioCompletion*>::item m_xlist_item;
-    bool event_notify;
-
-    template <typename T, void (T::*MF)(int)>
-    static void callback_adapter(completion_t cb, void *arg) {
-      AioCompletion *comp = reinterpret_cast<AioCompletion *>(cb);
-      T *t = reinterpret_cast<T *>(arg);
-      (t->*MF)(comp->get_return_value());
-      comp->release();
-    }
-
-    static AioCompletion *create(void *cb_arg, callback_t cb_complete,
-                                 rbd_completion_t rbd_comp) {
-      AioCompletion *comp = new AioCompletion();
-      comp->set_complete_cb(cb_arg, cb_complete);
-      comp->rbd_comp = (rbd_comp != nullptr ? rbd_comp : comp);
-      return comp;
-    }
-
-    template <typename T, void (T::*MF)(int) = &T::complete>
-    static AioCompletion *create(T *obj) {
-      AioCompletion *comp = new AioCompletion();
-      comp->set_complete_cb(obj, &callback_adapter<T, MF>);
-      comp->rbd_comp = comp;
-      return comp;
-    }
-
-    AioCompletion() : lock("AioCompletion::lock", true, false),
-		      state(STATE_PENDING), rval(0), complete_cb(NULL),
-		      complete_arg(NULL), rbd_comp(NULL),
-		      pending_count(0), blockers(1),
-		      ref(1), released(false), ictx(NULL),
-		      aio_type(AIO_TYPE_NONE),
-		      read_bl(NULL), read_buf(NULL), read_buf_len(0),
-                      journal_tid(0),
-                      m_xlist_item(this), event_notify(false) {
-    }
-    ~AioCompletion() {
-    }
-
-    int wait_for_complete();
-
-    void finalize(ssize_t rval);
-
-    void init_time(ImageCtx *i, aio_type_t t);
-    void start_op(bool ignore_type = false);
-    void fail(int r);
-
-    void complete();
-
-    void set_complete_cb(void *cb_arg, callback_t cb) {
-      complete_cb = cb;
-      complete_arg = cb_arg;
-    }
-
-    void set_request_count(uint32_t num);
-    void add_request() {
-      lock.Lock();
-      assert(pending_count > 0);
-      lock.Unlock();
-      get();
-    }
-    void complete_request(ssize_t r);
-
-    void associate_journal_event(uint64_t tid);
-
-    bool is_complete();
-
-    ssize_t get_return_value();
-
-    void get() {
-      lock.Lock();
-      assert(ref > 0);
-      ref++;
-      lock.Unlock();
-    }
-    void release() {
-      lock.Lock();
-      assert(!released);
-      released = true;
-      put_unlock();
-    }
-    void put() {
-      lock.Lock();
-      put_unlock();
-    }
-    void put_unlock() {
-      assert(ref > 0);
-      int n = --ref;
-      lock.Unlock();
-      if (!n) {
-        if (ictx) {
-	  if (event_notify) {
-	    ictx->completed_reqs_lock.Lock();
-	    m_xlist_item.remove_myself();
-	    ictx->completed_reqs_lock.Unlock();
-	  }
-	  if (aio_type == AIO_TYPE_CLOSE || (aio_type == AIO_TYPE_OPEN &&
-					     rval < 0)) {
-	    delete ictx;
-	  }
-	}
-        delete this;
+typedef enum {
+  AIO_TYPE_NONE = 0,
+  AIO_TYPE_OPEN,
+  AIO_TYPE_CLOSE,
+  AIO_TYPE_READ,
+  AIO_TYPE_WRITE,
+  AIO_TYPE_DISCARD,
+  AIO_TYPE_FLUSH,
+} aio_type_t;
+
+typedef enum {
+  STATE_PENDING = 0,
+  STATE_CALLBACK,
+  STATE_COMPLETE,
+} aio_state_t;
+
+/**
+ * AioCompletion is the overall completion for a single
+ * rbd I/O request. It may be composed of many AioObjectRequests,
+ * which each go to a single object.
+ *
+ * The retrying of individual requests is handled at a lower level,
+ * so all AioCompletion cares about is the count of outstanding
+ * requests. The number of expected individual requests should be
+ * set initially using set_request_count() prior to issuing the
+ * requests.  This ensures that the completion will not be completed
+ * within the caller's thread of execution (instead via a librados
+ * context or via a thread pool context for cache read hits).
+ */
+struct AioCompletion {
+  mutable Mutex lock;
+  Cond cond;
+  aio_state_t state;
+  ssize_t rval;
+  callback_t complete_cb;
+  void *complete_arg;
+  rbd_completion_t rbd_comp;
+  uint32_t pending_count;   ///< number of requests
+  uint32_t blockers;
+  int ref;
+  bool released;
+  ImageCtx *ictx;
+  utime_t start_time;
+  aio_type_t aio_type;
+
+  Striper::StripedReadResult destriper;
+  bufferlist *read_bl;
+  char *read_buf;
+  size_t read_buf_len;
+
+  AsyncOperation async_op;
+
+  uint64_t journal_tid;
+  xlist<AioCompletion*>::item m_xlist_item;
+  bool event_notify;
+
+  template <typename T, void (T::*MF)(int)>
+  static void callback_adapter(completion_t cb, void *arg) {
+    AioCompletion *comp = reinterpret_cast<AioCompletion *>(cb);
+    T *t = reinterpret_cast<T *>(arg);
+    (t->*MF)(comp->get_return_value());
+    comp->release();
+  }
+
+  static AioCompletion *create(void *cb_arg, callback_t cb_complete,
+                               rbd_completion_t rbd_comp) {
+    AioCompletion *comp = new AioCompletion();
+    comp->set_complete_cb(cb_arg, cb_complete);
+    comp->rbd_comp = (rbd_comp != nullptr ? rbd_comp : comp);
+    return comp;
+  }
+
+  template <typename T, void (T::*MF)(int) = &T::complete>
+  static AioCompletion *create(T *obj) {
+    AioCompletion *comp = new AioCompletion();
+    comp->set_complete_cb(obj, &callback_adapter<T, MF>);
+    comp->rbd_comp = comp;
+    return comp;
+  }
+
+  template <typename T, void (T::*MF)(int) = &T::complete>
+  static AioCompletion *create_and_start(T *obj, ImageCtx *image_ctx,
+                                         aio_type_t type) {
+    AioCompletion *comp = create<T, MF>(obj);
+    comp->init_time(image_ctx, type);
+    comp->start_op();
+    return comp;
+  }
+
+  AioCompletion() : lock("AioCompletion::lock", true, false),
+                    state(STATE_PENDING), rval(0), complete_cb(NULL),
+                    complete_arg(NULL), rbd_comp(NULL),
+                    pending_count(0), blockers(1),
+                    ref(1), released(false), ictx(NULL),
+                    aio_type(AIO_TYPE_NONE),
+                    read_bl(NULL), read_buf(NULL), read_buf_len(0),
+                    journal_tid(0), m_xlist_item(this), event_notify(false) {
+  }
+
+  ~AioCompletion() {
+  }
+
+  int wait_for_complete();
+
+  void finalize(ssize_t rval);
+
+  inline bool is_initialized(aio_type_t type) const {
+    Mutex::Locker locker(lock);
+    return ((ictx != nullptr) && (aio_type == type));
+  }
+  inline bool is_started() const {
+    Mutex::Locker locker(lock);
+    return async_op.started();
+  }
+
+  void init_time(ImageCtx *i, aio_type_t t);
+  void start_op(bool ignore_type = false);
+  void fail(int r);
+
+  void complete();
+
+  void set_complete_cb(void *cb_arg, callback_t cb) {
+    complete_cb = cb;
+    complete_arg = cb_arg;
+  }
+
+  void set_request_count(uint32_t num);
+  void add_request() {
+    lock.Lock();
+    assert(pending_count > 0);
+    lock.Unlock();
+    get();
+  }
+  void complete_request(ssize_t r);
+
+  void associate_journal_event(uint64_t tid);
+
+  bool is_complete();
+
+  ssize_t get_return_value();
+
+  void get() {
+    lock.Lock();
+    assert(ref > 0);
+    ref++;
+    lock.Unlock();
+  }
+  void release() {
+    lock.Lock();
+    assert(!released);
+    released = true;
+    put_unlock();
+  }
+  void put() {
+    lock.Lock();
+    put_unlock();
+  }
+  void put_unlock() {
+    assert(ref > 0);
+    int n = --ref;
+    lock.Unlock();
+    if (!n) {
+      if (ictx) {
+        if (event_notify) {
+          ictx->completed_reqs_lock.Lock();
+          m_xlist_item.remove_myself();
+          ictx->completed_reqs_lock.Unlock();
+        }
+        if (aio_type == AIO_TYPE_CLOSE ||
+            (aio_type == AIO_TYPE_OPEN && rval < 0)) {
+          delete ictx;
+        }
       }
-    }
-
-    void block() {
-      Mutex::Locker l(lock);
-      ++blockers;
-    }
-    void unblock() {
-      Mutex::Locker l(lock);
-      assert(blockers > 0);
-      --blockers;
-      if (pending_count == 0 && blockers == 0) {
-        finalize(rval);
-        complete();
-      }
-    }
-
-    void set_event_notify(bool s) {
-      Mutex::Locker l(lock);
-      event_notify = s;
-    }
-
-    void *get_arg() {
-      return complete_arg;
-    }
-  };
-
-  class C_AioRequest : public Context {
-  public:
-    C_AioRequest(AioCompletion *completion) : m_completion(completion) {
-      m_completion->add_request();
-    }
-    virtual ~C_AioRequest() {}
-    virtual void finish(int r) {
-      m_completion->complete_request(r);
-    }
-  protected:
-    AioCompletion *m_completion;
-  };
-
-  class C_AioRead : public C_AioRequest {
-  public:
-    C_AioRead(AioCompletion *completion)
-      : C_AioRequest(completion), m_req(nullptr) {
-    }
-    virtual ~C_AioRead() {}
-    virtual void finish(int r);
-    void set_req(AioObjectRead *req) {
-      m_req = req;
-    }
-  private:
-    AioObjectRead *m_req;
-  };
-
-  class C_CacheRead : public Context {
-  public:
-    explicit C_CacheRead(ImageCtx *ictx, AioObjectRead *req)
-      : m_image_ctx(*ictx), m_req(req), m_enqueued(false) {}
-    virtual void complete(int r);
-  protected:
-    virtual void finish(int r);
-  private:
-    ImageCtx &m_image_ctx;
-    AioObjectRead *m_req;
-    bool m_enqueued;
-  };
-}
-
-#endif
+      delete this;
+    }
+  }
+
+  void block() {
+    Mutex::Locker l(lock);
+    ++blockers;
+  }
+  void unblock() {
+    Mutex::Locker l(lock);
+    assert(blockers > 0);
+    --blockers;
+    if (pending_count == 0 && blockers == 0) {
+      finalize(rval);
+      complete();
+    }
+  }
+
+  void set_event_notify(bool s) {
+    Mutex::Locker l(lock);
+    event_notify = s;
+  }
+
+  void *get_arg() {
+    return complete_arg;
+  }
+};
+
+class C_AioRequest : public Context {
+public:
+  C_AioRequest(AioCompletion *completion) : m_completion(completion) {
+    m_completion->add_request();
+  }
+  virtual ~C_AioRequest() {}
+  virtual void finish(int r) {
+    m_completion->complete_request(r);
+  }
+protected:
+  AioCompletion *m_completion;
+};
+
+} // namespace librbd
+
+#endif // CEPH_LIBRBD_AIO_COMPLETION_H
diff --git a/src/librbd/AioImageRequest.cc b/src/librbd/AioImageRequest.cc
index 1877af1..010283f 100644
--- a/src/librbd/AioImageRequest.cc
+++ b/src/librbd/AioImageRequest.cc
@@ -4,14 +4,13 @@
 #include "librbd/AioImageRequest.h"
 #include "librbd/AioCompletion.h"
 #include "librbd/AioObjectRequest.h"
-#include "librbd/ExclusiveLock.h"
 #include "librbd/ImageCtx.h"
-#include "librbd/ImageState.h"
-#include "librbd/ImageWatcher.h"
 #include "librbd/internal.h"
 #include "librbd/Journal.h"
+#include "librbd/Utils.h"
 #include "librbd/journal/Types.h"
 #include "include/rados/librados.hpp"
+#include "common/WorkQueue.h"
 #include "osdc/Striper.h"
 
 #define dout_subsys ceph_subsys_rbd
@@ -20,16 +19,19 @@
 
 namespace librbd {
 
+using util::get_image_ctx;
+
 namespace {
 
+template <typename ImageCtxT = ImageCtx>
 struct C_DiscardJournalCommit : public Context {
   typedef std::vector<ObjectExtent> ObjectExtents;
 
-  ImageCtx &image_ctx;
+  ImageCtxT &image_ctx;
   AioCompletion *aio_comp;
   ObjectExtents object_extents;
 
-  C_DiscardJournalCommit(ImageCtx &_image_ctx, AioCompletion *_aio_comp,
+  C_DiscardJournalCommit(ImageCtxT &_image_ctx, AioCompletion *_aio_comp,
                          const ObjectExtents &_object_extents, uint64_t tid)
     : image_ctx(_image_ctx), aio_comp(_aio_comp),
       object_extents(_object_extents) {
@@ -52,11 +54,12 @@ struct C_DiscardJournalCommit : public Context {
   }
 };
 
+template <typename ImageCtxT = ImageCtx>
 struct C_FlushJournalCommit : public Context {
-  ImageCtx &image_ctx;
+  ImageCtxT &image_ctx;
   AioCompletion *aio_comp;
 
-  C_FlushJournalCommit(ImageCtx &_image_ctx, AioCompletion *_aio_comp,
+  C_FlushJournalCommit(ImageCtxT &_image_ctx, AioCompletion *_aio_comp,
                        uint64_t tid)
     : image_ctx(_image_ctx), aio_comp(_aio_comp) {
     CephContext *cct = image_ctx.cct;
@@ -75,6 +78,74 @@ struct C_FlushJournalCommit : public Context {
   }
 };
 
+template <typename ImageCtxT>
+class C_AioRead : public C_AioRequest {
+public:
+  C_AioRead(AioCompletion *completion)
+    : C_AioRequest(completion), m_req(nullptr) {
+  }
+
+  virtual void finish(int r) {
+    m_completion->lock.Lock();
+    CephContext *cct = m_completion->ictx->cct;
+    ldout(cct, 10) << "C_AioRead::finish() " << this << " r = " << r << dendl;
+
+    if (r >= 0 || r == -ENOENT) { // this was a sparse_read operation
+      ldout(cct, 10) << " got " << m_req->get_extent_map()
+                     << " for " << m_req->get_buffer_extents()
+                     << " bl " << m_req->data().length() << dendl;
+      // reads from the parent don't populate the m_ext_map and the overlap
+      // may not be the full buffer.  compensate here by filling in m_ext_map
+      // with the read extent when it is empty.
+      if (m_req->get_extent_map().empty()) {
+        m_req->get_extent_map()[m_req->get_offset()] = m_req->data().length();
+      }
+
+      m_completion->destriper.add_partial_sparse_result(
+          cct, m_req->data(), m_req->get_extent_map(), m_req->get_offset(),
+          m_req->get_buffer_extents());
+      r = m_req->get_length();
+    }
+    m_completion->lock.Unlock();
+
+    C_AioRequest::finish(r);
+  }
+
+  void set_req(AioObjectRead<ImageCtxT> *req) {
+    m_req = req;
+  }
+private:
+  AioObjectRead<ImageCtxT> *m_req;
+};
+
+template <typename ImageCtxT>
+class C_CacheRead : public Context {
+public:
+  explicit C_CacheRead(ImageCtxT &ictx, AioObjectRead<ImageCtxT> *req)
+    : m_image_ctx(ictx), m_req(req), m_enqueued(false) {}
+
+  virtual void complete(int r) {
+    if (!m_enqueued) {
+      // cache_lock creates a lock ordering issue -- so re-execute this context
+      // outside the cache_lock
+      m_enqueued = true;
+      m_image_ctx.op_work_queue->queue(this, r);
+      return;
+    }
+    Context::complete(r);
+  }
+
+protected:
+  virtual void finish(int r) {
+    m_req->complete(r);
+  }
+
+private:
+  ImageCtxT &m_image_ctx;
+  AioObjectRead<ImageCtxT> *m_req;
+  bool m_enqueued;
+};
+
 } // anonymous namespace
 
 template <typename I>
@@ -82,10 +153,7 @@ void AioImageRequest<I>::aio_read(
     I *ictx, AioCompletion *c,
     const std::vector<std::pair<uint64_t,uint64_t> > &extents,
     char *buf, bufferlist *pbl, int op_flags) {
-  c->init_time(ictx, librbd::AIO_TYPE_READ);
-
-  AioImageRead req(*ictx, c, extents, buf, pbl, op_flags);
-  req.start_op();
+  AioImageRead<I> req(*ictx, c, extents, buf, pbl, op_flags);
   req.send();
 }
 
@@ -93,10 +161,7 @@ template <typename I>
 void AioImageRequest<I>::aio_read(I *ictx, AioCompletion *c,
                                   uint64_t off, size_t len, char *buf,
                                   bufferlist *pbl, int op_flags) {
-  c->init_time(ictx, librbd::AIO_TYPE_READ);
-
-  AioImageRead req(*ictx, c, off, len, buf, pbl, op_flags);
-  req.start_op();
+  AioImageRead<I> req(*ictx, c, off, len, buf, pbl, op_flags);
   req.send();
 }
 
@@ -104,98 +169,97 @@ template <typename I>
 void AioImageRequest<I>::aio_write(I *ictx, AioCompletion *c,
                                    uint64_t off, size_t len, const char *buf,
                                    int op_flags) {
-  c->init_time(ictx, librbd::AIO_TYPE_WRITE);
-
-  AioImageWrite req(*ictx, c, off, len, buf, op_flags);
-  req.start_op();
+  AioImageWrite<I> req(*ictx, c, off, len, buf, op_flags);
   req.send();
 }
 
 template <typename I>
 void AioImageRequest<I>::aio_discard(I *ictx, AioCompletion *c,
                                      uint64_t off, uint64_t len) {
-  c->init_time(ictx, librbd::AIO_TYPE_DISCARD);
-
-  AioImageDiscard req(*ictx, c, off, len);
-  req.start_op();
+  AioImageDiscard<I> req(*ictx, c, off, len);
   req.send();
 }
 
 template <typename I>
 void AioImageRequest<I>::aio_flush(I *ictx, AioCompletion *c) {
-  c->init_time(ictx, librbd::AIO_TYPE_FLUSH);
-
-  AioImageFlush req(*ictx, c);
-  req.start_op();
+  AioImageFlush<I> req(*ictx, c);
   req.send();
 }
 
 template <typename I>
 void AioImageRequest<I>::send() {
-  assert(m_image_ctx.owner_lock.is_locked());
+  I &image_ctx = this->m_image_ctx;
+  assert(image_ctx.owner_lock.is_locked());
+  assert(m_aio_comp->is_initialized(get_aio_type()));
+  assert(m_aio_comp->is_started() ^ (get_aio_type() == AIO_TYPE_FLUSH));
 
-  CephContext *cct = m_image_ctx.cct;
-  ldout(cct, 20) << get_request_type() << ": ictx=" << &m_image_ctx << ", "
-                 << "completion=" << m_aio_comp <<  dendl;
+  CephContext *cct = image_ctx.cct;
+  AioCompletion *aio_comp = this->m_aio_comp;
+  ldout(cct, 20) << get_request_type() << ": ictx=" << &image_ctx << ", "
+                 << "completion=" << aio_comp <<  dendl;
 
-  m_aio_comp->get();
+  aio_comp->get();
   send_request();
 }
 
 template <typename I>
 void AioImageRequest<I>::fail(int r) {
-  m_aio_comp->get();
-  m_aio_comp->fail(r);
+  AioCompletion *aio_comp = this->m_aio_comp;
+  aio_comp->get();
+  aio_comp->fail(r);
 }
 
-void AioImageRead::send_request() {
-  CephContext *cct = m_image_ctx.cct;
+template <typename I>
+void AioImageRead<I>::send_request() {
+  I &image_ctx = this->m_image_ctx;
+  CephContext *cct = image_ctx.cct;
 
-  if (m_image_ctx.object_cacher && m_image_ctx.readahead_max_bytes > 0 &&
+  if (image_ctx.object_cacher && image_ctx.readahead_max_bytes > 0 &&
       !(m_op_flags & LIBRADOS_OP_FLAG_FADVISE_RANDOM)) {
-    readahead(&m_image_ctx, m_image_extents);
+    readahead(get_image_ctx(&image_ctx), m_image_extents);
   }
 
+  AioCompletion *aio_comp = this->m_aio_comp;
   librados::snap_t snap_id;
   map<object_t,vector<ObjectExtent> > object_extents;
   uint64_t buffer_ofs = 0;
   {
     // prevent image size from changing between computing clip and recording
     // pending async operation
-    RWLock::RLocker snap_locker(m_image_ctx.snap_lock);
-    snap_id = m_image_ctx.snap_id;
+    RWLock::RLocker snap_locker(image_ctx.snap_lock);
+    snap_id = image_ctx.snap_id;
 
     // map
     for (vector<pair<uint64_t,uint64_t> >::const_iterator p =
            m_image_extents.begin();
          p != m_image_extents.end(); ++p) {
       uint64_t len = p->second;
-      int r = clip_io(&m_image_ctx, p->first, &len);
+      int r = clip_io(get_image_ctx(&image_ctx), p->first, &len);
       if (r < 0) {
-        m_aio_comp->fail(r);
+        aio_comp->fail(r);
         return;
       }
       if (len == 0) {
         continue;
       }
 
-      Striper::file_to_extents(cct, m_image_ctx.format_string,
-                               &m_image_ctx.layout, p->first, len, 0,
+      Striper::file_to_extents(cct, image_ctx.format_string,
+                               &image_ctx.layout, p->first, len, 0,
                                object_extents, buffer_ofs);
       buffer_ofs += len;
     }
   }
 
-  m_aio_comp->read_buf = m_buf;
-  m_aio_comp->read_buf_len = buffer_ofs;
-  m_aio_comp->read_bl = m_pbl;
+  aio_comp->read_buf = m_buf;
+  aio_comp->read_buf_len = buffer_ofs;
+  aio_comp->read_bl = m_pbl;
 
   // pre-calculate the expected number of read requests
   uint32_t request_count = 0;
   for (auto &object_extent : object_extents) {
     request_count += object_extent.second.size();
   }
-  m_aio_comp->set_request_count(request_count);
+  aio_comp->set_request_count(request_count);
 
   // issue the requests
   for (auto &object_extent : object_extents) {
@@ -204,72 +268,76 @@ void AioImageRead::send_request() {
                      << extent.length << " from " << extent.buffer_extents
                      << dendl;
 
-      C_AioRead *req_comp = new C_AioRead(m_aio_comp);
-      AioObjectRead *req = new AioObjectRead(&m_image_ctx, extent.oid.name,
-                                             extent.objectno, extent.offset,
-                                             extent.length,
-                                             extent.buffer_extents, snap_id,
-                                             true, req_comp, m_op_flags);
+      C_AioRead<I> *req_comp = new C_AioRead<I>(aio_comp);
+      AioObjectRead<I> *req = AioObjectRead<I>::create(
+        &image_ctx, extent.oid.name, extent.objectno, extent.offset,
+        extent.length, extent.buffer_extents, snap_id, true, req_comp,
+        m_op_flags);
       req_comp->set_req(req);
 
-      if (m_image_ctx.object_cacher) {
-        C_CacheRead *cache_comp = new C_CacheRead(&m_image_ctx, req);
-        m_image_ctx.aio_read_from_cache(extent.oid, extent.objectno,
-                                        &req->data(), extent.length,
-                                        extent.offset, cache_comp, m_op_flags);
+      if (image_ctx.object_cacher) {
+        C_CacheRead<I> *cache_comp = new C_CacheRead<I>(image_ctx, req);
+        image_ctx.aio_read_from_cache(extent.oid, extent.objectno,
+                                      &req->data(), extent.length,
+                                      extent.offset, cache_comp, m_op_flags);
       } else {
         req->send();
       }
     }
   }
 
-  m_aio_comp->put();
+  aio_comp->put();
 
-  m_image_ctx.perfcounter->inc(l_librbd_rd);
-  m_image_ctx.perfcounter->inc(l_librbd_rd_bytes, buffer_ofs);
+  image_ctx.perfcounter->inc(l_librbd_rd);
+  image_ctx.perfcounter->inc(l_librbd_rd_bytes, buffer_ofs);
 }
 
-void AbstractAioImageWrite::send_request() {
-  CephContext *cct = m_image_ctx.cct;
+template <typename I>
+void AbstractAioImageWrite<I>::send_request() {
+  I &image_ctx = this->m_image_ctx;
+  CephContext *cct = image_ctx.cct;
 
-  RWLock::RLocker md_locker(m_image_ctx.md_lock);
+  RWLock::RLocker md_locker(image_ctx.md_lock);
 
   bool journaling = false;
 
+  AioCompletion *aio_comp = this->m_aio_comp;
   uint64_t clip_len = m_len;
   ObjectExtents object_extents;
   ::SnapContext snapc;
   {
     // prevent image size from changing between computing clip and recording
     // pending async operation
-    RWLock::RLocker snap_locker(m_image_ctx.snap_lock);
-    if (m_image_ctx.snap_id != CEPH_NOSNAP || m_image_ctx.read_only) {
-      m_aio_comp->fail(-EROFS);
+    RWLock::RLocker snap_locker(image_ctx.snap_lock);
+    if (image_ctx.snap_id != CEPH_NOSNAP || image_ctx.read_only) {
+      aio_comp->fail(-EROFS);
       return;
     }
 
-    int r = clip_io(&m_image_ctx, m_off, &clip_len);
+    int r = clip_io(get_image_ctx(&image_ctx), m_off, &clip_len);
     if (r < 0) {
-      m_aio_comp->fail(r);
+      aio_comp->fail(r);
       return;
     }
 
-    snapc = m_image_ctx.snapc;
+    snapc = image_ctx.snapc;
 
     // map to object extents
     if (clip_len > 0) {
-      Striper::file_to_extents(cct, m_image_ctx.format_string,
-                               &m_image_ctx.layout, m_off, clip_len, 0,
+      Striper::file_to_extents(cct, image_ctx.format_string,
+                               &image_ctx.layout, m_off, clip_len, 0,
                                object_extents);
     }
 
-    journaling = (m_image_ctx.journal != NULL &&
-                  !m_image_ctx.journal->is_journal_replaying());
+    journaling = (image_ctx.journal != nullptr &&
+                  image_ctx.journal->is_journal_appending());
   }
 
+  prune_object_extents(object_extents);
+
   if (!object_extents.empty()) {
     uint64_t journal_tid = 0;
-    m_aio_comp->set_request_count(
+    aio_comp->set_request_count(
       object_extents.size() + get_cache_request_count(journaling));
 
     AioObjectRequests requests;
@@ -278,33 +346,37 @@ void AbstractAioImageWrite::send_request() {
 
     if (journaling) {
       // in-flight ops are flushed prior to closing the journal
-      assert(m_image_ctx.journal != NULL);
+      assert(image_ctx.journal != NULL);
       journal_tid = append_journal_event(requests, m_synchronous);
     }
 
-    if (m_image_ctx.object_cacher != NULL) {
+    if (image_ctx.object_cacher != NULL) {
       send_cache_requests(object_extents, journal_tid);
     }
   } else {
     // no IO to perform -- fire completion
-    m_aio_comp->unblock();
+    aio_comp->unblock();
   }
 
   update_stats(clip_len);
-  m_aio_comp->put();
+  aio_comp->put();
 }
 
-void AbstractAioImageWrite::send_object_requests(
+template <typename I>
+void AbstractAioImageWrite<I>::send_object_requests(
     const ObjectExtents &object_extents, const ::SnapContext &snapc,
     AioObjectRequests *aio_object_requests) {
-  CephContext *cct = m_image_ctx.cct;
+  I &image_ctx = this->m_image_ctx;
+  CephContext *cct = image_ctx.cct;
 
+  AioCompletion *aio_comp = this->m_aio_comp;
   for (ObjectExtents::const_iterator p = object_extents.begin();
        p != object_extents.end(); ++p) {
     ldout(cct, 20) << " oid " << p->oid << " " << p->offset << "~" << p->length
                    << " from " << p->buffer_extents << dendl;
-    C_AioRequest *req_comp = new C_AioRequest(m_aio_comp);
-    AioObjectRequest *request = create_object_request(*p, snapc, req_comp);
+    C_AioRequest *req_comp = new C_AioRequest(aio_comp);
+    AioObjectRequestHandle *request = create_object_request(*p, snapc,
+                                                            req_comp);
 
     // if journaling, stash the request for later; otherwise send
     if (request != NULL) {
@@ -317,168 +389,223 @@ void AbstractAioImageWrite::send_object_requests(
   }
 }
 
-void AioImageWrite::assemble_extent(const ObjectExtent &object_extent,
+template <typename I>
+void AioImageWrite<I>::assemble_extent(const ObjectExtent &object_extent,
                                     bufferlist *bl) {
-  for (Extents::const_iterator q = object_extent.buffer_extents.begin();
+  for (auto q = object_extent.buffer_extents.begin();
        q != object_extent.buffer_extents.end(); ++q) {
     bl->append(m_buf + q->first, q->second);;
   }
 }
 
-uint64_t AioImageWrite::append_journal_event(
+template <typename I>
+uint64_t AioImageWrite<I>::append_journal_event(
     const AioObjectRequests &requests, bool synchronous) {
   bufferlist bl;
-  bl.append(m_buf, m_len);
-
-  uint64_t tid = m_image_ctx.journal->append_write_event(m_off, m_len, bl,
-                                                         requests, synchronous);
-  if (m_image_ctx.object_cacher == NULL) {
-    m_aio_comp->associate_journal_event(tid);
+  bl.append(m_buf, this->m_len);
+
+  I &image_ctx = this->m_image_ctx;
+  uint64_t tid = image_ctx.journal->append_write_event(this->m_off, this->m_len,
+                                                       bl, requests,
+                                                       synchronous);
+  if (image_ctx.object_cacher == NULL) {
+    AioCompletion *aio_comp = this->m_aio_comp;
+    aio_comp->associate_journal_event(tid);
   }
   return tid;
 }
 
-void AioImageWrite::send_cache_requests(const ObjectExtents &object_extents,
+template <typename I>
+void AioImageWrite<I>::send_cache_requests(const ObjectExtents &object_extents,
                                         uint64_t journal_tid) {
-  for (ObjectExtents::const_iterator p = object_extents.begin();
-       p != object_extents.end(); ++p) {
+  I &image_ctx = this->m_image_ctx;
+  for (auto p = object_extents.begin(); p != object_extents.end(); ++p) {
     const ObjectExtent &object_extent = *p;
 
     bufferlist bl;
     assemble_extent(object_extent, &bl);
 
-    C_AioRequest *req_comp = new C_AioRequest(m_aio_comp);
-    m_image_ctx.write_to_cache(object_extent.oid, bl, object_extent.length,
-                               object_extent.offset, req_comp, m_op_flags,
+    AioCompletion *aio_comp = this->m_aio_comp;
+    C_AioRequest *req_comp = new C_AioRequest(aio_comp);
+    image_ctx.write_to_cache(object_extent.oid, bl, object_extent.length,
+                             object_extent.offset, req_comp, m_op_flags,
                                journal_tid);
   }
 }
 
-void AioImageWrite::send_object_requests(
+template <typename I>
+void AioImageWrite<I>::send_object_requests(
     const ObjectExtents &object_extents, const ::SnapContext &snapc,
     AioObjectRequests *aio_object_requests) {
+  I &image_ctx = this->m_image_ctx;
+
   // cache handles creating object requests during writeback
-  if (m_image_ctx.object_cacher == NULL) {
-    AbstractAioImageWrite::send_object_requests(object_extents, snapc,
+  if (image_ctx.object_cacher == NULL) {
+    AbstractAioImageWrite<I>::send_object_requests(object_extents, snapc,
                                                 aio_object_requests);
   }
 }
 
-AioObjectRequest *AioImageWrite::create_object_request(
+template <typename I>
+AioObjectRequestHandle *AioImageWrite<I>::create_object_request(
     const ObjectExtent &object_extent, const ::SnapContext &snapc,
     Context *on_finish) {
-  assert(m_image_ctx.object_cacher == NULL);
+  I &image_ctx = this->m_image_ctx;
+  assert(image_ctx.object_cacher == NULL);
 
   bufferlist bl;
   assemble_extent(object_extent, &bl);
-  AioObjectWrite *req = new AioObjectWrite(&m_image_ctx,
-                                           object_extent.oid.name,
-                                           object_extent.objectno,
-                                           object_extent.offset, bl,
-                                           snapc, on_finish);
-  req->set_op_flags(m_op_flags);
+  AioObjectRequest<I> *req = AioObjectRequest<I>::create_write(
+    &image_ctx, object_extent.oid.name, object_extent.objectno,
+    object_extent.offset, bl, snapc, on_finish, m_op_flags);
   return req;
 }
 
-void AioImageWrite::update_stats(size_t length) {
-  m_image_ctx.perfcounter->inc(l_librbd_wr);
-  m_image_ctx.perfcounter->inc(l_librbd_wr_bytes, length);
+template <typename I>
+void AioImageWrite<I>::update_stats(size_t length) {
+  I &image_ctx = this->m_image_ctx;
+  image_ctx.perfcounter->inc(l_librbd_wr);
+  image_ctx.perfcounter->inc(l_librbd_wr_bytes, length);
 }
 
-uint64_t AioImageDiscard::append_journal_event(
+template <typename I>
+uint64_t AioImageDiscard<I>::append_journal_event(
     const AioObjectRequests &requests, bool synchronous) {
-  journal::EventEntry event_entry(journal::AioDiscardEvent(m_off, m_len));
-  uint64_t tid = m_image_ctx.journal->append_io_event(std::move(event_entry),
-                                                      requests, m_off, m_len,
-                                                      synchronous);
-  m_aio_comp->associate_journal_event(tid);
+  I &image_ctx = this->m_image_ctx;
+
+  journal::EventEntry event_entry(journal::AioDiscardEvent(this->m_off,
+                                                           this->m_len));
+  uint64_t tid = image_ctx.journal->append_io_event(std::move(event_entry),
+                                                    requests, this->m_off,
+                                                    this->m_len, synchronous);
+
+  AioCompletion *aio_comp = this->m_aio_comp;
+  aio_comp->associate_journal_event(tid);
   return tid;
 }
 
-uint32_t AioImageDiscard::get_cache_request_count(bool journaling) const {
+template <typename I>
+void AioImageDiscard<I>::prune_object_extents(ObjectExtents &object_extents) {
+  I &image_ctx = this->m_image_ctx;
+  CephContext *cct = image_ctx.cct;
+  if (!cct->_conf->rbd_skip_partial_discard) {
+    return;
+  }
+
+  for (auto p = object_extents.begin(); p != object_extents.end(); ) {
+    if (p->offset + p->length < image_ctx.layout.object_size) {
+      ldout(cct, 20) << " oid " << p->oid << " " << p->offset << "~"
+		     << p->length << " from " << p->buffer_extents
+		     << ": skip partial discard" << dendl;
+      p = object_extents.erase(p);
+    } else {
+      ++p;
+    }
+  }
+}
+
+template <typename I>
+uint32_t AioImageDiscard<I>::get_cache_request_count(bool journaling) const {
   // extra completion request is required for tracking journal commit
-  return (m_image_ctx.object_cacher != nullptr && journaling ? 1 : 0);
+  I &image_ctx = this->m_image_ctx;
+  return (image_ctx.object_cacher != nullptr && journaling ? 1 : 0);
 }
 
-void AioImageDiscard::send_cache_requests(const ObjectExtents &object_extents,
+template <typename I>
+void AioImageDiscard<I>::send_cache_requests(const ObjectExtents &object_extents,
                                           uint64_t journal_tid) {
+  I &image_ctx = this->m_image_ctx;
   if (journal_tid == 0) {
-    Mutex::Locker cache_locker(m_image_ctx.cache_lock);
-    m_image_ctx.object_cacher->discard_set(m_image_ctx.object_set,
-                                           object_extents);
+    Mutex::Locker cache_locker(image_ctx.cache_lock);
+    image_ctx.object_cacher->discard_set(image_ctx.object_set,
+                                         object_extents);
   } else {
     // cannot discard from cache until journal has committed
-    assert(m_image_ctx.journal != NULL);
-    m_image_ctx.journal->wait_event(
-      journal_tid, new C_DiscardJournalCommit(m_image_ctx, m_aio_comp,
-                                              object_extents, journal_tid));
+    assert(image_ctx.journal != NULL);
+    AioCompletion *aio_comp = this->m_aio_comp;
+    image_ctx.journal->wait_event(
+      journal_tid, new C_DiscardJournalCommit<I>(image_ctx, aio_comp,
+                                                 object_extents, journal_tid));
   }
 }
 
-AioObjectRequest *AioImageDiscard::create_object_request(
+template <typename I>
+AioObjectRequestHandle *AioImageDiscard<I>::create_object_request(
     const ObjectExtent &object_extent, const ::SnapContext &snapc,
     Context *on_finish) {
-  CephContext *cct = m_image_ctx.cct;
+  I &image_ctx = this->m_image_ctx;
 
-  AioObjectRequest *req;
-  if (object_extent.length == m_image_ctx.layout.object_size) {
-    req = new AioObjectRemove(&m_image_ctx, object_extent.oid.name,
-                              object_extent.objectno, snapc, on_finish);
+  AioObjectRequest<I> *req;
+  if (object_extent.length == image_ctx.layout.object_size) {
+    req = AioObjectRequest<I>::create_remove(
+      &image_ctx, object_extent.oid.name, object_extent.objectno, snapc,
+      on_finish);
   } else if (object_extent.offset + object_extent.length ==
-               m_image_ctx.layout.object_size) {
-    req = new AioObjectTruncate(&m_image_ctx, object_extent.oid.name,
-                                object_extent.objectno, object_extent.offset,
-                                snapc, on_finish);
+               image_ctx.layout.object_size) {
+    req = AioObjectRequest<I>::create_truncate(
+      &image_ctx, object_extent.oid.name, object_extent.objectno,
+      object_extent.offset, snapc, on_finish);
   } else {
-    if(cct->_conf->rbd_skip_partial_discard) {
-      delete on_finish;
-      return NULL;
-    }
-    req = new AioObjectZero(&m_image_ctx, object_extent.oid.name,
-                            object_extent.objectno, object_extent.offset,
-                            object_extent.length, snapc, on_finish);
+    req = AioObjectRequest<I>::create_zero(
+      &image_ctx, object_extent.oid.name, object_extent.objectno,
+      object_extent.offset, object_extent.length, snapc, on_finish);
   }
   return req;
 }
 
-void AioImageDiscard::update_stats(size_t length) {
-  m_image_ctx.perfcounter->inc(l_librbd_discard);
-  m_image_ctx.perfcounter->inc(l_librbd_discard_bytes, length);
+template <typename I>
+void AioImageDiscard<I>::update_stats(size_t length) {
+  I &image_ctx = this->m_image_ctx;
+  image_ctx.perfcounter->inc(l_librbd_discard);
+  image_ctx.perfcounter->inc(l_librbd_discard_bytes, length);
 }
 
-void AioImageFlush::send_request() {
+template <typename I>
+void AioImageFlush<I>::send_request() {
+  I &image_ctx = this->m_image_ctx;
+  image_ctx.user_flushed();
+
   bool journaling = false;
   {
-    RWLock::RLocker snap_locker(m_image_ctx.snap_lock);
-    journaling = (m_image_ctx.journal != NULL &&
-                  !m_image_ctx.journal->is_journal_replaying());
+    RWLock::RLocker snap_locker(image_ctx.snap_lock);
+    journaling = (image_ctx.journal != nullptr &&
+                  image_ctx.journal->is_journal_appending());
   }
 
-  m_aio_comp->set_request_count(journaling ? 2 : 1);
-
+  AioCompletion *aio_comp = this->m_aio_comp;
   if (journaling) {
     // in-flight ops are flushed prior to closing the journal
-    uint64_t journal_tid = m_image_ctx.journal->append_io_event(
+    uint64_t journal_tid = image_ctx.journal->append_io_event(
       journal::EventEntry(journal::AioFlushEvent()),
       AioObjectRequests(), 0, 0, false);
 
-    C_FlushJournalCommit *ctx = new C_FlushJournalCommit(m_image_ctx,
-                                                         m_aio_comp,
-                                                         journal_tid);
-    m_image_ctx.journal->flush_event(journal_tid, ctx);
-    m_aio_comp->associate_journal_event(journal_tid);
-  }
+    aio_comp->set_request_count(2);
 
-  C_AioRequest *req_comp = new C_AioRequest(m_aio_comp);
-  m_image_ctx.flush(req_comp);
+    C_FlushJournalCommit<I> *ctx = new C_FlushJournalCommit<I>(image_ctx,
+                                                               aio_comp,
+                                                               journal_tid);
+    C_AioRequest *req_comp = new C_AioRequest(aio_comp);
+    image_ctx.journal->flush_event(journal_tid, ctx);
+    aio_comp->associate_journal_event(journal_tid);
+    image_ctx.flush_async_operations(req_comp);
+  } else {
+    // flush rbd cache only when journaling is not enabled
+    aio_comp->set_request_count(1);
+    C_AioRequest *req_comp = new C_AioRequest(aio_comp);
+    image_ctx.flush(req_comp);
+  }
 
   // track flush op for block writes
-  m_aio_comp->start_op(true);
-  m_aio_comp->put();
+  aio_comp->start_op(true);
+  aio_comp->put();
 
-  m_image_ctx.perfcounter->inc(l_librbd_aio_flush);
+  image_ctx.perfcounter->inc(l_librbd_aio_flush);
 }
 
 } // namespace librbd
 
 template class librbd::AioImageRequest<librbd::ImageCtx>;
+template class librbd::AbstractAioImageWrite<librbd::ImageCtx>;
+template class librbd::AioImageWrite<librbd::ImageCtx>;
+template class librbd::AioImageDiscard<librbd::ImageCtx>;
+template class librbd::AioImageFlush<librbd::ImageCtx>;
diff --git a/src/librbd/AioImageRequest.h b/src/librbd/AioImageRequest.h
index b30cc30..3d6b385 100644
--- a/src/librbd/AioImageRequest.h
+++ b/src/librbd/AioImageRequest.h
@@ -15,7 +15,8 @@
 
 namespace librbd {
 
-class AioObjectRequest;
+class AioCompletion;
+class AioObjectRequestHandle;
 class ImageCtx;
 
 template <typename ImageCtxT = ImageCtx>
@@ -48,7 +49,7 @@ public:
   void fail(int r);
 
 protected:
-  typedef std::list<AioObjectRequest *> AioObjectRequests;
+  typedef std::list<AioObjectRequestHandle *> AioObjectRequests;
 
   ImageCtxT &m_image_ctx;
   AioCompletion *m_aio_comp;
@@ -57,27 +58,35 @@ protected:
     : m_image_ctx(image_ctx), m_aio_comp(aio_comp) {}
 
   virtual void send_request() = 0;
+  virtual aio_type_t get_aio_type() const = 0;
   virtual const char *get_request_type() const = 0;
 };
 
-class AioImageRead : public AioImageRequest<> {
+template <typename ImageCtxT = ImageCtx>
+class AioImageRead : public AioImageRequest<ImageCtxT> {
 public:
-  AioImageRead(ImageCtx &image_ctx, AioCompletion *aio_comp, uint64_t off,
+  using typename AioImageRequest<ImageCtxT>::Extents;
+
+  AioImageRead(ImageCtxT &image_ctx, AioCompletion *aio_comp, uint64_t off,
                size_t len, char *buf, bufferlist *pbl, int op_flags)
-    : AioImageRequest(image_ctx, aio_comp), m_buf(buf), m_pbl(pbl),
+    : AioImageRequest<ImageCtxT>(image_ctx, aio_comp), m_buf(buf), m_pbl(pbl),
       m_op_flags(op_flags) {
     m_image_extents.push_back(std::make_pair(off, len));
   }
 
-  AioImageRead(ImageCtx &image_ctx, AioCompletion *aio_comp,
+  AioImageRead(ImageCtxT &image_ctx, AioCompletion *aio_comp,
                const Extents &image_extents, char *buf, bufferlist *pbl,
                int op_flags)
-    : AioImageRequest(image_ctx, aio_comp), m_image_extents(image_extents),
-      m_buf(buf), m_pbl(pbl), m_op_flags(op_flags) {
+    : AioImageRequest<ImageCtxT>(image_ctx, aio_comp),
+      m_image_extents(image_extents), m_buf(buf), m_pbl(pbl),
+      m_op_flags(op_flags) {
   }
 
 protected:
   virtual void send_request();
+  virtual aio_type_t get_aio_type() const {
+    return AIO_TYPE_READ;
+  }
   virtual const char *get_request_type() const {
     return "aio_read";
   }
@@ -88,7 +97,8 @@ private:
   int m_op_flags;
 };
 
-class AbstractAioImageWrite : public AioImageRequest<> {
+template <typename ImageCtxT = ImageCtx>
+class AbstractAioImageWrite : public AioImageRequest<ImageCtxT> {
 public:
   virtual bool is_write_op() const {
     return true;
@@ -99,19 +109,23 @@ public:
   }
 
 protected:
+  using typename AioImageRequest<ImageCtxT>::AioObjectRequests;
+
   typedef std::vector<ObjectExtent> ObjectExtents;
 
   const uint64_t m_off;
   const size_t m_len;
 
-  AbstractAioImageWrite(ImageCtx &image_ctx, AioCompletion *aio_comp,
+  AbstractAioImageWrite(ImageCtxT &image_ctx, AioCompletion *aio_comp,
                         uint64_t off, size_t len)
-    : AioImageRequest(image_ctx, aio_comp), m_off(off), m_len(len),
+    : AioImageRequest<ImageCtxT>(image_ctx, aio_comp), m_off(off), m_len(len),
       m_synchronous(false) {
   }
 
   virtual void send_request();
 
+  virtual void prune_object_extents(ObjectExtents &object_extents) {
+  }
   virtual uint32_t get_cache_request_count(bool journaling) const {
     return 0;
   }
@@ -121,7 +135,7 @@ protected:
   virtual void send_object_requests(const ObjectExtents &object_extents,
                                     const ::SnapContext &snapc,
                                     AioObjectRequests *aio_object_requests);
-  virtual AioObjectRequest *create_object_request(
+  virtual AioObjectRequestHandle *create_object_request(
       const ObjectExtent &object_extent, const ::SnapContext &snapc,
       Context *on_finish) = 0;
 
@@ -133,15 +147,22 @@ private:
   bool m_synchronous;
 };
 
-class AioImageWrite : public AbstractAioImageWrite {
+template <typename ImageCtxT = ImageCtx>
+class AioImageWrite : public AbstractAioImageWrite<ImageCtxT> {
 public:
-  AioImageWrite(ImageCtx &image_ctx, AioCompletion *aio_comp, uint64_t off,
+  AioImageWrite(ImageCtxT &image_ctx, AioCompletion *aio_comp, uint64_t off,
                 size_t len, const char *buf, int op_flags)
-    : AbstractAioImageWrite(image_ctx, aio_comp, off, len), m_buf(buf),
-      m_op_flags(op_flags) {
+    : AbstractAioImageWrite<ImageCtxT>(image_ctx, aio_comp, off, len),
+      m_buf(buf), m_op_flags(op_flags) {
   }
 
 protected:
+  using typename AioImageRequest<ImageCtxT>::AioObjectRequests;
+  using typename AbstractAioImageWrite<ImageCtxT>::ObjectExtents;
+
+  virtual aio_type_t get_aio_type() const {
+    return AIO_TYPE_WRITE;
+  }
   virtual const char *get_request_type() const {
     return "aio_write";
   }
@@ -154,7 +175,7 @@ protected:
   virtual void send_object_requests(const ObjectExtents &object_extents,
                                     const ::SnapContext &snapc,
                                     AioObjectRequests *aio_object_requests);
-  virtual AioObjectRequest *create_object_request(
+  virtual AioObjectRequestHandle *create_object_request(
       const ObjectExtent &object_extent, const ::SnapContext &snapc,
       Context *on_finish);
 
@@ -166,23 +187,31 @@ private:
   int m_op_flags;
 };
 
-class AioImageDiscard : public AbstractAioImageWrite {
+template <typename ImageCtxT = ImageCtx>
+class AioImageDiscard : public AbstractAioImageWrite<ImageCtxT> {
 public:
-  AioImageDiscard(ImageCtx &image_ctx, AioCompletion *aio_comp, uint64_t off,
+  AioImageDiscard(ImageCtxT &image_ctx, AioCompletion *aio_comp, uint64_t off,
                   uint64_t len)
-    : AbstractAioImageWrite(image_ctx, aio_comp, off, len) {
+    : AbstractAioImageWrite<ImageCtxT>(image_ctx, aio_comp, off, len) {
   }
 
 protected:
+  using typename AioImageRequest<ImageCtxT>::AioObjectRequests;
+  using typename AbstractAioImageWrite<ImageCtxT>::ObjectExtents;
+
+  virtual aio_type_t get_aio_type() const {
+    return AIO_TYPE_DISCARD;
+  }
   virtual const char *get_request_type() const {
     return "aio_discard";
   }
 
+  virtual void prune_object_extents(ObjectExtents &object_extents) override;
   virtual uint32_t get_cache_request_count(bool journaling) const override;
   virtual void send_cache_requests(const ObjectExtents &object_extents,
                                    uint64_t journal_tid);
 
-  virtual AioObjectRequest *create_object_request(
+  virtual AioObjectRequestHandle *create_object_request(
       const ObjectExtent &object_extent, const ::SnapContext &snapc,
       Context *on_finish);
 
@@ -191,10 +220,11 @@ protected:
   virtual void update_stats(size_t length);
 };
 
-class AioImageFlush : public AioImageRequest<> {
+template <typename ImageCtxT = ImageCtx>
+class AioImageFlush : public AioImageRequest<ImageCtxT> {
 public:
-  AioImageFlush(ImageCtx &image_ctx, AioCompletion *aio_comp)
-    : AioImageRequest(image_ctx, aio_comp) {
+  AioImageFlush(ImageCtxT &image_ctx, AioCompletion *aio_comp)
+    : AioImageRequest<ImageCtxT>(image_ctx, aio_comp) {
   }
 
   virtual bool is_write_op() const {
@@ -202,7 +232,12 @@ public:
   }
 
 protected:
+  using typename AioImageRequest<ImageCtxT>::AioObjectRequests;
+
   virtual void send_request();
+  virtual aio_type_t get_aio_type() const {
+    return AIO_TYPE_FLUSH;
+  }
   virtual const char *get_request_type() const {
     return "aio_flush";
   }
@@ -211,5 +246,9 @@ protected:
 } // namespace librbd
 
 extern template class librbd::AioImageRequest<librbd::ImageCtx>;
+extern template class librbd::AbstractAioImageWrite<librbd::ImageCtx>;
+extern template class librbd::AioImageWrite<librbd::ImageCtx>;
+extern template class librbd::AioImageDiscard<librbd::ImageCtx>;
+extern template class librbd::AioImageFlush<librbd::ImageCtx>;
 
 #endif // CEPH_LIBRBD_AIO_IMAGE_REQUEST_H
diff --git a/src/librbd/AioImageRequestWQ.cc b/src/librbd/AioImageRequestWQ.cc
index fba7933..a3e8aac 100644
--- a/src/librbd/AioImageRequestWQ.cc
+++ b/src/librbd/AioImageRequestWQ.cc
@@ -52,7 +52,7 @@ ssize_t AioImageRequestWQ::write(uint64_t off, uint64_t len, const char *buf,
                  << "len = " << len << dendl;
 
   m_image_ctx.snap_lock.get_read();
-  int r = clip_io(&m_image_ctx, off, &len);
+  int r = clip_io(util::get_image_ctx(&m_image_ctx), off, &len);
   m_image_ctx.snap_lock.put_read();
   if (r < 0) {
     lderr(cct) << "invalid IO request: " << cpp_strerror(r) << dendl;
@@ -76,7 +76,7 @@ int AioImageRequestWQ::discard(uint64_t off, uint64_t len) {
                  << "len = " << len << dendl;
 
   m_image_ctx.snap_lock.get_read();
-  int r = clip_io(&m_image_ctx, off, &len);
+  int r = clip_io(util::get_image_ctx(&m_image_ctx), off, &len);
   m_image_ctx.snap_lock.put_read();
   if (r < 0) {
     lderr(cct) << "invalid IO request: " << cpp_strerror(r) << dendl;
@@ -123,8 +123,9 @@ void AioImageRequestWQ::aio_read(AioCompletion *c, uint64_t off, uint64_t len,
 
   if (m_image_ctx.non_blocking_aio || writes_blocked() || !writes_empty() ||
       lock_required) {
-    queue(new AioImageRead(m_image_ctx, c, off, len, buf, pbl, op_flags));
+    queue(new AioImageRead<>(m_image_ctx, c, off, len, buf, pbl, op_flags));
   } else {
+    c->start_op();
     AioImageRequest<>::aio_read(&m_image_ctx, c, off, len, buf, pbl, op_flags);
     finish_in_flight_op();
   }
@@ -149,8 +150,9 @@ void AioImageRequestWQ::aio_write(AioCompletion *c, uint64_t off, uint64_t len,
 
   RWLock::RLocker owner_locker(m_image_ctx.owner_lock);
   if (m_image_ctx.non_blocking_aio || writes_blocked()) {
-    queue(new AioImageWrite(m_image_ctx, c, off, len, buf, op_flags));
+    queue(new AioImageWrite<>(m_image_ctx, c, off, len, buf, op_flags));
   } else {
+    c->start_op();
     AioImageRequest<>::aio_write(&m_image_ctx, c, off, len, buf, op_flags);
     finish_in_flight_op();
   }
@@ -174,8 +176,9 @@ void AioImageRequestWQ::aio_discard(AioCompletion *c, uint64_t off,
 
   RWLock::RLocker owner_locker(m_image_ctx.owner_lock);
   if (m_image_ctx.non_blocking_aio || writes_blocked()) {
-    queue(new AioImageDiscard(m_image_ctx, c, off, len));
+    queue(new AioImageDiscard<>(m_image_ctx, c, off, len));
   } else {
+    c->start_op();
     AioImageRequest<>::aio_discard(&m_image_ctx, c, off, len);
     finish_in_flight_op();
   }
@@ -197,7 +200,7 @@ void AioImageRequestWQ::aio_flush(AioCompletion *c, bool native_async) {
 
   RWLock::RLocker owner_locker(m_image_ctx.owner_lock);
   if (m_image_ctx.non_blocking_aio || writes_blocked() || !writes_empty()) {
-    queue(new AioImageFlush(m_image_ctx, c));
+    queue(new AioImageFlush<>(m_image_ctx, c));
   } else {
     AioImageRequest<>::aio_flush(&m_image_ctx, c);
     finish_in_flight_op();
@@ -460,9 +463,8 @@ void AioImageRequestWQ::handle_refreshed(int r, AioImageRequest<> *req) {
                  << "req=" << req << dendl;
   if (r < 0) {
     req->fail(r);
-    delete req;
-
     finish_queued_op(req);
+    delete req;
     finish_in_flight_op();
   } else {
     // since IO was stalled for refresh -- original IO order is preserved
diff --git a/src/librbd/AioImageRequestWQ.h b/src/librbd/AioImageRequestWQ.h
index 74b8438..42817c0 100644
--- a/src/librbd/AioImageRequestWQ.h
+++ b/src/librbd/AioImageRequestWQ.h
@@ -6,7 +6,6 @@
 
 #include "include/Context.h"
 #include "include/atomic.h"
-#include "common/Cond.h"
 #include "common/RWLock.h"
 #include "common/WorkQueue.h"
 #include <list>
diff --git a/src/librbd/AioObjectRequest.cc b/src/librbd/AioObjectRequest.cc
index faee3d2..cf76176 100644
--- a/src/librbd/AioObjectRequest.cc
+++ b/src/librbd/AioObjectRequest.cc
@@ -7,6 +7,7 @@
 #include "common/Mutex.h"
 #include "common/RWLock.h"
 #include "common/WorkQueue.h"
+#include "include/Context.h"
 
 #include "librbd/AioObjectRequest.h"
 #include "librbd/AioCompletion.h"
@@ -14,8 +15,6 @@
 #include "librbd/CopyupRequest.h"
 #include "librbd/ExclusiveLock.h"
 #include "librbd/ImageCtx.h"
-#include "librbd/ImageWatcher.h"
-#include "librbd/internal.h"
 #include "librbd/ObjectMap.h"
 #include "librbd/Utils.h"
 
@@ -28,562 +27,623 @@
 
 namespace librbd {
 
-  AioObjectRequest::AioObjectRequest(ImageCtx *ictx, const std::string &oid,
-			             uint64_t objectno, uint64_t off,
-                                     uint64_t len, librados::snap_t snap_id,
-                                     Context *completion, bool hide_enoent)
-    : m_ictx(ictx), m_oid(oid), m_object_no(objectno), m_object_off(off),
-      m_object_len(len), m_snap_id(snap_id), m_completion(completion),
-      m_hide_enoent(hide_enoent) {
+template <typename I>
+AioObjectRequest<I>*
+AioObjectRequest<I>::create_remove(I *ictx, const std::string &oid,
+                                   uint64_t object_no,
+                                   const ::SnapContext &snapc,
+                                   Context *completion) {
+  return new AioObjectRemove(util::get_image_ctx(ictx), oid, object_no, snapc,
+                             completion);
+}
 
-    Striper::extent_to_file(m_ictx->cct, &m_ictx->layout, m_object_no,
-                            0, m_ictx->layout.object_size, m_parent_extents);
+template <typename I>
+AioObjectRequest<I>*
+AioObjectRequest<I>::create_truncate(I *ictx, const std::string &oid,
+                                     uint64_t object_no, uint64_t object_off,
+                                     const ::SnapContext &snapc,
+                                     Context *completion) {
+  return new AioObjectTruncate(util::get_image_ctx(ictx), oid, object_no,
+                               object_off, snapc, completion);
+}
 
-    RWLock::RLocker snap_locker(m_ictx->snap_lock);
-    RWLock::RLocker parent_locker(m_ictx->parent_lock);
-    compute_parent_extents();
-  }
+template <typename I>
+AioObjectRequest<I>*
+AioObjectRequest<I>::create_write(I *ictx, const std::string &oid,
+                                  uint64_t object_no, uint64_t object_off,
+                                  const ceph::bufferlist &data,
+                                  const ::SnapContext &snapc,
+                                  Context *completion, int op_flags) {
+  return new AioObjectWrite(util::get_image_ctx(ictx), oid, object_no,
+                            object_off, data, snapc, completion, op_flags);
+}
 
-  void AioObjectRequest::complete(int r)
-  {
-    if (should_complete(r)) {
-      ldout(m_ictx->cct, 20) << "complete " << this << dendl;
-      if (m_hide_enoent && r == -ENOENT) {
-	r = 0;
-      }
-      m_completion->complete(r);
-      delete this;
-    }
-  }
+template <typename I>
+AioObjectRequest<I>*
+AioObjectRequest<I>::create_zero(I *ictx, const std::string &oid,
+                                 uint64_t object_no, uint64_t object_off,
+                                 uint64_t object_len,
+                                 const ::SnapContext &snapc,
+                                 Context *completion) {
+  return new AioObjectZero(util::get_image_ctx(ictx), oid, object_no,
+                           object_off, object_len, snapc, completion);
+}
 
-  bool AioObjectRequest::compute_parent_extents() {
-    assert(m_ictx->snap_lock.is_locked());
-    assert(m_ictx->parent_lock.is_locked());
+template <typename I>
+AioObjectRequest<I>::AioObjectRequest(ImageCtx *ictx, const std::string &oid,
+                                      uint64_t objectno, uint64_t off,
+                                      uint64_t len, librados::snap_t snap_id,
+                                      Context *completion, bool hide_enoent)
+  : m_ictx(ictx), m_oid(oid), m_object_no(objectno), m_object_off(off),
+    m_object_len(len), m_snap_id(snap_id), m_completion(completion),
+    m_hide_enoent(hide_enoent) {
+
+  Striper::extent_to_file(m_ictx->cct, &m_ictx->layout, m_object_no,
+                          0, m_ictx->layout.object_size, m_parent_extents);
+
+  RWLock::RLocker snap_locker(m_ictx->snap_lock);
+  RWLock::RLocker parent_locker(m_ictx->parent_lock);
+  compute_parent_extents();
+}
 
-    uint64_t parent_overlap;
-    int r = m_ictx->get_parent_overlap(m_snap_id, &parent_overlap);
-    if (r < 0) {
-      // NOTE: it's possible for a snapshot to be deleted while we are
-      // still reading from it
-      lderr(m_ictx->cct) << this << " compute_parent_extents: failed to "
-                         << "retrieve parent overlap: " << cpp_strerror(r)
-                         << dendl;
-      m_parent_extents.clear();
-      return false;
+template <typename I>
+void AioObjectRequest<I>::complete(int r)
+{
+  if (should_complete(r)) {
+    ldout(m_ictx->cct, 20) << "complete " << this << dendl;
+    if (m_hide_enoent && r == -ENOENT) {
+      r = 0;
     }
+    m_completion->complete(r);
+    delete this;
+  }
+}
 
-    uint64_t object_overlap =
-      m_ictx->prune_parent_extents(m_parent_extents, parent_overlap);
-    if (object_overlap > 0) {
-      ldout(m_ictx->cct, 20) << this << " compute_parent_extents: "
-                             << "overlap " << parent_overlap << " "
-                             << "extents " << m_parent_extents << dendl;
-      return true;
-    }
+template <typename I>
+bool AioObjectRequest<I>::compute_parent_extents() {
+  assert(m_ictx->snap_lock.is_locked());
+  assert(m_ictx->parent_lock.is_locked());
+
+  uint64_t parent_overlap;
+  int r = m_ictx->get_parent_overlap(m_snap_id, &parent_overlap);
+  if (r < 0) {
+    // NOTE: it's possible for a snapshot to be deleted while we are
+    // still reading from it
+    lderr(m_ictx->cct) << this << " compute_parent_extents: failed to "
+                       << "retrieve parent overlap: " << cpp_strerror(r)
+                       << dendl;
+    m_parent_extents.clear();
     return false;
   }
 
-  static inline bool is_copy_on_read(ImageCtx *ictx, librados::snap_t snap_id) {
-    assert(ictx->owner_lock.is_locked());
-    assert(ictx->snap_lock.is_locked());
-    return (ictx->clone_copy_on_read &&
-            !ictx->read_only && snap_id == CEPH_NOSNAP &&
-            (ictx->exclusive_lock == nullptr ||
-             ictx->exclusive_lock->is_lock_owner()));
+  uint64_t object_overlap =
+    m_ictx->prune_parent_extents(m_parent_extents, parent_overlap);
+  if (object_overlap > 0) {
+    ldout(m_ictx->cct, 20) << this << " compute_parent_extents: "
+                           << "overlap " << parent_overlap << " "
+                           << "extents " << m_parent_extents << dendl;
+    return true;
   }
+  return false;
+}
 
-  /** read **/
-
-  AioObjectRead::AioObjectRead(ImageCtx *ictx, const std::string &oid,
-                               uint64_t objectno, uint64_t offset, uint64_t len,
-                               vector<pair<uint64_t,uint64_t> >& be,
-                               librados::snap_t snap_id, bool sparse,
-                               Context *completion, int op_flags)
-    : AioObjectRequest(ictx, oid, objectno, offset, len, snap_id, completion,
-                       false),
-      m_buffer_extents(be), m_tried_parent(false), m_sparse(sparse),
-      m_op_flags(op_flags), m_parent_completion(NULL),
-      m_state(LIBRBD_AIO_READ_FLAT) {
+static inline bool is_copy_on_read(ImageCtx *ictx, librados::snap_t snap_id) {
+  assert(ictx->owner_lock.is_locked());
+  assert(ictx->snap_lock.is_locked());
+  return (ictx->clone_copy_on_read &&
+          !ictx->read_only && snap_id == CEPH_NOSNAP &&
+          (ictx->exclusive_lock == nullptr ||
+           ictx->exclusive_lock->is_lock_owner()));
+}
 
-    guard_read();
-  }
+/** read **/
+
+template <typename I>
+AioObjectRead<I>::AioObjectRead(I *ictx, const std::string &oid,
+                                uint64_t objectno, uint64_t offset,
+                                uint64_t len,
+                                vector<pair<uint64_t,uint64_t> >& be,
+                                librados::snap_t snap_id, bool sparse,
+                                Context *completion, int op_flags)
+  : AioObjectRequest<I>(util::get_image_ctx(ictx), oid, objectno, offset, len,
+                        snap_id, completion, false),
+    m_buffer_extents(be), m_tried_parent(false), m_sparse(sparse),
+    m_op_flags(op_flags), m_parent_completion(NULL),
+    m_state(LIBRBD_AIO_READ_FLAT) {
+
+  guard_read();
+}
 
-  void AioObjectRead::guard_read()
-  {
-    RWLock::RLocker snap_locker(m_ictx->snap_lock);
-    RWLock::RLocker parent_locker(m_ictx->parent_lock);
+template <typename I>
+void AioObjectRead<I>::guard_read()
+{
+  ImageCtx *image_ctx = this->m_ictx;
+  RWLock::RLocker snap_locker(image_ctx->snap_lock);
+  RWLock::RLocker parent_locker(image_ctx->parent_lock);
 
-    if (has_parent()) {
-      ldout(m_ictx->cct, 20) << __func__ << " guarding read" << dendl;
-      m_state = LIBRBD_AIO_READ_GUARD;
-    }
+  if (this->has_parent()) {
+    ldout(image_ctx->cct, 20) << __func__ << " guarding read" << dendl;
+    m_state = LIBRBD_AIO_READ_GUARD;
   }
+}
 
-  bool AioObjectRead::should_complete(int r)
-  {
-    ldout(m_ictx->cct, 20) << "should_complete " << this << " " << m_oid << " "
-                           << m_object_off << "~" << m_object_len
-                           << " r = " << r << dendl;
-
-    bool finished = true;
-
-    switch (m_state) {
-    case LIBRBD_AIO_READ_GUARD:
-      ldout(m_ictx->cct, 20) << "should_complete " << this
-                             << " READ_CHECK_GUARD" << dendl;
-
-      // This is the step to read from parent
-      if (!m_tried_parent && r == -ENOENT) {
-        {
-          RWLock::RLocker owner_locker(m_ictx->owner_lock);
-          RWLock::RLocker snap_locker(m_ictx->snap_lock);
-          RWLock::RLocker parent_locker(m_ictx->parent_lock);
-          if (m_ictx->parent == NULL) {
-	    ldout(m_ictx->cct, 20) << "parent is gone; do nothing" << dendl;
-	    m_state = LIBRBD_AIO_READ_FLAT;
-	    finished = false;
-	    break;
-	  }
-
-          // calculate reverse mapping onto the image
-          vector<pair<uint64_t,uint64_t> > parent_extents;
-          Striper::extent_to_file(m_ictx->cct, &m_ictx->layout, m_object_no,
-                                  m_object_off, m_object_len, parent_extents);
-
-          uint64_t parent_overlap = 0;
-          uint64_t object_overlap = 0;
-          r = m_ictx->get_parent_overlap(m_snap_id, &parent_overlap);
-          if (r == 0) {
-            object_overlap = m_ictx->prune_parent_extents(parent_extents,
-                                                          parent_overlap);
-          }
+template <typename I>
+bool AioObjectRead<I>::should_complete(int r)
+{
+  ImageCtx *image_ctx = this->m_ictx;
+  ldout(image_ctx->cct, 20) << "should_complete " << this << " "
+                            << this->m_oid << " "
+                            << this->m_object_off << "~" << this->m_object_len
+                            << " r = " << r << dendl;
+
+  bool finished = true;
+
+  switch (m_state) {
+  case LIBRBD_AIO_READ_GUARD:
+    ldout(image_ctx->cct, 20) << "should_complete " << this
+                              << " READ_CHECK_GUARD" << dendl;
+
+    // This is the step to read from parent
+    if (!m_tried_parent && r == -ENOENT) {
+      {
+        RWLock::RLocker owner_locker(image_ctx->owner_lock);
+        RWLock::RLocker snap_locker(image_ctx->snap_lock);
+        RWLock::RLocker parent_locker(image_ctx->parent_lock);
+        if (image_ctx->parent == NULL) {
+          ldout(image_ctx->cct, 20) << "parent is gone; do nothing" << dendl;
+          m_state = LIBRBD_AIO_READ_FLAT;
+          finished = false;
+          break;
+        }
 
-          if (object_overlap > 0) {
-            m_tried_parent = true;
-            if (is_copy_on_read(m_ictx, m_snap_id)) {
-              m_state = LIBRBD_AIO_READ_COPYUP;
-	    }
+        // calculate reverse mapping onto the image
+        vector<pair<uint64_t,uint64_t> > parent_extents;
+        Striper::extent_to_file(image_ctx->cct, &image_ctx->layout,
+                                this->m_object_no, this->m_object_off,
+                                this->m_object_len, parent_extents);
+
+        uint64_t parent_overlap = 0;
+        uint64_t object_overlap = 0;
+        r = image_ctx->get_parent_overlap(this->m_snap_id, &parent_overlap);
+        if (r == 0) {
+          object_overlap = image_ctx->prune_parent_extents(parent_extents,
+                                                           parent_overlap);
+        }
 
-            read_from_parent(parent_extents);
-            finished = false;
+        if (object_overlap > 0) {
+          m_tried_parent = true;
+          if (is_copy_on_read(image_ctx, this->m_snap_id)) {
+            m_state = LIBRBD_AIO_READ_COPYUP;
           }
-        }
 
-        if (m_tried_parent) {
-          // release reference to the parent read completion.  this request
-          // might be completed after unblock is invoked.
-          AioCompletion *parent_completion = m_parent_completion;
-          parent_completion->unblock();
-          parent_completion->put();
+          read_from_parent(parent_extents);
+          finished = false;
         }
       }
-      break;
-    case LIBRBD_AIO_READ_COPYUP:
-      ldout(m_ictx->cct, 20) << "should_complete " << this << " READ_COPYUP"
-                             << dendl;
-      // This is the extra step for copy-on-read: kick off an asynchronous copyup.
-      // It is different from copy-on-write as asynchronous copyup will finish
-      // by itself so state won't go back to LIBRBD_AIO_READ_GUARD.
-
-      assert(m_tried_parent);
-      if (r > 0) {
-        // If read entire object from parent success and CoR is possible, kick
-        // off a asynchronous copyup. This approach minimizes the latency
-        // impact.
-        send_copyup();
+
+      if (m_tried_parent) {
+        // release reference to the parent read completion.  this request
+        // might be completed after unblock is invoked.
+        AioCompletion *parent_completion = m_parent_completion;
+        parent_completion->unblock();
+        parent_completion->put();
       }
-      break;
-    case LIBRBD_AIO_READ_FLAT:
-      ldout(m_ictx->cct, 20) << "should_complete " << this << " READ_FLAT"
-                             << dendl;
-      // The read content should be deposit in m_read_data
-      break;
-    default:
-      lderr(m_ictx->cct) << "invalid request state: " << m_state << dendl;
-      assert(0);
     }
-
-    return finished;
+    break;
+  case LIBRBD_AIO_READ_COPYUP:
+    ldout(image_ctx->cct, 20) << "should_complete " << this << " READ_COPYUP"
+                              << dendl;
+    // This is the extra step for copy-on-read: kick off an asynchronous copyup.
+    // It is different from copy-on-write as asynchronous copyup will finish
+    // by itself so state won't go back to LIBRBD_AIO_READ_GUARD.
+
+    assert(m_tried_parent);
+    if (r > 0) {
+      // If read entire object from parent success and CoR is possible, kick
+      // off a asynchronous copyup. This approach minimizes the latency
+      // impact.
+      send_copyup();
+    }
+    break;
+  case LIBRBD_AIO_READ_FLAT:
+    ldout(image_ctx->cct, 20) << "should_complete " << this << " READ_FLAT"
+                              << dendl;
+    // The read content should be deposit in m_read_data
+    break;
+  default:
+    lderr(image_ctx->cct) << "invalid request state: " << m_state << dendl;
+    assert(0);
   }
 
-  void AioObjectRead::send() {
-    ldout(m_ictx->cct, 20) << "send " << this << " " << m_oid << " "
-                           << m_object_off << "~" << m_object_len << dendl;
+  return finished;
+}
 
-    {
-      RWLock::RLocker snap_locker(m_ictx->snap_lock);
+template <typename I>
+void AioObjectRead<I>::send() {
+  ImageCtx *image_ctx = this->m_ictx;
+  ldout(image_ctx->cct, 20) << "send " << this << " " << this->m_oid << " "
+                            << this->m_object_off << "~" << this->m_object_len
+                            << dendl;
 
-      // send read request to parent if the object doesn't exist locally
-      if (m_ictx->object_map != nullptr &&
-          !m_ictx->object_map->object_may_exist(m_object_no)) {
-        m_ictx->op_work_queue->queue(util::create_context_callback<
-          AioObjectRequest>(this), -ENOENT);
-        return;
-      }
+  {
+    RWLock::RLocker snap_locker(image_ctx->snap_lock);
+
+    // send read request to parent if the object doesn't exist locally
+    if (image_ctx->object_map != nullptr &&
+        !image_ctx->object_map->object_may_exist(this->m_object_no)) {
+      image_ctx->op_work_queue->queue(util::create_context_callback<
+        AioObjectRequest<I> >(this), -ENOENT);
+      return;
     }
+  }
 
-    librados::ObjectReadOperation op;
-    int flags = m_ictx->get_read_flags(m_snap_id);
-    if (m_sparse) {
-      op.sparse_read(m_object_off, m_object_len, &m_ext_map, &m_read_data,
-		     NULL);
-    } else {
-      op.read(m_object_off, m_object_len, &m_read_data, NULL);
-    }
-    op.set_op_flags2(m_op_flags);
+  librados::ObjectReadOperation op;
+  int flags = image_ctx->get_read_flags(this->m_snap_id);
+  if (m_sparse) {
+    op.sparse_read(this->m_object_off, this->m_object_len, &m_ext_map,
+                   &m_read_data, nullptr);
+  } else {
+    op.read(this->m_object_off, this->m_object_len, &m_read_data, nullptr);
+  }
+  op.set_op_flags2(m_op_flags);
 
-    librados::AioCompletion *rados_completion =
-      util::create_rados_ack_callback(this);
-    int r = m_ictx->data_ctx.aio_operate(m_oid, rados_completion, &op, flags,
-                                         NULL);
-    assert(r == 0);
+  librados::AioCompletion *rados_completion =
+    util::create_rados_ack_callback(this);
+  int r = image_ctx->data_ctx.aio_operate(this->m_oid, rados_completion, &op,
+                                          flags, nullptr);
+  assert(r == 0);
 
-    rados_completion->release();
-  }
+  rados_completion->release();
+}
 
-  void AioObjectRead::send_copyup()
+template <typename I>
+void AioObjectRead<I>::send_copyup()
+{
+  ImageCtx *image_ctx = this->m_ictx;
   {
-    {
-      RWLock::RLocker owner_locker(m_ictx->owner_lock);
-      RWLock::RLocker snap_locker(m_ictx->snap_lock);
-      RWLock::RLocker parent_locker(m_ictx->parent_lock);
-      if (!compute_parent_extents() ||
-          (m_ictx->exclusive_lock != nullptr &&
-           !m_ictx->exclusive_lock->is_lock_owner())) {
-        return;
-      }
-    }
-
-    Mutex::Locker copyup_locker(m_ictx->copyup_list_lock);
-    map<uint64_t, CopyupRequest*>::iterator it =
-      m_ictx->copyup_list.find(m_object_no);
-    if (it == m_ictx->copyup_list.end()) {
-      // create and kick off a CopyupRequest
-      CopyupRequest *new_req = new CopyupRequest(m_ictx, m_oid, m_object_no,
-    					         m_parent_extents);
-      m_ictx->copyup_list[m_object_no] = new_req;
-      new_req->send();
+    RWLock::RLocker owner_locker(image_ctx->owner_lock);
+    RWLock::RLocker snap_locker(image_ctx->snap_lock);
+    RWLock::RLocker parent_locker(image_ctx->parent_lock);
+    if (!this->compute_parent_extents() ||
+        (image_ctx->exclusive_lock != nullptr &&
+         !image_ctx->exclusive_lock->is_lock_owner())) {
+      return;
     }
   }
 
-  void AioObjectRead::read_from_parent(const vector<pair<uint64_t,uint64_t> >& parent_extents)
-  {
-    assert(!m_parent_completion);
-    m_parent_completion = AioCompletion::create<AioObjectRequest>(this);
-
-    // prevent the parent image from being deleted while this
-    // request is still in-progress
-    m_parent_completion->get();
-    m_parent_completion->block();
-
-    ldout(m_ictx->cct, 20) << "read_from_parent this = " << this
-			   << " parent completion " << m_parent_completion
-			   << " extents " << parent_extents
-			   << dendl;
-    RWLock::RLocker owner_locker(m_ictx->parent->owner_lock);
-    AioImageRequest<>::aio_read(m_ictx->parent, m_parent_completion,
-                                parent_extents, NULL, &m_read_data, 0);
+  Mutex::Locker copyup_locker(image_ctx->copyup_list_lock);
+  map<uint64_t, CopyupRequest*>::iterator it =
+    image_ctx->copyup_list.find(this->m_object_no);
+  if (it == image_ctx->copyup_list.end()) {
+    // create and kick off a CopyupRequest
+    CopyupRequest *new_req = new CopyupRequest(image_ctx, this->m_oid,
+                                               this->m_object_no,
+                                               this->m_parent_extents);
+    image_ctx->copyup_list[this->m_object_no] = new_req;
+    new_req->send();
   }
+}
 
-  /** write **/
-
-  AbstractAioObjectWrite::AbstractAioObjectWrite(ImageCtx *ictx,
-                                                 const std::string &oid,
-                                                 uint64_t object_no,
-                                                 uint64_t object_off,
-                                                 uint64_t len,
-                                                 const ::SnapContext &snapc,
-                                                 Context *completion,
-                                                 bool hide_enoent)
-    : AioObjectRequest(ictx, oid, object_no, object_off, len, CEPH_NOSNAP,
-                       completion, hide_enoent),
-      m_state(LIBRBD_AIO_WRITE_FLAT), m_snap_seq(snapc.seq.val)
-  {
-    m_snaps.insert(m_snaps.end(), snapc.snaps.begin(), snapc.snaps.end());
-  }
+template <typename I>
+void AioObjectRead<I>::read_from_parent(const Extents& parent_extents)
+{
+  ImageCtx *image_ctx = this->m_ictx;
+  assert(!m_parent_completion);
+  m_parent_completion = AioCompletion::create_and_start<AioObjectRequest<I> >(
+    this, image_ctx, AIO_TYPE_READ);
+
+  // prevent the parent image from being deleted while this
+  // request is still in-progress
+  m_parent_completion->get();
+  m_parent_completion->block();
+
+  ldout(image_ctx->cct, 20) << "read_from_parent this = " << this
+                            << " parent completion " << m_parent_completion
+                            << " extents " << parent_extents
+                            << dendl;
+  RWLock::RLocker owner_locker(image_ctx->parent->owner_lock);
+  AioImageRequest<>::aio_read(image_ctx->parent, m_parent_completion,
+                              parent_extents, NULL, &m_read_data, 0);
+}
 
-  void AbstractAioObjectWrite::guard_write()
-  {
-    if (has_parent()) {
-      m_state = LIBRBD_AIO_WRITE_GUARD;
-      m_write.assert_exists();
-      ldout(m_ictx->cct, 20) << __func__ << " guarding write" << dendl;
-    }
+/** write **/
+
+AbstractAioObjectWrite::AbstractAioObjectWrite(ImageCtx *ictx,
+                                               const std::string &oid,
+                                               uint64_t object_no,
+                                               uint64_t object_off,
+                                               uint64_t len,
+                                               const ::SnapContext &snapc,
+                                               Context *completion,
+                                               bool hide_enoent)
+  : AioObjectRequest(ictx, oid, object_no, object_off, len, CEPH_NOSNAP,
+                     completion, hide_enoent),
+    m_state(LIBRBD_AIO_WRITE_FLAT), m_snap_seq(snapc.seq.val)
+{
+  m_snaps.insert(m_snaps.end(), snapc.snaps.begin(), snapc.snaps.end());
+}
+
+void AbstractAioObjectWrite::guard_write()
+{
+  if (has_parent()) {
+    m_state = LIBRBD_AIO_WRITE_GUARD;
+    m_write.assert_exists();
+    ldout(m_ictx->cct, 20) << __func__ << " guarding write" << dendl;
   }
+}
 
-  bool AbstractAioObjectWrite::should_complete(int r)
-  {
-    ldout(m_ictx->cct, 20) << get_write_type() << " " << this << " " << m_oid
-                           << " " << m_object_off << "~" << m_object_len
-			   << " should_complete: r = " << r << dendl;
-
-    bool finished = true;
-    switch (m_state) {
-    case LIBRBD_AIO_WRITE_PRE:
-      ldout(m_ictx->cct, 20) << "WRITE_PRE" << dendl;
-      if (r < 0) {
-	return true;
-      }
+bool AbstractAioObjectWrite::should_complete(int r)
+{
+  ldout(m_ictx->cct, 20) << get_write_type() << " " << this << " " << m_oid
+                         << " " << m_object_off << "~" << m_object_len
+                         << " should_complete: r = " << r << dendl;
 
-      send_write();
-      finished = false;
-      break;
+  bool finished = true;
+  switch (m_state) {
+  case LIBRBD_AIO_WRITE_PRE:
+    ldout(m_ictx->cct, 20) << "WRITE_PRE" << dendl;
+    if (r < 0) {
+      return true;
+    }
 
-    case LIBRBD_AIO_WRITE_POST:
-      ldout(m_ictx->cct, 20) << "WRITE_POST" << dendl;
-      finished = true;
-      break;
+    send_write();
+    finished = false;
+    break;
 
-    case LIBRBD_AIO_WRITE_GUARD:
-      ldout(m_ictx->cct, 20) << "WRITE_CHECK_GUARD" << dendl;
-
-      if (r == -ENOENT) {
-        handle_write_guard();
-	finished = false;
-	break;
-      } else if (r < 0) {
-        // pass the error code to the finish context
-        m_state = LIBRBD_AIO_WRITE_ERROR;
-        complete(r);
-	finished = false;
-	break;
-      }
+  case LIBRBD_AIO_WRITE_POST:
+    ldout(m_ictx->cct, 20) << "WRITE_POST" << dendl;
+    finished = true;
+    break;
 
-      finished = send_post();
-      break;
+  case LIBRBD_AIO_WRITE_GUARD:
+    ldout(m_ictx->cct, 20) << "WRITE_CHECK_GUARD" << dendl;
 
-    case LIBRBD_AIO_WRITE_COPYUP:
-      ldout(m_ictx->cct, 20) << "WRITE_COPYUP" << dendl;
-      if (r < 0) {
-        m_state = LIBRBD_AIO_WRITE_ERROR;
-        complete(r);
-        finished = false;
-      } else {
-        finished = send_post();
-      }
+    if (r == -ENOENT) {
+      handle_write_guard();
+      finished = false;
       break;
+    } else if (r < 0) {
+      // pass the error code to the finish context
+      m_state = LIBRBD_AIO_WRITE_ERROR;
+      complete(r);
+      finished = false;
+      break;
+    }
 
-    case LIBRBD_AIO_WRITE_FLAT:
-      ldout(m_ictx->cct, 20) << "WRITE_FLAT" << dendl;
+    finished = send_post();
+    break;
 
+  case LIBRBD_AIO_WRITE_COPYUP:
+    ldout(m_ictx->cct, 20) << "WRITE_COPYUP" << dendl;
+    if (r < 0) {
+      m_state = LIBRBD_AIO_WRITE_ERROR;
+      complete(r);
+      finished = false;
+    } else {
       finished = send_post();
-      break;
+    }
+    break;
 
-    case LIBRBD_AIO_WRITE_ERROR:
-      assert(r < 0);
-      lderr(m_ictx->cct) << "WRITE_ERROR: " << cpp_strerror(r)
-			 << dendl;
-      break;
+  case LIBRBD_AIO_WRITE_FLAT:
+    ldout(m_ictx->cct, 20) << "WRITE_FLAT" << dendl;
 
-    default:
-      lderr(m_ictx->cct) << "invalid request state: " << m_state << dendl;
-      assert(0);
-    }
+    finished = send_post();
+    break;
 
-    return finished;
-  }
+  case LIBRBD_AIO_WRITE_ERROR:
+    assert(r < 0);
+    lderr(m_ictx->cct) << "WRITE_ERROR: " << cpp_strerror(r) << dendl;
+    break;
 
-  void AbstractAioObjectWrite::send() {
-    assert(m_ictx->owner_lock.is_locked());
-    ldout(m_ictx->cct, 20) << "send " << get_write_type() << " " << this <<" "
-                           << m_oid << " " << m_object_off << "~"
-                           << m_object_len << dendl;
-    send_pre();
+  default:
+    lderr(m_ictx->cct) << "invalid request state: " << m_state << dendl;
+    assert(0);
   }
 
-  void AbstractAioObjectWrite::send_pre() {
-    assert(m_ictx->owner_lock.is_locked());
+  return finished;
+}
 
-    bool write = false;
-    {
-      RWLock::RLocker snap_lock(m_ictx->snap_lock);
-      if (m_ictx->object_map == nullptr) {
-        m_object_exist = true;
-        write = true;
-      } else {
-        // should have been flushed prior to releasing lock
-        assert(m_ictx->exclusive_lock->is_lock_owner());
+void AbstractAioObjectWrite::send() {
+  assert(m_ictx->owner_lock.is_locked());
+  ldout(m_ictx->cct, 20) << "send " << get_write_type() << " " << this <<" "
+                         << m_oid << " " << m_object_off << "~"
+                         << m_object_len << dendl;
+  send_pre();
+}
+
+void AbstractAioObjectWrite::send_pre() {
+  assert(m_ictx->owner_lock.is_locked());
+
+  bool write = false;
+  {
+    RWLock::RLocker snap_lock(m_ictx->snap_lock);
+    if (m_ictx->object_map == nullptr) {
+      m_object_exist = true;
+      write = true;
+    } else {
+      // should have been flushed prior to releasing lock
+      assert(m_ictx->exclusive_lock->is_lock_owner());
+      m_object_exist = m_ictx->object_map->object_may_exist(m_object_no);
 
-        m_object_exist = m_ictx->object_map->object_may_exist(m_object_no);
+      uint8_t new_state;
+      pre_object_map_update(&new_state);
 
+      RWLock::WLocker object_map_locker(m_ictx->object_map_lock);
+      if (m_ictx->object_map->update_required(m_object_no, new_state)) {
         ldout(m_ictx->cct, 20) << "send_pre " << this << " " << m_oid << " "
-          		       << m_object_off << "~" << m_object_len << dendl;
+                               << m_object_off << "~" << m_object_len
+                               << dendl;
         m_state = LIBRBD_AIO_WRITE_PRE;
 
-        uint8_t new_state;
-        boost::optional<uint8_t> current_state;
-        pre_object_map_update(&new_state);
-
-        RWLock::WLocker object_map_locker(m_ictx->object_map_lock);
-        if ((*m_ictx->object_map)[m_object_no] != new_state) {
-          Context *ctx = util::create_context_callback<AioObjectRequest>(this);
-          bool updated = m_ictx->object_map->aio_update(m_object_no, new_state,
-                                                        current_state, ctx);
-          assert(updated);
-        } else {
-          write = true;
-        }
+        Context *ctx = util::create_context_callback<AioObjectRequest>(this);
+        bool updated = m_ictx->object_map->aio_update(m_object_no, new_state,
+                                                      {}, ctx);
+        assert(updated);
+      } else {
+        write = true;
       }
     }
+  }
 
-    // avoid possible recursive lock attempts
-    if (write) {
-      // no object map update required
-      send_write();
-    }
+  // avoid possible recursive lock attempts
+  if (write) {
+    // no object map update required
+    send_write();
   }
+}
 
-  bool AbstractAioObjectWrite::send_post() {
-    RWLock::RLocker owner_locker(m_ictx->owner_lock);
-    RWLock::RLocker snap_locker(m_ictx->snap_lock);
-    if (m_ictx->object_map == nullptr || !post_object_map_update()) {
-      return true;
-    }
+bool AbstractAioObjectWrite::send_post() {
+  RWLock::RLocker owner_locker(m_ictx->owner_lock);
+  RWLock::RLocker snap_locker(m_ictx->snap_lock);
+  if (m_ictx->object_map == nullptr || !post_object_map_update()) {
+    return true;
+  }
 
-    // should have been flushed prior to releasing lock
-    assert(m_ictx->exclusive_lock->is_lock_owner());
+  // should have been flushed prior to releasing lock
+  assert(m_ictx->exclusive_lock->is_lock_owner());
 
-    ldout(m_ictx->cct, 20) << "send_post " << this << " " << m_oid << " "
-			   << m_object_off << "~" << m_object_len << dendl;
-    m_state = LIBRBD_AIO_WRITE_POST;
+  RWLock::WLocker object_map_locker(m_ictx->object_map_lock);
+  if (!m_ictx->object_map->update_required(m_object_no, OBJECT_NONEXISTENT)) {
+    return true;
+  }
 
-    RWLock::WLocker object_map_locker(m_ictx->object_map_lock);
-    uint8_t current_state = (*m_ictx->object_map)[m_object_no];
-    if (current_state != OBJECT_PENDING ||
-        current_state == OBJECT_NONEXISTENT) {
-      return true;
-    }
+  ldout(m_ictx->cct, 20) << "send_post " << this << " " << m_oid << " "
+                         << m_object_off << "~" << m_object_len << dendl;
+  m_state = LIBRBD_AIO_WRITE_POST;
 
-    Context *ctx = util::create_context_callback<AioObjectRequest>(this);
-    bool updated = m_ictx->object_map->aio_update(m_object_no,
-                                                  OBJECT_NONEXISTENT,
-				                  OBJECT_PENDING, ctx);
-    assert(updated);
-    return false;
-  }
+  Context *ctx = util::create_context_callback<AioObjectRequest>(this);
+  bool updated = m_ictx->object_map->aio_update(m_object_no,
+                                                OBJECT_NONEXISTENT,
+      			                  OBJECT_PENDING, ctx);
+  assert(updated);
+  return false;
+}
 
-  void AbstractAioObjectWrite::send_write() {
-    ldout(m_ictx->cct, 20) << "send_write " << this << " " << m_oid << " "
-			   << m_object_off << "~" << m_object_len 
-                           << " object exist " << m_object_exist << dendl;
+void AbstractAioObjectWrite::send_write() {
+  ldout(m_ictx->cct, 20) << "send_write " << this << " " << m_oid << " "
+      		         << m_object_off << "~" << m_object_len
+                         << " object exist " << m_object_exist << dendl;
 
-    if (!m_object_exist && has_parent()) {
-      m_state = LIBRBD_AIO_WRITE_GUARD;
-      handle_write_guard();
-    } else {
-      send_write_op(true);
-    }
+  if (!m_object_exist && has_parent()) {
+    m_state = LIBRBD_AIO_WRITE_GUARD;
+    handle_write_guard();
+  } else {
+    send_write_op(true);
   }
+}
 
-  void AbstractAioObjectWrite::send_copyup()
-  {
-    ldout(m_ictx->cct, 20) << "send_copyup " << this << " " << m_oid << " "
-                           << m_object_off << "~" << m_object_len << dendl;
-    m_state = LIBRBD_AIO_WRITE_COPYUP;
-
-    m_ictx->copyup_list_lock.Lock();
-    map<uint64_t, CopyupRequest*>::iterator it =
-      m_ictx->copyup_list.find(m_object_no);
-    if (it == m_ictx->copyup_list.end()) {
-      CopyupRequest *new_req = new CopyupRequest(m_ictx, m_oid,
-                                                 m_object_no,
-                                                 m_parent_extents);
-
-      // make sure to wait on this CopyupRequest
-      new_req->append_request(this);
-      m_ictx->copyup_list[m_object_no] = new_req;
-
-      m_ictx->copyup_list_lock.Unlock();
-      new_req->send();
-    } else {
-      it->second->append_request(this);
-      m_ictx->copyup_list_lock.Unlock();
-    }
+void AbstractAioObjectWrite::send_copyup()
+{
+  ldout(m_ictx->cct, 20) << "send_copyup " << this << " " << m_oid << " "
+                         << m_object_off << "~" << m_object_len << dendl;
+  m_state = LIBRBD_AIO_WRITE_COPYUP;
+
+  m_ictx->copyup_list_lock.Lock();
+  map<uint64_t, CopyupRequest*>::iterator it =
+    m_ictx->copyup_list.find(m_object_no);
+  if (it == m_ictx->copyup_list.end()) {
+    CopyupRequest *new_req = new CopyupRequest(m_ictx, m_oid,
+                                               m_object_no,
+                                               m_parent_extents);
+
+    // make sure to wait on this CopyupRequest
+    new_req->append_request(this);
+    m_ictx->copyup_list[m_object_no] = new_req;
+
+    m_ictx->copyup_list_lock.Unlock();
+    new_req->send();
+  } else {
+    it->second->append_request(this);
+    m_ictx->copyup_list_lock.Unlock();
   }
-  void AbstractAioObjectWrite::send_write_op(bool write_guard)
+}
+void AbstractAioObjectWrite::send_write_op(bool write_guard)
+{
+  m_state = LIBRBD_AIO_WRITE_FLAT;
+  if (write_guard)
+    guard_write();
+  add_write_ops(&m_write);
+  assert(m_write.size() != 0);
+
+  librados::AioCompletion *rados_completion =
+    util::create_rados_safe_callback(this);
+  int r = m_ictx->data_ctx.aio_operate(m_oid, rados_completion, &m_write,
+                                       m_snap_seq, m_snaps);
+  assert(r == 0);
+  rados_completion->release();
+}
+void AbstractAioObjectWrite::handle_write_guard()
+{
+  bool has_parent;
   {
-    m_state = LIBRBD_AIO_WRITE_FLAT;
-    if (write_guard)
-      guard_write();
-    add_write_ops(&m_write);
-    assert(m_write.size() != 0);
-
-    librados::AioCompletion *rados_completion =
-      util::create_rados_safe_callback(this);
-    int r = m_ictx->data_ctx.aio_operate(m_oid, rados_completion, &m_write,
-					 m_snap_seq, m_snaps);
-    assert(r == 0);
-    rados_completion->release();
+    RWLock::RLocker snap_locker(m_ictx->snap_lock);
+    RWLock::RLocker parent_locker(m_ictx->parent_lock);
+    has_parent = compute_parent_extents();
   }
-  void AbstractAioObjectWrite::handle_write_guard()
-  {
-    bool has_parent;
-    {
-      RWLock::RLocker snap_locker(m_ictx->snap_lock);
-      RWLock::RLocker parent_locker(m_ictx->parent_lock);
-      has_parent = compute_parent_extents();
-    }
-    // If parent still exists, overlap might also have changed.
-    if (has_parent) {
-      send_copyup();
-    } else {
-      // parent may have disappeared -- send original write again
-      ldout(m_ictx->cct, 20) << "should_complete(" << this
-        << "): parent overlap now 0" << dendl;
-      send_write();
-    }
+  // If parent still exists, overlap might also have changed.
+  if (has_parent) {
+    send_copyup();
+  } else {
+    // parent may have disappeared -- send original write again
+    ldout(m_ictx->cct, 20) << "should_complete(" << this
+                           << "): parent overlap now 0" << dendl;
+    send_write();
   }
+}
 
-  void AioObjectWrite::add_write_ops(librados::ObjectWriteOperation *wr) {
-    RWLock::RLocker snap_locker(m_ictx->snap_lock);
-    if (m_ictx->enable_alloc_hint &&
-        (m_ictx->object_map == nullptr ||
-         !m_ictx->object_map->object_may_exist(m_object_no))) {
-      wr->set_alloc_hint(m_ictx->get_object_size(), m_ictx->get_object_size());
-    }
-
-    if (m_object_off == 0 && m_object_len == m_ictx->get_object_size()) {
-      wr->write_full(m_write_data);
-    } else {
-      wr->write(m_object_off, m_write_data);
-    }
-    wr->set_op_flags2(m_op_flags);
+void AioObjectWrite::add_write_ops(librados::ObjectWriteOperation *wr) {
+  RWLock::RLocker snap_locker(m_ictx->snap_lock);
+  if (m_ictx->enable_alloc_hint &&
+      (m_ictx->object_map == nullptr || !m_object_exist)) {
+    wr->set_alloc_hint(m_ictx->get_object_size(), m_ictx->get_object_size());
   }
 
-  void AioObjectWrite::send_write() {
-    bool write_full = (m_object_off == 0 && m_object_len == m_ictx->get_object_size());
-    ldout(m_ictx->cct, 20) << "send_write " << this << " " << m_oid << " "
-			   << m_object_off << "~" << m_object_len
-                           << " object exist " << m_object_exist
-			   << " write_full " << write_full << dendl;
-    if (write_full && !has_parent()) {
-      send_write_op(false);
-    } else {
-      AbstractAioObjectWrite::send_write();
-    }
+  if (m_object_off == 0 && m_object_len == m_ictx->get_object_size()) {
+    wr->write_full(m_write_data);
+  } else {
+    wr->write(m_object_off, m_write_data);
   }
+  wr->set_op_flags2(m_op_flags);
+}
 
-  void AioObjectRemove::guard_write() {
-    // do nothing to disable write guard only if deep-copyup not required
-    RWLock::RLocker snap_locker(m_ictx->snap_lock);
-    if (!m_ictx->snaps.empty()) {
-      AbstractAioObjectWrite::guard_write();
-    }
+void AioObjectWrite::send_write() {
+  bool write_full = (m_object_off == 0 && m_object_len == m_ictx->get_object_size());
+  ldout(m_ictx->cct, 20) << "send_write " << this << " " << m_oid << " "
+                         << m_object_off << "~" << m_object_len
+                         << " object exist " << m_object_exist
+                         << " write_full " << write_full << dendl;
+  if (write_full && !has_parent()) {
+    send_write_op(false);
+  } else {
+    AbstractAioObjectWrite::send_write();
   }
-  void AioObjectRemove::send_write() {
-    ldout(m_ictx->cct, 20) << "send_write " << this << " " << m_oid << " "
-			   << m_object_off << "~" << m_object_len << dendl;
-    send_write_op(true);
+}
+
+void AioObjectRemove::guard_write() {
+  // do nothing to disable write guard only if deep-copyup not required
+  RWLock::RLocker snap_locker(m_ictx->snap_lock);
+  if (!m_ictx->snaps.empty()) {
+    AbstractAioObjectWrite::guard_write();
   }
-  void AioObjectTruncate::send_write() {
-    ldout(m_ictx->cct, 20) << "send_write " << this << " " << m_oid
-			   << " truncate " << m_object_off << dendl;
-    if (!m_object_exist && ! has_parent()) {
-      m_state = LIBRBD_AIO_WRITE_FLAT;
-      Context *ctx = util::create_context_callback<AioObjectRequest>(this);
-      m_ictx->op_work_queue->queue(ctx, 0);
-    } else {
-      AbstractAioObjectWrite::send_write();
-    }
+}
+void AioObjectRemove::send_write() {
+  ldout(m_ictx->cct, 20) << "send_write " << this << " " << m_oid << " "
+                         << m_object_off << "~" << m_object_len << dendl;
+  send_write_op(true);
+}
+void AioObjectTruncate::send_write() {
+  ldout(m_ictx->cct, 20) << "send_write " << this << " " << m_oid
+                         << " truncate " << m_object_off << dendl;
+  if (!m_object_exist && ! has_parent()) {
+    m_state = LIBRBD_AIO_WRITE_FLAT;
+    Context *ctx = util::create_context_callback<AioObjectRequest>(this);
+    m_ictx->op_work_queue->queue(ctx, 0);
+  } else {
+    AbstractAioObjectWrite::send_write();
   }
 }
+
+} // namespace librbd
+
+template class librbd::AioObjectRequest<librbd::ImageCtx>;
+template class librbd::AioObjectRead<librbd::ImageCtx>;
diff --git a/src/librbd/AioObjectRequest.h b/src/librbd/AioObjectRequest.h
index 2e9226e..068ce1a 100644
--- a/src/librbd/AioObjectRequest.h
+++ b/src/librbd/AioObjectRequest.h
@@ -1,7 +1,8 @@
 // -*- mode:C++; tab-width:8; c-basic-offset:2; indent-tabs-mode:t -*-
 // vim: ts=8 sw=2 smarttab
-#ifndef CEPH_LIBRBD_AIOREQUEST_H
-#define CEPH_LIBRBD_AIOREQUEST_H
+
+#ifndef CEPH_LIBRBD_AIO_OBJECT_REQUEST_H
+#define CEPH_LIBRBD_AIO_OBJECT_REQUEST_H
 
 #include "include/int_types.h"
 
@@ -9,346 +10,404 @@
 
 #include "common/snap_types.h"
 #include "include/buffer.h"
-#include "include/Context.h"
 #include "include/rados/librados.hpp"
-#include "include/rbd/librbd.hpp"
 #include "librbd/ObjectMap.h"
 
+class Context;
+
 namespace librbd {
 
-  struct AioCompletion;
-  struct ImageCtx;
-  class CopyupRequest;
+struct AioCompletion;
+class AioObjectRemove;
+class AioObjectTruncate;
+class AioObjectWrite;
+class AioObjectZero;
+struct ImageCtx;
+class CopyupRequest;
+
+struct AioObjectRequestHandle {
+  virtual ~AioObjectRequestHandle() {
+  }
+
+  virtual void complete(int r) = 0;
+  virtual void send() = 0;
+};
+
+/**
+ * This class represents an I/O operation to a single RBD data object.
+ * Its subclasses encapsulate logic for dealing with special cases
+ * for I/O due to layering.
+ */
+template <typename ImageCtxT = ImageCtx>
+class AioObjectRequest : public AioObjectRequestHandle {
+public:
+  typedef std::vector<std::pair<uint64_t, uint64_t> > Extents;
+
+  static AioObjectRequest* create_remove(ImageCtxT *ictx,
+                                         const std::string &oid,
+                                         uint64_t object_no,
+                                         const ::SnapContext &snapc,
+                                         Context *completion);
+  static AioObjectRequest* create_truncate(ImageCtxT *ictx,
+                                           const std::string &oid,
+                                           uint64_t object_no,
+                                           uint64_t object_off,
+                                           const ::SnapContext &snapc,
+                                           Context *completion);
+  static AioObjectRequest* create_write(ImageCtxT *ictx, const std::string &oid,
+                                        uint64_t object_no,
+                                        uint64_t object_off,
+                                        const ceph::bufferlist &data,
+                                        const ::SnapContext &snapc,
+                                        Context *completion, int op_flags);
+  static AioObjectRequest* create_zero(ImageCtxT *ictx, const std::string &oid,
+                                       uint64_t object_no, uint64_t object_off,
+                                       uint64_t object_len,
+                                       const ::SnapContext &snapc,
+                                       Context *completion);
+
+  AioObjectRequest(ImageCtx *ictx, const std::string &oid,
+                   uint64_t objectno, uint64_t off, uint64_t len,
+                   librados::snap_t snap_id,
+                   Context *completion, bool hide_enoent);
+  virtual ~AioObjectRequest() {}
+
+  virtual void add_copyup_ops(librados::ObjectWriteOperation *wr) {};
+
+  void complete(int r);
+
+  virtual bool should_complete(int r) = 0;
+  virtual void send() = 0;
+
+  bool has_parent() const {
+    return !m_parent_extents.empty();
+  }
+
+protected:
+  bool compute_parent_extents();
+
+  ImageCtx *m_ictx;
+  std::string m_oid;
+  uint64_t m_object_no, m_object_off, m_object_len;
+  librados::snap_t m_snap_id;
+  Context *m_completion;
+  Extents m_parent_extents;
+  bool m_hide_enoent;
+};
+
+template <typename ImageCtxT = ImageCtx>
+class AioObjectRead : public AioObjectRequest<ImageCtxT> {
+public:
+  typedef std::vector<std::pair<uint64_t, uint64_t> > Extents;
+  typedef std::map<uint64_t, uint64_t> ExtentMap;
+
+  static AioObjectRead* create(ImageCtxT *ictx, const std::string &oid,
+                               uint64_t objectno, uint64_t offset,
+                               uint64_t len, Extents &buffer_extents,
+                               librados::snap_t snap_id, bool sparse,
+                               Context *completion, int op_flags) {
+    return new AioObjectRead(ictx, oid, objectno, offset, len, buffer_extents,
+                             snap_id, sparse, completion, op_flags);
+  }
+
+  AioObjectRead(ImageCtxT *ictx, const std::string &oid,
+                uint64_t objectno, uint64_t offset, uint64_t len,
+                Extents& buffer_extents, librados::snap_t snap_id, bool sparse,
+                Context *completion, int op_flags);
+
+  virtual bool should_complete(int r);
+  virtual void send();
+  void guard_read();
+
+  inline uint64_t get_offset() const {
+    return this->m_object_off;
+  }
+  inline uint64_t get_length() const {
+    return this->m_object_len;
+  }
+  ceph::bufferlist &data() {
+    return m_read_data;
+  }
+  const Extents &get_buffer_extents() const {
+    return m_buffer_extents;
+  }
+  ExtentMap &get_extent_map() {
+    return m_ext_map;
+  }
+private:
+  Extents m_buffer_extents;
+  bool m_tried_parent;
+  bool m_sparse;
+  int m_op_flags;
+  ceph::bufferlist m_read_data;
+  AioCompletion *m_parent_completion;
+  ExtentMap m_ext_map;
 
   /**
-   * This class represents an I/O operation to a single RBD data object.
-   * Its subclasses encapsulate logic for dealing with special cases
-   * for I/O due to layering.
+   * Reads go through the following state machine to deal with
+   * layering:
+   *
+   *                          need copyup
+   * LIBRBD_AIO_READ_GUARD ---------------> LIBRBD_AIO_READ_COPYUP
+   *           |                                       |
+   *           v                                       |
+   *         done <------------------------------------/
+   *           ^
+   *           |
+   * LIBRBD_AIO_READ_FLAT
+   *
+   * Reads start in LIBRBD_AIO_READ_GUARD or _FLAT, depending on
+   * whether there is a parent or not.
    */
-  class AioObjectRequest
-  {
-  public:
-    AioObjectRequest(ImageCtx *ictx, const std::string &oid,
-                     uint64_t objectno, uint64_t off, uint64_t len,
-                     librados::snap_t snap_id,
-                     Context *completion, bool hide_enoent);
-    virtual ~AioObjectRequest() {}
-
-    virtual void add_copyup_ops(librados::ObjectWriteOperation *wr) {};
-
-    void complete(int r);
-
-    virtual bool should_complete(int r) = 0;
-    virtual void send() = 0;
-
-    bool has_parent() const {
-      return !m_parent_extents.empty();
-    }
-
-  protected:
-    bool compute_parent_extents();
-
-    ImageCtx *m_ictx;
-    std::string m_oid;
-    uint64_t m_object_no, m_object_off, m_object_len;
-    librados::snap_t m_snap_id;
-    Context *m_completion;
-    std::vector<std::pair<uint64_t,uint64_t> > m_parent_extents;
-    bool m_hide_enoent;
-  };
-
-  class AioObjectRead : public AioObjectRequest {
-  public:
-    AioObjectRead(ImageCtx *ictx, const std::string &oid,
-	          uint64_t objectno, uint64_t offset, uint64_t len,
-	          vector<pair<uint64_t,uint64_t> >& be,
-	          librados::snap_t snap_id, bool sparse,
-	          Context *completion, int op_flags);
-
-    virtual bool should_complete(int r);
-    virtual void send();
-    void guard_read();
-
-    ceph::bufferlist &data() {
-      return m_read_data;
-    }
-
-    std::map<uint64_t, uint64_t> m_ext_map;
-
-    friend class C_AioRead;
-
-  private:
-    vector<pair<uint64_t,uint64_t> > m_buffer_extents;
-    bool m_tried_parent;
-    bool m_sparse;
-    int m_op_flags;
-    ceph::bufferlist m_read_data;
-    AioCompletion *m_parent_completion;
-
-    /**
-     * Reads go through the following state machine to deal with
-     * layering:
-     *
-     *                          need copyup
-     * LIBRBD_AIO_READ_GUARD ---------------> LIBRBD_AIO_READ_COPYUP
-     *           |                                       |
-     *           v                                       |
-     *         done <------------------------------------/
-     *           ^
-     *           |
-     * LIBRBD_AIO_READ_FLAT
-     *
-     * Reads start in LIBRBD_AIO_READ_GUARD or _FLAT, depending on
-     * whether there is a parent or not.
-     */
-    enum read_state_d {
-      LIBRBD_AIO_READ_GUARD,
-      LIBRBD_AIO_READ_COPYUP,
-      LIBRBD_AIO_READ_FLAT
-    };
-
-    read_state_d m_state;
-
-    void send_copyup();
-
-    void read_from_parent(const vector<pair<uint64_t,uint64_t> >& image_extents);
-  };
-
-  class AbstractAioObjectWrite : public AioObjectRequest {
-  public:
-    AbstractAioObjectWrite(ImageCtx *ictx, const std::string &oid,
-                           uint64_t object_no, uint64_t object_off,
-                           uint64_t len, const ::SnapContext &snapc,
-                           Context *completion, bool hide_enoent);
-
-    virtual void add_copyup_ops(librados::ObjectWriteOperation *wr)
-    {
-      add_write_ops(wr);
-    }
-
-    virtual bool should_complete(int r);
-    virtual void send();
-
-    /**
-     * Writes go through the following state machine to deal with
-     * layering and the object map:
-     *
-     * <start>
-     *  .  |
-     *  .  |
-     *  .  \---> LIBRBD_AIO_WRITE_PRE
-     *  .           |         |
-     *  . . . . . . | . . . . | . . . . . . . . . . .
-     *      .       |   -or-  |                     .
-     *      .       |         |                     v
-     *      .       |         \----------------> LIBRBD_AIO_WRITE_FLAT . . .
-     *      .       |                                               |      .
-     *      v       v         need copyup                           |      .
-     * LIBRBD_AIO_WRITE_GUARD -----------> LIBRBD_AIO_WRITE_COPYUP  |      .
-     *  .       |                               |        .          |      .
-     *  .       |                               |        .          |      .
-     *  .       |                         /-----/        .          |      .
-     *  .       |                         |              .          |      .
-     *  .       \-------------------\     |     /-------------------/      .
-     *  .                           |     |     |        .                 .
-     *  .                           v     v     v        .                 .
-     *  .                       LIBRBD_AIO_WRITE_POST    .                 .
-     *  .                               |                .                 .
-     *  .                               |  . . . . . . . .                 .
-     *  .                               |  .                               .
-     *  .                               v  v                               .
-     *  . . . . . . . . . . . . . . > <finish> < . . . . . . . . . . . . . .
-     *
-     * The _PRE/_POST states are skipped if the object map is disabled.
-     * The write starts in _WRITE_GUARD or _FLAT depending on whether or not
-     * there is a parent overlap.
-     */
-  protected:
-    enum write_state_d {
-      LIBRBD_AIO_WRITE_GUARD,
-      LIBRBD_AIO_WRITE_COPYUP,
-      LIBRBD_AIO_WRITE_FLAT,
-      LIBRBD_AIO_WRITE_PRE,
-      LIBRBD_AIO_WRITE_POST,
-      LIBRBD_AIO_WRITE_ERROR
-    };
-
-    write_state_d m_state;
-    librados::ObjectWriteOperation m_write;
-    uint64_t m_snap_seq;
-    std::vector<librados::snap_t> m_snaps;
-    bool m_object_exist;
-
-    virtual void add_write_ops(librados::ObjectWriteOperation *wr) = 0;
-    virtual const char* get_write_type() const = 0;
-    virtual void guard_write();
-    virtual void pre_object_map_update(uint8_t *new_state) = 0;
-    virtual bool post_object_map_update() {
-      return false;
-    }
-    virtual void send_write();
-    virtual void send_write_op(bool write_guard);
-    virtual void handle_write_guard();
-
-  private:
-    void send_pre();
-    bool send_post();
-    void send_copyup();
+  enum read_state_d {
+    LIBRBD_AIO_READ_GUARD,
+    LIBRBD_AIO_READ_COPYUP,
+    LIBRBD_AIO_READ_FLAT
   };
 
-  class AioObjectWrite : public AbstractAioObjectWrite {
-  public:
-    AioObjectWrite(ImageCtx *ictx, const std::string &oid, uint64_t object_no,
-                   uint64_t object_off, const ceph::bufferlist &data,
-                   const ::SnapContext &snapc, Context *completion)
-      : AbstractAioObjectWrite(ictx, oid, object_no, object_off, data.length(),
-                               snapc, completion, false),
-	m_write_data(data), m_op_flags(0) {
-    }
+  read_state_d m_state;
 
-    void set_op_flags(int op_flags) {
-      m_op_flags = op_flags;
-    }
-  protected:
-    virtual void add_write_ops(librados::ObjectWriteOperation *wr);
+  void send_copyup();
 
-    virtual const char* get_write_type() const {
-      return "write";
-    }
+  void read_from_parent(const Extents& image_extents);
+};
 
-    virtual void pre_object_map_update(uint8_t *new_state) {
-      *new_state = OBJECT_EXISTS;
-    }
-    virtual void send_write();
-
-  private:
-    ceph::bufferlist m_write_data;
-    int m_op_flags;
-  };
-
-  class AioObjectRemove : public AbstractAioObjectWrite {
-  public:
-    AioObjectRemove(ImageCtx *ictx, const std::string &oid, uint64_t object_no,
-	            const ::SnapContext &snapc, Context *completion)
-      : AbstractAioObjectWrite(ictx, oid, object_no, 0, 0, snapc, completion,
-                               true),
-        m_object_state(OBJECT_NONEXISTENT) {
-    }
-
-  protected:
-    virtual void add_write_ops(librados::ObjectWriteOperation *wr) {
-      if (has_parent()) {
-	wr->truncate(0);
-      } else {
-	wr->remove();
-      }
-    }
+class AbstractAioObjectWrite : public AioObjectRequest<> {
+public:
+  AbstractAioObjectWrite(ImageCtx *ictx, const std::string &oid,
+                         uint64_t object_no, uint64_t object_off,
+                         uint64_t len, const ::SnapContext &snapc,
+                         Context *completion, bool hide_enoent);
 
-    virtual const char* get_write_type() const {
-      if (has_parent()) {
-        return "remove (trunc)";
-      }
-      return "remove";
-    }
-    virtual void pre_object_map_update(uint8_t *new_state) {
-      if (has_parent()) {
-	m_object_state = OBJECT_EXISTS;
-      } else {
-	m_object_state = OBJECT_PENDING;
-      }
-      *new_state = m_object_state;
-    }
-
-    virtual bool post_object_map_update() {
-      if (m_object_state == OBJECT_EXISTS) {
-	return false;
-      }
-      return true;
-    }
+  virtual void add_copyup_ops(librados::ObjectWriteOperation *wr)
+  {
+    add_write_ops(wr);
+  }
 
-    virtual void guard_write();
-    virtual void send_write();
+  virtual bool should_complete(int r);
+  virtual void send();
 
-  private:
-    uint8_t m_object_state;
+  /**
+   * Writes go through the following state machine to deal with
+   * layering and the object map:
+   *
+   * <start>
+   *  .  |
+   *  .  |
+   *  .  \---> LIBRBD_AIO_WRITE_PRE
+   *  .           |         |
+   *  . . . . . . | . . . . | . . . . . . . . . . .
+   *      .       |   -or-  |                     .
+   *      .       |         |                     v
+   *      .       |         \----------------> LIBRBD_AIO_WRITE_FLAT . . .
+   *      .       |                                               |      .
+   *      v       v         need copyup                           |      .
+   * LIBRBD_AIO_WRITE_GUARD -----------> LIBRBD_AIO_WRITE_COPYUP  |      .
+   *  .       |                               |        .          |      .
+   *  .       |                               |        .          |      .
+   *  .       |                         /-----/        .          |      .
+   *  .       |                         |              .          |      .
+   *  .       \-------------------\     |     /-------------------/      .
+   *  .                           |     |     |        .                 .
+   *  .                           v     v     v        .                 .
+   *  .                       LIBRBD_AIO_WRITE_POST    .                 .
+   *  .                               |                .                 .
+   *  .                               |  . . . . . . . .                 .
+   *  .                               |  .                               .
+   *  .                               v  v                               .
+   *  . . . . . . . . . . . . . . > <finish> < . . . . . . . . . . . . . .
+   *
+   * The _PRE/_POST states are skipped if the object map is disabled.
+   * The write starts in _WRITE_GUARD or _FLAT depending on whether or not
+   * there is a parent overlap.
+   */
+protected:
+  enum write_state_d {
+    LIBRBD_AIO_WRITE_GUARD,
+    LIBRBD_AIO_WRITE_COPYUP,
+    LIBRBD_AIO_WRITE_FLAT,
+    LIBRBD_AIO_WRITE_PRE,
+    LIBRBD_AIO_WRITE_POST,
+    LIBRBD_AIO_WRITE_ERROR
   };
 
-  class AioObjectTrim : public AbstractAioObjectWrite {
-  public:
-    AioObjectTrim(ImageCtx *ictx, const std::string &oid, uint64_t object_no,
+  write_state_d m_state;
+  librados::ObjectWriteOperation m_write;
+  uint64_t m_snap_seq;
+  std::vector<librados::snap_t> m_snaps;
+  bool m_object_exist;
+
+  virtual void add_write_ops(librados::ObjectWriteOperation *wr) = 0;
+  virtual const char* get_write_type() const = 0;
+  virtual void guard_write();
+  virtual void pre_object_map_update(uint8_t *new_state) = 0;
+  virtual bool post_object_map_update() {
+    return false;
+  }
+  virtual void send_write();
+  virtual void send_write_op(bool write_guard);
+  virtual void handle_write_guard();
+
+private:
+  void send_pre();
+  bool send_post();
+  void send_copyup();
+};
+
+class AioObjectWrite : public AbstractAioObjectWrite {
+public:
+  AioObjectWrite(ImageCtx *ictx, const std::string &oid, uint64_t object_no,
+                 uint64_t object_off, const ceph::bufferlist &data,
+                 const ::SnapContext &snapc, Context *completion,
+                 int op_flags)
+    : AbstractAioObjectWrite(ictx, oid, object_no, object_off, data.length(),
+                             snapc, completion, false),
+      m_write_data(data), m_op_flags(op_flags) {
+  }
+
+protected:
+  virtual void add_write_ops(librados::ObjectWriteOperation *wr);
+
+  virtual const char* get_write_type() const {
+    return "write";
+  }
+
+  virtual void pre_object_map_update(uint8_t *new_state) {
+    *new_state = OBJECT_EXISTS;
+  }
+  virtual void send_write();
+
+private:
+  ceph::bufferlist m_write_data;
+  int m_op_flags;
+};
+
+class AioObjectRemove : public AbstractAioObjectWrite {
+public:
+  AioObjectRemove(ImageCtx *ictx, const std::string &oid, uint64_t object_no,
                   const ::SnapContext &snapc, Context *completion)
-      : AbstractAioObjectWrite(ictx, oid, object_no, 0, 0, snapc, completion,
-                               true) {
-    }
-
-  protected:
-    virtual void add_write_ops(librados::ObjectWriteOperation *wr) {
+    : AbstractAioObjectWrite(ictx, oid, object_no, 0, 0, snapc, completion,
+                             true),
+      m_object_state(OBJECT_NONEXISTENT) {
+  }
+
+protected:
+  virtual void add_write_ops(librados::ObjectWriteOperation *wr) {
+    if (has_parent()) {
+      wr->truncate(0);
+    } else {
       wr->remove();
     }
+  }
 
-    virtual const char* get_write_type() const {
-      return "remove (trim)";
+  virtual const char* get_write_type() const {
+    if (has_parent()) {
+      return "remove (trunc)";
     }
-
-    virtual void pre_object_map_update(uint8_t *new_state) {
-      *new_state = OBJECT_PENDING;
-    }
-
-    virtual bool post_object_map_update() {
-      return true;
-    }
-  };
-
-  class AioObjectTruncate : public AbstractAioObjectWrite {
-  public:
-    AioObjectTruncate(ImageCtx *ictx, const std::string &oid,
-                      uint64_t object_no, uint64_t object_off,
-                      const ::SnapContext &snapc, Context *completion)
-      : AbstractAioObjectWrite(ictx, oid, object_no, object_off, 0, snapc,
-                               completion, true) {
-    }
-
-  protected:
-    virtual void add_write_ops(librados::ObjectWriteOperation *wr) {
-      wr->truncate(m_object_off);
+    return "remove";
+  }
+  virtual void pre_object_map_update(uint8_t *new_state) {
+    if (has_parent()) {
+      m_object_state = OBJECT_EXISTS;
+    } else {
+      m_object_state = OBJECT_PENDING;
     }
+    *new_state = m_object_state;
+  }
 
-    virtual const char* get_write_type() const {
-      return "truncate";
-    }
-
-    virtual void pre_object_map_update(uint8_t *new_state) {
-      if (!m_object_exist && !has_parent())
-        *new_state = OBJECT_NONEXISTENT;
-      else
-	*new_state = OBJECT_EXISTS;
-    }
-    virtual void send_write();
-  };
-
-  class AioObjectZero : public AbstractAioObjectWrite {
-  public:
-    AioObjectZero(ImageCtx *ictx, const std::string &oid, uint64_t object_no,
-                  uint64_t object_off, uint64_t object_len,
-                  const ::SnapContext &snapc, Context *completion)
-      : AbstractAioObjectWrite(ictx, oid, object_no, object_off, object_len,
-                               snapc, completion, true) {
-    }
-
-  protected:
-    virtual void add_write_ops(librados::ObjectWriteOperation *wr) {
-      wr->zero(m_object_off, m_object_len);
-    }
-
-    virtual const char* get_write_type() const {
-      return "zero";
+  virtual bool post_object_map_update() {
+    if (m_object_state == OBJECT_EXISTS) {
+      return false;
     }
-
-    virtual void pre_object_map_update(uint8_t *new_state) {
+    return true;
+  }
+
+  virtual void guard_write();
+  virtual void send_write();
+
+private:
+  uint8_t m_object_state;
+};
+
+class AioObjectTrim : public AbstractAioObjectWrite {
+public:
+  AioObjectTrim(ImageCtx *ictx, const std::string &oid, uint64_t object_no,
+                const ::SnapContext &snapc, Context *completion)
+    : AbstractAioObjectWrite(ictx, oid, object_no, 0, 0, snapc, completion,
+                             true) {
+  }
+
+protected:
+  virtual void add_write_ops(librados::ObjectWriteOperation *wr) {
+    wr->remove();
+  }
+
+  virtual const char* get_write_type() const {
+    return "remove (trim)";
+  }
+
+  virtual void pre_object_map_update(uint8_t *new_state) {
+    *new_state = OBJECT_PENDING;
+  }
+
+  virtual bool post_object_map_update() {
+    return true;
+  }
+};
+
+class AioObjectTruncate : public AbstractAioObjectWrite {
+public:
+  AioObjectTruncate(ImageCtx *ictx, const std::string &oid,
+                    uint64_t object_no, uint64_t object_off,
+                    const ::SnapContext &snapc, Context *completion)
+    : AbstractAioObjectWrite(ictx, oid, object_no, object_off, 0, snapc,
+                             completion, true) {
+  }
+
+protected:
+  virtual void add_write_ops(librados::ObjectWriteOperation *wr) {
+    wr->truncate(m_object_off);
+  }
+
+  virtual const char* get_write_type() const {
+    return "truncate";
+  }
+
+  virtual void pre_object_map_update(uint8_t *new_state) {
+    if (!m_object_exist && !has_parent())
+      *new_state = OBJECT_NONEXISTENT;
+    else
       *new_state = OBJECT_EXISTS;
-    }
-  };
-
-}
-
-#endif
+  }
+  virtual void send_write();
+};
+
+class AioObjectZero : public AbstractAioObjectWrite {
+public:
+  AioObjectZero(ImageCtx *ictx, const std::string &oid, uint64_t object_no,
+                uint64_t object_off, uint64_t object_len,
+                const ::SnapContext &snapc, Context *completion)
+    : AbstractAioObjectWrite(ictx, oid, object_no, object_off, object_len,
+                             snapc, completion, true) {
+  }
+
+protected:
+  virtual void add_write_ops(librados::ObjectWriteOperation *wr) {
+    wr->zero(m_object_off, m_object_len);
+  }
+
+  virtual const char* get_write_type() const {
+    return "zero";
+  }
+
+  virtual void pre_object_map_update(uint8_t *new_state) {
+    *new_state = OBJECT_EXISTS;
+  }
+};
+
+} // namespace librbd
+
+extern template class librbd::AioObjectRequest<librbd::ImageCtx>;
+extern template class librbd::AioObjectRead<librbd::ImageCtx>;
+
+#endif // CEPH_LIBRBD_AIO_OBJECT_REQUEST_H
diff --git a/src/librbd/AsyncObjectThrottle.cc b/src/librbd/AsyncObjectThrottle.cc
index 3bf195c..99f0bfd 100644
--- a/src/librbd/AsyncObjectThrottle.cc
+++ b/src/librbd/AsyncObjectThrottle.cc
@@ -1,12 +1,10 @@
 // -*- mode:C++; tab-width:8; c-basic-offset:2; indent-tabs-mode:t -*-
 // vim: ts=8 sw=2 smarttab
 #include "librbd/AsyncObjectThrottle.h"
-#include "include/rbd/librbd.hpp"
 #include "common/RWLock.h"
 #include "common/WorkQueue.h"
 #include "librbd/AsyncRequest.h"
 #include "librbd/ImageCtx.h"
-#include "librbd/internal.h"
 #include "librbd/Utils.h"
 
 namespace librbd
diff --git a/src/librbd/AsyncObjectThrottle.h b/src/librbd/AsyncObjectThrottle.h
index 08f0199..845e487 100644
--- a/src/librbd/AsyncObjectThrottle.h
+++ b/src/librbd/AsyncObjectThrottle.h
@@ -5,10 +5,8 @@
 
 #include "include/int_types.h"
 #include "include/Context.h"
-#include "common/RWLock.h"
 
 #include <boost/function.hpp>
-#include "include/assert.h"
 
 namespace librbd
 {
diff --git a/src/librbd/AsyncRequest.cc b/src/librbd/AsyncRequest.cc
index a93eb50..c251143 100644
--- a/src/librbd/AsyncRequest.cc
+++ b/src/librbd/AsyncRequest.cc
@@ -2,10 +2,8 @@
 // vim: ts=8 sw=2 smarttab
 #include "librbd/AsyncRequest.h"
 #include "librbd/ImageCtx.h"
-#include "librbd/internal.h"
 #include "librbd/Utils.h"
 #include "common/WorkQueue.h"
-#include <boost/bind.hpp>
 
 namespace librbd
 {
diff --git a/src/librbd/AsyncRequest.h b/src/librbd/AsyncRequest.h
index 8ca84f5..f74368d 100644
--- a/src/librbd/AsyncRequest.h
+++ b/src/librbd/AsyncRequest.h
@@ -3,7 +3,6 @@
 #ifndef CEPH_LIBRBD_ASYNC_REQUEST_H
 #define CEPH_LIBRBD_ASYNC_REQUEST_H
 
-#include "include/int_types.h"
 #include "include/Context.h"
 #include "include/rados/librados.hpp"
 #include "include/xlist.h"
@@ -23,8 +22,7 @@ public:
   void complete(int r) {
     if (should_complete(r)) {
       r = filter_return_code(r);
-      finish(r);
-      delete this;
+      finish_and_destroy(r);
     }
   }
 
@@ -51,6 +49,12 @@ protected:
     return r;
   }
 
+  // NOTE: temporary until converted to new state machine format
+  virtual void finish_and_destroy(int r) {
+    finish(r);
+    delete this;
+  }
+
   virtual void finish(int r) {
     finish_request();
     m_on_finish->complete(r);
diff --git a/src/librbd/CopyupRequest.cc b/src/librbd/CopyupRequest.cc
index 29d8a4a..b95544b 100644
--- a/src/librbd/CopyupRequest.cc
+++ b/src/librbd/CopyupRequest.cc
@@ -13,8 +13,6 @@
 #include "librbd/CopyupRequest.h"
 #include "librbd/ExclusiveLock.h"
 #include "librbd/ImageCtx.h"
-#include "librbd/ImageWatcher.h"
-#include "librbd/internal.h"
 #include "librbd/ObjectMap.h"
 #include "librbd/Utils.h"
 
@@ -80,227 +78,229 @@ private:
 } // anonymous namespace
 
 
-  CopyupRequest::CopyupRequest(ImageCtx *ictx, const std::string &oid,
-                               uint64_t objectno,
-			       vector<pair<uint64_t,uint64_t> >& image_extents)
-    : m_ictx(ictx), m_oid(oid), m_object_no(objectno),
-      m_image_extents(image_extents), m_state(STATE_READ_FROM_PARENT)
-  {
-    m_async_op.start_op(*m_ictx);
-  }
+CopyupRequest::CopyupRequest(ImageCtx *ictx, const std::string &oid,
+                             uint64_t objectno,
+      		       vector<pair<uint64_t,uint64_t> >& image_extents)
+  : m_ictx(ictx), m_oid(oid), m_object_no(objectno),
+    m_image_extents(image_extents), m_state(STATE_READ_FROM_PARENT)
+{
+  m_async_op.start_op(*m_ictx);
+}
 
-  CopyupRequest::~CopyupRequest() {
-    assert(m_pending_requests.empty());
-    m_async_op.finish_op();
-  }
+CopyupRequest::~CopyupRequest() {
+  assert(m_pending_requests.empty());
+  m_async_op.finish_op();
+}
+
+void CopyupRequest::append_request(AioObjectRequest<> *req) {
+  ldout(m_ictx->cct, 20) << __func__ << " " << this << ": " << req << dendl;
+  m_pending_requests.push_back(req);
+}
 
-  void CopyupRequest::append_request(AioObjectRequest *req) {
-    ldout(m_ictx->cct, 20) << __func__ << " " << this << ": " << req << dendl;
-    m_pending_requests.push_back(req);
+void CopyupRequest::complete_requests(int r) {
+  while (!m_pending_requests.empty()) {
+    vector<AioObjectRequest<> *>::iterator it = m_pending_requests.begin();
+    AioObjectRequest<> *req = *it;
+    ldout(m_ictx->cct, 20) << __func__ << " completing request " << req
+                           << dendl;
+    req->complete(r);
+    m_pending_requests.erase(it);
   }
+}
 
-  void CopyupRequest::complete_requests(int r) {
-    while (!m_pending_requests.empty()) {
-      vector<AioObjectRequest *>::iterator it = m_pending_requests.begin();
-      AioObjectRequest *req = *it;
-      ldout(m_ictx->cct, 20) << __func__ << " completing request " << req
-			     << dendl;
-      req->complete(r);
-      m_pending_requests.erase(it);
-    }
+bool CopyupRequest::send_copyup() {
+  bool add_copyup_op = !m_copyup_data.is_zero();
+  bool copy_on_read = m_pending_requests.empty();
+  if (!add_copyup_op && copy_on_read) {
+    // copyup empty object to prevent future CoR attempts
+    m_copyup_data.clear();
+    add_copyup_op = true;
   }
 
-  bool CopyupRequest::send_copyup() {
-    bool add_copyup_op = !m_copyup_data.is_zero();
-    bool copy_on_read = m_pending_requests.empty();
-    if (!add_copyup_op && copy_on_read) {
-      // copyup empty object to prevent future CoR attempts
-      m_copyup_data.clear();
-      add_copyup_op = true;
-    }
+  ldout(m_ictx->cct, 20) << __func__ << " " << this
+                         << ": oid " << m_oid << dendl;
+  m_state = STATE_COPYUP;
 
-    ldout(m_ictx->cct, 20) << __func__ << " " << this
-			   << ": oid " << m_oid << dendl;
-    m_state = STATE_COPYUP;
+  m_ictx->snap_lock.get_read();
+  ::SnapContext snapc = m_ictx->snapc;
+  m_ictx->snap_lock.put_read();
 
-    m_ictx->snap_lock.get_read();
-    ::SnapContext snapc = m_ictx->snapc;
-    m_ictx->snap_lock.put_read();
+  std::vector<librados::snap_t> snaps;
 
-    std::vector<librados::snap_t> snaps;
+  if (!copy_on_read) {
+    m_pending_copyups.inc();
+  }
 
-    if (!copy_on_read) {
-      m_pending_copyups.inc();
-    }
+  int r;
+  if (copy_on_read || (!snapc.snaps.empty() && add_copyup_op)) {
+    assert(add_copyup_op);
+    add_copyup_op = false;
+
+    librados::ObjectWriteOperation copyup_op;
+    copyup_op.exec("rbd", "copyup", m_copyup_data);
+
+    // send only the copyup request with a blank snapshot context so that
+    // all snapshots are detected from the parent for this object.  If
+    // this is a CoW request, a second request will be created for the
+    // actual modification.
+    m_pending_copyups.inc();
+
+    ldout(m_ictx->cct, 20) << __func__ << " " << this << " copyup with "
+                           << "empty snapshot context" << dendl;
+    librados::AioCompletion *comp = util::create_rados_safe_callback(this);
+    r = m_ictx->md_ctx.aio_operate(m_oid, comp, &copyup_op, 0, snaps);
+    assert(r == 0);
+    comp->release();
+  }
 
-    int r;
-    if (copy_on_read || (!snapc.snaps.empty() && add_copyup_op)) {
-      assert(add_copyup_op);
-      add_copyup_op = false;
-
-      librados::ObjectWriteOperation copyup_op;
-      copyup_op.exec("rbd", "copyup", m_copyup_data);
-
-      // send only the copyup request with a blank snapshot context so that
-      // all snapshots are detected from the parent for this object.  If
-      // this is a CoW request, a second request will be created for the
-      // actual modification.
-      m_pending_copyups.inc();
-
-      ldout(m_ictx->cct, 20) << __func__ << " " << this << " copyup with "
-                             << "empty snapshot context" << dendl;
-      librados::AioCompletion *comp = util::create_rados_safe_callback(this);
-      r = m_ictx->md_ctx.aio_operate(m_oid, comp, &copyup_op, 0, snaps);
-      assert(r == 0);
-      comp->release();
+  if (!copy_on_read) {
+    librados::ObjectWriteOperation write_op;
+    if (add_copyup_op) {
+      // CoW did not need to handle existing snapshots
+      write_op.exec("rbd", "copyup", m_copyup_data);
     }
 
-    if (!copy_on_read) {
-      librados::ObjectWriteOperation write_op;
-      if (add_copyup_op) {
-        // CoW did not need to handle existing snapshots
-        write_op.exec("rbd", "copyup", m_copyup_data);
-      }
-
-      // merge all pending write ops into this single RADOS op
-      for (size_t i=0; i<m_pending_requests.size(); ++i) {
-        AioObjectRequest *req = m_pending_requests[i];
-        ldout(m_ictx->cct, 20) << __func__ << " add_copyup_ops " << req
-                               << dendl;
-        req->add_copyup_ops(&write_op);
-      }
-      assert(write_op.size() != 0);
-
-      snaps.insert(snaps.end(), snapc.snaps.begin(), snapc.snaps.end());
-      librados::AioCompletion *comp = util::create_rados_safe_callback(this);
-      r = m_ictx->data_ctx.aio_operate(m_oid, comp, &write_op);
-      assert(r == 0);
-      comp->release();
+    // merge all pending write ops into this single RADOS op
+    for (size_t i=0; i<m_pending_requests.size(); ++i) {
+      AioObjectRequest<> *req = m_pending_requests[i];
+      ldout(m_ictx->cct, 20) << __func__ << " add_copyup_ops " << req
+                             << dendl;
+      req->add_copyup_ops(&write_op);
     }
-    return false;
+    assert(write_op.size() != 0);
+
+    snaps.insert(snaps.end(), snapc.snaps.begin(), snapc.snaps.end());
+    librados::AioCompletion *comp = util::create_rados_safe_callback(this);
+    r = m_ictx->data_ctx.aio_operate(m_oid, comp, &write_op);
+    assert(r == 0);
+    comp->release();
   }
+  return false;
+}
 
-  void CopyupRequest::send()
-  {
-    m_state = STATE_READ_FROM_PARENT;
-    AioCompletion *comp = AioCompletion::create(this);
+void CopyupRequest::send()
+{
+  m_state = STATE_READ_FROM_PARENT;
+  AioCompletion *comp = AioCompletion::create_and_start(
+    this, m_ictx, AIO_TYPE_READ);
+
+  ldout(m_ictx->cct, 20) << __func__ << " " << this
+                         << ": completion " << comp
+                         << ", oid " << m_oid
+                         << ", extents " << m_image_extents
+                         << dendl;
+  RWLock::RLocker owner_locker(m_ictx->parent->owner_lock);
+  AioImageRequest<>::aio_read(m_ictx->parent, comp, m_image_extents, NULL,
+                              &m_copyup_data, 0);
+}
 
-    ldout(m_ictx->cct, 20) << __func__ << " " << this
-                           << ": completion " << comp
-			   << ", oid " << m_oid
-                           << ", extents " << m_image_extents
-                           << dendl;
-    RWLock::RLocker owner_locker(m_ictx->parent->owner_lock);
-    AioImageRequest<>::aio_read(m_ictx->parent, comp, m_image_extents, NULL,
-                                &m_copyup_data, 0);
+void CopyupRequest::complete(int r)
+{
+  if (should_complete(r)) {
+    complete_requests(r);
+    delete this;
   }
+}
 
-  void CopyupRequest::complete(int r)
-  {
-    if (should_complete(r)) {
-      complete_requests(r);
-      delete this;
+bool CopyupRequest::should_complete(int r)
+{
+  CephContext *cct = m_ictx->cct;
+  ldout(cct, 20) << __func__ << " " << this
+                 << ": oid " << m_oid
+                 << ", extents " << m_image_extents
+                 << ", r " << r << dendl;
+
+  uint64_t pending_copyups;
+  switch (m_state) {
+  case STATE_READ_FROM_PARENT:
+    ldout(cct, 20) << "READ_FROM_PARENT" << dendl;
+    remove_from_list();
+    if (r >= 0 || r == -ENOENT) {
+      return send_object_map();
     }
-  }
-
-  bool CopyupRequest::should_complete(int r)
-  {
-    CephContext *cct = m_ictx->cct;
-    ldout(cct, 20) << __func__ << " " << this
-		   << ": oid " << m_oid
-		   << ", extents " << m_image_extents
-		   << ", r " << r << dendl;
-
-    uint64_t pending_copyups;
-    switch (m_state) {
-    case STATE_READ_FROM_PARENT:
-      ldout(cct, 20) << "READ_FROM_PARENT" << dendl;
-      remove_from_list();
-      if (r >= 0 || r == -ENOENT) {
-        return send_object_map();
-      }
-      break;
-
-    case STATE_OBJECT_MAP:
-      ldout(cct, 20) << "OBJECT_MAP" << dendl;
-      assert(r == 0);
-      return send_copyup();
-
-    case STATE_COPYUP:
-      // invoked via a finisher in librados, so thread safe
-      pending_copyups = m_pending_copyups.dec();
-      ldout(cct, 20) << "COPYUP (" << pending_copyups << " pending)"
-                     << dendl;
-      if (r == -ENOENT) {
-        // hide the -ENOENT error if this is the last op
-        if (pending_copyups == 0) {
-          complete_requests(0);
-        }
-      } else if (r < 0) {
-        complete_requests(r);
+    break;
+
+  case STATE_OBJECT_MAP:
+    ldout(cct, 20) << "OBJECT_MAP" << dendl;
+    assert(r == 0);
+    return send_copyup();
+
+  case STATE_COPYUP:
+    // invoked via a finisher in librados, so thread safe
+    pending_copyups = m_pending_copyups.dec();
+    ldout(cct, 20) << "COPYUP (" << pending_copyups << " pending)"
+                   << dendl;
+    if (r == -ENOENT) {
+      // hide the -ENOENT error if this is the last op
+      if (pending_copyups == 0) {
+        complete_requests(0);
       }
-      return (pending_copyups == 0);
-
-    default:
-      lderr(cct) << "invalid state: " << m_state << dendl;
-      assert(false);
-      break;
+    } else if (r < 0) {
+      complete_requests(r);
     }
-    return (r < 0);
+    return (pending_copyups == 0);
+
+  default:
+    lderr(cct) << "invalid state: " << m_state << dendl;
+    assert(false);
+    break;
   }
+  return (r < 0);
+}
 
-  void CopyupRequest::remove_from_list()
-  {
-    Mutex::Locker l(m_ictx->copyup_list_lock);
+void CopyupRequest::remove_from_list()
+{
+  Mutex::Locker l(m_ictx->copyup_list_lock);
 
-    map<uint64_t, CopyupRequest*>::iterator it =
-      m_ictx->copyup_list.find(m_object_no);
-    assert(it != m_ictx->copyup_list.end());
-    m_ictx->copyup_list.erase(it);
-  }
+  map<uint64_t, CopyupRequest*>::iterator it =
+    m_ictx->copyup_list.find(m_object_no);
+  assert(it != m_ictx->copyup_list.end());
+  m_ictx->copyup_list.erase(it);
+}
 
-  bool CopyupRequest::send_object_map() {
-    {
-      RWLock::RLocker owner_locker(m_ictx->owner_lock);
-      RWLock::RLocker snap_locker(m_ictx->snap_lock);
-      if (m_ictx->object_map != nullptr) {
-        bool copy_on_read = m_pending_requests.empty();
-        assert(m_ictx->exclusive_lock->is_lock_owner());
-
-        RWLock::WLocker object_map_locker(m_ictx->object_map_lock);
-        if (copy_on_read &&
-            (*m_ictx->object_map)[m_object_no] != OBJECT_EXISTS) {
-          // CoW already updates the HEAD object map
-          m_snap_ids.push_back(CEPH_NOSNAP);
-        }
-        if (!m_ictx->snaps.empty()) {
-          m_snap_ids.insert(m_snap_ids.end(), m_ictx->snaps.begin(),
-                            m_ictx->snaps.end());
-        }
+bool CopyupRequest::send_object_map() {
+  {
+    RWLock::RLocker owner_locker(m_ictx->owner_lock);
+    RWLock::RLocker snap_locker(m_ictx->snap_lock);
+    if (m_ictx->object_map != nullptr) {
+      bool copy_on_read = m_pending_requests.empty();
+      assert(m_ictx->exclusive_lock->is_lock_owner());
+
+      RWLock::WLocker object_map_locker(m_ictx->object_map_lock);
+      if (copy_on_read &&
+          (*m_ictx->object_map)[m_object_no] != OBJECT_EXISTS) {
+        // CoW already updates the HEAD object map
+        m_snap_ids.push_back(CEPH_NOSNAP);
+      }
+      if (!m_ictx->snaps.empty()) {
+        m_snap_ids.insert(m_snap_ids.end(), m_ictx->snaps.begin(),
+                          m_ictx->snaps.end());
       }
     }
+  }
 
-    // avoid possible recursive lock attempts
-    if (m_snap_ids.empty()) {
-      // no object map update required
-      return send_copyup();
-    } else {
-      // update object maps for HEAD and all existing snapshots
-      ldout(m_ictx->cct, 20) << __func__ << " " << this
-      	                     << ": oid " << m_oid
-                             << dendl;
-      m_state = STATE_OBJECT_MAP;
-
-      RWLock::RLocker owner_locker(m_ictx->owner_lock);
-      AsyncObjectThrottle<>::ContextFactory context_factory(
-        boost::lambda::bind(boost::lambda::new_ptr<UpdateObjectMap>(),
-        boost::lambda::_1, m_ictx, m_object_no, &m_snap_ids,
-        boost::lambda::_2));
-      AsyncObjectThrottle<> *throttle = new AsyncObjectThrottle<>(
-        NULL, *m_ictx, context_factory, util::create_context_callback(this),
-        NULL, 0, m_snap_ids.size());
-      throttle->start_ops(m_ictx->concurrent_management_ops);
-    }
-    return false;
+  // avoid possible recursive lock attempts
+  if (m_snap_ids.empty()) {
+    // no object map update required
+    return send_copyup();
+  } else {
+    // update object maps for HEAD and all existing snapshots
+    ldout(m_ictx->cct, 20) << __func__ << " " << this
+    	                   << ": oid " << m_oid
+                           << dendl;
+    m_state = STATE_OBJECT_MAP;
+
+    RWLock::RLocker owner_locker(m_ictx->owner_lock);
+    AsyncObjectThrottle<>::ContextFactory context_factory(
+      boost::lambda::bind(boost::lambda::new_ptr<UpdateObjectMap>(),
+      boost::lambda::_1, m_ictx, m_object_no, &m_snap_ids,
+      boost::lambda::_2));
+    AsyncObjectThrottle<> *throttle = new AsyncObjectThrottle<>(
+      NULL, *m_ictx, context_factory, util::create_context_callback(this),
+      NULL, 0, m_snap_ids.size());
+    throttle->start_ops(m_ictx->concurrent_management_ops);
   }
+  return false;
 }
+
+} // namespace librbd
diff --git a/src/librbd/CopyupRequest.h b/src/librbd/CopyupRequest.h
index 4d971d8..6278768 100644
--- a/src/librbd/CopyupRequest.h
+++ b/src/librbd/CopyupRequest.h
@@ -1,87 +1,88 @@
 // -*- mode:C++; tab-width:8; c-basic-offset:2; indent-tabs-mode:t -*-
 // vim: ts=8 sw=2 smarttab
-#ifndef CEPH_LIBRBD_COPYUPREQUEST_H
-#define CEPH_LIBRBD_COPYUPREQUEST_H
+
+#ifndef CEPH_LIBRBD_COPYUP_REQUEST_H
+#define CEPH_LIBRBD_COPYUP_REQUEST_H
 
 #include "librbd/AsyncOperation.h"
 #include "include/int_types.h"
-
-#include "common/Mutex.h"
 #include "include/buffer.h"
-#include "include/rados/librados.hpp"
 
 namespace librbd {
 
-  struct AioCompletion;
-
-  class CopyupRequest {
-  public:
-    CopyupRequest(ImageCtx *ictx, const std::string &oid, uint64_t objectno,
-                  vector<pair<uint64_t,uint64_t> >& image_extents);
-    ~CopyupRequest();
-
-    void append_request(AioObjectRequest *req);
-
-    void send();
-
-    void complete(int r);
-
-  private:
-    /**
-     * Copyup requests go through the following state machine to read from the
-     * parent image, update the object map, and copyup the object:
-     *
-     *
-     * @verbatim
-     *
-     * <start>
-     *    |
-     *    v
-     *  STATE_READ_FROM_PARENT
-     *    .   .        |
-     *    .   .        v
-     *    .   .     STATE_OBJECT_MAP . .
-     *    .   .        |               .
-     *    .   .        v               .
-     *    .   . . > STATE_COPYUP       .
-     *    .            |               .
-     *    .            v               .
-     *    . . . . > <finish> < . . . . .
-     *
-     * @endverbatim
-     *
-     * The _OBJECT_MAP state is skipped if the object map isn't enabled or if
-     * an object map update isn't required. The _COPYUP state is skipped if
-     * no data was read from the parent *and* there are no additional ops.
-     */
-    enum State {
-      STATE_READ_FROM_PARENT,
-      STATE_OBJECT_MAP,
-      STATE_COPYUP
-    };
-
-    ImageCtx *m_ictx;
-    std::string m_oid;
-    uint64_t m_object_no;
-    vector<pair<uint64_t,uint64_t> > m_image_extents;
-    State m_state;
-    ceph::bufferlist m_copyup_data;
-    vector<AioObjectRequest *> m_pending_requests;
-    atomic_t m_pending_copyups;
-
-    AsyncOperation m_async_op;
-
-    std::vector<uint64_t> m_snap_ids;
-
-    void complete_requests(int r);
-
-    bool should_complete(int r);
-
-    void remove_from_list();
-
-    bool send_object_map();
-    bool send_copyup();
+struct AioCompletion;
+template <typename I> class AioObjectRequest;
+struct ImageCtx;
+
+class CopyupRequest {
+public:
+  CopyupRequest(ImageCtx *ictx, const std::string &oid, uint64_t objectno,
+                vector<pair<uint64_t,uint64_t> >& image_extents);
+  ~CopyupRequest();
+
+  void append_request(AioObjectRequest<ImageCtx> *req);
+
+  void send();
+
+  void complete(int r);
+
+private:
+  /**
+   * Copyup requests go through the following state machine to read from the
+   * parent image, update the object map, and copyup the object:
+   *
+   *
+   * @verbatim
+   *
+   * <start>
+   *    |
+   *    v
+   *  STATE_READ_FROM_PARENT
+   *    .   .        |
+   *    .   .        v
+   *    .   .     STATE_OBJECT_MAP . .
+   *    .   .        |               .
+   *    .   .        v               .
+   *    .   . . > STATE_COPYUP       .
+   *    .            |               .
+   *    .            v               .
+   *    . . . . > <finish> < . . . . .
+   *
+   * @endverbatim
+   *
+   * The _OBJECT_MAP state is skipped if the object map isn't enabled or if
+   * an object map update isn't required. The _COPYUP state is skipped if
+   * no data was read from the parent *and* there are no additional ops.
+   */
+  enum State {
+    STATE_READ_FROM_PARENT,
+    STATE_OBJECT_MAP,
+    STATE_COPYUP
   };
-}
 
-#endif
+  ImageCtx *m_ictx;
+  std::string m_oid;
+  uint64_t m_object_no;
+  vector<pair<uint64_t,uint64_t> > m_image_extents;
+  State m_state;
+  ceph::bufferlist m_copyup_data;
+  vector<AioObjectRequest<ImageCtx> *> m_pending_requests;
+  atomic_t m_pending_copyups;
+
+  AsyncOperation m_async_op;
+
+  std::vector<uint64_t> m_snap_ids;
+
+  void complete_requests(int r);
+
+  bool should_complete(int r);
+
+  void remove_from_list();
+
+  bool send_object_map();
+  bool send_copyup();
+};
+
+} // namespace librbd
+
+#endif // CEPH_LIBRBD_COPYUP_REQUEST_H
diff --git a/src/librbd/DiffIterate.cc b/src/librbd/DiffIterate.cc
index df8287d..9b0a3ac 100644
--- a/src/librbd/DiffIterate.cc
+++ b/src/librbd/DiffIterate.cc
@@ -3,13 +3,11 @@
 
 #include "librbd/DiffIterate.h"
 #include "librbd/ImageCtx.h"
-#include "librbd/internal.h"
 #include "librbd/ObjectMap.h"
 #include "librbd/Utils.h"
 #include "include/rados/librados.hpp"
 #include "include/interval_set.h"
 #include "common/errno.h"
-#include "common/Mutex.h"
 #include "common/Throttle.h"
 #include "librados/snap_set_diff.h"
 #include <boost/tuple/tuple.hpp>
diff --git a/src/librbd/ExclusiveLock.cc b/src/librbd/ExclusiveLock.cc
index 932fe04..4c0501c 100644
--- a/src/librbd/ExclusiveLock.cc
+++ b/src/librbd/ExclusiveLock.cc
@@ -75,33 +75,36 @@ bool ExclusiveLock<I>::is_lock_owner() const {
 }
 
 template <typename I>
-bool ExclusiveLock<I>::accept_requests() const {
+bool ExclusiveLock<I>::accept_requests(int *ret_val) const {
   Mutex::Locker locker(m_lock);
 
   bool accept_requests = (!is_shutdown() && m_state == STATE_LOCKED &&
-                          m_request_blockers == 0);
+                          !m_request_blocked);
+  *ret_val = m_request_blocked_ret_val;
+
   ldout(m_image_ctx.cct, 20) << this << " " << __func__ << "="
                              << accept_requests << dendl;
   return accept_requests;
 }
 
 template <typename I>
-void ExclusiveLock<I>::block_requests() {
+void ExclusiveLock<I>::block_requests(int r) {
   Mutex::Locker locker(m_lock);
-  ++m_request_blockers;
+  assert(!m_request_blocked);
+  m_request_blocked = true;
+  m_request_blocked_ret_val = r;
 
-  ldout(m_image_ctx.cct, 20) << this << " " << __func__ << "="
-                             << m_request_blockers << dendl;
+  ldout(m_image_ctx.cct, 20) << this << " " << __func__ << dendl;
 }
 
 template <typename I>
 void ExclusiveLock<I>::unblock_requests() {
   Mutex::Locker locker(m_lock);
-  assert(m_request_blockers > 0);
-  --m_request_blockers;
+  assert(m_request_blocked);
+  m_request_blocked = false;
+  m_request_blocked_ret_val = 0;
 
-  ldout(m_image_ctx.cct, 20) << this << " " << __func__ << "="
-                             << m_request_blockers << dendl;
+  ldout(m_image_ctx.cct, 20) << this << " " << __func__ << dendl;
 }
 
 template <typename I>
@@ -192,6 +195,20 @@ void ExclusiveLock<I>::release_lock(Context *on_released) {
 }
 
 template <typename I>
+void ExclusiveLock<I>::handle_watch_registered() {
+  Mutex::Locker locker(m_lock);
+  if (m_state != STATE_WAITING_FOR_REGISTER) {
+    return;
+  }
+
+  ldout(m_image_ctx.cct, 10) << this << " " << __func__ << dendl;
+  Action active_action = get_active_action();
+  assert(active_action == ACTION_TRY_LOCK ||
+         active_action == ACTION_REQUEST_LOCK);
+  execute_next_action();
+}
+
+template <typename I>
 void ExclusiveLock<I>::handle_lock_released() {
   Mutex::Locker locker(m_lock);
   if (m_state != STATE_WAITING_FOR_PEER) {
@@ -237,6 +254,7 @@ bool ExclusiveLock<I>::is_transition_state() const {
   case STATE_INITIALIZING:
   case STATE_ACQUIRING:
   case STATE_WAITING_FOR_PEER:
+  case STATE_WAITING_FOR_REGISTER:
   case STATE_POST_ACQUIRING:
   case STATE_PRE_RELEASING:
   case STATE_RELEASING:
@@ -355,10 +373,16 @@ void ExclusiveLock<I>::send_acquire_lock() {
     return;
   }
 
-  ldout(m_image_ctx.cct, 10) << this << " " << __func__ << dendl;
+  CephContext *cct = m_image_ctx.cct;
+  ldout(cct, 10) << this << " " << __func__ << dendl;
   m_state = STATE_ACQUIRING;
 
   m_watch_handle = m_image_ctx.image_watcher->get_watch_handle();
+  if (m_watch_handle == 0) {
+    lderr(cct) << "image watcher not registered - delaying request" << dendl;
+    m_state = STATE_WAITING_FOR_REGISTER;
+    return;
+  }
 
   using el = ExclusiveLock<I>;
   AcquireRequest<I>* req = AcquireRequest<I>::create(
diff --git a/src/librbd/ExclusiveLock.h b/src/librbd/ExclusiveLock.h
index 6268c80..e82da20 100644
--- a/src/librbd/ExclusiveLock.h
+++ b/src/librbd/ExclusiveLock.h
@@ -8,7 +8,6 @@
 #include "include/Context.h"
 #include "include/rados/librados.hpp"
 #include "common/Mutex.h"
-#include "common/RWLock.h"
 #include <list>
 #include <string>
 #include <utility>
@@ -30,9 +29,9 @@ public:
   ~ExclusiveLock();
 
   bool is_lock_owner() const;
-  bool accept_requests() const;
+  bool accept_requests(int *ret_val) const;
 
-  void block_requests();
+  void block_requests(int r);
   void unblock_requests();
 
   void init(uint64_t features, Context *on_init);
@@ -42,6 +41,7 @@ public:
   void request_lock(Context *on_locked);
   void release_lock(Context *on_released);
 
+  void handle_watch_registered();
   void handle_lock_released();
 
   void assert_header_locked(librados::ObjectWriteOperation *op);
@@ -51,10 +51,13 @@ public:
 private:
 
   /**
-   * <start>                               WAITING_FOR_PEER -----------------\
-   *    |                                     ^                              |
-   *    |                                     *  (request_lock busy)         |
-   *    |                                     * * * * * * * * * * * *        |
+   * <start>                              * * > WAITING_FOR_REGISTER --------\
+   *    |                                 * (watch not registered)           |
+   *    |                                 *                                  |
+   *    |                                 * * > WAITING_FOR_PEER ------------\
+   *    |                                 * (request_lock busy)              |
+   *    |                                 *                                  |
+   *    |                                 * * * * * * * * * * * * * *        |
    *    |                                                           *        |
    *    v            (init)            (try_lock/request_lock)      *        |
    * UNINITIALIZED  -------> UNLOCKED ------------------------> ACQUIRING <--/
@@ -80,6 +83,7 @@ private:
     STATE_ACQUIRING,
     STATE_POST_ACQUIRING,
     STATE_WAITING_FOR_PEER,
+    STATE_WAITING_FOR_REGISTER,
     STATE_PRE_RELEASING,
     STATE_RELEASING,
     STATE_PRE_SHUTTING_DOWN,
@@ -130,7 +134,8 @@ private:
 
   ActionsContexts m_actions_contexts;
 
-  uint32_t m_request_blockers = 0;
+  bool m_request_blocked = false;
+  int m_request_blocked_ret_val = 0;
 
   std::string encode_lock_cookie() const;
 
diff --git a/src/librbd/ImageCtx.cc b/src/librbd/ImageCtx.cc
index 5649969..df1d837 100644
--- a/src/librbd/ImageCtx.cc
+++ b/src/librbd/ImageCtx.cc
@@ -27,6 +27,7 @@
 #include "librbd/Operations.h"
 #include "librbd/operation/ResizeRequest.h"
 #include "librbd/Utils.h"
+#include "librbd/LibrbdWriteback.h"
 
 #include "osdc/Striper.h"
 #include <boost/bind.hpp>
@@ -662,17 +663,6 @@ struct C_InvalidateCache : public Context {
     return -ENOENT;
   }
 
-  uint64_t ImageCtx::get_copyup_snap_id() const
-  {
-    assert(snap_lock.is_locked());
-    // copyup requires the largest possible parent overlap,
-    // which is always the oldest snapshot (if any).
-    if (!snaps.empty()) {
-      return snaps.back();
-    }
-    return CEPH_NOSNAP;
-  }
-
   void ImageCtx::aio_read_from_cache(object_t o, uint64_t object_no,
 				     bufferlist *bl, size_t len,
 				     uint64_t off, Context *onfinish,
@@ -790,7 +780,7 @@ struct C_InvalidateCache : public Context {
 
   void ImageCtx::register_watch(Context *on_finish) {
     assert(image_watcher == NULL);
-    image_watcher = new ImageWatcher(*this);
+    image_watcher = new ImageWatcher<>(*this);
     image_watcher->register_watch(on_finish);
   }
 
@@ -938,7 +928,8 @@ struct C_InvalidateCache : public Context {
         "rbd_journal_object_flush_interval", false)(
         "rbd_journal_object_flush_bytes", false)(
         "rbd_journal_object_flush_age", false)(
-        "rbd_journal_pool", false);
+        "rbd_journal_pool", false)(
+        "rbd_journal_max_payload_bytes", false);
 
     md_config_t local_config_t;
     std::map<std::string, bufferlist> res;
@@ -993,6 +984,7 @@ struct C_InvalidateCache : public Context {
     ASSIGN_OPTION(journal_object_flush_bytes);
     ASSIGN_OPTION(journal_object_flush_age);
     ASSIGN_OPTION(journal_pool);
+    ASSIGN_OPTION(journal_max_payload_bytes);
   }
 
   ExclusiveLock<ImageCtx> *ImageCtx::create_exclusive_lock() {
@@ -1019,10 +1011,7 @@ struct C_InvalidateCache : public Context {
 
   void ImageCtx::notify_update() {
     state->handle_update_notification();
-
-    C_SaferCond ctx;
-    image_watcher->notify_header_update(&ctx);
-    ctx.wait();
+    ImageWatcher<>::notify_header_update(md_ctx, header_oid);
   }
 
   void ImageCtx::notify_update(Context *on_finish) {
diff --git a/src/librbd/ImageCtx.h b/src/librbd/ImageCtx.h
index 076072c..7660f6e 100644
--- a/src/librbd/ImageCtx.h
+++ b/src/librbd/ImageCtx.h
@@ -7,18 +7,15 @@
 
 #include <list>
 #include <map>
-#include <set>
 #include <string>
 #include <vector>
-#include <boost/optional.hpp>
 
-#include "common/Cond.h"
 #include "common/event_socket.h"
 #include "common/Mutex.h"
 #include "common/Readahead.h"
 #include "common/RWLock.h"
 #include "common/snap_types.h"
-#include "include/atomic.h"
+
 #include "include/buffer_fwd.h"
 #include "include/rbd/librbd.hpp"
 #include "include/rbd_types.h"
@@ -28,7 +25,6 @@
 
 #include "cls/rbd/cls_rbd_client.h"
 #include "librbd/AsyncRequest.h"
-#include "librbd/LibrbdWriteback.h"
 #include "librbd/SnapInfo.h"
 #include "librbd/parent_types.h"
 
@@ -46,11 +42,12 @@ namespace librbd {
   class CopyupRequest;
   template <typename> class ExclusiveLock;
   template <typename> class ImageState;
-  class ImageWatcher;
+  template <typename> class ImageWatcher;
   template <typename> class Journal;
   class LibrbdAdminSocketHook;
   class ObjectMap;
   template <typename> class Operations;
+  class LibrbdWriteback;
 
   namespace exclusive_lock { struct Policy; }
   namespace journal { struct Policy; }
@@ -82,7 +79,7 @@ namespace librbd {
     std::string name;
     std::string snap_name;
     IoCtx data_ctx, md_ctx;
-    ImageWatcher *image_watcher;
+    ImageWatcher<ImageCtx> *image_watcher;
     Journal<ImageCtx> *journal;
 
     /**
@@ -185,6 +182,7 @@ namespace librbd {
     uint64_t journal_object_flush_bytes;
     double journal_object_flush_age;
     std::string journal_pool;
+    uint32_t journal_max_payload_bytes;
 
     LibrbdAdminSocketHook *asok_hook;
 
@@ -259,7 +257,6 @@ namespace librbd {
     uint64_t get_parent_snap_id(librados::snap_t in_snap_id) const;
     int get_parent_overlap(librados::snap_t in_snap_id,
 			   uint64_t *overlap) const;
-    uint64_t get_copyup_snap_id() const;
     void aio_read_from_cache(object_t o, uint64_t object_no, bufferlist *bl,
 			     size_t len, uint64_t off, Context *onfinish,
 			     int fadvise_flags);
diff --git a/src/librbd/ImageState.cc b/src/librbd/ImageState.cc
index 39f4ee6..5b450b0 100644
--- a/src/librbd/ImageState.cc
+++ b/src/librbd/ImageState.cc
@@ -2,6 +2,7 @@
 // vim: ts=8 sw=2 smarttab
 
 #include "librbd/ImageState.h"
+#include "include/rbd/librbd.hpp"
 #include "common/dout.h"
 #include "common/errno.h"
 #include "common/Cond.h"
@@ -22,16 +23,224 @@ namespace librbd {
 using util::create_async_context_callback;
 using util::create_context_callback;
 
+class ImageUpdateWatchers {
+public:
+
+  ImageUpdateWatchers(CephContext *cct) : m_cct(cct),
+    m_lock(util::unique_lock_name("librbd::ImageUpdateWatchers::m_lock", this)) {
+  }
+
+  ~ImageUpdateWatchers() {
+    assert(m_watchers.empty());
+    assert(m_in_flight.empty());
+    assert(m_pending_unregister.empty());
+    assert(m_on_shut_down_finish == nullptr);
+
+    destroy_work_queue();
+  }
+
+  void flush(Context *on_finish) {
+    ldout(m_cct, 20) << "ImageUpdateWatchers::" << __func__ << dendl;
+    {
+      Mutex::Locker locker(m_lock);
+      if (!m_in_flight.empty()) {
+	Context *ctx = new FunctionContext(
+	  [this, on_finish](int r) {
+	    ldout(m_cct, 20) << "ImageUpdateWatchers::" << __func__
+	                     << ": completing flush" << dendl;
+	    on_finish->complete(r);
+	  });
+	m_work_queue->queue(ctx, 0);
+	return;
+      }
+    }
+    ldout(m_cct, 20) << "ImageUpdateWatchers::" << __func__
+		     << ": completing flush" << dendl;
+    on_finish->complete(0);
+  }
+
+  void shut_down(Context *on_finish) {
+    ldout(m_cct, 20) << "ImageUpdateWatchers::" << __func__ << dendl;
+    {
+      Mutex::Locker locker(m_lock);
+      assert(m_on_shut_down_finish == nullptr);
+      m_watchers.clear();
+      if (!m_in_flight.empty()) {
+	m_on_shut_down_finish = on_finish;
+	return;
+      }
+    }
+    ldout(m_cct, 20) << "ImageUpdateWatchers::" << __func__
+		     << ": completing shut down" << dendl;
+    on_finish->complete(0);
+  }
+
+  void register_watcher(UpdateWatchCtx *watcher, uint64_t *handle) {
+    ldout(m_cct, 20) << __func__ << ": watcher=" << watcher << dendl;
+
+    Mutex::Locker locker(m_lock);
+    assert(m_on_shut_down_finish == nullptr);
+
+    create_work_queue();
+
+    *handle = m_next_handle++;
+    m_watchers.insert(std::make_pair(*handle, watcher));
+  }
+
+  void unregister_watcher(uint64_t handle, Context *on_finish) {
+    ldout(m_cct, 20) << "ImageUpdateWatchers::" << __func__ << ": handle="
+		     << handle << dendl;
+    int r = 0;
+    {
+      Mutex::Locker locker(m_lock);
+      auto it = m_watchers.find(handle);
+      if (it == m_watchers.end()) {
+	r = -ENOENT;
+      } else {
+	if (m_in_flight.find(handle) != m_in_flight.end()) {
+	  assert(m_pending_unregister.find(handle) == m_pending_unregister.end());
+	  m_pending_unregister[handle] = on_finish;
+	  on_finish = nullptr;
+	}
+	m_watchers.erase(it);
+      }
+    }
+
+    if (on_finish) {
+      ldout(m_cct, 20) << "ImageUpdateWatchers::" << __func__
+		       << ": completing unregister" << dendl;
+      on_finish->complete(r);
+    }
+  }
+
+  void notify() {
+    ldout(m_cct, 20) << "ImageUpdateWatchers::" << __func__ << dendl;
+
+    Mutex::Locker locker(m_lock);
+    for (auto it : m_watchers) {
+      send_notify(it.first, it.second);
+    }
+  }
+
+  void send_notify(uint64_t handle, UpdateWatchCtx *watcher) {
+    assert(m_lock.is_locked());
+
+    ldout(m_cct, 20) << "ImageUpdateWatchers::" << __func__ << ": handle="
+		     << handle << ", watcher=" << watcher << dendl;
+
+    m_in_flight.insert(handle);
+
+    Context *ctx = new FunctionContext(
+      [this, handle, watcher](int r) {
+	handle_notify(handle, watcher);
+      });
+
+    m_work_queue->queue(ctx, 0);
+  }
+
+  void handle_notify(uint64_t handle, UpdateWatchCtx *watcher) {
+
+    ldout(m_cct, 20) << "ImageUpdateWatchers::" << __func__ << ": handle="
+		     << handle << ", watcher=" << watcher << dendl;
+
+    watcher->handle_notify();
+
+    Context *on_unregister_finish = nullptr;
+    Context *on_shut_down_finish = nullptr;
+
+    {
+      Mutex::Locker locker(m_lock);
+
+      auto in_flight_it = m_in_flight.find(handle);
+      assert(in_flight_it != m_in_flight.end());
+      m_in_flight.erase(in_flight_it);
+
+      // If there is no more in flight notifications for this watcher
+      // and it is pending unregister, complete it now.
+      if (m_in_flight.find(handle) == m_in_flight.end()) {
+	auto it = m_pending_unregister.find(handle);
+	if (it != m_pending_unregister.end()) {
+	  on_unregister_finish = it->second;
+	  m_pending_unregister.erase(it);
+	}
+      }
+
+      if (m_in_flight.empty()) {
+	assert(m_pending_unregister.empty());
+	if (m_on_shut_down_finish != nullptr) {
+	  std::swap(m_on_shut_down_finish, on_shut_down_finish);
+	}
+      }
+    }
+
+    if (on_unregister_finish != nullptr) {
+      ldout(m_cct, 20) << "ImageUpdateWatchers::" << __func__
+		       << ": completing unregister" << dendl;
+      on_unregister_finish->complete(0);
+    }
+
+    if (on_shut_down_finish != nullptr) {
+      ldout(m_cct, 20) << "ImageUpdateWatchers::" << __func__
+		       << ": completing shut down" << dendl;
+      on_shut_down_finish->complete(0);
+    }
+  }
+
+private:
+  class ThreadPoolSingleton : public ThreadPool {
+  public:
+    explicit ThreadPoolSingleton(CephContext *cct)
+      : ThreadPool(cct, "librbd::ImageUpdateWatchers::thread_pool", "tp_librbd",
+		   1) {
+      start();
+    }
+    virtual ~ThreadPoolSingleton() {
+      stop();
+    }
+  };
+
+  CephContext *m_cct;
+  Mutex m_lock;
+  ContextWQ *m_work_queue = nullptr;
+  std::map<uint64_t, UpdateWatchCtx*> m_watchers;
+  uint64_t m_next_handle = 0;
+  std::multiset<uint64_t> m_in_flight;
+  std::map<uint64_t, Context*> m_pending_unregister;
+  Context *m_on_shut_down_finish = nullptr;
+
+  void create_work_queue() {
+    if (m_work_queue != nullptr) {
+      return;
+    }
+    ThreadPoolSingleton *thread_pool_singleton;
+    m_cct->lookup_or_create_singleton_object<ThreadPoolSingleton>(
+      thread_pool_singleton, "librbd::ImageUpdateWatchers::thread_pool");
+    m_work_queue = new ContextWQ("librbd::ImageUpdateWatchers::op_work_queue",
+				 m_cct->_conf->rbd_op_thread_timeout,
+				 thread_pool_singleton);
+  }
+
+  void destroy_work_queue() {
+    if (m_work_queue == nullptr) {
+      return;
+    }
+    m_work_queue->drain();
+    delete m_work_queue;
+  }
+};
+
 template <typename I>
 ImageState<I>::ImageState(I *image_ctx)
   : m_image_ctx(image_ctx), m_state(STATE_UNINITIALIZED),
     m_lock(util::unique_lock_name("librbd::ImageState::m_lock", this)),
-    m_last_refresh(0), m_refresh_seq(0) {
+    m_last_refresh(0), m_refresh_seq(0),
+    m_update_watchers(new ImageUpdateWatchers(image_ctx->cct)) {
 }
 
 template <typename I>
 ImageState<I>::~ImageState() {
   assert(m_state == STATE_UNINITIALIZED || m_state == STATE_CLOSED);
+  delete m_update_watchers;
 }
 
 template <typename I>
@@ -84,8 +293,12 @@ void ImageState<I>::handle_update_notification() {
   ++m_refresh_seq;
 
   CephContext *cct = m_image_ctx->cct;
-  ldout(cct, 20) << "refresh_seq = " << m_refresh_seq << ", "
+  ldout(cct, 20) << __func__ << ": refresh_seq = " << m_refresh_seq << ", "
 		 << "last_refresh = " << m_last_refresh << dendl;
+
+  if (m_state == STATE_OPEN) {
+    m_update_watchers->notify();
+  }
 }
 
 template <typename I>
@@ -165,6 +378,44 @@ void ImageState<I>::snap_set(const std::string &snap_name, Context *on_finish) {
 }
 
 template <typename I>
+int ImageState<I>::register_update_watcher(UpdateWatchCtx *watcher,
+					 uint64_t *handle) {
+  CephContext *cct = m_image_ctx->cct;
+  ldout(cct, 20) << __func__ << dendl;
+
+  m_update_watchers->register_watcher(watcher, handle);
+
+  ldout(cct, 20) << __func__ << ": handle=" << *handle << dendl;
+  return 0;
+}
+
+template <typename I>
+int ImageState<I>::unregister_update_watcher(uint64_t handle) {
+  CephContext *cct = m_image_ctx->cct;
+  ldout(cct, 20) << __func__ << ": handle=" << handle << dendl;
+
+  C_SaferCond ctx;
+  m_update_watchers->unregister_watcher(handle, &ctx);
+  return ctx.wait();
+}
+
+template <typename I>
+void ImageState<I>::flush_update_watchers(Context *on_finish) {
+  CephContext *cct = m_image_ctx->cct;
+  ldout(cct, 20) << __func__ << dendl;
+
+  m_update_watchers->flush(on_finish);
+}
+
+template <typename I>
+void ImageState<I>::shut_down_update_watchers(Context *on_finish) {
+  CephContext *cct = m_image_ctx->cct;
+  ldout(cct, 20) << __func__ << dendl;
+
+  m_update_watchers->shut_down(on_finish);
+}
+
+template <typename I>
 bool ImageState<I>::is_transition_state() const {
   switch (m_state) {
   case STATE_UNINITIALIZED:
diff --git a/src/librbd/ImageState.h b/src/librbd/ImageState.h
index b60172f..bad4277 100644
--- a/src/librbd/ImageState.h
+++ b/src/librbd/ImageState.h
@@ -16,6 +16,8 @@ class RWLock;
 namespace librbd {
 
 class ImageCtx;
+class ImageUpdateWatchers;
+class UpdateWatchCtx;
 
 template <typename ImageCtxT = ImageCtx>
 class ImageState {
@@ -40,6 +42,11 @@ public:
 
   void snap_set(const std::string &snap_name, Context *on_finish);
 
+  int register_update_watcher(UpdateWatchCtx *watcher, uint64_t *handle);
+  int unregister_update_watcher(uint64_t handle);
+  void flush_update_watchers(Context *on_finish);
+  void shut_down_update_watchers(Context *on_finish);
+
 private:
   enum State {
     STATE_UNINITIALIZED,
@@ -95,6 +102,8 @@ private:
   uint64_t m_last_refresh;
   uint64_t m_refresh_seq;
 
+  ImageUpdateWatchers *m_update_watchers;
+
   bool is_transition_state() const;
   bool is_closed() const;
 
diff --git a/src/librbd/ImageWatcher.cc b/src/librbd/ImageWatcher.cc
index d3d4e70..7aeb1c9 100644
--- a/src/librbd/ImageWatcher.cc
+++ b/src/librbd/ImageWatcher.cc
@@ -7,7 +7,6 @@
 #include "librbd/ImageCtx.h"
 #include "librbd/ImageState.h"
 #include "librbd/internal.h"
-#include "librbd/ObjectMap.h"
 #include "librbd/Operations.h"
 #include "librbd/TaskFinisher.h"
 #include "librbd/Utils.h"
@@ -15,13 +14,9 @@
 #include "librbd/image_watcher/Notifier.h"
 #include "librbd/image_watcher/NotifyLockOwner.h"
 #include "include/encoding.h"
-#include "include/stringify.h"
 #include "common/errno.h"
 #include "common/WorkQueue.h"
-#include <sstream>
 #include <boost/bind.hpp>
-#include <boost/function.hpp>
-#include <boost/scope_exit.hpp>
 
 #define dout_subsys ceph_subsys_rbd
 #undef dout_prefix
@@ -31,6 +26,7 @@ namespace librbd {
 
 using namespace image_watcher;
 using namespace watch_notify;
+using util::create_async_context_callback;
 using util::create_context_callback;
 using util::create_rados_safe_callback;
 
@@ -72,7 +68,8 @@ struct C_UnwatchAndFlush : public Context {
 
 static const double	RETRY_DELAY_SECONDS = 1.0;
 
-ImageWatcher::ImageWatcher(ImageCtx &image_ctx)
+template <typename I>
+ImageWatcher<I>::ImageWatcher(I &image_ctx)
   : m_image_ctx(image_ctx),
     m_watch_lock(util::unique_lock_name("librbd::ImageWatcher::m_watch_lock", this)),
     m_watch_ctx(*this), m_watch_handle(0),
@@ -84,7 +81,8 @@ ImageWatcher::ImageWatcher(ImageCtx &image_ctx)
 {
 }
 
-ImageWatcher::~ImageWatcher()
+template <typename I>
+ImageWatcher<I>::~ImageWatcher()
 {
   delete m_task_finisher;
   {
@@ -93,7 +91,8 @@ ImageWatcher::~ImageWatcher()
   }
 }
 
-void ImageWatcher::register_watch(Context *on_finish) {
+template <typename I>
+void ImageWatcher<I>::register_watch(Context *on_finish) {
   ldout(m_image_ctx.cct, 10) << this << " registering image watcher" << dendl;
 
   RWLock::RLocker watch_locker(m_watch_lock);
@@ -106,7 +105,8 @@ void ImageWatcher::register_watch(Context *on_finish) {
   aio_comp->release();
 }
 
-void ImageWatcher::handle_register_watch(int r) {
+template <typename I>
+void ImageWatcher<I>::handle_register_watch(int r) {
   RWLock::WLocker watch_locker(m_watch_lock);
   assert(m_watch_state == WATCH_STATE_UNREGISTERED);
   if (r < 0) {
@@ -116,11 +116,15 @@ void ImageWatcher::handle_register_watch(int r) {
   }
 }
 
-void ImageWatcher::unregister_watch(Context *on_finish) {
+template <typename I>
+void ImageWatcher<I>::unregister_watch(Context *on_finish) {
   ldout(m_image_ctx.cct, 10) << this << " unregistering image watcher" << dendl;
 
   cancel_async_requests();
-  m_task_finisher->cancel_all();
+
+  C_Gather *g = new C_Gather(m_image_ctx.cct, create_async_context_callback(
+          m_image_ctx, on_finish));
+  m_task_finisher->cancel_all(g->new_sub());
 
   {
     RWLock::WLocker l(m_watch_lock);
@@ -128,33 +132,36 @@ void ImageWatcher::unregister_watch(Context *on_finish) {
       m_watch_state = WATCH_STATE_UNREGISTERED;
 
       librados::AioCompletion *aio_comp = create_rados_safe_callback(
-        new C_UnwatchAndFlush(m_image_ctx.md_ctx, on_finish));
+        new C_UnwatchAndFlush(m_image_ctx.md_ctx, g->new_sub()));
       int r = m_image_ctx.md_ctx.aio_unwatch(m_watch_handle, aio_comp);
       assert(r == 0);
       aio_comp->release();
+      g->activate();
       return;
     } else if (m_watch_state == WATCH_STATE_ERROR) {
       m_watch_state = WATCH_STATE_UNREGISTERED;
     }
   }
-
-  on_finish->complete(0);
+  g->activate();
 }
 
-void ImageWatcher::flush(Context *on_finish) {
+template <typename I>
+void ImageWatcher<I>::flush(Context *on_finish) {
   m_notifier.flush(on_finish);
 }
 
-void ImageWatcher::schedule_async_progress(const AsyncRequestId &request,
-					   uint64_t offset, uint64_t total) {
+template <typename I>
+void ImageWatcher<I>::schedule_async_progress(const AsyncRequestId &request,
+					      uint64_t offset, uint64_t total) {
   FunctionContext *ctx = new FunctionContext(
-    boost::bind(&ImageWatcher::notify_async_progress, this, request, offset,
+    boost::bind(&ImageWatcher<I>::notify_async_progress, this, request, offset,
                 total));
   m_task_finisher->queue(Task(TASK_CODE_ASYNC_PROGRESS, request), ctx);
 }
 
-int ImageWatcher::notify_async_progress(const AsyncRequestId &request,
-					uint64_t offset, uint64_t total) {
+template <typename I>
+int ImageWatcher<I>::notify_async_progress(const AsyncRequestId &request,
+				           uint64_t offset, uint64_t total) {
   ldout(m_image_ctx.cct, 20) << this << " remote async request progress: "
 			     << request << " @ " << offset
 			     << "/" << total << dendl;
@@ -165,25 +172,30 @@ int ImageWatcher::notify_async_progress(const AsyncRequestId &request,
   return 0;
 }
 
-void ImageWatcher::schedule_async_complete(const AsyncRequestId &request,
-					   int r) {
+template <typename I>
+void ImageWatcher<I>::schedule_async_complete(const AsyncRequestId &request,
+                                              int r) {
   FunctionContext *ctx = new FunctionContext(
-    boost::bind(&ImageWatcher::notify_async_complete, this, request, r));
+    boost::bind(&ImageWatcher<I>::notify_async_complete, this, request, r));
   m_task_finisher->queue(ctx);
 }
 
-void ImageWatcher::notify_async_complete(const AsyncRequestId &request, int r) {
+template <typename I>
+void ImageWatcher<I>::notify_async_complete(const AsyncRequestId &request,
+                                            int r) {
   ldout(m_image_ctx.cct, 20) << this << " remote async request finished: "
 			     << request << " = " << r << dendl;
 
   bufferlist bl;
   ::encode(NotifyMessage(AsyncCompletePayload(request, r)), bl);
   m_notifier.notify(bl, nullptr, new FunctionContext(
-    boost::bind(&ImageWatcher::handle_async_complete, this, request, r, _1)));
+    boost::bind(&ImageWatcher<I>::handle_async_complete, this, request, r,
+                _1)));
 }
 
-void ImageWatcher::handle_async_complete(const AsyncRequestId &request, int r,
-                                         int ret_val) {
+template <typename I>
+void ImageWatcher<I>::handle_async_complete(const AsyncRequestId &request,
+                                            int r, int ret_val) {
   ldout(m_image_ctx.cct, 20) << this << " " << __func__ << ": "
                              << "request=" << request << ", r=" << ret_val
                              << dendl;
@@ -199,9 +211,10 @@ void ImageWatcher::handle_async_complete(const AsyncRequestId &request, int r,
   }
 }
 
-void ImageWatcher::notify_flatten(uint64_t request_id,
-                                  ProgressContext &prog_ctx,
-                                  Context *on_finish) {
+template <typename I>
+void ImageWatcher<I>::notify_flatten(uint64_t request_id,
+                                     ProgressContext &prog_ctx,
+                                     Context *on_finish) {
   assert(m_image_ctx.owner_lock.is_locked());
   assert(m_image_ctx.exclusive_lock &&
          !m_image_ctx.exclusive_lock->is_lock_owner());
@@ -213,9 +226,10 @@ void ImageWatcher::notify_flatten(uint64_t request_id,
   notify_async_request(async_request_id, std::move(bl), prog_ctx, on_finish);
 }
 
-void ImageWatcher::notify_resize(uint64_t request_id, uint64_t size,
-				 ProgressContext &prog_ctx,
-                                 Context *on_finish) {
+template <typename I>
+void ImageWatcher<I>::notify_resize(uint64_t request_id, uint64_t size,
+                                    ProgressContext &prog_ctx,
+                                    Context *on_finish) {
   assert(m_image_ctx.owner_lock.is_locked());
   assert(m_image_ctx.exclusive_lock &&
          !m_image_ctx.exclusive_lock->is_lock_owner());
@@ -227,8 +241,9 @@ void ImageWatcher::notify_resize(uint64_t request_id, uint64_t size,
   notify_async_request(async_request_id, std::move(bl), prog_ctx, on_finish);
 }
 
-void ImageWatcher::notify_snap_create(const std::string &snap_name,
-                                      Context *on_finish) {
+template <typename I>
+void ImageWatcher<I>::notify_snap_create(const std::string &snap_name,
+                                         Context *on_finish) {
   assert(m_image_ctx.owner_lock.is_locked());
   assert(m_image_ctx.exclusive_lock &&
          !m_image_ctx.exclusive_lock->is_lock_owner());
@@ -238,8 +253,9 @@ void ImageWatcher::notify_snap_create(const std::string &snap_name,
   notify_lock_owner(std::move(bl), on_finish);
 }
 
-void ImageWatcher::notify_snap_rename(const snapid_t &src_snap_id,
-				      const std::string &dst_snap_name,
+template <typename I>
+void ImageWatcher<I>::notify_snap_rename(const snapid_t &src_snap_id,
+				         const std::string &dst_snap_name,
                                       Context *on_finish) {
   assert(m_image_ctx.owner_lock.is_locked());
   assert(m_image_ctx.exclusive_lock &&
@@ -250,8 +266,9 @@ void ImageWatcher::notify_snap_rename(const snapid_t &src_snap_id,
   notify_lock_owner(std::move(bl), on_finish);
 }
 
-void ImageWatcher::notify_snap_remove(const std::string &snap_name,
-                                      Context *on_finish) {
+template <typename I>
+void ImageWatcher<I>::notify_snap_remove(const std::string &snap_name,
+                                         Context *on_finish) {
   assert(m_image_ctx.owner_lock.is_locked());
   assert(m_image_ctx.exclusive_lock &&
          !m_image_ctx.exclusive_lock->is_lock_owner());
@@ -261,8 +278,9 @@ void ImageWatcher::notify_snap_remove(const std::string &snap_name,
   notify_lock_owner(std::move(bl), on_finish);
 }
 
-void ImageWatcher::notify_snap_protect(const std::string &snap_name,
-                                       Context *on_finish) {
+template <typename I>
+void ImageWatcher<I>::notify_snap_protect(const std::string &snap_name,
+                                          Context *on_finish) {
   assert(m_image_ctx.owner_lock.is_locked());
   assert(m_image_ctx.exclusive_lock &&
          !m_image_ctx.exclusive_lock->is_lock_owner());
@@ -272,8 +290,9 @@ void ImageWatcher::notify_snap_protect(const std::string &snap_name,
   notify_lock_owner(std::move(bl), on_finish);
 }
 
-void ImageWatcher::notify_snap_unprotect(const std::string &snap_name,
-                                         Context *on_finish) {
+template <typename I>
+void ImageWatcher<I>::notify_snap_unprotect(const std::string &snap_name,
+                                            Context *on_finish) {
   assert(m_image_ctx.owner_lock.is_locked());
   assert(m_image_ctx.exclusive_lock &&
          !m_image_ctx.exclusive_lock->is_lock_owner());
@@ -283,9 +302,10 @@ void ImageWatcher::notify_snap_unprotect(const std::string &snap_name,
   notify_lock_owner(std::move(bl), on_finish);
 }
 
-void ImageWatcher::notify_rebuild_object_map(uint64_t request_id,
-                                             ProgressContext &prog_ctx,
-                                             Context *on_finish) {
+template <typename I>
+void ImageWatcher<I>::notify_rebuild_object_map(uint64_t request_id,
+                                                ProgressContext &prog_ctx,
+                                                Context *on_finish) {
   assert(m_image_ctx.owner_lock.is_locked());
   assert(m_image_ctx.exclusive_lock &&
          !m_image_ctx.exclusive_lock->is_lock_owner());
@@ -297,8 +317,9 @@ void ImageWatcher::notify_rebuild_object_map(uint64_t request_id,
   notify_async_request(async_request_id, std::move(bl), prog_ctx, on_finish);
 }
 
-void ImageWatcher::notify_rename(const std::string &image_name,
-                                 Context *on_finish) {
+template <typename I>
+void ImageWatcher<I>::notify_rename(const std::string &image_name,
+                                    Context *on_finish) {
   assert(m_image_ctx.owner_lock.is_locked());
   assert(m_image_ctx.exclusive_lock &&
          !m_image_ctx.exclusive_lock->is_lock_owner());
@@ -308,7 +329,8 @@ void ImageWatcher::notify_rename(const std::string &image_name,
   notify_lock_owner(std::move(bl), on_finish);
 }
 
-void ImageWatcher::notify_header_update(Context *on_finish) {
+template <typename I>
+void ImageWatcher<I>::notify_header_update(Context *on_finish) {
   ldout(m_image_ctx.cct, 10) << this << ": " << __func__ << dendl;
 
   // supports legacy (empty buffer) clients
@@ -317,13 +339,24 @@ void ImageWatcher::notify_header_update(Context *on_finish) {
   m_notifier.notify(bl, nullptr, on_finish);
 }
 
-void ImageWatcher::schedule_cancel_async_requests() {
+template <typename I>
+void ImageWatcher<I>::notify_header_update(librados::IoCtx &io_ctx,
+				           const std::string &oid) {
+  // supports legacy (empty buffer) clients
+  bufferlist bl;
+  ::encode(NotifyMessage(HeaderUpdatePayload()), bl);
+  io_ctx.notify2(oid, bl, image_watcher::Notifier::NOTIFY_TIMEOUT, nullptr);
+}
+
+template <typename I>
+void ImageWatcher<I>::schedule_cancel_async_requests() {
   FunctionContext *ctx = new FunctionContext(
-    boost::bind(&ImageWatcher::cancel_async_requests, this));
+    boost::bind(&ImageWatcher<I>::cancel_async_requests, this));
   m_task_finisher->queue(TASK_CODE_CANCEL_ASYNC_REQUESTS, ctx);
 }
 
-void ImageWatcher::cancel_async_requests() {
+template <typename I>
+void ImageWatcher<I>::cancel_async_requests() {
   RWLock::WLocker l(m_async_request_lock);
   for (std::map<AsyncRequestId, AsyncRequest>::iterator iter =
 	 m_async_requests.begin();
@@ -333,19 +366,22 @@ void ImageWatcher::cancel_async_requests() {
   m_async_requests.clear();
 }
 
-void ImageWatcher::set_owner_client_id(const ClientId& client_id) {
+template <typename I>
+void ImageWatcher<I>::set_owner_client_id(const ClientId& client_id) {
   assert(m_owner_client_id_lock.is_locked());
   m_owner_client_id = client_id;
   ldout(m_image_ctx.cct, 10) << this << " current lock owner: "
                              << m_owner_client_id << dendl;
 }
 
-ClientId ImageWatcher::get_client_id() {
+template <typename I>
+ClientId ImageWatcher<I>::get_client_id() {
   RWLock::RLocker l(m_watch_lock);
   return ClientId(m_image_ctx.md_ctx.get_instance_id(), m_watch_handle);
 }
 
-void ImageWatcher::notify_acquired_lock() {
+template <typename I>
+void ImageWatcher<I>::notify_acquired_lock() {
   ldout(m_image_ctx.cct, 10) << this << " notify acquired lock" << dendl;
 
   ClientId client_id = get_client_id();
@@ -359,7 +395,8 @@ void ImageWatcher::notify_acquired_lock() {
   m_notifier.notify(bl, nullptr, nullptr);
 }
 
-void ImageWatcher::notify_released_lock() {
+template <typename I>
+void ImageWatcher<I>::notify_released_lock() {
   ldout(m_image_ctx.cct, 10) << this << " notify released lock" << dendl;
 
   {
@@ -372,7 +409,8 @@ void ImageWatcher::notify_released_lock() {
   m_notifier.notify(bl, nullptr, nullptr);
 }
 
-void ImageWatcher::schedule_request_lock(bool use_timer, int timer_delay) {
+template <typename I>
+void ImageWatcher<I>::schedule_request_lock(bool use_timer, int timer_delay) {
   assert(m_image_ctx.owner_lock.is_locked());
 
   if (m_image_ctx.exclusive_lock == nullptr) {
@@ -387,7 +425,7 @@ void ImageWatcher::schedule_request_lock(bool use_timer, int timer_delay) {
     ldout(m_image_ctx.cct, 15) << this << " requesting exclusive lock" << dendl;
 
     FunctionContext *ctx = new FunctionContext(
-      boost::bind(&ImageWatcher::notify_request_lock, this));
+      boost::bind(&ImageWatcher<I>::notify_request_lock, this));
     if (use_timer) {
       if (timer_delay < 0) {
         timer_delay = RETRY_DELAY_SECONDS;
@@ -400,7 +438,8 @@ void ImageWatcher::schedule_request_lock(bool use_timer, int timer_delay) {
   }
 }
 
-void ImageWatcher::notify_request_lock() {
+template <typename I>
+void ImageWatcher<I>::notify_request_lock() {
   RWLock::RLocker owner_locker(m_image_ctx.owner_lock);
   RWLock::RLocker snap_locker(m_image_ctx.snap_lock);
 
@@ -415,10 +454,11 @@ void ImageWatcher::notify_request_lock() {
   bufferlist bl;
   ::encode(NotifyMessage(RequestLockPayload(get_client_id(), false)), bl);
   notify_lock_owner(std::move(bl), create_context_callback<
-    ImageWatcher, &ImageWatcher::handle_request_lock>(this));
+    ImageWatcher, &ImageWatcher<I>::handle_request_lock>(this));
 }
 
-void ImageWatcher::handle_request_lock(int r) {
+template <typename I>
+void ImageWatcher<I>::handle_request_lock(int r) {
   RWLock::RLocker owner_locker(m_image_ctx.owner_lock);
   RWLock::RLocker snap_locker(m_image_ctx.snap_lock);
 
@@ -447,7 +487,8 @@ void ImageWatcher::handle_request_lock(int r) {
   }
 }
 
-void ImageWatcher::notify_lock_owner(bufferlist &&bl, Context *on_finish) {
+template <typename I>
+void ImageWatcher<I>::notify_lock_owner(bufferlist &&bl, Context *on_finish) {
   assert(on_finish != nullptr);
   assert(m_image_ctx.owner_lock.is_locked());
   NotifyLockOwner *notify_lock_owner = NotifyLockOwner::create(
@@ -455,7 +496,8 @@ void ImageWatcher::notify_lock_owner(bufferlist &&bl, Context *on_finish) {
   notify_lock_owner->send();
 }
 
-Context *ImageWatcher::remove_async_request(const AsyncRequestId &id) {
+template <typename I>
+Context *ImageWatcher<I>::remove_async_request(const AsyncRequestId &id) {
   RWLock::WLocker async_request_locker(m_async_request_lock);
   auto it = m_async_requests.find(id);
   if (it != m_async_requests.end()) {
@@ -466,12 +508,13 @@ Context *ImageWatcher::remove_async_request(const AsyncRequestId &id) {
   return nullptr;
 }
 
-void ImageWatcher::schedule_async_request_timed_out(const AsyncRequestId &id) {
+template <typename I>
+void ImageWatcher<I>::schedule_async_request_timed_out(const AsyncRequestId &id) {
   ldout(m_image_ctx.cct, 20) << "scheduling async request time out: " << id
                              << dendl;
 
   Context *ctx = new FunctionContext(boost::bind(
-    &ImageWatcher::async_request_timed_out, this, id));
+    &ImageWatcher<I>::async_request_timed_out, this, id));
 
   Task task(TASK_CODE_ASYNC_REQUEST, id);
   m_task_finisher->cancel(task);
@@ -479,7 +522,8 @@ void ImageWatcher::schedule_async_request_timed_out(const AsyncRequestId &id) {
   m_task_finisher->add_event_after(task, m_image_ctx.request_timed_out_seconds, ctx);
 }
 
-void ImageWatcher::async_request_timed_out(const AsyncRequestId &id) {
+template <typename I>
+void ImageWatcher<I>::async_request_timed_out(const AsyncRequestId &id) {
   Context *on_complete = remove_async_request(id);
   if (on_complete != nullptr) {
     ldout(m_image_ctx.cct, 5) << "async request timed out: " << id << dendl;
@@ -487,8 +531,9 @@ void ImageWatcher::async_request_timed_out(const AsyncRequestId &id) {
   }
 }
 
-void ImageWatcher::notify_async_request(const AsyncRequestId &async_request_id,
-				        bufferlist &&in,
+template <typename I>
+void ImageWatcher<I>::notify_async_request(const AsyncRequestId &async_request_id,
+				           bufferlist &&in,
 				        ProgressContext& prog_ctx,
                                         Context *on_finish) {
   assert(on_finish != nullptr);
@@ -521,9 +566,10 @@ void ImageWatcher::notify_async_request(const AsyncRequestId &async_request_id,
   notify_lock_owner(std::move(in), on_notify);
 }
 
-int ImageWatcher::prepare_async_request(const AsyncRequestId& async_request_id,
-                                        bool* new_request, Context** ctx,
-                                        ProgressContext** prog_ctx) {
+template <typename I>
+int ImageWatcher<I>::prepare_async_request(const AsyncRequestId& async_request_id,
+                                           bool* new_request, Context** ctx,
+                                           ProgressContext** prog_ctx) {
   if (async_request_id.client_id == get_client_id()) {
     return -ERESTART;
   } else {
@@ -540,17 +586,23 @@ int ImageWatcher::prepare_async_request(const AsyncRequestId& async_request_id,
   return 0;
 }
 
-bool ImageWatcher::handle_payload(const HeaderUpdatePayload &payload,
-				  C_NotifyAck *ack_ctx) {
+template <typename I>
+bool ImageWatcher<I>::handle_payload(const HeaderUpdatePayload &payload,
+			             C_NotifyAck *ack_ctx) {
   ldout(m_image_ctx.cct, 10) << this << " image header updated" << dendl;
 
   m_image_ctx.state->handle_update_notification();
   m_image_ctx.perfcounter->inc(l_librbd_notify);
+  if (ack_ctx != nullptr) {
+    m_image_ctx.state->flush_update_watchers(new C_ResponseMessage(ack_ctx));
+    return false;
+  }
   return true;
 }
 
-bool ImageWatcher::handle_payload(const AcquiredLockPayload &payload,
-                                  C_NotifyAck *ack_ctx) {
+template <typename I>
+bool ImageWatcher<I>::handle_payload(const AcquiredLockPayload &payload,
+                                     C_NotifyAck *ack_ctx) {
   ldout(m_image_ctx.cct, 10) << this << " image exclusively locked announcement"
                              << dendl;
 
@@ -572,8 +624,9 @@ bool ImageWatcher::handle_payload(const AcquiredLockPayload &payload,
   return true;
 }
 
-bool ImageWatcher::handle_payload(const ReleasedLockPayload &payload,
-                                  C_NotifyAck *ack_ctx) {
+template <typename I>
+bool ImageWatcher<I>::handle_payload(const ReleasedLockPayload &payload,
+                                     C_NotifyAck *ack_ctx) {
   ldout(m_image_ctx.cct, 10) << this << " exclusive lock released" << dendl;
 
   bool cancel_async_requests = true;
@@ -605,35 +658,41 @@ bool ImageWatcher::handle_payload(const ReleasedLockPayload &payload,
   return true;
 }
 
-bool ImageWatcher::handle_payload(const RequestLockPayload &payload,
-                                  C_NotifyAck *ack_ctx) {
+template <typename I>
+bool ImageWatcher<I>::handle_payload(const RequestLockPayload &payload,
+                                     C_NotifyAck *ack_ctx) {
   ldout(m_image_ctx.cct, 10) << this << " exclusive lock requested" << dendl;
   if (payload.client_id == get_client_id()) {
     return true;
   }
 
   RWLock::RLocker l(m_image_ctx.owner_lock);
-  if (m_image_ctx.exclusive_lock != nullptr &&
-      m_image_ctx.exclusive_lock->accept_requests()) {
-    // need to send something back so the client can detect a missing leader
-    ::encode(ResponseMessage(0), ack_ctx->out);
-
-    {
-      Mutex::Locker owner_client_id_locker(m_owner_client_id_lock);
-      if (!m_owner_client_id.is_valid()) {
-	return true;
+  if (m_image_ctx.exclusive_lock != nullptr) {
+    int r;
+    if (m_image_ctx.exclusive_lock->accept_requests(&r)) {
+      // need to send something back so the client can detect a missing leader
+      ::encode(ResponseMessage(0), ack_ctx->out);
+
+      {
+        Mutex::Locker owner_client_id_locker(m_owner_client_id_lock);
+        if (!m_owner_client_id.is_valid()) {
+	  return true;
+        }
       }
-    }
 
-    ldout(m_image_ctx.cct, 10) << this << " queuing release of exclusive lock"
-                               << dendl;
-    m_image_ctx.get_exclusive_lock_policy()->lock_requested(payload.force);
+      ldout(m_image_ctx.cct, 10) << this << " queuing release of exclusive lock"
+                                 << dendl;
+      m_image_ctx.get_exclusive_lock_policy()->lock_requested(payload.force);
+    } else if (r < 0) {
+      ::encode(ResponseMessage(r), ack_ctx->out);
+    }
   }
   return true;
 }
 
-bool ImageWatcher::handle_payload(const AsyncProgressPayload &payload,
-                                  C_NotifyAck *ack_ctx) {
+template <typename I>
+bool ImageWatcher<I>::handle_payload(const AsyncProgressPayload &payload,
+                                     C_NotifyAck *ack_ctx) {
   RWLock::RLocker l(m_async_request_lock);
   std::map<AsyncRequestId, AsyncRequest>::iterator req_it =
     m_async_requests.find(payload.async_request_id);
@@ -648,8 +707,9 @@ bool ImageWatcher::handle_payload(const AsyncProgressPayload &payload,
   return true;
 }
 
-bool ImageWatcher::handle_payload(const AsyncCompletePayload &payload,
-                                  C_NotifyAck *ack_ctx) {
+template <typename I>
+bool ImageWatcher<I>::handle_payload(const AsyncCompletePayload &payload,
+                                     C_NotifyAck *ack_ctx) {
   Context *on_complete = remove_async_request(payload.async_request_id);
   if (on_complete != nullptr) {
     ldout(m_image_ctx.cct, 10) << this << " request finished: "
@@ -660,177 +720,226 @@ bool ImageWatcher::handle_payload(const AsyncCompletePayload &payload,
   return true;
 }
 
-bool ImageWatcher::handle_payload(const FlattenPayload &payload,
-				  C_NotifyAck *ack_ctx) {
+template <typename I>
+bool ImageWatcher<I>::handle_payload(const FlattenPayload &payload,
+				     C_NotifyAck *ack_ctx) {
 
   RWLock::RLocker l(m_image_ctx.owner_lock);
-  if (m_image_ctx.exclusive_lock != nullptr &&
-      m_image_ctx.exclusive_lock->accept_requests()) {
-    bool new_request;
-    Context *ctx;
-    ProgressContext *prog_ctx;
-    int r = prepare_async_request(payload.async_request_id, &new_request,
-                                  &ctx, &prog_ctx);
-    if (new_request) {
-      ldout(m_image_ctx.cct, 10) << this << " remote flatten request: "
-				 << payload.async_request_id << dendl;
-      m_image_ctx.operations->execute_flatten(*prog_ctx, ctx);
-    }
+  if (m_image_ctx.exclusive_lock != nullptr) {
+    int r;
+    if (m_image_ctx.exclusive_lock->accept_requests(&r)) {
+      bool new_request;
+      Context *ctx;
+      ProgressContext *prog_ctx;
+      r = prepare_async_request(payload.async_request_id, &new_request,
+                                &ctx, &prog_ctx);
+      if (new_request) {
+        ldout(m_image_ctx.cct, 10) << this << " remote flatten request: "
+				   << payload.async_request_id << dendl;
+        m_image_ctx.operations->execute_flatten(*prog_ctx, ctx);
+      }
 
-    ::encode(ResponseMessage(r), ack_ctx->out);
+      ::encode(ResponseMessage(r), ack_ctx->out);
+    } else if (r < 0) {
+      ::encode(ResponseMessage(r), ack_ctx->out);
+    }
   }
   return true;
 }
 
-bool ImageWatcher::handle_payload(const ResizePayload &payload,
-				  C_NotifyAck *ack_ctx) {
+template <typename I>
+bool ImageWatcher<I>::handle_payload(const ResizePayload &payload,
+				     C_NotifyAck *ack_ctx) {
   RWLock::RLocker l(m_image_ctx.owner_lock);
-  if (m_image_ctx.exclusive_lock != nullptr &&
-      m_image_ctx.exclusive_lock->accept_requests()) {
-    bool new_request;
-    Context *ctx;
-    ProgressContext *prog_ctx;
-    int r = prepare_async_request(payload.async_request_id, &new_request,
-                                  &ctx, &prog_ctx);
-    if (new_request) {
-      ldout(m_image_ctx.cct, 10) << this << " remote resize request: "
-				 << payload.async_request_id << " "
-				 << payload.size << dendl;
-      m_image_ctx.operations->execute_resize(payload.size, *prog_ctx, ctx, 0);
-    }
+  if (m_image_ctx.exclusive_lock != nullptr) {
+    int r;
+    if (m_image_ctx.exclusive_lock->accept_requests(&r)) {
+      bool new_request;
+      Context *ctx;
+      ProgressContext *prog_ctx;
+      r = prepare_async_request(payload.async_request_id, &new_request,
+                                &ctx, &prog_ctx);
+      if (new_request) {
+        ldout(m_image_ctx.cct, 10) << this << " remote resize request: "
+				   << payload.async_request_id << " "
+				   << payload.size << dendl;
+        m_image_ctx.operations->execute_resize(payload.size, *prog_ctx, ctx, 0);
+      }
 
-    ::encode(ResponseMessage(r), ack_ctx->out);
+      ::encode(ResponseMessage(r), ack_ctx->out);
+    } else if (r < 0) {
+      ::encode(ResponseMessage(r), ack_ctx->out);
+    }
   }
   return true;
 }
 
-bool ImageWatcher::handle_payload(const SnapCreatePayload &payload,
-				  C_NotifyAck *ack_ctx) {
+template <typename I>
+bool ImageWatcher<I>::handle_payload(const SnapCreatePayload &payload,
+			             C_NotifyAck *ack_ctx) {
   RWLock::RLocker l(m_image_ctx.owner_lock);
-  if (m_image_ctx.exclusive_lock != nullptr &&
-      m_image_ctx.exclusive_lock->accept_requests()) {
-    ldout(m_image_ctx.cct, 10) << this << " remote snap_create request: "
-			       << payload.snap_name << dendl;
-
-    m_image_ctx.operations->execute_snap_create(payload.snap_name.c_str(),
-                                                new C_ResponseMessage(ack_ctx),
-                                                0, false);
-    return false;
+  if (m_image_ctx.exclusive_lock != nullptr) {
+    int r;
+    if (m_image_ctx.exclusive_lock->accept_requests(&r)) {
+      ldout(m_image_ctx.cct, 10) << this << " remote snap_create request: "
+			         << payload.snap_name << dendl;
+
+      m_image_ctx.operations->execute_snap_create(payload.snap_name,
+                                                  new C_ResponseMessage(ack_ctx),
+                                                  0, false);
+      return false;
+    } else if (r < 0) {
+      ::encode(ResponseMessage(r), ack_ctx->out);
+    }
   }
   return true;
 }
 
-bool ImageWatcher::handle_payload(const SnapRenamePayload &payload,
-				  C_NotifyAck *ack_ctx) {
+template <typename I>
+bool ImageWatcher<I>::handle_payload(const SnapRenamePayload &payload,
+			             C_NotifyAck *ack_ctx) {
   RWLock::RLocker l(m_image_ctx.owner_lock);
-  if (m_image_ctx.exclusive_lock != nullptr &&
-      m_image_ctx.exclusive_lock->accept_requests()) {
-    ldout(m_image_ctx.cct, 10) << this << " remote snap_rename request: "
-			       << payload.snap_id << " to "
-			       << payload.snap_name << dendl;
-
-    m_image_ctx.operations->execute_snap_rename(payload.snap_id,
-                                                payload.snap_name.c_str(),
-                                                new C_ResponseMessage(ack_ctx));
-    return false;
+  if (m_image_ctx.exclusive_lock != nullptr) {
+    int r;
+    if (m_image_ctx.exclusive_lock->accept_requests(&r)) {
+      ldout(m_image_ctx.cct, 10) << this << " remote snap_rename request: "
+			         << payload.snap_id << " to "
+			         << payload.snap_name << dendl;
+
+      m_image_ctx.operations->execute_snap_rename(payload.snap_id,
+                                                  payload.snap_name,
+                                                  new C_ResponseMessage(ack_ctx));
+      return false;
+    } else if (r < 0) {
+      ::encode(ResponseMessage(r), ack_ctx->out);
+    }
   }
   return true;
 }
 
-bool ImageWatcher::handle_payload(const SnapRemovePayload &payload,
-				  C_NotifyAck *ack_ctx) {
+template <typename I>
+bool ImageWatcher<I>::handle_payload(const SnapRemovePayload &payload,
+			             C_NotifyAck *ack_ctx) {
   RWLock::RLocker l(m_image_ctx.owner_lock);
-  if (m_image_ctx.exclusive_lock != nullptr &&
-      m_image_ctx.exclusive_lock->accept_requests()) {
-    ldout(m_image_ctx.cct, 10) << this << " remote snap_remove request: "
-			       << payload.snap_name << dendl;
-
-    m_image_ctx.operations->execute_snap_remove(payload.snap_name.c_str(),
-                                                new C_ResponseMessage(ack_ctx));
-    return false;
+  if (m_image_ctx.exclusive_lock != nullptr) {
+    int r;
+    if (m_image_ctx.exclusive_lock->accept_requests(&r)) {
+      ldout(m_image_ctx.cct, 10) << this << " remote snap_remove request: "
+			         << payload.snap_name << dendl;
+
+      m_image_ctx.operations->execute_snap_remove(payload.snap_name,
+                                                  new C_ResponseMessage(ack_ctx));
+      return false;
+    } else if (r < 0) {
+      ::encode(ResponseMessage(r), ack_ctx->out);
+    }
   }
   return true;
 }
 
-bool ImageWatcher::handle_payload(const SnapProtectPayload& payload,
-                                  C_NotifyAck *ack_ctx) {
+template <typename I>
+bool ImageWatcher<I>::handle_payload(const SnapProtectPayload& payload,
+                                     C_NotifyAck *ack_ctx) {
   RWLock::RLocker owner_locker(m_image_ctx.owner_lock);
-  if (m_image_ctx.exclusive_lock != nullptr &&
-      m_image_ctx.exclusive_lock->accept_requests()) {
-    ldout(m_image_ctx.cct, 10) << this << " remote snap_protect request: "
-                               << payload.snap_name << dendl;
+  if (m_image_ctx.exclusive_lock != nullptr) {
+    int r;
+    if (m_image_ctx.exclusive_lock->accept_requests(&r)) {
+      ldout(m_image_ctx.cct, 10) << this << " remote snap_protect request: "
+                                 << payload.snap_name << dendl;
 
-    m_image_ctx.operations->execute_snap_protect(payload.snap_name.c_str(),
-                                                 new C_ResponseMessage(ack_ctx));
-    return false;
+      m_image_ctx.operations->execute_snap_protect(payload.snap_name,
+                                                   new C_ResponseMessage(ack_ctx));
+      return false;
+    } else if (r < 0) {
+      ::encode(ResponseMessage(r), ack_ctx->out);
+    }
   }
   return true;
 }
 
-bool ImageWatcher::handle_payload(const SnapUnprotectPayload& payload,
-                                  C_NotifyAck *ack_ctx) {
+template <typename I>
+bool ImageWatcher<I>::handle_payload(const SnapUnprotectPayload& payload,
+                                     C_NotifyAck *ack_ctx) {
   RWLock::RLocker owner_locker(m_image_ctx.owner_lock);
-  if (m_image_ctx.exclusive_lock != nullptr &&
-      m_image_ctx.exclusive_lock->accept_requests()) {
-    ldout(m_image_ctx.cct, 10) << this << " remote snap_unprotect request: "
-                               << payload.snap_name << dendl;
-
-    m_image_ctx.operations->execute_snap_unprotect(payload.snap_name.c_str(),
-                                                   new C_ResponseMessage(ack_ctx));
-    return false;
+  if (m_image_ctx.exclusive_lock != nullptr) {
+    int r;
+    if (m_image_ctx.exclusive_lock->accept_requests(&r)) {
+      ldout(m_image_ctx.cct, 10) << this << " remote snap_unprotect request: "
+                                 << payload.snap_name << dendl;
+
+      m_image_ctx.operations->execute_snap_unprotect(payload.snap_name,
+                                                     new C_ResponseMessage(ack_ctx));
+      return false;
+    } else if (r < 0) {
+      ::encode(ResponseMessage(r), ack_ctx->out);
+    }
   }
   return true;
 }
 
-bool ImageWatcher::handle_payload(const RebuildObjectMapPayload& payload,
-                                  C_NotifyAck *ack_ctx) {
+template <typename I>
+bool ImageWatcher<I>::handle_payload(const RebuildObjectMapPayload& payload,
+                                     C_NotifyAck *ack_ctx) {
   RWLock::RLocker l(m_image_ctx.owner_lock);
-  if (m_image_ctx.exclusive_lock != nullptr &&
-      m_image_ctx.exclusive_lock->accept_requests()) {
-    bool new_request;
-    Context *ctx;
-    ProgressContext *prog_ctx;
-    int r = prepare_async_request(payload.async_request_id, &new_request,
-                                  &ctx, &prog_ctx);
-    if (new_request) {
-      ldout(m_image_ctx.cct, 10) << this
-                                 << " remote rebuild object map request: "
-                                 << payload.async_request_id << dendl;
-      m_image_ctx.operations->execute_rebuild_object_map(*prog_ctx, ctx);
-    }
+  if (m_image_ctx.exclusive_lock != nullptr) {
+    int r;
+    if (m_image_ctx.exclusive_lock->accept_requests(&r)) {
+      bool new_request;
+      Context *ctx;
+      ProgressContext *prog_ctx;
+      r = prepare_async_request(payload.async_request_id, &new_request,
+                                &ctx, &prog_ctx);
+      if (new_request) {
+        ldout(m_image_ctx.cct, 10) << this
+                                   << " remote rebuild object map request: "
+                                   << payload.async_request_id << dendl;
+        m_image_ctx.operations->execute_rebuild_object_map(*prog_ctx, ctx);
+      }
 
-    ::encode(ResponseMessage(r), ack_ctx->out);
+      ::encode(ResponseMessage(r), ack_ctx->out);
+    } else if (r < 0) {
+      ::encode(ResponseMessage(r), ack_ctx->out);
+    }
   }
   return true;
 }
 
-bool ImageWatcher::handle_payload(const RenamePayload& payload,
-                                  C_NotifyAck *ack_ctx) {
+template <typename I>
+bool ImageWatcher<I>::handle_payload(const RenamePayload& payload,
+                                     C_NotifyAck *ack_ctx) {
   RWLock::RLocker owner_locker(m_image_ctx.owner_lock);
-  if (m_image_ctx.exclusive_lock != nullptr &&
-      m_image_ctx.exclusive_lock->accept_requests()) {
-    ldout(m_image_ctx.cct, 10) << this << " remote rename request: "
-                               << payload.image_name << dendl;
-
-    m_image_ctx.operations->execute_rename(payload.image_name.c_str(),
-                                   new C_ResponseMessage(ack_ctx));
-    return false;
+  if (m_image_ctx.exclusive_lock != nullptr) {
+    int r;
+    if (m_image_ctx.exclusive_lock->accept_requests(&r)) {
+      ldout(m_image_ctx.cct, 10) << this << " remote rename request: "
+                                 << payload.image_name << dendl;
+
+      m_image_ctx.operations->execute_rename(payload.image_name,
+                                             new C_ResponseMessage(ack_ctx));
+      return false;
+    } else if (r < 0) {
+      ::encode(ResponseMessage(r), ack_ctx->out);
+    }
   }
   return true;
 }
 
-bool ImageWatcher::handle_payload(const UnknownPayload &payload,
-				  C_NotifyAck *ack_ctx) {
+template <typename I>
+bool ImageWatcher<I>::handle_payload(const UnknownPayload &payload,
+			             C_NotifyAck *ack_ctx) {
   RWLock::RLocker l(m_image_ctx.owner_lock);
-  if (m_image_ctx.exclusive_lock != nullptr &&
-      m_image_ctx.exclusive_lock->accept_requests()) {
-    ::encode(ResponseMessage(-EOPNOTSUPP), ack_ctx->out);
+  if (m_image_ctx.exclusive_lock != nullptr) {
+    int r;
+    if (m_image_ctx.exclusive_lock->accept_requests(&r) || r < 0) {
+      ::encode(ResponseMessage(-EOPNOTSUPP), ack_ctx->out);
+    }
   }
   return true;
 }
 
-void ImageWatcher::process_payload(uint64_t notify_id, uint64_t handle,
-                                   const Payload &payload, int r) {
+template <typename I>
+void ImageWatcher<I>::process_payload(uint64_t notify_id, uint64_t handle,
+                                      const Payload &payload, int r) {
   if (r < 0) {
     bufferlist out_bl;
     acknowledge_notify(notify_id, handle, out_bl);
@@ -839,8 +948,9 @@ void ImageWatcher::process_payload(uint64_t notify_id, uint64_t handle,
   }
 }
 
-void ImageWatcher::handle_notify(uint64_t notify_id, uint64_t handle,
-				 bufferlist &bl) {
+template <typename I>
+void ImageWatcher<I>::handle_notify(uint64_t notify_id, uint64_t handle,
+			            bufferlist &bl) {
   NotifyMessage notify_message;
   if (bl.length() == 0) {
     // legacy notification for header updates
@@ -866,7 +976,8 @@ void ImageWatcher::handle_notify(uint64_t notify_id, uint64_t handle,
   }
 }
 
-void ImageWatcher::handle_error(uint64_t handle, int err) {
+template <typename I>
+void ImageWatcher<I>::handle_error(uint64_t handle, int err) {
   lderr(m_image_ctx.cct) << this << " image watch failed: " << handle << ", "
                          << cpp_strerror(err) << dendl;
 
@@ -881,17 +992,19 @@ void ImageWatcher::handle_error(uint64_t handle, int err) {
     m_watch_state = WATCH_STATE_ERROR;
 
     FunctionContext *ctx = new FunctionContext(
-      boost::bind(&ImageWatcher::reregister_watch, this));
+      boost::bind(&ImageWatcher<I>::reregister_watch, this));
     m_task_finisher->queue(TASK_CODE_REREGISTER_WATCH, ctx);
   }
 }
 
-void ImageWatcher::acknowledge_notify(uint64_t notify_id, uint64_t handle,
-				      bufferlist &out) {
+template <typename I>
+void ImageWatcher<I>::acknowledge_notify(uint64_t notify_id, uint64_t handle,
+				         bufferlist &out) {
   m_image_ctx.md_ctx.notify_ack(m_image_ctx.header_oid, notify_id, handle, out);
 }
 
-void ImageWatcher::reregister_watch() {
+template <typename I>
+void ImageWatcher<I>::reregister_watch() {
   ldout(m_image_ctx.cct, 10) << this << " re-registering image watch" << dendl;
 
   bool releasing_lock = false;
@@ -928,7 +1041,7 @@ void ImageWatcher::reregister_watch() {
                              << cpp_strerror(r) << dendl;
       if (r != -ESHUTDOWN) {
         FunctionContext *ctx = new FunctionContext(boost::bind(
-          &ImageWatcher::reregister_watch, this));
+          &ImageWatcher<I>::reregister_watch, this));
         m_task_finisher->add_event_after(TASK_CODE_REREGISTER_WATCH,
                                          RETRY_DELAY_SECONDS, ctx);
       }
@@ -937,33 +1050,47 @@ void ImageWatcher::reregister_watch() {
 
     m_watch_state = WATCH_STATE_REGISTERED;
   }
+
+  // if the exclusive lock state machine was paused waiting for the
+  // watch to be re-registered, wake it up
+  RWLock::RLocker owner_locker(m_image_ctx.owner_lock);
+  RWLock::RLocker snap_locker(m_image_ctx.snap_lock);
+  if (m_image_ctx.exclusive_lock != nullptr) {
+    m_image_ctx.exclusive_lock->handle_watch_registered();
+  }
+
   handle_payload(HeaderUpdatePayload(), NULL);
 }
 
-void ImageWatcher::WatchCtx::handle_notify(uint64_t notify_id,
-        	                           uint64_t handle,
-                                           uint64_t notifier_id,
-	                                   bufferlist& bl) {
+template <typename I>
+void ImageWatcher<I>::WatchCtx::handle_notify(uint64_t notify_id,
+                                              uint64_t handle,
+                                              uint64_t notifier_id,
+                                              bufferlist& bl) {
   image_watcher.handle_notify(notify_id, handle, bl);
 }
 
-void ImageWatcher::WatchCtx::handle_error(uint64_t handle, int err) {
+template <typename I>
+void ImageWatcher<I>::WatchCtx::handle_error(uint64_t handle, int err) {
   image_watcher.handle_error(handle, err);
 }
 
-void ImageWatcher::RemoteContext::finish(int r) {
+template <typename I>
+void ImageWatcher<I>::RemoteContext::finish(int r) {
   m_image_watcher.schedule_async_complete(m_async_request_id, r);
 }
 
-ImageWatcher::C_NotifyAck::C_NotifyAck(ImageWatcher *image_watcher,
-                                       uint64_t notify_id, uint64_t handle)
+template <typename I>
+ImageWatcher<I>::C_NotifyAck::C_NotifyAck(ImageWatcher *image_watcher,
+                                          uint64_t notify_id, uint64_t handle)
   : image_watcher(image_watcher), notify_id(notify_id), handle(handle) {
   CephContext *cct = image_watcher->m_image_ctx.cct;
   ldout(cct, 10) << this << " C_NotifyAck start: id=" << notify_id << ", "
                  << "handle=" << handle << dendl;
 }
 
-void ImageWatcher::C_NotifyAck::finish(int r) {
+template <typename I>
+void ImageWatcher<I>::C_NotifyAck::finish(int r) {
   assert(r == 0);
   CephContext *cct = image_watcher->m_image_ctx.cct;
   ldout(cct, 10) << this << " C_NotifyAck finish: id=" << notify_id << ", "
@@ -972,7 +1099,8 @@ void ImageWatcher::C_NotifyAck::finish(int r) {
   image_watcher->acknowledge_notify(notify_id, handle, out);
 }
 
-void ImageWatcher::C_ResponseMessage::finish(int r) {
+template <typename I>
+void ImageWatcher<I>::C_ResponseMessage::finish(int r) {
   CephContext *cct = notify_ack->image_watcher->m_image_ctx.cct;
   ldout(cct, 10) << this << " C_ResponseMessage: r=" << r << dendl;
 
@@ -981,3 +1109,5 @@ void ImageWatcher::C_ResponseMessage::finish(int r) {
 }
 
 } // namespace librbd
+
+template class librbd::ImageWatcher<librbd::ImageCtx>;
diff --git a/src/librbd/ImageWatcher.h b/src/librbd/ImageWatcher.h
index da1c11b..e72eea7 100644
--- a/src/librbd/ImageWatcher.h
+++ b/src/librbd/ImageWatcher.h
@@ -7,16 +7,13 @@
 #include "common/Mutex.h"
 #include "common/RWLock.h"
 #include "include/Context.h"
-#include "include/rados/librados.hpp"
 #include "include/rbd/librbd.hpp"
 #include "librbd/image_watcher/Notifier.h"
 #include "librbd/WatchNotifyTypes.h"
 #include <set>
 #include <string>
 #include <utility>
-#include <vector>
-#include <boost/function.hpp>
-#include "include/assert.h"
+#include <boost/variant.hpp>
 
 class entity_name_t;
 
@@ -25,9 +22,10 @@ namespace librbd {
 class ImageCtx;
 template <typename T> class TaskFinisher;
 
+template <typename ImageCtxT = ImageCtx>
 class ImageWatcher {
 public:
-  ImageWatcher(ImageCtx& image_ctx);
+  ImageWatcher(ImageCtxT& image_ctx);
   ~ImageWatcher();
 
   void register_watch(Context *on_finish);
@@ -54,6 +52,8 @@ public:
   void notify_request_lock();
 
   void notify_header_update(Context *on_finish);
+  static void notify_header_update(librados::IoCtx &io_ctx,
+                                   const std::string &oid);
 
   uint64_t get_watch_handle() const {
     RWLock::RLocker watch_locker(m_watch_lock);
@@ -220,7 +220,7 @@ private:
     }
   };
 
-  ImageCtx &m_image_ctx;
+  ImageCtxT &m_image_ctx;
 
   mutable RWLock m_watch_lock;
   WatchCtx m_watch_ctx;
@@ -315,4 +315,6 @@ private:
 
 } // namespace librbd
 
+extern template class librbd::ImageWatcher<librbd::ImageCtx>;
+
 #endif // CEPH_LIBRBD_IMAGE_WATCHER_H
diff --git a/src/librbd/Journal.cc b/src/librbd/Journal.cc
index b874005..f98977b 100644
--- a/src/librbd/Journal.cc
+++ b/src/librbd/Journal.cc
@@ -10,10 +10,14 @@
 #include "librbd/Utils.h"
 #include "cls/journal/cls_journal_types.h"
 #include "journal/Journaler.h"
+#include "journal/Policy.h"
 #include "journal/ReplayEntry.h"
+#include "journal/Settings.h"
 #include "common/errno.h"
 #include "common/Timer.h"
 #include "common/WorkQueue.h"
+#include "include/rados/librados.hpp"
+
 #include <boost/scope_exit.hpp>
 
 #define dout_subsys ceph_subsys_rbd
@@ -48,7 +52,8 @@ struct C_DecodeTag : public Context {
 
   int process(int r) {
     if (r < 0) {
-      lderr(cct) << "failed to allocate tag: " << cpp_strerror(r) << dendl;
+      lderr(cct) << this << " " << __func__ << ": "
+                 << "failed to allocate tag: " << cpp_strerror(r) << dendl;
       return r;
     }
 
@@ -58,11 +63,13 @@ struct C_DecodeTag : public Context {
     bufferlist::iterator data_it = tag.data.begin();
     r = decode(&data_it, tag_data);
     if (r < 0) {
-      lderr(cct) << "failed to decode allocated tag" << dendl;
+      lderr(cct) << this << " " << __func__ << ": "
+                 << "failed to decode allocated tag" << dendl;
       return r;
     }
 
-    ldout(cct, 20) << "allocated journal tag: "
+    ldout(cct, 20) << this << " " << __func__ << ": "
+                   << "allocated journal tag: "
                    << "tid=" << tag.tid << ", "
                    << "data=" << *tag_data << dendl;
     return 0;
@@ -104,13 +111,15 @@ struct C_DecodeTags : public Context {
 
   int process(int r) {
     if (r < 0) {
-      lderr(cct) << "failed to retrieve journal tags: " << cpp_strerror(r)
+      lderr(cct) << this << " " << __func__ << ": "
+                 << "failed to retrieve journal tags: " << cpp_strerror(r)
                  << dendl;
       return r;
     }
 
     if (tags.empty()) {
-      lderr(cct) << "no journal tags retrieved" << dendl;
+      lderr(cct) << this << " " << __func__ << ": "
+                 << "no journal tags retrieved" << dendl;
       return -ENOENT;
     }
 
@@ -120,11 +129,13 @@ struct C_DecodeTags : public Context {
     bufferlist::iterator data_it = tags.back().data.begin();
     r = C_DecodeTag::decode(&data_it, tag_data);
     if (r < 0) {
-      lderr(cct) << "failed to decode journal tag" << dendl;
+      lderr(cct) << this << " " << __func__ << ": "
+                 << "failed to decode journal tag" << dendl;
       return r;
     }
 
-    ldout(cct, 20) << "most recent journal tag: "
+    ldout(cct, 20) << this << " " << __func__ << ": "
+                   << "most recent journal tag: "
                    << "tid=" << *tag_tid << ", "
                    << "data=" << *tag_data << dendl;
     return 0;
@@ -230,7 +241,8 @@ int allocate_journaler_tag(CephContext *cct, J *journaler,
 
   int r = allocate_tag_ctx.wait();
   if (r < 0) {
-    lderr(cct) << "failed to allocate tag: " << cpp_strerror(r) << dendl;
+    lderr(cct) << __func__ << ": "
+               << "failed to allocate tag: " << cpp_strerror(r) << dendl;
     return r;
   }
   return 0;
@@ -300,7 +312,8 @@ Journal<I>::Journal(I &image_ctx)
     m_lock("Journal<I>::m_lock"), m_state(STATE_UNINITIALIZED),
     m_error_result(0), m_replay_handler(this), m_close_pending(false),
     m_event_lock("Journal<I>::m_event_lock"), m_event_tid(0),
-    m_blocking_writes(false), m_journal_replay(NULL) {
+    m_blocking_writes(false), m_journal_replay(NULL),
+    m_metadata_listener(this) {
 
   CephContext *cct = m_image_ctx.cct;
   ldout(cct, 5) << this << ": ictx=" << &m_image_ctx << dendl;
@@ -329,6 +342,7 @@ Journal<I>::~Journal() {
   assert(m_state == STATE_UNINITIALIZED || m_state == STATE_CLOSED);
   assert(m_journaler == NULL);
   assert(m_journal_replay == NULL);
+  assert(m_on_replay_close_request == nullptr);
   assert(m_wait_for_state_contexts.empty());
 }
 
@@ -353,7 +367,8 @@ int Journal<I>::create(librados::IoCtx &io_ctx, const std::string &image_id,
     IoCtx data_io_ctx;
     int r = rados.ioctx_create(object_pool.c_str(), data_io_ctx);
     if (r != 0) {
-      lderr(cct) << "failed to create journal: "
+      lderr(cct) << __func__ << ": "
+                 << "failed to create journal: "
 		 << "error opening journal objects pool '" << object_pool
 		 << "': " << cpp_strerror(r) << dendl;
       return r;
@@ -361,12 +376,12 @@ int Journal<I>::create(librados::IoCtx &io_ctx, const std::string &image_id,
     pool_id = data_io_ctx.get_id();
   }
 
-  Journaler journaler(io_ctx, image_id, IMAGE_CLIENT_ID,
-                      cct->_conf->rbd_journal_commit_age);
+  Journaler journaler(io_ctx, image_id, IMAGE_CLIENT_ID, {});
 
   int r = journaler.create(order, splay_width, pool_id);
   if (r < 0) {
-    lderr(cct) << "failed to create journal: " << cpp_strerror(r) << dendl;
+    lderr(cct) << __func__ << ": "
+               << "failed to create journal: " << cpp_strerror(r) << dendl;
     return r;
   }
 
@@ -387,7 +402,8 @@ int Journal<I>::create(librados::IoCtx &io_ctx, const std::string &image_id,
 
   r = journaler.register_client(client_data);
   if (r < 0) {
-    lderr(cct) << "failed to register client: " << cpp_strerror(r) << dendl;
+    lderr(cct) << __func__ << ": "
+               << "failed to register client: " << cpp_strerror(r) << dendl;
     return r;
   }
   return 0;
@@ -398,13 +414,13 @@ int Journal<I>::remove(librados::IoCtx &io_ctx, const std::string &image_id) {
   CephContext *cct = reinterpret_cast<CephContext *>(io_ctx.cct());
   ldout(cct, 5) << __func__ << ": image=" << image_id << dendl;
 
-  Journaler journaler(io_ctx, image_id, IMAGE_CLIENT_ID,
-                      cct->_conf->rbd_journal_commit_age);
+  Journaler journaler(io_ctx, image_id, IMAGE_CLIENT_ID, {});
 
   bool journal_exists;
   int r = journaler.exists(&journal_exists);
   if (r < 0) {
-    lderr(cct) << "failed to stat journal header: " << cpp_strerror(r) << dendl;
+    lderr(cct) << __func__ << ": "
+               << "failed to stat journal header: " << cpp_strerror(r) << dendl;
     return r;
   } else if (!journal_exists) {
     return 0;
@@ -420,13 +436,15 @@ int Journal<I>::remove(librados::IoCtx &io_ctx, const std::string &image_id) {
   if (r == -ENOENT) {
     return 0;
   } else if (r < 0) {
-    lderr(cct) << "failed to initialize journal: " << cpp_strerror(r) << dendl;
+    lderr(cct) << __func__ << ": "
+               << "failed to initialize journal: " << cpp_strerror(r) << dendl;
     return r;
   }
 
-  r = journaler.remove(false);
+  r = journaler.remove(true);
   if (r < 0) {
-    lderr(cct) << "failed to remove journal: " << cpp_strerror(r) << dendl;
+    lderr(cct) << __func__ << ": "
+               << "failed to remove journal: " << cpp_strerror(r) << dendl;
     return r;
   }
   return 0;
@@ -437,8 +455,7 @@ int Journal<I>::reset(librados::IoCtx &io_ctx, const std::string &image_id) {
   CephContext *cct = reinterpret_cast<CephContext *>(io_ctx.cct());
   ldout(cct, 5) << __func__ << ": image=" << image_id << dendl;
 
-  Journaler journaler(io_ctx, image_id, IMAGE_CLIENT_ID,
-                      cct->_conf->rbd_journal_commit_age);
+  Journaler journaler(io_ctx, image_id, IMAGE_CLIENT_ID, {});
 
   C_SaferCond cond;
   journaler.init(&cond);
@@ -450,7 +467,8 @@ int Journal<I>::reset(librados::IoCtx &io_ctx, const std::string &image_id) {
   if (r == -ENOENT) {
     return 0;
   } else if (r < 0) {
-    lderr(cct) << "failed to initialize journal: " << cpp_strerror(r) << dendl;
+    lderr(cct) << __func__ << ": "
+               << "failed to initialize journal: " << cpp_strerror(r) << dendl;
     return r;
   }
 
@@ -463,20 +481,23 @@ int Journal<I>::reset(librados::IoCtx &io_ctx, const std::string &image_id) {
     librados::Rados rados(io_ctx);
     r = rados.pool_reverse_lookup(pool_id, &pool_name);
     if (r < 0) {
-      lderr(cct) << "failed to lookup data pool: " << cpp_strerror(r) << dendl;
+      lderr(cct) << __func__ << ": "
+                 << "failed to lookup data pool: " << cpp_strerror(r) << dendl;
       return r;
     }
   }
 
   r = journaler.remove(true);
   if (r < 0) {
-    lderr(cct) << "failed to reset journal: " << cpp_strerror(r) << dendl;
+    lderr(cct) << __func__ << ": "
+               << "failed to reset journal: " << cpp_strerror(r) << dendl;
     return r;
   }
 
   r = create(io_ctx, image_id, order, splay_width, pool_name, false, "");
   if (r < 0) {
-    lderr(cct) << "failed to create journal: " << cpp_strerror(r) << dendl;
+    lderr(cct) << __func__ << ": "
+               << "failed to create journal: " << cpp_strerror(r) << dendl;
     return r;
   }
   return 0;
@@ -511,8 +532,7 @@ int Journal<I>::get_tag_owner(IoCtx& io_ctx, std::string& image_id,
   CephContext *cct = (CephContext *)io_ctx.cct();
   ldout(cct, 20) << __func__ << dendl;
 
-  Journaler journaler(io_ctx, image_id, IMAGE_CLIENT_ID,
-                      cct->_conf->rbd_journal_commit_age);
+  Journaler journaler(io_ctx, image_id, IMAGE_CLIENT_ID, {});
 
   cls::journal::Client client;
   journal::ImageClientMeta client_meta;
@@ -531,8 +551,7 @@ int Journal<I>::request_resync(I *image_ctx) {
   CephContext *cct = image_ctx->cct;
   ldout(cct, 20) << __func__ << dendl;
 
-  Journaler journaler(image_ctx->md_ctx, image_ctx->id, IMAGE_CLIENT_ID,
-                      image_ctx->cct->_conf->rbd_journal_commit_age);
+  Journaler journaler(image_ctx->md_ctx, image_ctx->id, IMAGE_CLIENT_ID, {});
 
   cls::journal::Client client;
   journal::ImageClientMeta client_meta;
@@ -558,7 +577,8 @@ int Journal<I>::request_resync(I *image_ctx) {
 
   r = update_client_ctx.wait();
   if (r < 0) {
-    lderr(cct) << "failed to update client: " << cpp_strerror(r) << dendl;
+    lderr(cct) << __func__ << ": "
+               << "failed to update client: " << cpp_strerror(r) << dendl;
     return r;
   }
   return 0;
@@ -569,8 +589,7 @@ int Journal<I>::promote(I *image_ctx) {
   CephContext *cct = image_ctx->cct;
   ldout(cct, 20) << __func__ << dendl;
 
-  Journaler journaler(image_ctx->md_ctx, image_ctx->id, IMAGE_CLIENT_ID,
-                      image_ctx->cct->_conf->rbd_journal_commit_age);
+  Journaler journaler(image_ctx->md_ctx, image_ctx->id, IMAGE_CLIENT_ID, {});
 
   cls::journal::Client client;
   journal::ImageClientMeta client_meta;
@@ -611,6 +630,14 @@ bool Journal<I>::is_journal_replaying() const {
 }
 
 template <typename I>
+bool Journal<I>::is_journal_appending() const {
+  assert(m_image_ctx.snap_lock.is_locked());
+  Mutex::Locker locker(m_lock);
+  return (m_state == STATE_READY &&
+          !m_image_ctx.get_journal_policy()->append_disabled());
+}
+
+template <typename I>
 void Journal<I>::wait_for_journal_ready(Context *on_ready) {
   on_ready = create_async_context_callback(m_image_ctx, on_ready);
 
@@ -653,6 +680,12 @@ void Journal<I>::close(Context *on_finish) {
     stop_recording();
   }
 
+  // interrupt external replay if active
+  if (m_on_replay_close_request != nullptr) {
+    m_on_replay_close_request->complete(0);
+    m_on_replay_close_request = nullptr;
+  }
+
   m_close_pending = true;
   wait_for_steady_state(on_finish);
 }
@@ -678,7 +711,8 @@ int Journal<I>::demote() {
   cls::journal::Client client;
   int r = m_journaler->get_cached_client(IMAGE_CLIENT_ID, &client);
   if (r < 0) {
-    lderr(cct) << "failed to retrieve client: " << cpp_strerror(r) << dendl;
+    lderr(cct) << this << " " << __func__ << ": "
+               << "failed to retrieve client: " << cpp_strerror(r) << dendl;
     return r;
   }
 
@@ -692,7 +726,8 @@ int Journal<I>::demote() {
   bufferlist::iterator tag_data_bl_it = new_tag.data.begin();
   r = C_DecodeTag::decode(&tag_data_bl_it, &m_tag_data);
   if (r < 0) {
-    lderr(cct) << "failed to decode newly allocated tag" << dendl;
+    lderr(cct) << this << " " << __func__ << ": "
+               << "failed to decode newly allocated tag" << dendl;
     return r;
   }
 
@@ -707,7 +742,8 @@ int Journal<I>::demote() {
 
   r = ctx.wait();
   if (r < 0) {
-    lderr(cct) << "failed to append demotion journal event: " << cpp_strerror(r)
+    lderr(cct) << this << " " << __func__ << ": "
+               << "failed to append demotion journal event: " << cpp_strerror(r)
                << dendl;
     return r;
   }
@@ -718,7 +754,8 @@ int Journal<I>::demote() {
 
   r = flush_ctx.wait();
   if (r < 0) {
-    lderr(cct) << "failed to flush demotion commit position: "
+    lderr(cct) << this << " " << __func__ << ": "
+               << "failed to flush demotion commit position: "
                << cpp_strerror(r) << dendl;
     return r;
   }
@@ -741,7 +778,8 @@ void Journal<I>::allocate_local_tag(Context *on_finish) {
     cls::journal::Client client;
     int r = m_journaler->get_cached_client(IMAGE_CLIENT_ID, &client);
     if (r < 0) {
-      lderr(cct) << "failed to retrieve client: " << cpp_strerror(r) << dendl;
+      lderr(cct) << this << " " << __func__ << ": "
+                 << "failed to retrieve client: " << cpp_strerror(r) << dendl;
       m_image_ctx.op_work_queue->queue(on_finish, r);
       return;
     }
@@ -937,7 +975,8 @@ void Journal<I>::commit_io_event_extent(uint64_t tid, uint64_t offset,
 
   event.pending_extents.subtract(intersect);
   if (!event.pending_extents.empty()) {
-    ldout(cct, 20) << "pending extents: " << event.pending_extents << dendl;
+    ldout(cct, 20) << this << " " << __func__ << ": "
+                   << "pending extents: " << event.pending_extents << dendl;
     return;
   }
   complete_event(it, event.ret_val);
@@ -978,7 +1017,7 @@ void Journal<I>::append_op_event(uint64_t op_tid,
 }
 
 template <typename I>
-void Journal<I>::commit_op_event(uint64_t op_tid, int r) {
+void Journal<I>::commit_op_event(uint64_t op_tid, int r, Context *on_safe) {
   CephContext *cct = m_image_ctx.cct;
   ldout(cct, 10) << this << " " << __func__ << ": op_tid=" << op_tid << ", "
                  << "r=" << r << dendl;
@@ -1005,7 +1044,7 @@ void Journal<I>::commit_op_event(uint64_t op_tid, int r) {
 
   op_finish_future.flush(create_async_context_callback(
     m_image_ctx, new C_OpEventSafe(this, op_tid, op_start_future,
-                                   op_finish_future)));
+                                   op_finish_future, on_safe)));
 }
 
 template <typename I>
@@ -1059,7 +1098,8 @@ typename Journal<I>::Future Journal<I>::wait_event(Mutex &lock, uint64_t tid,
   Event &event = it->second;
   if (event.safe) {
     // journal entry already safe
-    ldout(cct, 20) << "journal entry already safe" << dendl;
+    ldout(cct, 20) << this << " " << __func__ << ": "
+                   << "journal entry already safe" << dendl;
     m_image_ctx.op_work_queue->queue(on_safe, event.ret_val);
     return Future();
   }
@@ -1071,23 +1111,26 @@ typename Journal<I>::Future Journal<I>::wait_event(Mutex &lock, uint64_t tid,
 
 template <typename I>
 void Journal<I>::start_external_replay(journal::Replay<I> **journal_replay,
-                                       Context *on_finish) {
+                                       Context *on_start,
+                                       Context *on_close_request) {
   CephContext *cct = m_image_ctx.cct;
   ldout(cct, 20) << this << " " << __func__ << dendl;
 
   Mutex::Locker locker(m_lock);
   assert(m_state == STATE_READY);
   assert(m_journal_replay == nullptr);
+  assert(m_on_replay_close_request == nullptr);
+  m_on_replay_close_request = on_close_request;
 
-  on_finish = util::create_async_context_callback(m_image_ctx, on_finish);
-  on_finish = new FunctionContext(
-    [this, journal_replay, on_finish](int r) {
-      handle_start_external_replay(r, journal_replay, on_finish);
+  on_start = util::create_async_context_callback(m_image_ctx, on_start);
+  on_start = new FunctionContext(
+    [this, journal_replay, on_start](int r) {
+      handle_start_external_replay(r, journal_replay, on_start);
     });
 
   // safely flush all in-flight events before starting external replay
   m_journaler->stop_append(util::create_async_context_callback(m_image_ctx,
-                                                               on_finish));
+                                                               on_start));
 }
 
 template <typename I>
@@ -1102,9 +1145,15 @@ void Journal<I>::handle_start_external_replay(int r,
   assert(m_journal_replay == nullptr);
 
   if (r < 0) {
-    lderr(cct) << "failed to stop recording: " << cpp_strerror(r) << dendl;
+    lderr(cct) << this << " " << __func__ << ": "
+               << "failed to stop recording: " << cpp_strerror(r) << dendl;
     *journal_replay = nullptr;
 
+    if (m_on_replay_close_request != nullptr) {
+      m_on_replay_close_request->complete(r);
+      m_on_replay_close_request = nullptr;
+    }
+
     // get back to a sane-state
     start_append();
     on_finish->complete(r);
@@ -1123,9 +1172,19 @@ void Journal<I>::stop_external_replay() {
   assert(m_journal_replay != nullptr);
   assert(m_state == STATE_REPLAYING);
 
+  if (m_on_replay_close_request != nullptr) {
+    m_on_replay_close_request->complete(-ECANCELED);
+    m_on_replay_close_request = nullptr;
+  }
+
   delete m_journal_replay;
   m_journal_replay = nullptr;
 
+  if (m_close_pending) {
+    destroy_journaler(0);
+    return;
+  }
+
   start_append();
 }
 
@@ -1139,9 +1198,13 @@ void Journal<I>::create_journaler() {
   assert(m_journaler == NULL);
 
   transition_state(STATE_INITIALIZING, 0);
+  ::journal::Settings settings;
+  settings.commit_interval = m_image_ctx.journal_commit_age;
+  settings.max_payload_bytes = m_image_ctx.journal_max_payload_bytes;
+
   m_journaler = new Journaler(m_work_queue, m_timer, m_timer_lock,
 			      m_image_ctx.md_ctx, m_image_ctx.id,
-			      IMAGE_CLIENT_ID, m_image_ctx.journal_commit_age);
+			      IMAGE_CLIENT_ID, settings);
   m_journaler->init(create_async_context_callback(
     m_image_ctx, create_context_callback<
       Journal<I>, &Journal<I>::handle_initialized>(this)));
@@ -1157,6 +1220,8 @@ void Journal<I>::destroy_journaler(int r) {
   delete m_journal_replay;
   m_journal_replay = NULL;
 
+  m_journaler->remove_listener(&m_metadata_listener);
+
   transition_state(STATE_CLOSING, r);
   m_journaler->shut_down(create_async_context_callback(
     m_image_ctx, create_context_callback<
@@ -1175,6 +1240,8 @@ void Journal<I>::recreate_journaler(int r) {
   delete m_journal_replay;
   m_journal_replay = NULL;
 
+  m_journaler->remove_listener(&m_metadata_listener);
+
   transition_state(STATE_RESTARTING_REPLAY, r);
   m_journaler->shut_down(create_async_context_callback(
     m_image_ctx, create_context_callback<
@@ -1195,7 +1262,8 @@ void Journal<I>::complete_event(typename Events::iterator it, int r) {
     // event recorded to journal but failed to update disk, we cannot
     // commit this IO event. this event must be replayed.
     assert(event.safe);
-    lderr(cct) << "failed to commit IO to disk, replay required: "
+    lderr(cct) << this << " " << __func__ << ": "
+               << "failed to commit IO to disk, replay required: "
                << cpp_strerror(r) << dendl;
   }
 
@@ -1243,7 +1311,8 @@ void Journal<I>::handle_initialized(int r) {
   r = m_journaler->get_cached_client(Journal<ImageCtx>::IMAGE_CLIENT_ID,
                                      &client);
   if (r < 0) {
-    lderr(cct) << "failed to locate master image client" << dendl;
+    lderr(cct) << this << " " << __func__ << ": "
+               << "failed to locate master image client" << dendl;
     destroy_journaler(r);
     return;
   }
@@ -1253,7 +1322,8 @@ void Journal<I>::handle_initialized(int r) {
   try {
     ::decode(client_data, bl);
   } catch (const buffer::error &err) {
-    lderr(cct) << "failed to decode client meta data: " << err.what()
+    lderr(cct) << this << " " << __func__ << ": "
+               << "failed to decode client meta data: " << err.what()
                << dendl;
     destroy_journaler(-EINVAL);
     return;
@@ -1262,13 +1332,15 @@ void Journal<I>::handle_initialized(int r) {
   journal::ImageClientMeta *image_client_meta =
     boost::get<journal::ImageClientMeta>(&client_data.client_meta);
   if (image_client_meta == nullptr) {
-    lderr(cct) << "failed to extract client meta data" << dendl;
+    lderr(cct) << this << " " << __func__ << ": "
+               << "failed to extract client meta data" << dendl;
     destroy_journaler(-EINVAL);
     return;
   }
 
   m_tag_class = image_client_meta->tag_class;
-  ldout(cct, 20) << "client: " << client << ", "
+  ldout(cct, 20) << this << " " << __func__ << ": "
+                 << "client: " << client << ", "
                  << "image meta: " << *image_client_meta << dendl;
 
   C_DecodeTags *tags_ctx = new C_DecodeTags(
@@ -1276,6 +1348,8 @@ void Journal<I>::handle_initialized(int r) {
       m_image_ctx, create_context_callback<
         Journal<I>, &Journal<I>::handle_get_tags>(this)));
   m_journaler->get_tags(m_tag_class, &tags_ctx->tags, tags_ctx);
+
+  m_journaler->add_listener(&m_metadata_listener);
 }
 
 template <typename I>
@@ -1298,6 +1372,7 @@ void Journal<I>::handle_get_tags(int r) {
 
 template <typename I>
 void Journal<I>::handle_replay_ready() {
+  CephContext *cct = m_image_ctx.cct;
   ReplayEntry replay_entry;
   {
     Mutex::Locker locker(m_lock);
@@ -1305,7 +1380,6 @@ void Journal<I>::handle_replay_ready() {
       return;
     }
 
-    CephContext *cct = m_image_ctx.cct;
     ldout(cct, 20) << this << " " << __func__ << dendl;
     if (!m_journaler->try_pop_front(&replay_entry)) {
       return;
@@ -1318,11 +1392,20 @@ void Journal<I>::handle_replay_ready() {
 
   bufferlist data = replay_entry.get_data();
   bufferlist::iterator it = data.begin();
+
+  journal::EventEntry event_entry;
+  int r = m_journal_replay->decode(&it, &event_entry);
+  if (r < 0) {
+    lderr(cct) << this << " " << __func__ << ": "
+               << "failed to decode journal event entry" << dendl;
+    handle_replay_process_safe(replay_entry, r);
+    return;
+  }
+
   Context *on_ready = create_context_callback<
     Journal<I>, &Journal<I>::handle_replay_process_ready>(this);
   Context *on_commit = new C_ReplayProcessSafe(this, std::move(replay_entry));
-
-  m_journal_replay->process(&it, on_ready, on_commit);
+  m_journal_replay->process(event_entry, on_ready, on_commit);
 }
 
 template <typename I>
@@ -1398,7 +1481,8 @@ void Journal<I>::handle_replay_process_safe(ReplayEntry replay_entry, int r) {
 
   ldout(cct, 20) << this << " " << __func__ << ": r=" << r << dendl;
   if (r < 0) {
-    lderr(cct) << "failed to commit journal event to disk: " << cpp_strerror(r)
+    lderr(cct) << this << " " << __func__ << ": "
+               << "failed to commit journal event to disk: " << cpp_strerror(r)
                << dendl;
 
     if (m_state == STATE_REPLAYING) {
@@ -1518,7 +1602,8 @@ void Journal<I>::handle_io_event_safe(int r, uint64_t tid) {
   // journal will be flushed before closing
   assert(m_state == STATE_READY || m_state == STATE_STOPPING);
   if (r < 0) {
-    lderr(cct) << "failed to commit IO event: "  << cpp_strerror(r) << dendl;
+    lderr(cct) << this << " " << __func__ << ": "
+               << "failed to commit IO event: "  << cpp_strerror(r) << dendl;
   }
 
   AioObjectRequests aio_object_requests;
@@ -1548,7 +1633,8 @@ void Journal<I>::handle_io_event_safe(int r, uint64_t tid) {
     }
   }
 
-  ldout(cct, 20) << "completing tid=" << tid << dendl;
+  ldout(cct, 20) << this << " " << __func__ << ": "
+                 << "completing tid=" << tid << dendl;
   for (AioObjectRequests::iterator it = aio_object_requests.begin();
        it != aio_object_requests.end(); ++it) {
     if (r < 0) {
@@ -1571,7 +1657,8 @@ void Journal<I>::handle_io_event_safe(int r, uint64_t tid) {
 template <typename I>
 void Journal<I>::handle_op_event_safe(int r, uint64_t tid,
                                       const Future &op_start_future,
-                                      const Future &op_finish_future) {
+                                      const Future &op_finish_future,
+                                      Context *on_safe) {
   CephContext *cct = m_image_ctx.cct;
   ldout(cct, 20) << this << " " << __func__ << ": r=" << r << ", "
                  << "tid=" << tid << dendl;
@@ -1579,14 +1666,15 @@ void Journal<I>::handle_op_event_safe(int r, uint64_t tid,
   // journal will be flushed before closing
   assert(m_state == STATE_READY || m_state == STATE_STOPPING);
   if (r < 0) {
-    lderr(cct) << "failed to commit op event: "  << cpp_strerror(r) << dendl;
+    lderr(cct) << this << " " << __func__ << ": "
+               << "failed to commit op event: "  << cpp_strerror(r) << dendl;
   }
 
   m_journaler->committed(op_start_future);
   m_journaler->committed(op_finish_future);
 
   // reduce the replay window after committing an op event
-  m_journaler->flush_commit_position(nullptr);
+  m_journaler->flush_commit_position(on_safe);
 }
 
 template <typename I>
@@ -1652,6 +1740,103 @@ void Journal<I>::wait_for_steady_state(Context *on_state) {
   m_wait_for_state_contexts.push_back(on_state);
 }
 
+template <typename I>
+int Journal<I>::check_resync_requested(bool *do_resync) {
+  Mutex::Locker l(m_lock);
+  return check_resync_requested_internal(do_resync);
+}
+
+template <typename I>
+int Journal<I>::check_resync_requested_internal(bool *do_resync) {
+  CephContext *cct = m_image_ctx.cct;
+  ldout(cct, 20) << this << " " << __func__ << dendl;
+
+  assert(m_lock.is_locked());
+  assert(do_resync != nullptr);
+
+  cls::journal::Client client;
+  int r = m_journaler->get_cached_client(IMAGE_CLIENT_ID, &client);
+  if (r < 0) {
+     lderr(cct) << this << " " << __func__ << ": "
+                << "failed to retrieve client: " << cpp_strerror(r) << dendl;
+     return r;
+  }
+
+  librbd::journal::ClientData client_data;
+  bufferlist::iterator bl_it = client.data.begin();
+  try {
+    ::decode(client_data, bl_it);
+  } catch (const buffer::error &err) {
+    lderr(cct) << this << " " << __func__ << ": "
+               << "failed to decode client data: " << err << dendl;
+    return -EINVAL;
+  }
+
+  journal::ImageClientMeta *image_client_meta =
+    boost::get<journal::ImageClientMeta>(&client_data.client_meta);
+  if (image_client_meta == nullptr) {
+    lderr(cct) << this << " " << __func__ << ": "
+               << "failed to access image client meta struct" << dendl;
+    return -EINVAL;
+  }
+
+  *do_resync = image_client_meta->resync_requested;
+
+  return 0;
+}
+
+template <typename I>
+void Journal<I>::handle_metadata_updated() {
+  CephContext *cct = m_image_ctx.cct;
+  ldout(cct, 20) << this << " " << __func__ << dendl;
+
+  std::list<journal::ResyncListener *> resync_private_list;
+
+  {
+    Mutex::Locker l(m_lock);
+
+    if (m_state == STATE_CLOSING || m_state == STATE_CLOSED ||
+        m_state == STATE_UNINITIALIZED || m_state == STATE_STOPPING) {
+      return;
+    }
+
+    bool do_resync = false;
+    int r = check_resync_requested_internal(&do_resync);
+    if (r < 0) {
+      lderr(cct) << this << " " << __func__ << ": "
+                 << "failed to check if a resync was requested" << dendl;
+      return;
+    }
+
+    if (do_resync) {
+      for (const auto& listener :
+                              m_listener_map[journal::ListenerType::RESYNC]) {
+        journal::ResyncListener *rsync_listener =
+                        boost::get<journal::ResyncListener *>(listener);
+        resync_private_list.push_back(rsync_listener);
+      }
+    }
+  }
+
+  for (const auto& listener : resync_private_list) {
+    listener->handle_resync();
+  }
+}
+
+template <typename I>
+void Journal<I>::add_listener(journal::ListenerType type,
+                              journal::JournalListenerPtr listener) {
+  Mutex::Locker l(m_lock);
+  m_listener_map[type].push_back(listener);
+}
+
+template <typename I>
+void Journal<I>::remove_listener(journal::ListenerType type,
+                                 journal::JournalListenerPtr listener) {
+  Mutex::Locker l(m_lock);
+  m_listener_map[type].remove(listener);
+}
+
 } // namespace librbd
 
 template class librbd::Journal<librbd::ImageCtx>;
diff --git a/src/librbd/Journal.h b/src/librbd/Journal.h
index d77d50e..ec3b328 100644
--- a/src/librbd/Journal.h
+++ b/src/librbd/Journal.h
@@ -8,29 +8,30 @@
 #include "include/atomic.h"
 #include "include/Context.h"
 #include "include/interval_set.h"
-#include "include/rados/librados.hpp"
 #include "common/Mutex.h"
 #include "journal/Future.h"
+#include "journal/JournalMetadataListener.h"
 #include "journal/ReplayEntry.h"
 #include "journal/ReplayHandler.h"
 #include "librbd/journal/Types.h"
 #include "librbd/journal/TypeTraits.h"
 #include <algorithm>
-#include <iosfwd>
 #include <list>
 #include <string>
 #include <unordered_map>
 
-class Context;
 class ContextWQ;
 class SafeTimer;
 namespace journal {
 class Journaler;
 }
+namespace librados {
+  class IoCtx;
+}
 
 namespace librbd {
 
-class AioObjectRequest;
+struct AioObjectRequestHandle;
 class ImageCtx;
 
 namespace journal { template <typename> class Replay; }
@@ -86,7 +87,7 @@ public:
   static const std::string LOCAL_MIRROR_UUID;
   static const std::string ORPHAN_MIRROR_UUID;
 
-  typedef std::list<AioObjectRequest *> AioObjectRequests;
+  typedef std::list<AioObjectRequestHandle *> AioObjectRequests;
 
   Journal(ImageCtxT &image_ctx);
   ~Journal();
@@ -110,6 +111,7 @@ public:
 
   bool is_journal_ready() const;
   bool is_journal_replaying() const;
+  bool is_journal_appending() const;
 
   void wait_for_journal_ready(Context *on_ready);
 
@@ -142,7 +144,7 @@ public:
 
   void append_op_event(uint64_t op_tid, journal::EventEntry &&event_entry,
                        Context *on_safe);
-  void commit_op_event(uint64_t tid, int r);
+  void commit_op_event(uint64_t tid, int r, Context *on_safe);
   void replay_op_ready(uint64_t op_tid, Context *on_resume);
 
   void flush_event(uint64_t tid, Context *on_safe);
@@ -155,9 +157,16 @@ public:
   }
 
   void start_external_replay(journal::Replay<ImageCtxT> **journal_replay,
-                             Context *on_finish);
+                             Context *on_start, Context *on_close_request);
   void stop_external_replay();
 
+  void add_listener(journal::ListenerType type,
+                    journal::JournalListenerPtr listener);
+  void remove_listener(journal::ListenerType type,
+                       journal::JournalListenerPtr listener);
+
+  int check_resync_requested(bool *do_resync);
+
 private:
   ImageCtxT &m_image_ctx;
 
@@ -213,15 +222,17 @@ private:
     uint64_t tid;
     Future op_start_future;
     Future op_finish_future;
+    Context *on_safe;
 
     C_OpEventSafe(Journal *journal, uint64_t tid, const Future &op_start_future,
-                  const Future &op_finish_future)
+                  const Future &op_finish_future, Context *on_safe)
       : journal(journal), tid(tid), op_start_future(op_start_future),
-        op_finish_future(op_finish_future) {
+        op_finish_future(op_finish_future), on_safe(on_safe) {
     }
 
     virtual void finish(int r) {
-      journal->handle_op_event_safe(r, tid, op_start_future, op_finish_future);
+      journal->handle_op_event_safe(r, tid, op_start_future, op_finish_future,
+                                    on_safe);
     }
   };
 
@@ -286,6 +297,24 @@ private:
   bool m_blocking_writes;
 
   journal::Replay<ImageCtxT> *m_journal_replay;
+  Context *m_on_replay_close_request = nullptr;
+
+  struct MetadataListener : public ::journal::JournalMetadataListener {
+    Journal<ImageCtxT> *journal;
+
+    MetadataListener(Journal<ImageCtxT> *journal) : journal(journal) { }
+
+    void handle_update(::journal::JournalMetadata *) {
+      FunctionContext *ctx = new FunctionContext([this](int r) {
+        journal->handle_metadata_updated();
+      });
+      journal->m_work_queue->queue(ctx, 0);
+    }
+  } m_metadata_listener;
+
+  typedef std::map<journal::ListenerType,
+                   std::list<journal::JournalListenerPtr> > ListenerMap;
+  ListenerMap m_listener_map;
 
   uint64_t append_io_events(journal::EventType event_type,
                             const Bufferlists &bufferlists,
@@ -322,7 +351,7 @@ private:
 
   void handle_io_event_safe(int r, uint64_t tid);
   void handle_op_event_safe(int r, uint64_t tid, const Future &op_start_future,
-                            const Future &op_finish_future);
+                            const Future &op_finish_future, Context *on_safe);
 
   void stop_recording();
 
@@ -330,6 +359,10 @@ private:
 
   bool is_steady_state() const;
   void wait_for_steady_state(Context *on_state);
+
+  int check_resync_requested_internal(bool *do_resync);
+
+  void handle_metadata_updated();
 };
 
 } // namespace librbd
diff --git a/src/librbd/LibrbdWriteback.cc b/src/librbd/LibrbdWriteback.cc
index e3ba517..977b0b3 100644
--- a/src/librbd/LibrbdWriteback.cc
+++ b/src/librbd/LibrbdWriteback.cc
@@ -162,7 +162,7 @@ namespace librbd {
 
       request_sent = true;
       AioObjectWrite *req = new AioObjectWrite(image_ctx, oid, object_no, off,
-                                               bl, snapc, this);
+                                               bl, snapc, this, 0);
       req->send();
     }
   };
@@ -274,7 +274,7 @@ namespace librbd {
 					      journal_tid));
     } else {
       AioObjectWrite *req = new AioObjectWrite(m_ictx, oid.name, object_no,
-					       off, bl, snapc, req_comp);
+					       off, bl, snapc, req_comp, 0);
       req->send();
     }
     return ++m_tid;
diff --git a/src/librbd/LibrbdWriteback.h b/src/librbd/LibrbdWriteback.h
index ef5fa75..a7dc05f 100644
--- a/src/librbd/LibrbdWriteback.h
+++ b/src/librbd/LibrbdWriteback.h
@@ -5,13 +5,12 @@
 
 #include <queue>
 
-#include "include/Context.h"
-#include "include/types.h"
-#include "include/rados/librados.hpp"
+#include "common/snap_types.h"
 #include "osd/osd_types.h"
 #include "osdc/WritebackHandler.h"
 
 class Mutex;
+class Context;
 
 namespace librbd {
 
diff --git a/src/librbd/MirroringWatcher.cc b/src/librbd/MirroringWatcher.cc
index c414478..a6f0a20 100644
--- a/src/librbd/MirroringWatcher.cc
+++ b/src/librbd/MirroringWatcher.cc
@@ -3,6 +3,7 @@
 
 #include "librbd/MirroringWatcher.h"
 #include "include/rbd_types.h"
+#include "include/rados/librados.hpp"
 #include "common/errno.h"
 
 #define dout_subsys ceph_subsys_rbd
diff --git a/src/librbd/MirroringWatcher.h b/src/librbd/MirroringWatcher.h
index f2ec61a..2128556 100644
--- a/src/librbd/MirroringWatcher.h
+++ b/src/librbd/MirroringWatcher.h
@@ -5,12 +5,15 @@
 #define CEPH_LIBRBD_MIRRORING_WATCHER_H
 
 #include "include/int_types.h"
-#include "include/rados/librados.hpp"
 #include "cls/rbd/cls_rbd_types.h"
 #include "librbd/ImageCtx.h"
 #include "librbd/ObjectWatcher.h"
 #include "librbd/mirroring_watcher/Types.h"
 
+namespace librados {
+  class IoCtx;
+}
+
 namespace librbd {
 
 template <typename ImageCtxT = librbd::ImageCtx>
diff --git a/src/librbd/ObjectMap.cc b/src/librbd/ObjectMap.cc
index 9f7d1d4..b5d659e 100644
--- a/src/librbd/ObjectMap.cc
+++ b/src/librbd/ObjectMap.cc
@@ -3,9 +3,6 @@
 #include "librbd/ObjectMap.h"
 #include "librbd/ExclusiveLock.h"
 #include "librbd/ImageCtx.h"
-#include "librbd/ImageWatcher.h"
-#include "librbd/internal.h"
-#include "librbd/object_map/InvalidateRequest.h"
 #include "librbd/object_map/RefreshRequest.h"
 #include "librbd/object_map/ResizeRequest.h"
 #include "librbd/object_map/SnapshotCreateRequest.h"
@@ -17,6 +14,9 @@
 #include "common/dout.h"
 #include "common/errno.h"
 #include "common/WorkQueue.h"
+
+#include "include/rados/librados.hpp"
+
 #include "cls/lock/cls_lock_client.h"
 #include "cls/rbd/cls_rbd_types.h"
 #include "include/stringify.h"
@@ -90,6 +90,18 @@ bool ObjectMap::object_may_exist(uint64_t object_no) const
   return exists;
 }
 
+bool ObjectMap::update_required(uint64_t object_no, uint8_t new_state) {
+  assert(m_image_ctx.object_map_lock.is_wlocked());
+  uint8_t state = (*this)[object_no];
+
+  if ((state == new_state) ||
+      (new_state == OBJECT_PENDING && state == OBJECT_NONEXISTENT) ||
+      (new_state == OBJECT_NONEXISTENT && state != OBJECT_PENDING)) {
+    return false;
+  }
+  return true;
+}
+
 void ObjectMap::open(Context *on_finish) {
   object_map::RefreshRequest<> *req = new object_map::RefreshRequest<>(
     m_image_ctx, &m_object_map, m_snap_id, on_finish);
diff --git a/src/librbd/ObjectMap.h b/src/librbd/ObjectMap.h
index f285296..5d99180 100644
--- a/src/librbd/ObjectMap.h
+++ b/src/librbd/ObjectMap.h
@@ -5,13 +5,15 @@
 
 #include "include/int_types.h"
 #include "include/fs_types.h"
-#include "include/rados/librados.hpp"
 #include "include/rbd/object_map_types.h"
 #include "common/bit_vector.hpp"
 #include <boost/optional.hpp>
 
 class Context;
 class RWLock;
+namespace librados {
+  class IoCtx;
+}
 
 namespace librbd {
 
@@ -37,6 +39,7 @@ public:
   void close(Context *on_finish);
 
   bool object_may_exist(uint64_t object_no) const;
+  bool update_required(uint64_t object_no, uint8_t new_state);
 
   void aio_save(Context *on_finish);
   void aio_resize(uint64_t new_size, uint8_t default_object_state,
diff --git a/src/librbd/Operations.cc b/src/librbd/Operations.cc
index 7766a12..0c6d8cd 100644
--- a/src/librbd/Operations.cc
+++ b/src/librbd/Operations.cc
@@ -5,6 +5,7 @@
 #include "common/dout.h"
 #include "common/errno.h"
 #include "common/WorkQueue.h"
+
 #include "librbd/ExclusiveLock.h"
 #include "librbd/ImageCtx.h"
 #include "librbd/ImageState.h"
@@ -176,8 +177,9 @@ struct C_InvokeAsyncRequest : public Context {
       return;
     }
 
+    int r;
     if (image_ctx.exclusive_lock->is_lock_owner() &&
-        image_ctx.exclusive_lock->accept_requests()) {
+        image_ctx.exclusive_lock->accept_requests(&r)) {
       send_local_request();
       owner_lock.put_read();
       return;
@@ -233,6 +235,8 @@ struct C_InvokeAsyncRequest : public Context {
     ldout(cct, 20) << __func__ << ": r=" << r << dendl;
 
     if (r != -ETIMEDOUT && r != -ERESTART) {
+      image_ctx.state->handle_update_notification();
+
       complete(r);
       return;
     }
@@ -307,7 +311,7 @@ int Operations<I>::flatten(ProgressContext &prog_ctx) {
   r = invoke_async_request("flatten", false,
                            boost::bind(&Operations<I>::execute_flatten, this,
                                        boost::ref(prog_ctx), _1),
-                           boost::bind(&ImageWatcher::notify_flatten,
+                           boost::bind(&ImageWatcher<I>::notify_flatten,
                                        m_image_ctx.image_watcher, request_id,
                                        boost::ref(prog_ctx), _1));
 
@@ -387,7 +391,7 @@ int Operations<I>::rebuild_object_map(ProgressContext &prog_ctx) {
   r = invoke_async_request("rebuild object map", true,
                            boost::bind(&Operations<I>::execute_rebuild_object_map,
                                        this, boost::ref(prog_ctx), _1),
-                           boost::bind(&ImageWatcher::notify_rebuild_object_map,
+                           boost::bind(&ImageWatcher<I>::notify_rebuild_object_map,
                                        m_image_ctx.image_watcher, request_id,
                                        boost::ref(prog_ctx), _1));
 
@@ -444,7 +448,7 @@ int Operations<I>::rename(const char *dstname) {
     r = invoke_async_request("rename", true,
                              boost::bind(&Operations<I>::execute_rename, this,
                                          dstname, _1),
-                             boost::bind(&ImageWatcher::notify_rename,
+                             boost::bind(&ImageWatcher<I>::notify_rename,
                                          m_image_ctx.image_watcher, dstname,
                                          _1));
     if (r < 0 && r != -EEXIST) {
@@ -466,22 +470,42 @@ int Operations<I>::rename(const char *dstname) {
 }
 
 template <typename I>
-void Operations<I>::execute_rename(const char *dstname, Context *on_finish) {
+void Operations<I>::execute_rename(const std::string &dest_name,
+                                   Context *on_finish) {
   assert(m_image_ctx.owner_lock.is_locked());
   if (m_image_ctx.test_features(RBD_FEATURE_JOURNALING)) {
     assert(m_image_ctx.exclusive_lock == nullptr ||
            m_image_ctx.exclusive_lock->is_lock_owner());
   }
 
+  m_image_ctx.snap_lock.get_read();
+  if (m_image_ctx.name == dest_name) {
+    m_image_ctx.snap_lock.put_read();
+    on_finish->complete(-EEXIST);
+    return;
+  }
+  m_image_ctx.snap_lock.put_read();
+
   CephContext *cct = m_image_ctx.cct;
-  ldout(cct, 5) << this << " " << __func__ << ": dest_name=" << dstname
+  ldout(cct, 5) << this << " " << __func__ << ": dest_name=" << dest_name
                 << dendl;
 
   if (m_image_ctx.old_format) {
+    // unregister watch before and register back after rename
     on_finish = new C_NotifyUpdate<I>(m_image_ctx, on_finish);
+    on_finish = new FunctionContext([this, on_finish](int r) {
+	m_image_ctx.image_watcher->register_watch(on_finish);
+      });
+    on_finish = new FunctionContext([this, dest_name, on_finish](int r) {
+	operation::RenameRequest<I> *req = new operation::RenameRequest<I>(
+	  m_image_ctx, on_finish, dest_name);
+	req->send();
+      });
+    m_image_ctx.image_watcher->unregister_watch(on_finish);
+    return;
   }
   operation::RenameRequest<I> *req = new operation::RenameRequest<I>(
-    m_image_ctx, on_finish, dstname);
+    m_image_ctx, on_finish, dest_name);
   req->send();
 }
 
@@ -510,7 +534,7 @@ int Operations<I>::resize(uint64_t size, ProgressContext& prog_ctx) {
   r = invoke_async_request("resize", false,
                            boost::bind(&Operations<I>::execute_resize, this,
                                        size, boost::ref(prog_ctx), _1, 0),
-                           boost::bind(&ImageWatcher::notify_resize,
+                           boost::bind(&ImageWatcher<I>::notify_resize,
                                        m_image_ctx.image_watcher, request_id,
                                        size, boost::ref(prog_ctx), _1));
 
@@ -598,14 +622,14 @@ void Operations<I>::snap_create(const char *snap_name, Context *on_finish) {
     m_image_ctx, "snap_create", true,
     boost::bind(&Operations<I>::execute_snap_create, this, snap_name, _1, 0,
                 false),
-    boost::bind(&ImageWatcher::notify_snap_create, m_image_ctx.image_watcher,
+    boost::bind(&ImageWatcher<I>::notify_snap_create, m_image_ctx.image_watcher,
                 snap_name, _1),
     {-EEXIST}, on_finish);
   req->send();
 }
 
 template <typename I>
-void Operations<I>::execute_snap_create(const char *snap_name,
+void Operations<I>::execute_snap_create(const std::string &snap_name,
                                         Context *on_finish,
                                         uint64_t journal_op_tid,
                                         bool skip_object_map) {
@@ -683,7 +707,7 @@ int Operations<I>::snap_rollback(const char *snap_name,
 }
 
 template <typename I>
-void Operations<I>::execute_snap_rollback(const char *snap_name,
+void Operations<I>::execute_snap_rollback(const std::string &snap_name,
                                           ProgressContext& prog_ctx,
                                           Context *on_finish) {
   assert(m_image_ctx.owner_lock.is_locked());
@@ -745,6 +769,7 @@ void Operations<I>::snap_remove(const char *snap_name, Context *on_finish) {
     return;
   }
 
+  // quickly filter out duplicate ops
   m_image_ctx.snap_lock.get_read();
   if (m_image_ctx.get_snap_id(snap_name) == CEPH_NOSNAP) {
     m_image_ctx.snap_lock.put_read();
@@ -760,7 +785,7 @@ void Operations<I>::snap_remove(const char *snap_name, Context *on_finish) {
     C_InvokeAsyncRequest<I> *req = new C_InvokeAsyncRequest<I>(
       m_image_ctx, "snap_remove", true,
       boost::bind(&Operations<I>::execute_snap_remove, this, snap_name, _1),
-      boost::bind(&ImageWatcher::notify_snap_remove, m_image_ctx.image_watcher,
+      boost::bind(&ImageWatcher<I>::notify_snap_remove, m_image_ctx.image_watcher,
                   snap_name, _1),
       {-ENOENT}, on_finish);
     req->send();
@@ -771,7 +796,7 @@ void Operations<I>::snap_remove(const char *snap_name, Context *on_finish) {
 }
 
 template <typename I>
-void Operations<I>::execute_snap_remove(const char *snap_name,
+void Operations<I>::execute_snap_remove(const std::string &snap_name,
                                         Context *on_finish) {
   assert(m_image_ctx.owner_lock.is_locked());
   {
@@ -846,7 +871,7 @@ int Operations<I>::snap_rename(const char *srcname, const char *dstname) {
     r = invoke_async_request("snap_rename", true,
                              boost::bind(&Operations<I>::execute_snap_rename,
                                          this, snap_id, dstname, _1),
-                             boost::bind(&ImageWatcher::notify_snap_rename,
+                             boost::bind(&ImageWatcher<I>::notify_snap_rename,
                                          m_image_ctx.image_watcher, snap_id,
                                          dstname, _1));
     if (r < 0 && r != -EEXIST) {
@@ -869,7 +894,7 @@ int Operations<I>::snap_rename(const char *srcname, const char *dstname) {
 
 template <typename I>
 void Operations<I>::execute_snap_rename(const uint64_t src_snap_id,
-                                        const char *dst_name,
+                                        const std::string &dest_snap_name,
                                         Context *on_finish) {
   assert(m_image_ctx.owner_lock.is_locked());
   if ((m_image_ctx.features & RBD_FEATURE_JOURNALING) != 0) {
@@ -877,15 +902,23 @@ void Operations<I>::execute_snap_rename(const uint64_t src_snap_id,
            m_image_ctx.exclusive_lock->is_lock_owner());
   }
 
+  m_image_ctx.snap_lock.get_read();
+  if (m_image_ctx.get_snap_id(dest_snap_name) != CEPH_NOSNAP) {
+    m_image_ctx.snap_lock.put_read();
+    on_finish->complete(-EEXIST);
+    return;
+  }
+  m_image_ctx.snap_lock.put_read();
+
   CephContext *cct = m_image_ctx.cct;
   ldout(cct, 5) << this << " " << __func__ << ": "
                 << "snap_id=" << src_snap_id << ", "
-                << "new_snap_name=" << dst_name << dendl;
+                << "new_snap_name=" << dest_snap_name << dendl;
 
   operation::SnapshotRenameRequest<I> *req =
     new operation::SnapshotRenameRequest<I>(
       m_image_ctx, new C_NotifyUpdate<I>(m_image_ctx, on_finish), src_snap_id,
-      dst_name);
+      dest_snap_name);
   req->send();
 }
 
@@ -922,7 +955,7 @@ int Operations<I>::snap_protect(const char *snap_name) {
     r = invoke_async_request("snap_protect", true,
                              boost::bind(&Operations<I>::execute_snap_protect,
                                          this, snap_name, _1),
-                             boost::bind(&ImageWatcher::notify_snap_protect,
+                             boost::bind(&ImageWatcher<I>::notify_snap_protect,
                                          m_image_ctx.image_watcher, snap_name,
                                          _1));
     if (r < 0 && r != -EBUSY) {
@@ -942,7 +975,7 @@ int Operations<I>::snap_protect(const char *snap_name) {
 }
 
 template <typename I>
-void Operations<I>::execute_snap_protect(const char *snap_name,
+void Operations<I>::execute_snap_protect(const std::string &snap_name,
                                          Context *on_finish) {
   assert(m_image_ctx.owner_lock.is_locked());
   if (m_image_ctx.test_features(RBD_FEATURE_JOURNALING)) {
@@ -950,6 +983,21 @@ void Operations<I>::execute_snap_protect(const char *snap_name,
            m_image_ctx.exclusive_lock->is_lock_owner());
   }
 
+  m_image_ctx.snap_lock.get_read();
+  bool is_protected;
+  int r = m_image_ctx.is_snap_protected(m_image_ctx.get_snap_id(snap_name),
+                                        &is_protected);
+  if (r < 0) {
+    m_image_ctx.snap_lock.put_read();
+    on_finish->complete(r);
+    return;
+  } else if (is_protected) {
+    m_image_ctx.snap_lock.put_read();
+    on_finish->complete(-EBUSY);
+    return;
+  }
+  m_image_ctx.snap_lock.put_read();
+
   CephContext *cct = m_image_ctx.cct;
   ldout(cct, 5) << this << " " << __func__ << ": snap_name=" << snap_name
                 << dendl;
@@ -993,7 +1041,7 @@ int Operations<I>::snap_unprotect(const char *snap_name) {
     r = invoke_async_request("snap_unprotect", true,
                              boost::bind(&Operations<I>::execute_snap_unprotect,
                                          this, snap_name, _1),
-                             boost::bind(&ImageWatcher::notify_snap_unprotect,
+                             boost::bind(&ImageWatcher<I>::notify_snap_unprotect,
                                          m_image_ctx.image_watcher, snap_name,
                                          _1));
     if (r < 0 && r != -EINVAL) {
@@ -1013,7 +1061,7 @@ int Operations<I>::snap_unprotect(const char *snap_name) {
 }
 
 template <typename I>
-void Operations<I>::execute_snap_unprotect(const char *snap_name,
+void Operations<I>::execute_snap_unprotect(const std::string &snap_name,
                                            Context *on_finish) {
   assert(m_image_ctx.owner_lock.is_locked());
   if (m_image_ctx.test_features(RBD_FEATURE_JOURNALING)) {
@@ -1021,6 +1069,21 @@ void Operations<I>::execute_snap_unprotect(const char *snap_name,
            m_image_ctx.exclusive_lock->is_lock_owner());
   }
 
+  m_image_ctx.snap_lock.get_read();
+  bool is_unprotected;
+  int r = m_image_ctx.is_snap_unprotected(m_image_ctx.get_snap_id(snap_name),
+                                          &is_unprotected);
+  if (r < 0) {
+    m_image_ctx.snap_lock.put_read();
+    on_finish->complete(r);
+    return;
+  } else if (is_unprotected) {
+    m_image_ctx.snap_lock.put_read();
+    on_finish->complete(-EINVAL);
+    return;
+  }
+  m_image_ctx.snap_lock.put_read();
+
   CephContext *cct = m_image_ctx.cct;
   ldout(cct, 5) << this << " " << __func__ << ": snap_name=" << snap_name
                 << dendl;
@@ -1048,7 +1111,7 @@ int Operations<I>::prepare_image_update() {
     RWLock::WLocker owner_locker(m_image_ctx.owner_lock);
     if (m_image_ctx.exclusive_lock != nullptr &&
         (!m_image_ctx.exclusive_lock->is_lock_owner() ||
-         !m_image_ctx.exclusive_lock->accept_requests())) {
+         !m_image_ctx.exclusive_lock->accept_requests(&r))) {
       m_image_ctx.exclusive_lock->try_lock(&ctx);
       trying_lock = true;
     }
diff --git a/src/librbd/Operations.h b/src/librbd/Operations.h
index 95af4dc..411b4ef 100644
--- a/src/librbd/Operations.h
+++ b/src/librbd/Operations.h
@@ -29,7 +29,7 @@ public:
                                   Context *on_finish);
 
   int rename(const char *dstname);
-  void execute_rename(const char *dstname, Context *on_finish);
+  void execute_rename(const std::string &dest_name, Context *on_finish);
 
   int resize(uint64_t size, ProgressContext& prog_ctx);
   void execute_resize(uint64_t size, ProgressContext &prog_ctx,
@@ -37,26 +37,27 @@ public:
 
   int snap_create(const char *snap_name);
   void snap_create(const char *snap_name, Context *on_finish);
-  void execute_snap_create(const char *snap_name, Context *on_finish,
+  void execute_snap_create(const std::string &snap_name, Context *on_finish,
                            uint64_t journal_op_tid, bool skip_object_map);
 
   int snap_rollback(const char *snap_name, ProgressContext& prog_ctx);
-  void execute_snap_rollback(const char *snap_name, ProgressContext& prog_ctx,
-                             Context *on_finish);
+  void execute_snap_rollback(const std::string &snap_name,
+                             ProgressContext& prog_ctx, Context *on_finish);
 
   int snap_remove(const char *snap_name);
   void snap_remove(const char *snap_name, Context *on_finish);
-  void execute_snap_remove(const char *snap_name, Context *on_finish);
+  void execute_snap_remove(const std::string &snap_name, Context *on_finish);
 
   int snap_rename(const char *srcname, const char *dstname);
-  void execute_snap_rename(const uint64_t src_snap_id, const char *dst_name,
+  void execute_snap_rename(const uint64_t src_snap_id,
+                           const std::string &dest_snap_name,
                            Context *on_finish);
 
   int snap_protect(const char *snap_name);
-  void execute_snap_protect(const char *snap_name, Context *on_finish);
+  void execute_snap_protect(const std::string &snap_name, Context *on_finish);
 
   int snap_unprotect(const char *snap_name);
-  void execute_snap_unprotect(const char *snap_name, Context *on_finish);
+  void execute_snap_unprotect(const std::string &snap_name, Context *on_finish);
 
   int prepare_image_update();
 
diff --git a/src/librbd/SnapInfo.h b/src/librbd/SnapInfo.h
index 4a225a4..1babee9 100644
--- a/src/librbd/SnapInfo.h
+++ b/src/librbd/SnapInfo.h
@@ -5,9 +5,6 @@
 
 #include "include/int_types.h"
 
-#include "include/rados/librados.hpp"
-
-#include "cls/rbd/cls_rbd_client.h"
 #include "librbd/parent_types.h"
 
 namespace librbd {
diff --git a/src/librbd/TaskFinisher.h b/src/librbd/TaskFinisher.h
index 466537e..e0aebd9 100644
--- a/src/librbd/TaskFinisher.h
+++ b/src/librbd/TaskFinisher.h
@@ -3,7 +3,6 @@
 #ifndef LIBRBD_TASK_FINISHER_H
 #define LIBRBD_TASK_FINISHER_H
 
-#include "include/int_types.h"
 #include "include/Context.h"
 #include "common/Finisher.h"
 #include "common/Mutex.h"
@@ -12,7 +11,6 @@
 #include <utility>
 
 class CephContext;
-class Context;
 
 namespace librbd {
 
@@ -63,13 +61,17 @@ public:
     }
   }
 
-  void cancel_all() {
-    Mutex::Locker l(*m_lock);
-    for (typename TaskContexts::iterator it = m_task_contexts.begin();
-         it != m_task_contexts.end(); ++it) {
-      delete it->second.first;
+  void cancel_all(Context *comp) {
+    {
+      Mutex::Locker l(*m_lock);
+      for (typename TaskContexts::iterator it = m_task_contexts.begin();
+           it != m_task_contexts.end(); ++it) {
+        delete it->second.first;
+        m_safe_timer->cancel_event(it->second.second);
+      }
+      m_task_contexts.clear();
     }
-    m_task_contexts.clear();
+    m_finisher->queue(comp);
   }
 
   bool add_event_after(const Task& task, double seconds, Context *ctx) {
diff --git a/src/librbd/Utils.h b/src/librbd/Utils.h
index fd881f6..5904081 100644
--- a/src/librbd/Utils.h
+++ b/src/librbd/Utils.h
@@ -8,8 +8,6 @@
 #include "include/Context.h"
 #include <type_traits>
 
-class Context;
-
 namespace librbd {
 
 class ImageCtx;
@@ -154,6 +152,11 @@ Context *create_async_context_callback(I &image_ctx, Context *on_finish) {
       image_ctx.op_work_queue, on_finish);
 }
 
+// TODO: temporary until AioCompletion supports templated ImageCtx
+inline ImageCtx *get_image_ctx(ImageCtx *image_ctx) {
+  return image_ctx;
+}
+
 } // namespace util
 } // namespace librbd
 
diff --git a/src/librbd/exclusive_lock/AcquireRequest.h b/src/librbd/exclusive_lock/AcquireRequest.h
index 4990abb..7b31d92 100644
--- a/src/librbd/exclusive_lock/AcquireRequest.h
+++ b/src/librbd/exclusive_lock/AcquireRequest.h
@@ -6,10 +6,8 @@
 
 #include "include/int_types.h"
 #include "include/buffer.h"
-#include "include/rados/librados.hpp"
 #include "librbd/ImageCtx.h"
 #include "msg/msg_types.h"
-#include <map>
 #include <string>
 
 class Context;
diff --git a/src/librbd/exclusive_lock/ReleaseRequest.cc b/src/librbd/exclusive_lock/ReleaseRequest.cc
index 0583c26..bed9517 100644
--- a/src/librbd/exclusive_lock/ReleaseRequest.cc
+++ b/src/librbd/exclusive_lock/ReleaseRequest.cc
@@ -6,11 +6,8 @@
 #include "cls/lock/cls_lock_types.h"
 #include "common/dout.h"
 #include "common/errno.h"
-#include "common/WorkQueue.h"
-#include "include/stringify.h"
 #include "librbd/AioImageRequestWQ.h"
 #include "librbd/ExclusiveLock.h"
-#include "librbd/ImageCtx.h"
 #include "librbd/ImageWatcher.h"
 #include "librbd/Journal.h"
 #include "librbd/ObjectMap.h"
@@ -103,12 +100,6 @@ Context *ReleaseRequest<I>::handle_block_writes(int *ret_val) {
     return m_on_finish;
   }
 
-  if (m_on_releasing != nullptr) {
-    // alert caller that we no longer own the exclusive lock
-    m_on_releasing->complete(0);
-    m_on_releasing = nullptr;
-  }
-
   send_flush_notifies();
   return nullptr;
 }
@@ -211,6 +202,12 @@ void ReleaseRequest<I>::send_unlock() {
   CephContext *cct = m_image_ctx.cct;
   ldout(cct, 10) << __func__ << dendl;
 
+  if (m_on_releasing != nullptr) {
+    // alert caller that we no longer own the exclusive lock
+    m_on_releasing->complete(0);
+    m_on_releasing = nullptr;
+  }
+
   librados::ObjectWriteOperation op;
   rados::cls::lock::unlock(&op, RBD_LOCK_NAME, m_cookie);
 
diff --git a/src/librbd/exclusive_lock/ReleaseRequest.h b/src/librbd/exclusive_lock/ReleaseRequest.h
index 8712bc9..a68530b 100644
--- a/src/librbd/exclusive_lock/ReleaseRequest.h
+++ b/src/librbd/exclusive_lock/ReleaseRequest.h
@@ -4,7 +4,6 @@
 #ifndef CEPH_LIBRBD_EXCLUSIVE_LOCK_RELEASE_REQUEST_H
 #define CEPH_LIBRBD_EXCLUSIVE_LOCK_RELEASE_REQUEST_H
 
-#include "include/int_types.h"
 #include "librbd/ImageCtx.h"
 #include <string>
 
diff --git a/src/librbd/image/CloseRequest.cc b/src/librbd/image/CloseRequest.cc
index b953860..4ee52a5 100644
--- a/src/librbd/image/CloseRequest.cc
+++ b/src/librbd/image/CloseRequest.cc
@@ -4,10 +4,8 @@
 #include "librbd/image/CloseRequest.h"
 #include "common/dout.h"
 #include "common/errno.h"
-#include "common/WorkQueue.h"
 #include "librbd/AioImageRequestWQ.h"
 #include "librbd/ExclusiveLock.h"
-#include "librbd/ImageWatcher.h"
 #include "librbd/ImageCtx.h"
 #include "librbd/ImageState.h"
 #include "librbd/ImageWatcher.h"
@@ -33,6 +31,30 @@ CloseRequest<I>::CloseRequest(I *image_ctx, Context *on_finish)
 
 template <typename I>
 void CloseRequest<I>::send() {
+  send_shut_down_update_watchers();
+}
+
+template <typename I>
+void CloseRequest<I>::send_shut_down_update_watchers() {
+  CephContext *cct = m_image_ctx->cct;
+  ldout(cct, 10) << this << " " << __func__ << dendl;
+
+  m_image_ctx->state->shut_down_update_watchers(create_async_context_callback(
+    *m_image_ctx, create_context_callback<
+      CloseRequest<I>, &CloseRequest<I>::handle_shut_down_update_watchers>(this)));
+}
+
+template <typename I>
+void CloseRequest<I>::handle_shut_down_update_watchers(int r) {
+  CephContext *cct = m_image_ctx->cct;
+  ldout(cct, 10) << this << " " << __func__ << ": r=" << r << dendl;
+
+  save_result(r);
+  if (r < 0) {
+    lderr(cct) << "failed to shut down update watchers: " << cpp_strerror(r)
+               << dendl;
+  }
+
   send_unregister_image_watcher();
 }
 
diff --git a/src/librbd/image/CloseRequest.h b/src/librbd/image/CloseRequest.h
index 832cd2e..e7d2184 100644
--- a/src/librbd/image/CloseRequest.h
+++ b/src/librbd/image/CloseRequest.h
@@ -4,7 +4,6 @@
 #ifndef CEPH_LIBRBD_IMAGE_CLOSE_REQUEST_H
 #define CEPH_LIBRBD_IMAGE_CLOSE_REQUEST_H
 
-#include "include/int_types.h"
 #include "librbd/ImageCtx.h"
 
 class Context;
@@ -31,6 +30,9 @@ private:
    * <start>
    *    |
    *    v
+   * SHUT_DOWN_UPDATE_WATCHERS
+   *    |
+   *    v
    * UNREGISTER_IMAGE_WATCHER
    *    |
    *    v
@@ -72,6 +74,9 @@ private:
 
   decltype(m_image_ctx->exclusive_lock) m_exclusive_lock;
 
+  void send_shut_down_update_watchers();
+  void handle_shut_down_update_watchers(int r);
+
   void send_unregister_image_watcher();
   void handle_unregister_image_watcher(int r);
 
diff --git a/src/librbd/image/OpenRequest.cc b/src/librbd/image/OpenRequest.cc
index 7714722..36d740d 100644
--- a/src/librbd/image/OpenRequest.cc
+++ b/src/librbd/image/OpenRequest.cc
@@ -4,7 +4,6 @@
 #include "librbd/image/OpenRequest.h"
 #include "common/dout.h"
 #include "common/errno.h"
-#include "common/WorkQueue.h"
 #include "cls/rbd/cls_rbd_client.h"
 #include "librbd/ImageCtx.h"
 #include "librbd/Utils.h"
diff --git a/src/librbd/image/OpenRequest.h b/src/librbd/image/OpenRequest.h
index cf506eb..627285b 100644
--- a/src/librbd/image/OpenRequest.h
+++ b/src/librbd/image/OpenRequest.h
@@ -4,7 +4,6 @@
 #ifndef CEPH_LIBRBD_IMAGE_OPEN_REQUEST_H
 #define CEPH_LIBRBD_IMAGE_OPEN_REQUEST_H
 
-#include "include/int_types.h"
 #include "include/buffer.h"
 #include <map>
 #include <string>
diff --git a/src/librbd/image/RefreshRequest.cc b/src/librbd/image/RefreshRequest.cc
index ebd86ef..fdab92b 100644
--- a/src/librbd/image/RefreshRequest.cc
+++ b/src/librbd/image/RefreshRequest.cc
@@ -2,10 +2,8 @@
 // vim: ts=8 sw=2 smarttab
 
 #include "librbd/image/RefreshRequest.h"
-#include "include/stringify.h"
 #include "common/dout.h"
 #include "common/errno.h"
-#include "common/WorkQueue.h"
 #include "cls/lock/cls_lock_client.h"
 #include "cls/rbd/cls_rbd_client.h"
 #include "librbd/AioImageRequestWQ.h"
diff --git a/src/librbd/image/RefreshRequest.h b/src/librbd/image/RefreshRequest.h
index 58b6487..79b5d9e 100644
--- a/src/librbd/image/RefreshRequest.h
+++ b/src/librbd/image/RefreshRequest.h
@@ -6,7 +6,6 @@
 
 #include "include/int_types.h"
 #include "include/buffer.h"
-#include "include/rbd_types.h"
 #include "common/snap_types.h"
 #include "cls/lock/cls_lock_types.h"
 #include "librbd/ImageCtx.h"
diff --git a/src/librbd/image/SetSnapRequest.cc b/src/librbd/image/SetSnapRequest.cc
index 44da673..e00ebeb 100644
--- a/src/librbd/image/SetSnapRequest.cc
+++ b/src/librbd/image/SetSnapRequest.cc
@@ -136,13 +136,12 @@ Context *SetSnapRequest<I>::handle_block_writes(int *result) {
 
 template <typename I>
 Context *SetSnapRequest<I>::send_shut_down_exclusive_lock(int *result) {
-  ExclusiveLock<I> *exclusive_lock;
   {
     RWLock::RLocker snap_locker(m_image_ctx.snap_lock);
-    exclusive_lock = m_image_ctx.exclusive_lock;
+    m_exclusive_lock = m_image_ctx.exclusive_lock;
   }
 
-  if (exclusive_lock == nullptr) {
+  if (m_exclusive_lock == nullptr) {
     return send_refresh_parent(result);
   }
 
@@ -152,7 +151,7 @@ Context *SetSnapRequest<I>::send_shut_down_exclusive_lock(int *result) {
   using klass = SetSnapRequest<I>;
   Context *ctx = create_context_callback<
     klass, &klass::handle_shut_down_exclusive_lock>(this);
-  exclusive_lock->shut_down(ctx);
+  m_exclusive_lock->shut_down(ctx);
   return nullptr;
 }
 
@@ -327,11 +326,13 @@ int SetSnapRequest<I>::apply() {
   RWLock::WLocker snap_locker(m_image_ctx.snap_lock);
   RWLock::WLocker parent_locker(m_image_ctx.parent_lock);
   if (m_snap_id != CEPH_NOSNAP) {
+    assert(m_image_ctx.exclusive_lock == nullptr);
     int r = m_image_ctx.snap_set(m_snap_name);
     if (r < 0) {
       return r;
     }
   } else {
+    std::swap(m_image_ctx.exclusive_lock, m_exclusive_lock);
     m_image_ctx.snap_unset();
   }
 
@@ -339,7 +340,6 @@ int SetSnapRequest<I>::apply() {
     m_refresh_parent->apply();
   }
 
-  std::swap(m_exclusive_lock, m_image_ctx.exclusive_lock);
   std::swap(m_object_map, m_image_ctx.object_map);
   return 0;
 }
diff --git a/src/librbd/image/SetSnapRequest.h b/src/librbd/image/SetSnapRequest.h
index fa12032..1e2df49 100644
--- a/src/librbd/image/SetSnapRequest.h
+++ b/src/librbd/image/SetSnapRequest.h
@@ -4,8 +4,6 @@
 #ifndef CEPH_LIBRBD_IMAGE_SNAP_SET_REQUEST_H
 #define CEPH_LIBRBD_IMAGE_SNAP_SET_REQUEST_H
 
-#include "include/int_types.h"
-#include "librbd/parent_types.h"
 #include <string>
 
 class Context;
diff --git a/src/librbd/image_watcher/NotifyLockOwner.h b/src/librbd/image_watcher/NotifyLockOwner.h
index b4bdb2d..0b85097 100644
--- a/src/librbd/image_watcher/NotifyLockOwner.h
+++ b/src/librbd/image_watcher/NotifyLockOwner.h
@@ -4,7 +4,6 @@
 #ifndef CEPH_LIBRBD_IMAGE_WATCHER_NOTIFY_LOCK_OWNER_H
 #define CEPH_LIBRBD_IMAGE_WATCHER_NOTIFY_LOCK_OWNER_H
 
-#include "include/int_types.h"
 #include "include/buffer.h"
 
 class Context;
diff --git a/src/librbd/internal.cc b/src/librbd/internal.cc
index 13682df..a999f6f 100644
--- a/src/librbd/internal.cc
+++ b/src/librbd/internal.cc
@@ -10,9 +10,7 @@
 #include "common/ceph_context.h"
 #include "common/dout.h"
 #include "common/errno.h"
-#include "common/ContextCompletion.h"
 #include "common/Throttle.h"
-#include "common/WorkQueue.h"
 #include "common/event_socket.h"
 #include "cls/lock/cls_lock_client.h"
 #include "include/stringify.h"
@@ -26,12 +24,10 @@
 #include "librbd/AioImageRequest.h"
 #include "librbd/AioImageRequestWQ.h"
 #include "librbd/AioObjectRequest.h"
-#include "librbd/CopyupRequest.h"
 #include "librbd/DiffIterate.h"
 #include "librbd/ExclusiveLock.h"
 #include "librbd/ImageCtx.h"
 #include "librbd/ImageState.h"
-#include "librbd/ImageWatcher.h"
 #include "librbd/internal.h"
 #include "librbd/Journal.h"
 #include "librbd/journal/Types.h"
@@ -45,7 +41,6 @@
 
 #include "journal/Journaler.h"
 
-#include <boost/bind.hpp>
 #include <boost/scope_exit.hpp>
 #include <boost/variant.hpp>
 #include "include/assert.h"
@@ -248,7 +243,6 @@ int mirror_image_enable(CephContext *cct, librados::IoCtx &io_ctx,
   if (r < 0) {
     lderr(cct) << "failed to send update notification: " << cpp_strerror(r)
                << dendl;
-    return r;
   }
 
   ldout(cct, 20) << "image mirroring is enabled: global_id=" <<
@@ -341,7 +335,6 @@ int mirror_image_disable_internal(ImageCtx *ictx, bool force,
   if (r < 0) {
     lderr(cct) << "failed to send update notification: " << cpp_strerror(r)
                << dendl;
-    return r;
   }
 
   header_oid = ::journal::Journaler::header_oid(ictx->id);
@@ -395,7 +388,7 @@ int mirror_image_disable_internal(ImageCtx *ictx, bool force,
 
   if (remove) {
     r = cls_client::mirror_image_remove(&ictx->md_ctx, ictx->id);
-    if (r < 0) {
+    if (r < 0 && r != -ENOENT) {
       lderr(cct) << "failed to remove image from mirroring directory: "
                  << cpp_strerror(r) << dendl;
       return r;
@@ -1689,7 +1682,7 @@ int mirror_image_disable_internal(ImageCtx *ictx, bool force,
     // avoid accepting new requests from peers while we manipulate
     // the image features
     if (ictx->exclusive_lock != nullptr) {
-      ictx->exclusive_lock->block_requests();
+      ictx->exclusive_lock->block_requests(0);
     }
     BOOST_SCOPE_EXIT_ALL( (ictx) ) {
       if (ictx->exclusive_lock != nullptr) {
@@ -1818,6 +1811,7 @@ int mirror_image_disable_internal(ImageCtx *ictx, bool force,
             if (r < 0 && r != -ENOENT) {
               lderr(cct) << "error retrieving mirroring state: "
                 << cpp_strerror(r) << dendl;
+              return r;
             }
 
             if (mirror_image.state == cls::rbd::MIRROR_IMAGE_STATE_ENABLED) {
@@ -1827,7 +1821,7 @@ int mirror_image_disable_internal(ImageCtx *ictx, bool force,
             }
           } else if (mirror_mode == RBD_MIRROR_MODE_POOL) {
             r = cls_client::mirror_image_remove(&ictx->md_ctx, ictx->id);
-            if (r < 0) {
+            if (r < 0 && r != -ENOENT) {
               lderr(cct) << "failed to remove image from mirroring directory: "
                          << cpp_strerror(r) << dendl;
               return r;
@@ -2125,58 +2119,66 @@ int mirror_image_disable_internal(ImageCtx *ictx, bool force,
     }
 
     if (old_format || unknown_format) {
-      ldout(cct, 2) << "removing rbd image from directory..." << dendl;
+      ldout(cct, 2) << "removing rbd image from v1 directory..." << dendl;
       r = tmap_rm(io_ctx, imgname);
       old_format = (r == 0);
       if (r < 0 && !unknown_format) {
         if (r != -ENOENT) {
-	  lderr(cct) << "error removing img from old-style directory: "
-		     << cpp_strerror(-r) << dendl;
+          lderr(cct) << "error removing image from v1 directory: "
+                     << cpp_strerror(-r) << dendl;
         }
 	return r;
       }
     }
     if (!old_format) {
-      r = Journal<>::remove(io_ctx, id);
-      if (r < 0 && r != -ENOENT) {
-        lderr(cct) << "error removing image journal" << dendl;
-        return r;
+      if (id.empty()) {
+        ldout(cct, 5) << "attempting to determine image id" << dendl;
+        r = cls_client::dir_get_id(&io_ctx, RBD_DIRECTORY, imgname, &id);
+        if (r < 0 && r != -ENOENT) {
+          lderr(cct) << "error getting id of image" << dendl;
+          return r;
+        }
       }
+      if (!id.empty()) {
+        ldout(cct, 10) << "removing journal..." << dendl;
+        r = Journal<>::remove(io_ctx, id);
+        if (r < 0 && r != -ENOENT) {
+          lderr(cct) << "error removing image journal" << dendl;
+          return r;
+        }
 
-      r = ObjectMap::remove(io_ctx, id);
-      if (r < 0 && r != -ENOENT) {
-	lderr(cct) << "error removing image object map" << dendl;
-        return r;
+        ldout(cct, 10) << "removing object map..." << dendl;
+        r = ObjectMap::remove(io_ctx, id);
+        if (r < 0 && r != -ENOENT) {
+          lderr(cct) << "error removing image object map" << dendl;
+          return r;
+        }
+
+        ldout(cct, 10) << "removing image from rbd_mirroring object..."
+                       << dendl;
+        r = cls_client::mirror_image_remove(&io_ctx, id);
+        if (r < 0 && r != -ENOENT && r != -EOPNOTSUPP) {
+          lderr(cct) << "failed to remove image from mirroring directory: "
+                     << cpp_strerror(r) << dendl;
+          return r;
+        }
       }
 
       ldout(cct, 2) << "removing id object..." << dendl;
       r = io_ctx.remove(util::id_obj_name(imgname));
       if (r < 0 && r != -ENOENT) {
-	lderr(cct) << "error removing id object: " << cpp_strerror(r) << dendl;
-	return r;
-      }
-
-      r = cls_client::dir_get_id(&io_ctx, RBD_DIRECTORY, imgname, &id);
-      if (r < 0 && r != -ENOENT) {
-	lderr(cct) << "error getting id of image" << dendl;
+	lderr(cct) << "error removing id object: " << cpp_strerror(r)
+                   << dendl;
 	return r;
       }
 
-      ldout(cct, 2) << "removing rbd image from directory..." << dendl;
+      ldout(cct, 2) << "removing rbd image from v2 directory..." << dendl;
       r = cls_client::dir_remove_image(&io_ctx, RBD_DIRECTORY, imgname, id);
       if (r < 0) {
         if (r != -ENOENT) {
-	  lderr(cct) << "error removing img from new-style directory: "
-		     << cpp_strerror(-r) << dendl;
+          lderr(cct) << "error removing image from v2 directory: "
+                     << cpp_strerror(-r) << dendl;
         }
-	return r;
-      }
-
-      ldout(cct, 2) << "removing image from rbd_mirroring object..." << dendl;
-      r = cls_client::mirror_image_remove(&io_ctx, id);
-      if (r < 0 && r != -ENOENT && r != -EOPNOTSUPP) {
-        lderr(cct) << "failed to remove image from mirroring directory: "
-                   << cpp_strerror(r) << dendl;
         return r;
       }
     }
@@ -2384,7 +2386,8 @@ int mirror_image_disable_internal(ImageCtx *ictx, bool force,
       uint64_t len = min(period, src_size - offset);
       bufferlist *bl = new bufferlist();
       Context *ctx = new C_CopyRead(&throttle, dest, offset, bl);
-      AioCompletion *comp = AioCompletion::create(ctx);
+      AioCompletion *comp = AioCompletion::create_and_start(ctx, src,
+                                                            AIO_TYPE_READ);
       AioImageRequest<>::aio_read(src, comp, offset, len, NULL, bl,
                                   fadvise_flags);
       prog_ctx.update_progress(offset, src_size);
@@ -2609,7 +2612,8 @@ int mirror_image_disable_internal(ImageCtx *ictx, bool force,
       bufferlist bl;
 
       C_SaferCond ctx;
-      AioCompletion *c = AioCompletion::create(&ctx);
+      AioCompletion *c = AioCompletion::create_and_start(&ctx, ictx,
+                                                         AIO_TYPE_READ);
       AioImageRequest<>::aio_read(ictx, c, off, read_len, NULL, &bl, 0);
 
       int ret = ctx.wait();
@@ -3199,7 +3203,6 @@ int mirror_image_disable_internal(ImageCtx *ictx, bool force,
       if (r < 0) {
         lderr(cct) << "failed to send update notification: " << cpp_strerror(r)
                    << dendl;
-        return r;
       }
     }
 
@@ -3355,7 +3358,6 @@ int mirror_image_disable_internal(ImageCtx *ictx, bool force,
     if (r < 0) {
       lderr(cct) << "failed to send update notification: " << cpp_strerror(r)
                  << dendl;
-      return r;
     }
     return 0;
   }
@@ -3532,13 +3534,6 @@ int mirror_image_disable_internal(ImageCtx *ictx, bool force,
     return 0;
   }
 
-  void rbd_req_cb(completion_t cb, void *arg)
-  {
-    AioObjectRequest *req = reinterpret_cast<AioObjectRequest *>(arg);
-    AioCompletion *comp = reinterpret_cast<AioCompletion *>(cb);
-    req->complete(comp->get_return_value());
-  }
-
   struct C_RBD_Readahead : public Context {
     ImageCtx *ictx;
     object_t oid;
diff --git a/src/librbd/journal/Policy.h b/src/librbd/journal/Policy.h
index 8265622..2ef21e6 100644
--- a/src/librbd/journal/Policy.h
+++ b/src/librbd/journal/Policy.h
@@ -14,8 +14,8 @@ struct Policy {
   virtual ~Policy() {
   }
 
+  virtual bool append_disabled() const = 0;
   virtual void allocate_tag_on_lock(Context *on_finish) = 0;
-  virtual void cancel_external_replay(Context *on_finish) = 0;
 };
 
 } // namespace journal
diff --git a/src/librbd/journal/Replay.cc b/src/librbd/journal/Replay.cc
index c57202a..de33e03 100644
--- a/src/librbd/journal/Replay.cc
+++ b/src/librbd/journal/Replay.cc
@@ -9,14 +9,13 @@
 #include "librbd/AioImageRequest.h"
 #include "librbd/ImageCtx.h"
 #include "librbd/ImageState.h"
-#include "librbd/ImageWatcher.h"
 #include "librbd/internal.h"
 #include "librbd/Operations.h"
 #include "librbd/Utils.h"
 
 #define dout_subsys ceph_subsys_rbd
 #undef dout_prefix
-#define dout_prefix *_dout << "librbd::journal::Replay: "
+#define dout_prefix *_dout << "librbd::journal::Replay: " << this << " "
 
 namespace librbd {
 namespace journal {
@@ -39,40 +38,40 @@ struct ExecuteOp : public Context {
   }
 
   void execute(const journal::SnapCreateEvent &_) {
-    image_ctx.operations->execute_snap_create(event.snap_name.c_str(),
+    image_ctx.operations->execute_snap_create(event.snap_name,
                                               on_op_complete,
                                               event.op_tid, false);
   }
 
   void execute(const journal::SnapRemoveEvent &_) {
-    image_ctx.operations->execute_snap_remove(event.snap_name.c_str(),
+    image_ctx.operations->execute_snap_remove(event.snap_name,
                                               on_op_complete);
   }
 
   void execute(const journal::SnapRenameEvent &_) {
     image_ctx.operations->execute_snap_rename(event.snap_id,
-                                              event.snap_name.c_str(),
+                                              event.snap_name,
                                               on_op_complete);
   }
 
   void execute(const journal::SnapProtectEvent &_) {
-    image_ctx.operations->execute_snap_protect(event.snap_name.c_str(),
+    image_ctx.operations->execute_snap_protect(event.snap_name,
                                                on_op_complete);
   }
 
   void execute(const journal::SnapUnprotectEvent &_) {
-    image_ctx.operations->execute_snap_unprotect(event.snap_name.c_str(),
+    image_ctx.operations->execute_snap_unprotect(event.snap_name,
                                                  on_op_complete);
   }
 
   void execute(const journal::SnapRollbackEvent &_) {
-    image_ctx.operations->execute_snap_rollback(event.snap_name.c_str(),
+    image_ctx.operations->execute_snap_rollback(event.snap_name,
                                                 no_op_progress_callback,
                                                 on_op_complete);
   }
 
   void execute(const journal::RenameEvent &_) {
-    image_ctx.operations->execute_rename(event.image_name.c_str(),
+    image_ctx.operations->execute_rename(event.image_name,
                                          on_op_complete);
   }
 
@@ -89,12 +88,12 @@ struct ExecuteOp : public Context {
   virtual void finish(int r) override {
     CephContext *cct = image_ctx.cct;
     if (r < 0) {
-      lderr(cct) << "ExecuteOp: " << __func__ << ": r=" << r << dendl;
+      lderr(cct) << ": ExecuteOp::" << __func__ << ": r=" << r << dendl;
       on_op_complete->complete(r);
       return;
     }
 
-    ldout(cct, 20) << "ExecuteOp: " << __func__ << dendl;
+    ldout(cct, 20) << ": ExecuteOp::" << __func__ << dendl;
     RWLock::RLocker owner_locker(image_ctx.owner_lock);
     execute(event);
   }
@@ -108,29 +107,38 @@ struct C_RefreshIfRequired : public Context {
   C_RefreshIfRequired(I &image_ctx, Context *on_finish)
     : image_ctx(image_ctx), on_finish(on_finish) {
   }
+  virtual ~C_RefreshIfRequired() {
+    delete on_finish;
+  }
 
   virtual void finish(int r) override {
     CephContext *cct = image_ctx.cct;
+    Context *ctx = on_finish;
+    on_finish = nullptr;
 
     if (r < 0) {
-      lderr(cct) << "C_RefreshIfRequired: " << __func__ << ": r=" << r << dendl;
-      image_ctx.op_work_queue->queue(on_finish, r);
+      lderr(cct) << ": C_RefreshIfRequired::" << __func__ << ": r=" << r << dendl;
+      image_ctx.op_work_queue->queue(ctx, r);
       return;
     }
 
     if (image_ctx.state->is_refresh_required()) {
-      ldout(cct, 20) << "C_RefreshIfRequired: " << __func__ << ": "
+      ldout(cct, 20) << ": C_RefreshIfRequired::" << __func__ << ": "
                      << "refresh required" << dendl;
-      image_ctx.state->refresh(on_finish);
+      image_ctx.state->refresh(ctx);
       return;
     }
 
-    image_ctx.op_work_queue->queue(on_finish, 0);
+    image_ctx.op_work_queue->queue(ctx, 0);
   }
 };
 
 } // anonymous namespace
 
+#undef dout_prefix
+#define dout_prefix *_dout << "librbd::journal::Replay: " << this << " " \
+                           << __func__
+
 template <typename I>
 Replay<I>::Replay(I &image_ctx)
   : m_image_ctx(image_ctx), m_lock("Replay<I>::m_lock") {
@@ -143,25 +151,27 @@ Replay<I>::~Replay() {
   assert(m_aio_modify_unsafe_contexts.empty());
   assert(m_aio_modify_safe_contexts.empty());
   assert(m_op_events.empty());
+  assert(m_in_flight_op_events == 0);
 }
 
 template <typename I>
-void Replay<I>::process(bufferlist::iterator *it, Context *on_ready,
-                        Context *on_safe) {
-  CephContext *cct = m_image_ctx.cct;
-  ldout(cct, 20) << this << " " << __func__ << ": "
-                 << "on_ready=" << on_ready << ", on_safe=" << on_safe << dendl;
-
-  on_ready = util::create_async_context_callback(m_image_ctx, on_ready);
-
-  journal::EventEntry event_entry;
+int Replay<I>::decode(bufferlist::iterator *it, EventEntry *event_entry) {
   try {
-    ::decode(event_entry, *it);
+    ::decode(*event_entry, *it);
   } catch (const buffer::error &err) {
-    lderr(cct) << "failed to decode event entry: " << err.what() << dendl;
-    on_ready->complete(-EINVAL);
-    return;
+    return -EBADMSG;
   }
+  return 0;
+}
+
+template <typename I>
+void Replay<I>::process(const EventEntry &event_entry,
+                        Context *on_ready, Context *on_safe) {
+  CephContext *cct = m_image_ctx.cct;
+  ldout(cct, 20) << ": on_ready=" << on_ready << ", on_safe=" << on_safe
+                 << dendl;
+
+  on_ready = util::create_async_context_callback(m_image_ctx, on_ready);
 
   RWLock::RLocker owner_lock(m_image_ctx.owner_lock);
   boost::apply_visitor(EventVisitor(this, on_ready, on_safe),
@@ -171,7 +181,7 @@ void Replay<I>::process(bufferlist::iterator *it, Context *on_ready,
 template <typename I>
 void Replay<I>::shut_down(bool cancel_ops, Context *on_finish) {
   CephContext *cct = m_image_ctx.cct;
-  ldout(cct, 20) << this << " " << __func__ << dendl;
+  ldout(cct, 20) << dendl;
 
   AioCompletion *flush_comp = nullptr;
   on_finish = util::create_async_context_callback(
@@ -208,7 +218,7 @@ void Replay<I>::shut_down(bool cancel_ops, Context *on_finish) {
     }
 
     assert(m_flush_ctx == nullptr);
-    if (!m_op_events.empty() || flush_comp != nullptr) {
+    if (m_in_flight_op_events > 0 || flush_comp != nullptr) {
       std::swap(m_flush_ctx, on_finish);
     }
   }
@@ -239,7 +249,7 @@ void Replay<I>::flush(Context *on_finish) {
 template <typename I>
 void Replay<I>::replay_op_ready(uint64_t op_tid, Context *on_resume) {
   CephContext *cct = m_image_ctx.cct;
-  ldout(cct, 20) << this << " " << __func__ << ": op_tid=" << op_tid << dendl;
+  ldout(cct, 20) << ": op_tid=" << op_tid << dendl;
 
   Mutex::Locker locker(m_lock);
   auto op_it = m_op_events.find(op_tid);
@@ -279,10 +289,11 @@ template <typename I>
 void Replay<I>::handle_event(const journal::AioDiscardEvent &event,
                              Context *on_ready, Context *on_safe) {
   CephContext *cct = m_image_ctx.cct;
-  ldout(cct, 20) << this << " " << __func__ << ": AIO discard event" << dendl;
+  ldout(cct, 20) << ": AIO discard event" << dendl;
 
   bool flush_required;
   AioCompletion *aio_comp = create_aio_modify_completion(on_ready, on_safe,
+                                                         AIO_TYPE_DISCARD,
                                                          &flush_required);
   AioImageRequest<I>::aio_discard(&m_image_ctx, aio_comp, event.offset,
                                   event.length);
@@ -299,11 +310,12 @@ template <typename I>
 void Replay<I>::handle_event(const journal::AioWriteEvent &event,
                              Context *on_ready, Context *on_safe) {
   CephContext *cct = m_image_ctx.cct;
-  ldout(cct, 20) << this << " " << __func__ << ": AIO write event" << dendl;
+  ldout(cct, 20) << ": AIO write event" << dendl;
 
   bufferlist data = event.data;
   bool flush_required;
   AioCompletion *aio_comp = create_aio_modify_completion(on_ready, on_safe,
+                                                         AIO_TYPE_WRITE,
                                                          &flush_required);
   AioImageRequest<I>::aio_write(&m_image_ctx, aio_comp, event.offset,
                                 event.length, data.c_str(), 0);
@@ -320,7 +332,7 @@ template <typename I>
 void Replay<I>::handle_event(const journal::AioFlushEvent &event,
 			     Context *on_ready, Context *on_safe) {
   CephContext *cct = m_image_ctx.cct;
-  ldout(cct, 20) << this << " " << __func__ << ": AIO flush event" << dendl;
+  ldout(cct, 20) << ": AIO flush event" << dendl;
 
   AioCompletion *aio_comp;
   {
@@ -336,17 +348,18 @@ template <typename I>
 void Replay<I>::handle_event(const journal::OpFinishEvent &event,
                              Context *on_ready, Context *on_safe) {
   CephContext *cct = m_image_ctx.cct;
-  ldout(cct, 20) << this << " " << __func__ << ": Op finish event: "
+  ldout(cct, 20) << ": Op finish event: "
                  << "op_tid=" << event.op_tid << dendl;
 
   bool op_in_progress;
+  bool filter_ret_val;
   Context *on_op_complete = nullptr;
   Context *on_op_finish_event = nullptr;
   {
     Mutex::Locker locker(m_lock);
     auto op_it = m_op_events.find(event.op_tid);
     if (op_it == m_op_events.end()) {
-      ldout(cct, 10) << "unable to locate associated op: assuming previously "
+      ldout(cct, 10) << ": unable to locate associated op: assuming previously "
                      << "committed." << dendl;
       on_ready->complete(0);
       m_image_ctx.op_work_queue->queue(on_safe, 0);
@@ -360,6 +373,10 @@ void Replay<I>::handle_event(const journal::OpFinishEvent &event,
     op_in_progress = op_event.op_in_progress;
     std::swap(on_op_complete, op_event.on_op_complete);
     std::swap(on_op_finish_event, op_event.on_op_finish_event);
+
+    // special errors which indicate op never started but was recorded
+    // as failed in the journal
+    filter_ret_val = (op_event.op_finish_error_codes.count(event.r) != 0);
   }
 
   if (event.r < 0) {
@@ -373,7 +390,7 @@ void Replay<I>::handle_event(const journal::OpFinishEvent &event,
       // creating the op event
       delete on_op_complete;
       delete on_op_finish_event;
-      handle_op_complete(event.op_tid, event.r);
+      handle_op_complete(event.op_tid, filter_ret_val ? 0 : event.r);
     }
     return;
   }
@@ -386,7 +403,7 @@ template <typename I>
 void Replay<I>::handle_event(const journal::SnapCreateEvent &event,
 			     Context *on_ready, Context *on_safe) {
   CephContext *cct = m_image_ctx.cct;
-  ldout(cct, 20) << this << " " << __func__ << ": Snap create event" << dendl;
+  ldout(cct, 20) << ": Snap create event" << dendl;
 
   Mutex::Locker locker(m_lock);
   OpEvent *op_event;
@@ -415,7 +432,7 @@ template <typename I>
 void Replay<I>::handle_event(const journal::SnapRemoveEvent &event,
 			     Context *on_ready, Context *on_safe) {
   CephContext *cct = m_image_ctx.cct;
-  ldout(cct, 20) << this << " " << __func__ << ": Snap remove event" << dendl;
+  ldout(cct, 20) << ": Snap remove event" << dendl;
 
   Mutex::Locker locker(m_lock);
   OpEvent *op_event;
@@ -439,7 +456,7 @@ template <typename I>
 void Replay<I>::handle_event(const journal::SnapRenameEvent &event,
 			     Context *on_ready, Context *on_safe) {
   CephContext *cct = m_image_ctx.cct;
-  ldout(cct, 20) << this << " " << __func__ << ": Snap rename event" << dendl;
+  ldout(cct, 20) << ": Snap rename event" << dendl;
 
   Mutex::Locker locker(m_lock);
   OpEvent *op_event;
@@ -463,7 +480,7 @@ template <typename I>
 void Replay<I>::handle_event(const journal::SnapProtectEvent &event,
 			     Context *on_ready, Context *on_safe) {
   CephContext *cct = m_image_ctx.cct;
-  ldout(cct, 20) << this << " " << __func__ << ": Snap protect event" << dendl;
+  ldout(cct, 20) << ": Snap protect event" << dendl;
 
   Mutex::Locker locker(m_lock);
   OpEvent *op_event;
@@ -487,8 +504,7 @@ template <typename I>
 void Replay<I>::handle_event(const journal::SnapUnprotectEvent &event,
 			     Context *on_ready, Context *on_safe) {
   CephContext *cct = m_image_ctx.cct;
-  ldout(cct, 20) << this << " " << __func__ << ": Snap unprotect event"
-                 << dendl;
+  ldout(cct, 20) << ": Snap unprotect event" << dendl;
 
   Mutex::Locker locker(m_lock);
   OpEvent *op_event;
@@ -503,6 +519,9 @@ void Replay<I>::handle_event(const journal::SnapUnprotectEvent &event,
                                                                event,
                                                                on_op_complete));
 
+  // ignore errors recorded in the journal
+  op_event->op_finish_error_codes = {-EBUSY};
+
   // ignore errors caused due to replay
   op_event->ignore_error_codes = {-EINVAL};
 
@@ -513,8 +532,7 @@ template <typename I>
 void Replay<I>::handle_event(const journal::SnapRollbackEvent &event,
 			     Context *on_ready, Context *on_safe) {
   CephContext *cct = m_image_ctx.cct;
-  ldout(cct, 20) << this << " " << __func__ << ": Snap rollback start event"
-                 << dendl;
+  ldout(cct, 20) << ": Snap rollback start event" << dendl;
 
   Mutex::Locker locker(m_lock);
   OpEvent *op_event;
@@ -536,7 +554,7 @@ template <typename I>
 void Replay<I>::handle_event(const journal::RenameEvent &event,
 			     Context *on_ready, Context *on_safe) {
   CephContext *cct = m_image_ctx.cct;
-  ldout(cct, 20) << this << " " << __func__ << ": Rename event" << dendl;
+  ldout(cct, 20) << ": Rename event" << dendl;
 
   Mutex::Locker locker(m_lock);
   OpEvent *op_event;
@@ -560,7 +578,7 @@ template <typename I>
 void Replay<I>::handle_event(const journal::ResizeEvent &event,
 			     Context *on_ready, Context *on_safe) {
   CephContext *cct = m_image_ctx.cct;
-  ldout(cct, 20) << this << " " << __func__ << ": Resize start event" << dendl;
+  ldout(cct, 20) << ": Resize start event" << dendl;
 
   Mutex::Locker locker(m_lock);
   OpEvent *op_event;
@@ -585,7 +603,7 @@ template <typename I>
 void Replay<I>::handle_event(const journal::FlattenEvent &event,
 			     Context *on_ready, Context *on_safe) {
   CephContext *cct = m_image_ctx.cct;
-  ldout(cct, 20) << this << " " << __func__ << ": Flatten start event" << dendl;
+  ldout(cct, 20) << ": Flatten start event" << dendl;
 
   Mutex::Locker locker(m_lock);
   OpEvent *op_event;
@@ -609,7 +627,7 @@ template <typename I>
 void Replay<I>::handle_event(const journal::DemoteEvent &event,
 			     Context *on_ready, Context *on_safe) {
   CephContext *cct = m_image_ctx.cct;
-  ldout(cct, 20) << this << " " << __func__ << ": Demote event" << dendl;
+  ldout(cct, 20) << ": Demote event" << dendl;
   on_ready->complete(0);
   on_safe->complete(0);
 }
@@ -618,7 +636,7 @@ template <typename I>
 void Replay<I>::handle_event(const journal::UnknownEvent &event,
 			     Context *on_ready, Context *on_safe) {
   CephContext *cct = m_image_ctx.cct;
-  ldout(cct, 20) << this << " " << __func__ << ": unknown event" << dendl;
+  ldout(cct, 20) << ": unknown event" << dendl;
   on_ready->complete(0);
   on_safe->complete(0);
 }
@@ -628,14 +646,14 @@ void Replay<I>::handle_aio_modify_complete(Context *on_ready, Context *on_safe,
                                            int r) {
   Mutex::Locker locker(m_lock);
   CephContext *cct = m_image_ctx.cct;
-  ldout(cct, 20) << this << " " << __func__ << ": on_ready=" << on_ready << ", "
+  ldout(cct, 20) << ": on_ready=" << on_ready << ", "
                  << "on_safe=" << on_safe << ", r=" << r << dendl;
 
   if (on_ready != nullptr) {
     on_ready->complete(0);
   }
   if (r < 0) {
-    lderr(cct) << "AIO modify op failed: " << cpp_strerror(r) << dendl;
+    lderr(cct) << ": AIO modify op failed: " << cpp_strerror(r) << dendl;
     on_safe->complete(r);
     return;
   }
@@ -648,10 +666,10 @@ template <typename I>
 void Replay<I>::handle_aio_flush_complete(Context *on_flush_safe,
                                           Contexts &on_safe_ctxs, int r) {
   CephContext *cct = m_image_ctx.cct;
-  ldout(cct, 20) << this << " " << __func__ << ": " << "r=" << r << dendl;
+  ldout(cct, 20) << ": r=" << r << dendl;
 
   if (r < 0) {
-    lderr(cct) << "AIO flush failed: " << cpp_strerror(r) << dendl;
+    lderr(cct) << ": AIO flush failed: " << cpp_strerror(r) << dendl;
   }
 
   Context *on_aio_ready = nullptr;
@@ -664,7 +682,7 @@ void Replay<I>::handle_aio_flush_complete(Context *on_flush_safe,
     m_in_flight_aio_modify -= on_safe_ctxs.size();
 
     std::swap(on_aio_ready, m_on_aio_ready);
-    if (m_op_events.empty() &&
+    if (m_in_flight_op_events == 0 &&
         (m_in_flight_aio_flush + m_in_flight_aio_modify) == 0) {
       on_flush = m_flush_ctx;
     }
@@ -680,7 +698,7 @@ void Replay<I>::handle_aio_flush_complete(Context *on_flush_safe,
   }
 
   if (on_aio_ready != nullptr) {
-    ldout(cct, 10) << "resuming paused AIO" << dendl;
+    ldout(cct, 10) << ": resuming paused AIO" << dendl;
     on_aio_ready->complete(0);
   }
 
@@ -688,12 +706,12 @@ void Replay<I>::handle_aio_flush_complete(Context *on_flush_safe,
     on_safe_ctxs.push_back(on_flush_safe);
   }
   for (auto ctx : on_safe_ctxs) {
-    ldout(cct, 20) << "completing safe context: " << ctx << dendl;
+    ldout(cct, 20) << ": completing safe context: " << ctx << dendl;
     ctx->complete(r);
   }
 
   if (on_flush != nullptr) {
-    ldout(cct, 20) << "completing flush context: " << on_flush << dendl;
+    ldout(cct, 20) << ": completing flush context: " << on_flush << dendl;
     on_flush->complete(r);
   }
 }
@@ -707,12 +725,16 @@ Context *Replay<I>::create_op_context_callback(uint64_t op_tid,
 
   assert(m_lock.is_locked());
   if (m_op_events.count(op_tid) != 0) {
-    lderr(cct) << "duplicate op tid detected: " << op_tid << dendl;
+    lderr(cct) << ": duplicate op tid detected: " << op_tid << dendl;
+
+    // on_ready is already async but on failure invoke on_safe async
+    // as well
     on_ready->complete(0);
-    on_safe->complete(-EINVAL);
+    m_image_ctx.op_work_queue->queue(on_safe, -EINVAL);
     return nullptr;
   }
 
+  ++m_in_flight_op_events;
   *op_event = &m_op_events[op_tid];
   (*op_event)->on_start_safe = on_safe;
 
@@ -724,11 +746,10 @@ Context *Replay<I>::create_op_context_callback(uint64_t op_tid,
 template <typename I>
 void Replay<I>::handle_op_complete(uint64_t op_tid, int r) {
   CephContext *cct = m_image_ctx.cct;
-  ldout(cct, 20) << this << " " << __func__ << ": op_tid=" << op_tid << ", "
+  ldout(cct, 20) << ": op_tid=" << op_tid << ", "
                  << "r=" << r << dendl;
 
   OpEvent op_event;
-  Context *on_flush = nullptr;
   bool shutting_down = false;
   {
     Mutex::Locker locker(m_lock);
@@ -739,10 +760,6 @@ void Replay<I>::handle_op_complete(uint64_t op_tid, int r) {
     m_op_events.erase(op_it);
 
     shutting_down = (m_flush_ctx != nullptr);
-    if (m_op_events.empty() &&
-        (m_in_flight_aio_flush + m_in_flight_aio_modify) == 0) {
-      on_flush = m_flush_ctx;
-    }
   }
 
   assert(op_event.on_start_ready == nullptr || (r < 0 && r != -ERESTART));
@@ -775,14 +792,22 @@ void Replay<I>::handle_op_complete(uint64_t op_tid, int r) {
   if (op_event.on_finish_safe != nullptr) {
     op_event.on_finish_safe->complete(r);
   }
-  if (on_flush != nullptr) {
-    on_flush->complete(0);
+
+  // shut down request might have occurred while lock was
+  // dropped -- handle if pending
+  Mutex::Locker locker(m_lock);
+  assert(m_in_flight_op_events > 0);
+  --m_in_flight_op_events;
+  if (m_flush_ctx != nullptr && m_in_flight_op_events == 0 &&
+      (m_in_flight_aio_flush + m_in_flight_aio_modify) == 0) {
+    m_image_ctx.op_work_queue->queue(m_flush_ctx, 0);
   }
 }
 
 template <typename I>
 AioCompletion *Replay<I>::create_aio_modify_completion(Context *on_ready,
                                                        Context *on_safe,
+                                                       aio_type_t aio_type,
                                                        bool *flush_required) {
   Mutex::Locker locker(m_lock);
   CephContext *cct = m_image_ctx.cct;
@@ -798,7 +823,7 @@ AioCompletion *Replay<I>::create_aio_modify_completion(Context *on_ready,
   *flush_required = (m_aio_modify_unsafe_contexts.size() ==
                        IN_FLIGHT_IO_LOW_WATER_MARK);
   if (*flush_required) {
-    ldout(cct, 10) << "hit AIO replay low-water mark: scheduling flush"
+    ldout(cct, 10) << ": hit AIO replay low-water mark: scheduling flush"
                    << dendl;
   }
 
@@ -808,7 +833,7 @@ AioCompletion *Replay<I>::create_aio_modify_completion(Context *on_ready,
   //   shrink has adjusted clip boundary, etc) -- should have already been
   //   flagged not-ready
   if (m_in_flight_aio_modify == IN_FLIGHT_IO_HIGH_WATER_MARK) {
-    ldout(cct, 10) << "hit AIO replay high-water mark: pausing replay"
+    ldout(cct, 10) << ": hit AIO replay high-water mark: pausing replay"
                    << dendl;
     assert(m_on_aio_ready == nullptr);
     std::swap(m_on_aio_ready, on_ready);
@@ -817,8 +842,9 @@ AioCompletion *Replay<I>::create_aio_modify_completion(Context *on_ready,
   // when the modification is ACKed by librbd, we can process the next
   // event. when flushed, the completion of the next flush will fire the
   // on_safe callback
-  AioCompletion *aio_comp = AioCompletion::create<Context>(
-    new C_AioModifyComplete(this, on_ready, on_safe));
+  AioCompletion *aio_comp = AioCompletion::create_and_start<Context>(
+    new C_AioModifyComplete(this, on_ready, on_safe),
+    util::get_image_ctx(&m_image_ctx), aio_type);
   return aio_comp;
 }
 
@@ -829,9 +855,10 @@ AioCompletion *Replay<I>::create_aio_flush_completion(Context *on_safe) {
   ++m_in_flight_aio_flush;
 
   // associate all prior write/discard ops to this flush request
-  AioCompletion *aio_comp = AioCompletion::create<Context>(
+  AioCompletion *aio_comp = AioCompletion::create_and_start<Context>(
       new C_AioFlushComplete(this, on_safe,
-                             std::move(m_aio_modify_unsafe_contexts)));
+                             std::move(m_aio_modify_unsafe_contexts)),
+      util::get_image_ctx(&m_image_ctx), AIO_TYPE_FLUSH);
   m_aio_modify_unsafe_contexts.clear();
   return aio_comp;
 }
diff --git a/src/librbd/journal/Replay.h b/src/librbd/journal/Replay.h
index aeca5ba..30642db 100644
--- a/src/librbd/journal/Replay.h
+++ b/src/librbd/journal/Replay.h
@@ -7,12 +7,11 @@
 #include "include/int_types.h"
 #include "include/buffer_fwd.h"
 #include "include/Context.h"
-#include "include/rbd/librbd.hpp"
 #include "common/Mutex.h"
+#include "librbd/AioCompletion.h"
 #include "librbd/journal/Types.h"
 #include <boost/variant.hpp>
 #include <list>
-#include <set>
 #include <unordered_set>
 #include <unordered_map>
 
@@ -33,7 +32,9 @@ public:
   Replay(ImageCtxT &image_ctx);
   ~Replay();
 
-  void process(bufferlist::iterator *it, Context *on_ready, Context *on_safe);
+  int decode(bufferlist::iterator *it, EventEntry *event_entry);
+  void process(const EventEntry &event_entry,
+               Context *on_ready, Context *on_safe);
 
   void shut_down(bool cancel_ops, Context *on_finish);
   void flush(Context *on_finish);
@@ -52,6 +53,7 @@ private:
     Context *on_finish_ready = nullptr;
     Context *on_finish_safe = nullptr;
     Context *on_op_complete = nullptr;
+    ReturnValues op_finish_error_codes;
     ReturnValues ignore_error_codes;
   };
 
@@ -122,6 +124,7 @@ private:
   ContextSet m_aio_modify_safe_contexts;
 
   OpEvents m_op_events;
+  uint64_t m_in_flight_op_events = 0;
 
   Context *m_flush_ctx = nullptr;
   Context *m_on_aio_ready = nullptr;
@@ -167,6 +170,7 @@ private:
 
   AioCompletion *create_aio_modify_completion(Context *on_ready,
                                               Context *on_safe,
+                                              aio_type_t aio_type,
                                               bool *flush_required);
   AioCompletion *create_aio_flush_completion(Context *on_safe);
   void handle_aio_completion(AioCompletion *aio_comp);
diff --git a/src/librbd/journal/StandardPolicy.cc b/src/librbd/journal/StandardPolicy.cc
index 9e71828..5cba7c0 100644
--- a/src/librbd/journal/StandardPolicy.cc
+++ b/src/librbd/journal/StandardPolicy.cc
@@ -25,10 +25,5 @@ void StandardPolicy::allocate_tag_on_lock(Context *on_finish) {
   m_image_ctx->journal->allocate_local_tag(on_finish);
 }
 
-void StandardPolicy::cancel_external_replay(Context *on_finish) {
-  // external replay is only handled by rbd-mirror
-  assert(false);
-}
-
 } // namespace journal
 } // namespace librbd
diff --git a/src/librbd/journal/StandardPolicy.h b/src/librbd/journal/StandardPolicy.h
index c49ec9c..c2c997c 100644
--- a/src/librbd/journal/StandardPolicy.h
+++ b/src/librbd/journal/StandardPolicy.h
@@ -17,8 +17,10 @@ public:
   StandardPolicy(ImageCtx *image_ctx) : m_image_ctx(image_ctx) {
   }
 
+  virtual bool append_disabled() const {
+    return false;
+  }
   virtual void allocate_tag_on_lock(Context *on_finish);
-  virtual void cancel_external_replay(Context *on_finish);
 
 private:
   ImageCtx *m_image_ctx;
diff --git a/src/librbd/journal/Types.cc b/src/librbd/journal/Types.cc
index 8f9f942..0b2a54d 100644
--- a/src/librbd/journal/Types.cc
+++ b/src/librbd/journal/Types.cc
@@ -157,16 +157,21 @@ void SnapEventBase::dump(Formatter *f) const {
 void SnapRenameEvent::encode(bufferlist& bl) const {
   SnapEventBase::encode(bl);
   ::encode(snap_id, bl);
+  ::encode(src_snap_name, bl);
 }
 
 void SnapRenameEvent::decode(__u8 version, bufferlist::iterator& it) {
   SnapEventBase::decode(version, it);
   ::decode(snap_id, it);
+  if (version >= 2) {
+    ::decode(src_snap_name, it);
+  }
 }
 
 void SnapRenameEvent::dump(Formatter *f) const {
   SnapEventBase::dump(f);
   f->dump_unsigned("src_snap_id", snap_id);
+  f->dump_string("src_snap_name", src_snap_name);
   f->dump_string("dest_snap_name", snap_name);
 }
 
@@ -224,7 +229,7 @@ EventType EventEntry::get_event_type() const {
 }
 
 void EventEntry::encode(bufferlist& bl) const {
-  ENCODE_START(1, 1, bl);
+  ENCODE_START(2, 1, bl);
   boost::apply_visitor(EncodeVisitor(bl), event);
   ENCODE_FINISH(bl);
 }
@@ -312,7 +317,7 @@ void EventEntry::generate_test_instances(std::list<EventEntry *> &o) {
   o.push_back(new EventEntry(SnapRemoveEvent(345, "snap")));
 
   o.push_back(new EventEntry(SnapRenameEvent()));
-  o.push_back(new EventEntry(SnapRenameEvent(456, 1, "snap")));
+  o.push_back(new EventEntry(SnapRenameEvent(456, 1, "src snap", "dest snap")));
 
   o.push_back(new EventEntry(SnapProtectEvent()));
   o.push_back(new EventEntry(SnapProtectEvent(567, "snap")));
diff --git a/src/librbd/journal/Types.h b/src/librbd/journal/Types.h
index 4008a0f..8584532 100644
--- a/src/librbd/journal/Types.h
+++ b/src/librbd/journal/Types.h
@@ -161,12 +161,15 @@ struct SnapRenameEvent : public SnapEventBase {
   static const EventType TYPE = EVENT_TYPE_SNAP_RENAME;
 
   uint64_t snap_id;
+  std::string src_snap_name;
 
   SnapRenameEvent() : snap_id(CEPH_NOSNAP) {
   }
   SnapRenameEvent(uint64_t op_tid, uint64_t src_snap_id,
+                  const std::string &src_snap_name,
                   const std::string &dest_snap_name)
-    : SnapEventBase(op_tid, dest_snap_name), snap_id(src_snap_id) {
+    : SnapEventBase(op_tid, dest_snap_name), snap_id(src_snap_id),
+      src_snap_name(src_snap_name) {
   }
 
   void encode(bufferlist& bl) const;
@@ -483,6 +486,18 @@ std::ostream &operator<<(std::ostream &out, const MirrorPeerState &meta);
 std::ostream &operator<<(std::ostream &out, const MirrorPeerClientMeta &meta);
 std::ostream &operator<<(std::ostream &out, const TagData &tag_data);
 
+enum class ListenerType : int8_t {
+  RESYNC
+};
+
+struct ResyncListener {
+  virtual ~ResyncListener() {}
+  virtual void handle_resync() = 0;
+};
+
+typedef boost::variant<ResyncListener *> JournalListenerPtr;
+
+
 } // namespace journal
 } // namespace librbd
 
diff --git a/src/librbd/librbd.cc b/src/librbd/librbd.cc
index c31bd64..b71d5e8 100644
--- a/src/librbd/librbd.cc
+++ b/src/librbd/librbd.cc
@@ -15,14 +15,10 @@
 
 #include <errno.h>
 
-#include "common/Cond.h"
 #include "common/dout.h"
 #include "common/errno.h"
-#include "common/snap_types.h"
-#include "common/perf_counters.h"
 #include "common/TracepointProvider.h"
 #include "include/Context.h"
-#include "osdc/ObjectCacher.h"
 
 #include "librbd/AioCompletion.h"
 #include "librbd/AioImageRequestWQ.h"
@@ -132,6 +128,19 @@ struct C_CloseComplete : public Context {
   }
 };
 
+struct C_UpdateWatchCB : public librbd::UpdateWatchCtx {
+  rbd_update_callback_t watch_cb;
+  void *arg;
+  uint64_t handle = 0;
+
+  C_UpdateWatchCB(rbd_update_callback_t watch_cb, void *arg) :
+    watch_cb(watch_cb), arg(arg) {
+  }
+  void handle_notify() {
+    watch_cb(arg);
+  }
+};
+
 void mirror_image_info_cpp_to_c(const librbd::mirror_image_info_t &cpp_info,
 				rbd_mirror_image_info_t *c_info) {
   c_info->global_id = strdup(cpp_info.global_id.c_str());
@@ -1297,6 +1306,22 @@ namespace librbd {
 					   status_size);
   }
 
+  int Image::update_watch(UpdateWatchCtx *wctx, uint64_t *handle) {
+    ImageCtx *ictx = (ImageCtx *)ctx;
+    tracepoint(librbd, update_watch_enter, ictx, wctx);
+    int r = ictx->state->register_update_watcher(wctx, handle);
+    tracepoint(librbd, update_watch_exit, r, *handle);
+    return r;
+  }
+
+  int Image::update_unwatch(uint64_t handle) {
+    ImageCtx *ictx = (ImageCtx *)ctx;
+    tracepoint(librbd, update_unwatch_enter, ictx, handle);
+    int r = ictx->state->unregister_update_watcher(handle);
+    tracepoint(librbd, update_unwatch_exit, r);
+    return r;
+  }
+
 } // namespace librbd
 
 extern "C" void rbd_version(int *major, int *minor, int *extra)
@@ -2753,6 +2778,29 @@ extern "C" int rbd_mirror_image_get_status(rbd_image_t image,
   return 0;
 }
 
+extern "C" int rbd_update_watch(rbd_image_t image, uint64_t *handle,
+				rbd_update_callback_t watch_cb, void *arg)
+{
+  librbd::ImageCtx *ictx = (librbd::ImageCtx *)image;
+  C_UpdateWatchCB *wctx = new C_UpdateWatchCB(watch_cb, arg);
+  tracepoint(librbd, update_watch_enter, ictx, wctx);
+  int r = ictx->state->register_update_watcher(wctx, &wctx->handle);
+  tracepoint(librbd, update_watch_exit, r, wctx->handle);
+  *handle = reinterpret_cast<uint64_t>(wctx);
+  return r;
+}
+
+extern "C" int rbd_update_unwatch(rbd_image_t image, uint64_t handle)
+{
+  librbd::ImageCtx *ictx = (librbd::ImageCtx *)image;
+  C_UpdateWatchCB *wctx = reinterpret_cast<C_UpdateWatchCB *>(handle);
+  tracepoint(librbd, update_unwatch_enter, ictx, wctx->handle);
+  int r = ictx->state->unregister_update_watcher(wctx->handle);
+  delete wctx;
+  tracepoint(librbd, update_unwatch_exit, r);
+  return r;
+}
+
 extern "C" int rbd_aio_is_complete(rbd_completion_t c)
 {
   librbd::RBD::AioCompletion *comp = (librbd::RBD::AioCompletion *)c;
diff --git a/src/librbd/object_map/InvalidateRequest.cc b/src/librbd/object_map/InvalidateRequest.cc
index 41ef46c..e60d04f 100644
--- a/src/librbd/object_map/InvalidateRequest.cc
+++ b/src/librbd/object_map/InvalidateRequest.cc
@@ -3,10 +3,8 @@
 
 #include "librbd/object_map/InvalidateRequest.h"
 #include "common/dout.h"
-#include "common/errno.h"
 #include "librbd/ExclusiveLock.h"
 #include "librbd/ImageCtx.h"
-#include "librbd/ImageWatcher.h"
 
 #define dout_subsys ceph_subsys_rbd
 #undef dout_prefix
diff --git a/src/librbd/object_map/LockRequest.h b/src/librbd/object_map/LockRequest.h
index 04c9a09..0333548 100644
--- a/src/librbd/object_map/LockRequest.h
+++ b/src/librbd/object_map/LockRequest.h
@@ -4,12 +4,12 @@
 #ifndef CEPH_LIBRBD_OBJECT_MAP_LOCK_REQUEST_H
 #define CEPH_LIBRBD_OBJECT_MAP_LOCK_REQUEST_H
 
-#include "include/int_types.h"
 #include "include/buffer.h"
-#include "include/Context.h"
 #include "cls/lock/cls_lock_types.h"
 #include <map>
 
+class Context;
+
 namespace librbd {
 
 class ImageCtx;
diff --git a/src/librbd/object_map/RefreshRequest.cc b/src/librbd/object_map/RefreshRequest.cc
index 1bd465e..9421c12 100644
--- a/src/librbd/object_map/RefreshRequest.cc
+++ b/src/librbd/object_map/RefreshRequest.cc
@@ -2,12 +2,9 @@
 // vim: ts=8 sw=2 smarttab
 
 #include "librbd/object_map/RefreshRequest.h"
-#include "cls/rbd/cls_rbd_client.h"
-#include "cls/rbd/cls_rbd_types.h"
 #include "cls/lock/cls_lock_client.h"
 #include "common/dout.h"
 #include "common/errno.h"
-#include "common/WorkQueue.h"
 #include "librbd/ImageCtx.h"
 #include "librbd/ObjectMap.h"
 #include "librbd/object_map/InvalidateRequest.h"
diff --git a/src/librbd/object_map/RefreshRequest.h b/src/librbd/object_map/RefreshRequest.h
index 9ae1f27..4c2b059 100644
--- a/src/librbd/object_map/RefreshRequest.h
+++ b/src/librbd/object_map/RefreshRequest.h
@@ -6,9 +6,10 @@
 
 #include "include/int_types.h"
 #include "include/buffer.h"
-#include "include/Context.h"
 #include "common/bit_vector.hpp"
 
+class Context;
+
 namespace librbd {
 
 class ImageCtx;
diff --git a/src/librbd/object_map/Request.cc b/src/librbd/object_map/Request.cc
index 48cd99f..1725cbf 100644
--- a/src/librbd/object_map/Request.cc
+++ b/src/librbd/object_map/Request.cc
@@ -2,13 +2,10 @@
 // vim: ts=8 sw=2 smarttab
 
 #include "librbd/object_map/Request.h"
-#include "include/rados/librados.hpp"
-#include "include/rbd/librbd.hpp"
 #include "common/dout.h"
 #include "common/errno.h"
 #include "common/RWLock.h"
 #include "librbd/ImageCtx.h"
-#include "librbd/ImageWatcher.h"
 #include "librbd/object_map/InvalidateRequest.h"
 
 #define dout_subsys ceph_subsys_rbd
diff --git a/src/librbd/object_map/SnapshotCreateRequest.cc b/src/librbd/object_map/SnapshotCreateRequest.cc
index abca0e2..6408973 100644
--- a/src/librbd/object_map/SnapshotCreateRequest.cc
+++ b/src/librbd/object_map/SnapshotCreateRequest.cc
@@ -3,7 +3,6 @@
 
 #include "librbd/object_map/SnapshotCreateRequest.h"
 #include "common/dout.h"
-#include "common/errno.h"
 #include "librbd/ImageCtx.h"
 #include "librbd/ObjectMap.h"
 #include "cls/lock/cls_lock_client.h"
diff --git a/src/librbd/object_map/SnapshotRemoveRequest.cc b/src/librbd/object_map/SnapshotRemoveRequest.cc
index c718af1..94c0952 100644
--- a/src/librbd/object_map/SnapshotRemoveRequest.cc
+++ b/src/librbd/object_map/SnapshotRemoveRequest.cc
@@ -72,6 +72,11 @@ bool SnapshotRemoveRequest::should_complete(int r) {
   bool finished = false;
   switch (m_state) {
   case STATE_LOAD_MAP:
+    if (r == -ENOENT) {
+      finished = true;
+      break;
+    }
+
     if (r == 0) {
       bufferlist::iterator it = m_out_bl.begin();
       r = cls_client::object_map_load_finish(&it, &m_snap_object_map);
diff --git a/src/librbd/object_map/SnapshotRemoveRequest.h b/src/librbd/object_map/SnapshotRemoveRequest.h
index 6469678..3b196b1 100644
--- a/src/librbd/object_map/SnapshotRemoveRequest.h
+++ b/src/librbd/object_map/SnapshotRemoveRequest.h
@@ -58,7 +58,8 @@ protected:
   virtual bool should_complete(int r);
 
   virtual int filter_return_code(int r) const {
-    if (m_state == STATE_REMOVE_MAP && r == -ENOENT) {
+    if ((m_state == STATE_LOAD_MAP || m_state == STATE_REMOVE_MAP) &&
+        r == -ENOENT) {
       return 0;
     }
     return r;
diff --git a/src/librbd/object_map/SnapshotRollbackRequest.cc b/src/librbd/object_map/SnapshotRollbackRequest.cc
index 9d4fc4a..10eb591 100644
--- a/src/librbd/object_map/SnapshotRollbackRequest.cc
+++ b/src/librbd/object_map/SnapshotRollbackRequest.cc
@@ -3,7 +3,6 @@
 
 #include "librbd/object_map/SnapshotRollbackRequest.h"
 #include "common/dout.h"
-#include "common/errno.h"
 #include "librbd/ImageCtx.h"
 #include "librbd/ObjectMap.h"
 #include "librbd/object_map/InvalidateRequest.h"
diff --git a/src/librbd/object_map/UnlockRequest.h b/src/librbd/object_map/UnlockRequest.h
index 1453540..b52a3d0 100644
--- a/src/librbd/object_map/UnlockRequest.h
+++ b/src/librbd/object_map/UnlockRequest.h
@@ -4,10 +4,7 @@
 #ifndef CEPH_LIBRBD_OBJECT_MAP_UNLOCK_REQUEST_H
 #define CEPH_LIBRBD_OBJECT_MAP_UNLOCK_REQUEST_H
 
-#include "include/int_types.h"
-#include "include/buffer.h"
-#include "include/Context.h"
-#include <map>
+class Context;
 
 namespace librbd {
 
diff --git a/src/librbd/object_map/UpdateRequest.cc b/src/librbd/object_map/UpdateRequest.cc
index 5dd1e53..51dbc48 100644
--- a/src/librbd/object_map/UpdateRequest.cc
+++ b/src/librbd/object_map/UpdateRequest.cc
@@ -5,7 +5,6 @@
 #include "include/rbd/object_map_types.h"
 #include "include/stringify.h"
 #include "common/dout.h"
-#include "common/errno.h"
 #include "librbd/ImageCtx.h"
 #include "librbd/ObjectMap.h"
 #include "cls/lock/cls_lock_client.h"
diff --git a/src/librbd/operation/FlattenRequest.cc b/src/librbd/operation/FlattenRequest.cc
index 59bdb29..8cfddbe 100644
--- a/src/librbd/operation/FlattenRequest.cc
+++ b/src/librbd/operation/FlattenRequest.cc
@@ -6,8 +6,6 @@
 #include "librbd/AsyncObjectThrottle.h"
 #include "librbd/ExclusiveLock.h"
 #include "librbd/ImageCtx.h"
-#include "librbd/ImageWatcher.h"
-#include "librbd/ObjectMap.h"
 #include "common/dout.h"
 #include "common/errno.h"
 #include <boost/lambda/bind.hpp>
@@ -44,7 +42,7 @@ public:
     bufferlist bl;
     string oid = image_ctx.get_object_name(m_object_no);
     AioObjectWrite *req = new AioObjectWrite(&image_ctx, oid, m_object_no, 0,
-                                             bl, m_snapc, this);
+                                             bl, m_snapc, this, 0);
     if (!req->has_parent()) {
       // stop early if the parent went away - it just means
       // another flatten finished first or the image was resized
diff --git a/src/librbd/operation/RebuildObjectMapRequest.cc b/src/librbd/operation/RebuildObjectMapRequest.cc
index da2e744..352a439 100644
--- a/src/librbd/operation/RebuildObjectMapRequest.cc
+++ b/src/librbd/operation/RebuildObjectMapRequest.cc
@@ -7,7 +7,6 @@
 #include "librbd/AsyncObjectThrottle.h"
 #include "librbd/ExclusiveLock.h"
 #include "librbd/ImageCtx.h"
-#include "librbd/ImageWatcher.h"
 #include "librbd/internal.h"
 #include "librbd/ObjectMap.h"
 #include "librbd/operation/ResizeRequest.h"
diff --git a/src/librbd/operation/RenameRequest.h b/src/librbd/operation/RenameRequest.h
index c2fa14e..95f6658 100644
--- a/src/librbd/operation/RenameRequest.h
+++ b/src/librbd/operation/RenameRequest.h
@@ -5,7 +5,6 @@
 #define CEPH_LIBRBD_RENAME_REQUEST_H
 
 #include "librbd/operation/Request.h"
-#include <iosfwd>
 #include <string>
 
 class Context;
diff --git a/src/librbd/operation/Request.cc b/src/librbd/operation/Request.cc
index 216da1a..7534004 100644
--- a/src/librbd/operation/Request.cc
+++ b/src/librbd/operation/Request.cc
@@ -6,8 +6,6 @@
 #include "common/errno.h"
 #include "common/WorkQueue.h"
 #include "librbd/ImageCtx.h"
-#include "librbd/Journal.h"
-#include "librbd/Utils.h"
 
 #define dout_subsys ceph_subsys_rbd
 #undef dout_prefix
@@ -34,13 +32,40 @@ void Request<I>::send() {
 }
 
 template <typename I>
-void Request<I>::finish(int r) {
-  // automatically commit the event if we don't need to worry
-  // about affecting concurrent IO ops
-  if (r < 0 || !can_affect_io()) {
-    commit_op_event(r);
+Context *Request<I>::create_context_finisher(int r) {
+  // automatically commit the event if required (delete after commit)
+  if (m_appended_op_event && !m_committed_op_event &&
+      commit_op_event(r)) {
+    return nullptr;
   }
 
+  I &image_ctx = this->m_image_ctx;
+  CephContext *cct = image_ctx.cct;
+  ldout(cct, 10) << this << " " << __func__ << dendl;
+  return util::create_context_callback<Request<I>, &Request<I>::finish>(this);
+}
+
+template <typename I>
+void Request<I>::finish_and_destroy(int r) {
+  I &image_ctx = this->m_image_ctx;
+  CephContext *cct = image_ctx.cct;
+  ldout(cct, 10) << this << " " << __func__ << ": r=" << r << dendl;
+
+  // automatically commit the event if required (delete after commit)
+  if (m_appended_op_event && !m_committed_op_event &&
+      commit_op_event(r)) {
+    return;
+  }
+
+  AsyncRequest<I>::finish_and_destroy(r);
+}
+
+template <typename I>
+void Request<I>::finish(int r) {
+  I &image_ctx = this->m_image_ctx;
+  CephContext *cct = image_ctx.cct;
+  ldout(cct, 10) << this << " " << __func__ << ": r=" << r << dendl;
+
   assert(!m_appended_op_event || m_committed_op_event);
   AsyncRequest<I>::finish(r);
 }
@@ -51,8 +76,8 @@ bool Request<I>::append_op_event() {
 
   assert(image_ctx.owner_lock.is_locked());
   RWLock::RLocker snap_locker(image_ctx.snap_lock);
-  if (image_ctx.journal != NULL &&
-      !image_ctx.journal->is_journal_replaying()) {
+  if (image_ctx.journal != nullptr &&
+      image_ctx.journal->is_journal_appending()) {
     append_op_event(util::create_context_callback<
       Request<I>, &Request<I>::handle_op_event_safe>(this));
     return true;
@@ -61,27 +86,46 @@ bool Request<I>::append_op_event() {
 }
 
 template <typename I>
-void Request<I>::commit_op_event(int r) {
+bool Request<I>::commit_op_event(int r) {
   I &image_ctx = this->m_image_ctx;
   RWLock::RLocker snap_locker(image_ctx.snap_lock);
 
   if (!m_appended_op_event) {
-    return;
+    return false;
   }
 
   assert(m_op_tid != 0);
   assert(!m_committed_op_event);
   m_committed_op_event = true;
 
-  if (image_ctx.journal != NULL &&
-      !image_ctx.journal->is_journal_replaying()) {
+  if (image_ctx.journal != nullptr &&
+      image_ctx.journal->is_journal_appending()) {
     CephContext *cct = image_ctx.cct;
     ldout(cct, 10) << this << " " << __func__ << ": r=" << r << dendl;
 
     // ops will be canceled / completed before closing journal
     assert(image_ctx.journal->is_journal_ready());
-    image_ctx.journal->commit_op_event(m_op_tid, r);
+    image_ctx.journal->commit_op_event(m_op_tid, r,
+                                       new C_CommitOpEvent(this, r));
+    return true;
+  }
+  return false;
+}
+
+template <typename I>
+void Request<I>::handle_commit_op_event(int r, int original_ret_val) {
+  I &image_ctx = this->m_image_ctx;
+  CephContext *cct = image_ctx.cct;
+  ldout(cct, 10) << this << " " << __func__ << ": r=" << r << dendl;
+
+  if (r < 0) {
+    lderr(cct) << "failed to commit op event to journal: " << cpp_strerror(r)
+               << dendl;
+  }
+  if (original_ret_val < 0) {
+    r = original_ret_val;
   }
+  finish(r);
 }
 
 template <typename I>
@@ -108,7 +152,7 @@ void Request<I>::append_op_event(Context *on_safe) {
   m_op_tid = image_ctx.journal->allocate_op_tid();
   image_ctx.journal->append_op_event(
     m_op_tid, journal::EventEntry{create_event(m_op_tid)},
-    new C_OpEventSafe(this, on_safe));
+    new C_AppendOpEvent(this, on_safe));
 }
 
 template <typename I>
diff --git a/src/librbd/operation/Request.h b/src/librbd/operation/Request.h
index be4d174..7899316 100644
--- a/src/librbd/operation/Request.h
+++ b/src/librbd/operation/Request.h
@@ -9,7 +9,6 @@
 #include "common/RWLock.h"
 #include "librbd/Utils.h"
 #include "librbd/Journal.h"
-#include "librbd/journal/Types.h"
 
 namespace librbd {
 
@@ -41,32 +40,31 @@ protected:
     assert(can_affect_io());
     RWLock::RLocker owner_locker(image_ctx.owner_lock);
     RWLock::RLocker snap_locker(image_ctx.snap_lock);
-    if (image_ctx.journal != NULL) {
-      Context *ctx = util::create_context_callback<T, MF>(request);
+    if (image_ctx.journal != nullptr) {
       if (image_ctx.journal->is_journal_replaying()) {
+        Context *ctx = util::create_context_callback<T, MF>(request);
         replay_op_ready(ctx);
-      } else {
+        return true;
+      } else if (image_ctx.journal->is_journal_appending()) {
+        Context *ctx = util::create_context_callback<T, MF>(request);
         append_op_event(ctx);
+        return true;
       }
-      return true;
     }
     return false;
   }
 
   bool append_op_event();
-  void commit_op_event(int r);
 
   // NOTE: temporary until converted to new state machine format
-  Context *create_context_finisher() {
-    return util::create_context_callback<
-      Request<ImageCtxT>, &Request<ImageCtxT>::finish>(this);
-  }
+  Context *create_context_finisher(int r);
+  virtual void finish_and_destroy(int r) override;
 
 private:
-  struct C_OpEventSafe : public Context {
+  struct C_AppendOpEvent : public Context {
     Request *request;
     Context *on_safe;
-    C_OpEventSafe(Request *request, Context *on_safe)
+    C_AppendOpEvent(Request *request, Context *on_safe)
       : request(request), on_safe(on_safe) {
     }
     virtual void finish(int r) override {
@@ -77,6 +75,18 @@ private:
     }
   };
 
+  struct C_CommitOpEvent : public Context {
+    Request *request;
+    int ret_val;
+    C_CommitOpEvent(Request *request, int ret_val)
+      : request(request), ret_val(ret_val) {
+    }
+    virtual void finish(int r) override {
+      request->handle_commit_op_event(r, ret_val);
+      delete request;
+    }
+  };
+
   uint64_t m_op_tid = 0;
   bool m_appended_op_event = false;
   bool m_committed_op_event = false;
@@ -85,6 +95,9 @@ private:
   void append_op_event(Context *on_safe);
   void handle_op_event_safe(int r);
 
+  bool commit_op_event(int r);
+  void handle_commit_op_event(int r, int original_ret_val);
+
 };
 
 } // namespace operation
diff --git a/src/librbd/operation/ResizeRequest.cc b/src/librbd/operation/ResizeRequest.cc
index a2ee7b0..1c3dee7 100644
--- a/src/librbd/operation/ResizeRequest.cc
+++ b/src/librbd/operation/ResizeRequest.cc
@@ -5,7 +5,6 @@
 #include "librbd/AioImageRequestWQ.h"
 #include "librbd/ExclusiveLock.h"
 #include "librbd/ImageCtx.h"
-#include "librbd/ImageWatcher.h"
 #include "librbd/internal.h"
 #include "librbd/ObjectMap.h"
 #include "librbd/Utils.h"
@@ -106,7 +105,7 @@ Context *ResizeRequest<I>::handle_pre_block_writes(int *result) {
   if (*result < 0) {
     lderr(cct) << "failed to block writes: " << cpp_strerror(*result) << dendl;
     image_ctx.aio_work_queue->unblock_writes();
-    return this->create_context_finisher();
+    return this->create_context_finisher(*result);
   }
 
   return send_append_op_event();
@@ -135,7 +134,7 @@ Context *ResizeRequest<I>::handle_append_op_event(int *result) {
     lderr(cct) << "failed to commit journal entry: " << cpp_strerror(*result)
                << dendl;
     image_ctx.aio_work_queue->unblock_writes();
-    return this->create_context_finisher();
+    return this->create_context_finisher(*result);
   }
 
   return send_grow_object_map();
@@ -163,10 +162,10 @@ Context *ResizeRequest<I>::handle_trim_image(int *result) {
 
   if (*result == -ERESTART) {
     ldout(cct, 5) << "resize operation interrupted" << dendl;
-    return this->create_context_finisher();
+    return this->create_context_finisher(*result);
   } else if (*result < 0) {
     lderr(cct) << "failed to trim image: " << cpp_strerror(*result) << dendl;
-    return this->create_context_finisher();
+    return this->create_context_finisher(*result);
   }
 
   send_invalidate_cache();
@@ -196,7 +195,7 @@ Context *ResizeRequest<I>::handle_invalidate_cache(int *result) {
   if (*result < 0) {
     lderr(cct) << "failed to invalidate cache: " << cpp_strerror(*result)
                << dendl;
-    return this->create_context_finisher();
+    return this->create_context_finisher(*result);
   }
 
   send_post_block_writes();
@@ -214,10 +213,7 @@ Context *ResizeRequest<I>::send_grow_object_map() {
   image_ctx.aio_work_queue->unblock_writes();
 
   if (m_original_size == m_new_size) {
-    if (!m_disable_journal) {
-      this->commit_op_event(0);
-    }
-    return this->create_context_finisher();
+    return this->create_context_finisher(0);
   } else if (m_new_size < m_original_size) {
     send_trim_image();
     return nullptr;
@@ -270,7 +266,7 @@ Context *ResizeRequest<I>::send_shrink_object_map() {
     image_ctx.owner_lock.put_read();
 
     update_size_and_overlap();
-    return this->create_context_finisher();
+    return this->create_context_finisher(0);
   }
 
   CephContext *cct = image_ctx.cct;
@@ -298,7 +294,7 @@ Context *ResizeRequest<I>::handle_shrink_object_map(int *result) {
 
   update_size_and_overlap();
   assert(*result == 0);
-  return this->create_context_finisher();
+  return this->create_context_finisher(0);
 }
 
 template <typename I>
@@ -322,7 +318,7 @@ Context *ResizeRequest<I>::handle_post_block_writes(int *result) {
     image_ctx.aio_work_queue->unblock_writes();
     lderr(cct) << "failed to block writes prior to header update: "
                << cpp_strerror(*result) << dendl;
-    return this->create_context_finisher();
+    return this->create_context_finisher(*result);
   }
 
   send_update_header();
@@ -374,12 +370,9 @@ Context *ResizeRequest<I>::handle_update_header(int *result) {
     lderr(cct) << "failed to update image header: " << cpp_strerror(*result)
                << dendl;
     image_ctx.aio_work_queue->unblock_writes();
-    return this->create_context_finisher();
+    return this->create_context_finisher(*result);
   }
 
-  if (!m_disable_journal) {
-    this->commit_op_event(0);
-  }
   return send_shrink_object_map();
 }
 
diff --git a/src/librbd/operation/SnapshotCreateRequest.cc b/src/librbd/operation/SnapshotCreateRequest.cc
index cb92c6c..4f20ec4 100644
--- a/src/librbd/operation/SnapshotCreateRequest.cc
+++ b/src/librbd/operation/SnapshotCreateRequest.cc
@@ -7,7 +7,6 @@
 #include "librbd/AioImageRequestWQ.h"
 #include "librbd/ExclusiveLock.h"
 #include "librbd/ImageCtx.h"
-#include "librbd/ImageWatcher.h"
 #include "librbd/ObjectMap.h"
 #include "librbd/Utils.h"
 
@@ -116,7 +115,7 @@ Context *SnapshotCreateRequest<I>::handle_suspend_aio(int *result) {
   if (*result < 0) {
     lderr(cct) << "failed to block writes: " << cpp_strerror(*result) << dendl;
     image_ctx.aio_work_queue->unblock_writes();
-    return this->create_context_finisher();
+    return this->create_context_finisher(*result);
   }
 
   send_append_op_event();
@@ -147,7 +146,7 @@ Context *SnapshotCreateRequest<I>::handle_append_op_event(int *result) {
     image_ctx.aio_work_queue->unblock_writes();
     lderr(cct) << "failed to commit journal entry: " << cpp_strerror(*result)
                << dendl;
-    return this->create_context_finisher();
+    return this->create_context_finisher(*result);
   }
 
   send_allocate_snap_id();
@@ -176,10 +175,10 @@ Context *SnapshotCreateRequest<I>::handle_allocate_snap_id(int *result) {
 
   if (*result < 0) {
     save_result(result);
-    finalize(*result);
+    image_ctx.aio_work_queue->unblock_writes();
     lderr(cct) << "failed to allocate snapshot id: " << cpp_strerror(*result)
                << dendl;
-    return this->create_context_finisher();
+    return this->create_context_finisher(*result);
   }
 
   send_create_snap();
@@ -251,8 +250,8 @@ Context *SnapshotCreateRequest<I>::send_create_object_map() {
   if (image_ctx.object_map == nullptr || m_skip_object_map) {
     image_ctx.snap_lock.put_read();
 
-    finalize(0);
-    return this->create_context_finisher();
+    image_ctx.aio_work_queue->unblock_writes();
+    return this->create_context_finisher(0);
   }
 
   CephContext *cct = image_ctx.cct;
@@ -277,8 +276,8 @@ Context *SnapshotCreateRequest<I>::handle_create_object_map(int *result) {
 
   assert(*result == 0);
 
-  finalize(0);
-  return this->create_context_finisher();
+  image_ctx.aio_work_queue->unblock_writes();
+  return this->create_context_finisher(0);
 }
 
 template <typename I>
@@ -305,20 +304,8 @@ Context *SnapshotCreateRequest<I>::handle_release_snap_id(int *result) {
   assert(m_ret_val < 0);
   *result = m_ret_val;
 
-  finalize(m_ret_val);
-  return this->create_context_finisher();
-}
-
-template <typename I>
-void SnapshotCreateRequest<I>::finalize(int r) {
-  I &image_ctx = this->m_image_ctx;
-  CephContext *cct = image_ctx.cct;
-  ldout(cct, 5) << this << " " << __func__ << ": r=" << r << dendl;
-
-  if (r == 0) {
-    this->commit_op_event(0);
-  }
   image_ctx.aio_work_queue->unblock_writes();
+  return this->create_context_finisher(m_ret_val);
 }
 
 template <typename I>
diff --git a/src/librbd/operation/SnapshotCreateRequest.h b/src/librbd/operation/SnapshotCreateRequest.h
index 35f8b53..c239d60 100644
--- a/src/librbd/operation/SnapshotCreateRequest.h
+++ b/src/librbd/operation/SnapshotCreateRequest.h
@@ -6,7 +6,6 @@
 
 #include "librbd/operation/Request.h"
 #include "librbd/parent_types.h"
-#include <iosfwd>
 #include <string>
 
 class Context;
@@ -106,7 +105,6 @@ private:
   void send_release_snap_id();
   Context *handle_release_snap_id(int *result);
 
-  void finalize(int r);
   void update_snap_context();
 
   void save_result(int *result) {
diff --git a/src/librbd/operation/SnapshotProtectRequest.h b/src/librbd/operation/SnapshotProtectRequest.h
index e8abfb2..b905ff5 100644
--- a/src/librbd/operation/SnapshotProtectRequest.h
+++ b/src/librbd/operation/SnapshotProtectRequest.h
@@ -5,7 +5,6 @@
 #define CEPH_LIBRBD_OPERATION_SNAPSHOT_PROTECT_REQUEST_H
 
 #include "librbd/operation/Request.h"
-#include <iosfwd>
 #include <string>
 
 class Context;
diff --git a/src/librbd/operation/SnapshotRemoveRequest.cc b/src/librbd/operation/SnapshotRemoveRequest.cc
index 8ad123b..9bc4567 100644
--- a/src/librbd/operation/SnapshotRemoveRequest.cc
+++ b/src/librbd/operation/SnapshotRemoveRequest.cc
@@ -6,7 +6,6 @@
 #include "common/errno.h"
 #include "librbd/ExclusiveLock.h"
 #include "librbd/ImageCtx.h"
-#include "librbd/ImageWatcher.h"
 #include "librbd/ObjectMap.h"
 
 #define dout_subsys ceph_subsys_rbd
diff --git a/src/librbd/operation/SnapshotRenameRequest.cc b/src/librbd/operation/SnapshotRenameRequest.cc
index ec7eb65..c7148c8 100644
--- a/src/librbd/operation/SnapshotRenameRequest.cc
+++ b/src/librbd/operation/SnapshotRenameRequest.cc
@@ -6,7 +6,6 @@
 #include "common/errno.h"
 #include "librbd/ExclusiveLock.h"
 #include "librbd/ImageCtx.h"
-#include "librbd/ImageWatcher.h"
 
 #define dout_subsys ceph_subsys_rbd
 #undef dout_prefix
@@ -39,6 +38,21 @@ SnapshotRenameRequest<I>::SnapshotRenameRequest(I &image_ctx,
 }
 
 template <typename I>
+journal::Event SnapshotRenameRequest<I>::create_event(uint64_t op_tid) const {
+  I &image_ctx = this->m_image_ctx;
+  assert(image_ctx.snap_lock.is_locked());
+
+  std::string src_snap_name;
+  auto snap_info_it = image_ctx.snap_info.find(m_snap_id);
+  if (snap_info_it != image_ctx.snap_info.end()) {
+    src_snap_name = snap_info_it->second.name;
+  }
+
+  return journal::SnapRenameEvent(op_tid, m_snap_id, src_snap_name,
+                                  m_snap_name);
+}
+
+template <typename I>
 void SnapshotRenameRequest<I>::send_op() {
   send_rename_snap();
 }
diff --git a/src/librbd/operation/SnapshotRenameRequest.h b/src/librbd/operation/SnapshotRenameRequest.h
index 503058d..0b512ec 100644
--- a/src/librbd/operation/SnapshotRenameRequest.h
+++ b/src/librbd/operation/SnapshotRenameRequest.h
@@ -5,7 +5,6 @@
 #define CEPH_LIBRBD_OPERATION_SNAPSHOT_RENAME_REQUEST_H
 
 #include "librbd/operation/Request.h"
-#include <iosfwd>
 #include <string>
 
 class Context;
@@ -42,9 +41,7 @@ public:
   SnapshotRenameRequest(ImageCtxT &image_ctx, Context *on_finish,
                         uint64_t snap_id, const std::string &snap_name);
 
-  virtual journal::Event create_event(uint64_t op_tid) const {
-    return journal::SnapRenameEvent(op_tid, m_snap_id, m_snap_name);
-  }
+  virtual journal::Event create_event(uint64_t op_tid) const;
 
 protected:
   virtual void send_op();
diff --git a/src/librbd/operation/SnapshotRollbackRequest.cc b/src/librbd/operation/SnapshotRollbackRequest.cc
index 6dcf3a7..3335b36 100644
--- a/src/librbd/operation/SnapshotRollbackRequest.cc
+++ b/src/librbd/operation/SnapshotRollbackRequest.cc
@@ -107,7 +107,7 @@ Context *SnapshotRollbackRequest<I>::handle_block_writes(int *result) {
 
   if (*result < 0) {
     lderr(cct) << "failed to block writes: " << cpp_strerror(*result) << dendl;
-    return this->create_context_finisher();
+    return this->create_context_finisher(*result);
   }
 
   send_resize_image();
@@ -150,7 +150,7 @@ Context *SnapshotRollbackRequest<I>::handle_resize_image(int *result) {
   if (*result < 0) {
     lderr(cct) << "failed to resize image for rollback: "
                << cpp_strerror(*result) << dendl;
-    return this->create_context_finisher();
+    return this->create_context_finisher(*result);
   }
 
   send_rollback_object_map();
@@ -224,11 +224,11 @@ Context *SnapshotRollbackRequest<I>::handle_rollback_objects(int *result) {
 
   if (*result == -ERESTART) {
     ldout(cct, 5) << "snapshot rollback operation interrupted" << dendl;
-    return this->create_context_finisher();
+    return this->create_context_finisher(*result);
   } else if (*result < 0) {
     lderr(cct) << "failed to rollback objects: " << cpp_strerror(*result)
                << dendl;
-    return this->create_context_finisher();
+    return this->create_context_finisher(*result);
   }
 
   return send_refresh_object_map();
@@ -276,7 +276,7 @@ Context *SnapshotRollbackRequest<I>::send_invalidate_cache() {
 
   apply();
   if (image_ctx.object_cacher == NULL) {
-    return this->create_context_finisher();
+    return this->create_context_finisher(0);
   }
 
   CephContext *cct = image_ctx.cct;
@@ -300,7 +300,7 @@ Context *SnapshotRollbackRequest<I>::handle_invalidate_cache(int *result) {
     lderr(cct) << "failed to invalidate cache: " << cpp_strerror(*result)
                << dendl;
   }
-  return this->create_context_finisher();
+  return this->create_context_finisher(*result);
 }
 
 template <typename I>
diff --git a/src/librbd/operation/SnapshotRollbackRequest.h b/src/librbd/operation/SnapshotRollbackRequest.h
index 53fb85b..135ba97 100644
--- a/src/librbd/operation/SnapshotRollbackRequest.h
+++ b/src/librbd/operation/SnapshotRollbackRequest.h
@@ -7,13 +7,13 @@
 #include "librbd/operation/Request.h"
 #include "librbd/ImageCtx.h"
 #include "librbd/internal.h"
+#include "librbd/journal/Types.h"
 #include <string>
 
 class Context;
 
 namespace librbd {
 
-class ImageCtx;
 class ProgressContext;
 
 namespace operation {
diff --git a/src/librbd/operation/SnapshotUnprotectRequest.h b/src/librbd/operation/SnapshotUnprotectRequest.h
index 62467f2..c9f5b33 100644
--- a/src/librbd/operation/SnapshotUnprotectRequest.h
+++ b/src/librbd/operation/SnapshotUnprotectRequest.h
@@ -5,7 +5,6 @@
 #define CEPH_LIBRBD_OPERATION_SNAPSHOT_UNPROTECT_REQUEST_H
 
 #include "librbd/operation/Request.h"
-#include <iosfwd>
 #include <string>
 
 class Context;
diff --git a/src/librbd/operation/TrimRequest.cc b/src/librbd/operation/TrimRequest.cc
index 3ed96f7..3992fb7 100644
--- a/src/librbd/operation/TrimRequest.cc
+++ b/src/librbd/operation/TrimRequest.cc
@@ -6,7 +6,6 @@
 #include "librbd/AioObjectRequest.h"
 #include "librbd/ExclusiveLock.h"
 #include "librbd/ImageCtx.h"
-#include "librbd/ImageWatcher.h"
 #include "librbd/internal.h"
 #include "librbd/ObjectMap.h"
 #include "librbd/Utils.h"
@@ -46,8 +45,8 @@ public:
     string oid = image_ctx.get_object_name(m_object_no);
     ldout(image_ctx.cct, 10) << "removing (with copyup) " << oid << dendl;
 
-    AioObjectRequest *req = new AioObjectTrim(&image_ctx, oid, m_object_no,
-                                              m_snapc, this);
+    AioObjectRequest<> *req = new AioObjectTrim(&image_ctx, oid, m_object_no,
+                                                m_snapc, this);
     req->send();
     return 0;
   }
@@ -195,8 +194,7 @@ void TrimRequest<I>::send_copyup_objects() {
 
     snapc = image_ctx.snapc;
     has_snapshots = !image_ctx.snaps.empty();
-    int r = image_ctx.get_parent_overlap(image_ctx.get_copyup_snap_id(),
-                                           &parent_overlap);
+    int r = image_ctx.get_parent_overlap(CEPH_NOSNAP, &parent_overlap);
     assert(r == 0);
   }
 
@@ -363,7 +361,7 @@ void TrimRequest<I>::send_clean_boundary() {
     ldout(cct, 20) << " ex " << *p << dendl;
     Context *req_comp = new C_ContextCompletion(*completion);
 
-    AioObjectRequest *req;
+    AioObjectRequest<> *req;
     if (p->offset == 0) {
       req = new AioObjectTrim(&image_ctx, p->oid.name, p->objectno, snapc,
                               req_comp);
diff --git a/src/librbd/parent_types.h b/src/librbd/parent_types.h
index 5e26794..69598b7 100644
--- a/src/librbd/parent_types.h
+++ b/src/librbd/parent_types.h
@@ -16,7 +16,7 @@ namespace librbd {
     std::string image_id;
     snapid_t snap_id;
     parent_spec() : pool_id(-1), snap_id(CEPH_NOSNAP) {}
-    parent_spec(uint64_t pool_id, std::string image_id, snapid_t snap_id) :
+    parent_spec(int64_t pool_id, std::string image_id, snapid_t snap_id) :
       pool_id(pool_id), image_id(image_id), snap_id(snap_id) {}
     bool operator==(const parent_spec &other) {
       return ((this->pool_id == other.pool_id) &&
diff --git a/src/mds/CDir.cc b/src/mds/CDir.cc
index d99c896..fcae684 100644
--- a/src/mds/CDir.cc
+++ b/src/mds/CDir.cc
@@ -2229,11 +2229,19 @@ void CDir::_commit(version_t want, int op_prio)
 void CDir::_committed(int r, version_t v)
 {
   if (r < 0) {
-    dout(1) << "commit error " << r << " v " << v << dendl;
-    cache->mds->clog->error() << "failed to commit dir " << dirfrag() << " object,"
-			      << " errno " << r << "\n";
-    cache->mds->handle_write_error(r);
-    return;
+    // the directory could be partly purged during MDS failover
+    if (r == -ENOENT && committed_version == 0 &&
+	inode->inode.nlink == 0 && inode->snaprealm) {
+      inode->state_set(CInode::STATE_MISSINGOBJS);
+      r = 0;
+    }
+    if (r < 0) {
+      dout(1) << "commit error " << r << " v " << v << dendl;
+      cache->mds->clog->error() << "failed to commit dir " << dirfrag() << " object,"
+				<< " errno " << r << "\n";
+      cache->mds->handle_write_error(r);
+      return;
+    }
   }
 
   dout(10) << "_committed v " << v << " on " << *this << dendl;
diff --git a/src/mds/CInode.cc b/src/mds/CInode.cc
index 718c0bc..c4001a2 100644
--- a/src/mds/CInode.cc
+++ b/src/mds/CInode.cc
@@ -92,9 +92,8 @@ struct cinode_lock_info_t cinode_lock_info[] = {
   { CEPH_LOCK_IAUTH, CEPH_CAP_AUTH_EXCL },
   { CEPH_LOCK_ILINK, CEPH_CAP_LINK_EXCL },
   { CEPH_LOCK_IXATTR, CEPH_CAP_XATTR_EXCL },
-  { CEPH_LOCK_IFLOCK, CEPH_CAP_FLOCK_EXCL }  
 };
-int num_cinode_locks = 5;
+int num_cinode_locks = sizeof(cinode_lock_info) / sizeof(cinode_lock_info[0]);
 
 
 
@@ -144,6 +143,7 @@ ostream& operator<<(ostream& out, const CInode& in)
   if (in.state_test(CInode::STATE_NEEDSRECOVER)) out << " needsrecover";
   if (in.state_test(CInode::STATE_RECOVERING)) out << " recovering";
   if (in.state_test(CInode::STATE_DIRTYPARENT)) out << " dirtyparent";
+  if (in.state_test(CInode::STATE_MISSINGOBJS)) out << " missingobjs";
   if (in.is_freezing_inode()) out << " FREEZING=" << in.auth_pin_freeze_allowance;
   if (in.is_frozen_inode()) out << " FROZEN";
   if (in.is_frozen_auth_pin()) out << " FROZEN_AUTHPIN";
@@ -311,20 +311,23 @@ void CInode::remove_need_snapflush(CInode *snapin, snapid_t snapid, client_t cli
   }
 }
 
-void CInode::split_need_snapflush(CInode *cowin, CInode *in)
+bool CInode::split_need_snapflush(CInode *cowin, CInode *in)
 {
   dout(10) << "split_need_snapflush [" << cowin->first << "," << cowin->last << "] for " << *cowin << dendl;
+  bool need_flush = false;
   for (compact_map<snapid_t, set<client_t> >::iterator p = client_need_snapflush.lower_bound(cowin->first);
        p != client_need_snapflush.end() && p->first < in->first; ) {
     compact_map<snapid_t, set<client_t> >::iterator q = p;
     ++p;
     assert(!q->second.empty());
-    if (cowin->last >= q->first)
+    if (cowin->last >= q->first) {
       cowin->auth_pin(this);
-    else
+      need_flush = true;
+    } else
       client_need_snapflush.erase(q);
     in->auth_unpin(this);
   }
+  return need_flush;
 }
 
 void CInode::mark_dirty_rstat()
@@ -807,9 +810,14 @@ bool CInode::is_projected_ancestor_of(CInode *other)
   return false;
 }
 
-void CInode::make_path_string(string& s, bool force, CDentry *use_parent) const
+/*
+ * If use_parent is NULL (it should be one of inode's projected parents),
+ * we use it to make path string. Otherwise, we use inode's parent dentry
+ * to make path string
+ */
+void CInode::make_path_string(string& s, CDentry *use_parent) const
 {
-  if (!force)
+  if (!use_parent)
     use_parent = parent;
 
   if (use_parent) {
@@ -844,7 +852,7 @@ void CInode::make_path_string_projected(string& s) const
 	 p != projected_parent.end();
 	 ++p) {
       string q;
-      make_path_string(q, true, *p);
+      make_path_string(q, *p);
       s += " ";
       s += q;
     }
@@ -2814,18 +2822,18 @@ void CInode::move_to_realm(SnapRealm *realm)
   containing_realm = realm;
 }
 
-Capability *CInode::reconnect_cap(client_t client, ceph_mds_cap_reconnect& icr, Session *session)
+Capability *CInode::reconnect_cap(client_t client, const cap_reconnect_t& icr, Session *session)
 {
   Capability *cap = get_client_cap(client);
   if (cap) {
     // FIXME?
-    cap->merge(icr.wanted, icr.issued);
+    cap->merge(icr.capinfo.wanted, icr.capinfo.issued);
   } else {
     cap = add_client_cap(client, session);
-    cap->set_wanted(icr.wanted);
-    cap->issue_norevoke(icr.issued);
+    cap->set_cap_id(icr.capinfo.cap_id);
+    cap->set_wanted(icr.capinfo.wanted);
+    cap->issue_norevoke(icr.capinfo.issued);
     cap->reset_seq();
-    cap->set_cap_id(icr.cap_id);
   }
   cap->set_last_issue_stamp(ceph_clock_now(g_ceph_context));
   return cap;
@@ -4093,6 +4101,8 @@ void CInode::dump(Formatter *f) const
     f->dump_string("state", "dirtypool");
   if (state_test(STATE_ORPHAN))
     f->dump_string("state", "orphan");
+  if (state_test(STATE_MISSINGOBJS))
+    f->dump_string("state", "missingobjs");
   f->close_section();
 
   f->open_array_section("client_caps");
diff --git a/src/mds/CInode.h b/src/mds/CInode.h
index 8f27bf0..eb70325 100644
--- a/src/mds/CInode.h
+++ b/src/mds/CInode.h
@@ -217,6 +217,7 @@ public:
   static const int STATE_FROZENAUTHPIN = (1<<17);
   static const int STATE_DIRTYPOOL =   (1<<18);
   static const int STATE_REPAIRSTATS = (1<<19);
+  static const int STATE_MISSINGOBJS = (1<<20);
   // orphan inode needs notification of releasing reference
   static const int STATE_ORPHAN =	STATE_NOTIFYREF;
 
@@ -560,13 +561,13 @@ protected:
   compact_map<int32_t, int32_t>      mds_caps_wanted;     // [auth] mds -> caps wanted
   int                   replica_caps_wanted; // [replica] what i've requested from auth
 
-  compact_map<int, std::set<client_t> > client_snap_caps;     // [auth] [snap] dirty metadata we still need from the head
 public:
+  compact_map<int, std::set<client_t> > client_snap_caps;     // [auth] [snap] dirty metadata we still need from the head
   compact_map<snapid_t, std::set<client_t> > client_need_snapflush;
 
   void add_need_snapflush(CInode *snapin, snapid_t snapid, client_t client);
   void remove_need_snapflush(CInode *snapin, snapid_t snapid, client_t client);
-  void split_need_snapflush(CInode *cowin, CInode *in);
+  bool split_need_snapflush(CInode *cowin, CInode *in);
 
 protected:
 
@@ -738,7 +739,7 @@ public:
 
   // -- misc -- 
   bool is_projected_ancestor_of(CInode *other);
-  void make_path_string(std::string& s, bool force=false, CDentry *use_parent=NULL) const;
+  void make_path_string(std::string& s, CDentry *use_parent=NULL) const;
   void make_path_string_projected(std::string& s) const;
   void make_path(filepath& s) const;
   void name_stray_dentry(std::string& dname);
@@ -987,7 +988,7 @@ public:
   void remove_client_cap(client_t client);
   void move_to_realm(SnapRealm *realm);
 
-  Capability *reconnect_cap(client_t client, ceph_mds_cap_reconnect& icr, Session *session);
+  Capability *reconnect_cap(client_t client, const cap_reconnect_t& icr, Session *session);
   void clear_client_caps_after_export();
   void export_client_caps(std::map<client_t,Capability::Export>& cl);
 
diff --git a/src/mds/FSMap.cc b/src/mds/FSMap.cc
index 32b04db..eb3dd74 100644
--- a/src/mds/FSMap.cc
+++ b/src/mds/FSMap.cc
@@ -151,7 +151,8 @@ void FSMap::print_summary(Formatter *f, ostream *out)
 
     const fs_cluster_id_t fscid = mds_roles.at(info.global_id);
 
-    if (info.rank != MDS_RANK_NONE) {
+    if (info.rank != MDS_RANK_NONE &&
+        info.state != MDSMap::STATE_STANDBY_REPLAY) {
       if (f) {
         f->open_object_section("mds");
         f->dump_unsigned("filesystem_id", fscid);
diff --git a/src/mds/Locker.cc b/src/mds/Locker.cc
index 84e0e48..10fff98 100644
--- a/src/mds/Locker.cc
+++ b/src/mds/Locker.cc
@@ -1750,8 +1750,11 @@ void Locker::file_update_finish(CInode *in, MutationRef& mut, bool share, client
       } else
 	++p;
     }
-    if (gather)
+    if (gather) {
+      if (in->client_snap_caps.empty())
+	in->item_open_file.remove_myself();
       eval_cap_gather(in, &need_issue);
+    }
   } else {
     if (cap && (cap->wanted() & ~cap->pending()) &&
 	need_issue.count(in) == 0) {  // if we won't issue below anyway
@@ -2467,7 +2470,7 @@ void Locker::handle_client_caps(MClientCaps *m)
     if (mds->is_reconnect() &&
 	m->get_dirty() && m->get_client_tid() > 0 &&
 	!session->have_completed_flush(m->get_client_tid())) {
-      mdcache->set_reconnect_dirty_caps(m->get_ino(), m->get_dirty());
+      mdcache->set_reconnected_dirty_caps(client, m->get_ino(), m->get_dirty());
     }
     mds->wait_for_replay(new C_MDS_RetryMessage(mds, m));
     return;
diff --git a/src/mds/MDCache.cc b/src/mds/MDCache.cc
index ddf4684..df126d6 100644
--- a/src/mds/MDCache.cc
+++ b/src/mds/MDCache.cc
@@ -720,7 +720,7 @@ void MDCache::open_foreign_mdsdir(inodeno_t ino, MDSInternalContextBase *fin)
   discover_base_ino(ino, fin, mds_rank_t(ino & (MAX_MDS-1)));
 }
 
-CDentry *MDCache::get_or_create_stray_dentry(CInode *in)
+CDir *MDCache::get_stray_dir(CInode *in)
 {
   string straydname;
   in->name_stray_dentry(straydname);
@@ -730,6 +730,14 @@ CDentry *MDCache::get_or_create_stray_dentry(CInode *in)
   frag_t fg = strayi->pick_dirfrag(straydname);
   CDir *straydir = strayi->get_dirfrag(fg);
   assert(straydir);
+  return straydir;
+}
+
+CDentry *MDCache::get_or_create_stray_dentry(CInode *in)
+{
+  CDir *straydir = get_stray_dir(in);
+  string straydname;
+  in->name_stray_dentry(straydname);
   CDentry *straydn = straydir->lookup(straydname);
   if (!straydn) {
     straydn = straydir->add_null_dentry(straydname);
@@ -1540,7 +1548,21 @@ CInode *MDCache::cow_inode(CInode *in, snapid_t last)
   if (in->last != CEPH_NOSNAP) {
     CInode *head_in = get_inode(in->ino());
     assert(head_in);
-    head_in->split_need_snapflush(oldin, in);
+    if (head_in->split_need_snapflush(oldin, in)) {
+      oldin->client_snap_caps = in->client_snap_caps;
+      for (compact_map<int,set<client_t> >::iterator p = in->client_snap_caps.begin();
+	   p != in->client_snap_caps.end();
+	   ++p) {
+	SimpleLock *lock = oldin->get_lock(p->first);
+	assert(lock);
+	for (auto q = p->second.begin(); q != p->second.end(); ++q) {
+	  oldin->auth_pin(lock);
+	  lock->set_state(LOCK_SNAP_SYNC);  // gathering
+	  lock->get_wrlock(true);
+	}
+      }
+    }
+    return oldin;
   }
 
   // clone caps?
@@ -1684,7 +1706,10 @@ void MDCache::journal_cow_dentry(MutationImpl *mut, EMetaBlob *metablob,
       CDentry *olddn = dn->dir->add_primary_dentry(dn->name, oldin, oldfirst, follows);
       oldin->inode.version = olddn->pre_dirty();
       dout(10) << " olddn " << *olddn << dendl;
-      metablob->add_primary_dentry(olddn, 0, true);
+      bool need_snapflush = !oldin->client_snap_caps.empty();
+      if (need_snapflush)
+	mut->ls->open_files.push_back(&oldin->item_open_file);
+      metablob->add_primary_dentry(olddn, 0, true, false, false, need_snapflush);
       mut->add_cow_dentry(olddn);
     } else {
       assert(dnl->is_remote());
@@ -3920,17 +3945,13 @@ void MDCache::rejoin_send_rejoins()
 
   if (mds->is_rejoin()) {
     map<client_t, set<mds_rank_t> > client_exports;
-    for (map<inodeno_t,map<client_t,ceph_mds_cap_reconnect> >::iterator p = cap_exports.begin();
-         p != cap_exports.end();
-	 ++p) {
+    for (auto p = cap_exports.begin(); p != cap_exports.end(); ++p) {
       assert(cap_export_targets.count(p->first));
       mds_rank_t target = cap_export_targets[p->first];
       if (rejoins.count(target) == 0)
 	continue;
       rejoins[target]->cap_exports[p->first] = p->second;
-      for (map<client_t,ceph_mds_cap_reconnect>::iterator q = p->second.begin();
-	   q != p->second.end();
-	   ++q)
+      for (auto q = p->second.begin(); q != p->second.end(); ++q)
 	client_exports[q->first].insert(target);
     }
     for (map<client_t, set<mds_rank_t> >::iterator p = client_exports.begin();
@@ -4257,14 +4278,10 @@ void MDCache::handle_cache_rejoin_weak(MMDSCacheRejoin *weak)
     map<inodeno_t,map<client_t,Capability::Import> > imported_caps;
 
     // check cap exports
-    for (map<inodeno_t,map<client_t,ceph_mds_cap_reconnect> >::iterator p = weak->cap_exports.begin();
-	 p != weak->cap_exports.end();
-	 ++p) {
+    for (auto p = weak->cap_exports.begin(); p != weak->cap_exports.end(); ++p) {
       CInode *in = get_inode(p->first);
       assert(!in || in->is_auth());
-      for (map<client_t,ceph_mds_cap_reconnect>::iterator q = p->second.begin();
-	   q != p->second.end();
-	   ++q) {
+      for (auto q = p->second.begin(); q != p->second.end(); ++q) {
 	dout(10) << " claiming cap import " << p->first << " client." << q->first << " on " << *in << dendl;
 	Capability *cap = rejoin_import_cap(in, q->first, q->second, from);
 	Capability::Import& im = imported_caps[p->first][q->first];
@@ -4286,15 +4303,11 @@ void MDCache::handle_cache_rejoin_weak(MMDSCacheRejoin *weak)
     // check cap exports.
     rejoin_client_map.insert(weak->client_map.begin(), weak->client_map.end());
 
-    for (map<inodeno_t,map<client_t,ceph_mds_cap_reconnect> >::iterator p = weak->cap_exports.begin();
-	 p != weak->cap_exports.end();
-	 ++p) {
+    for (auto p = weak->cap_exports.begin(); p != weak->cap_exports.end(); ++p) {
       CInode *in = get_inode(p->first);
       assert(in && in->is_auth());
       // note
-      for (map<client_t,ceph_mds_cap_reconnect>::iterator q = p->second.begin();
-	   q != p->second.end();
-	   ++q) {
+      for (auto q = p->second.begin(); q != p->second.end(); ++q) {
 	dout(10) << " claiming cap import " << p->first << " client." << q->first << dendl;
 	cap_imports[p->first][q->first][from] = q->second;
       }
@@ -5049,7 +5062,7 @@ void MDCache::handle_cache_rejoin_ack(MMDSCacheRejoin *ack)
 
       // mark client caps stale.
       MClientCaps *m = new MClientCaps(CEPH_CAP_OP_EXPORT, p->first, 0,
-				       cap_exports[p->first][q->first].cap_id, 0,
+				       cap_exports[p->first][q->first].capinfo.cap_id, 0,
                                        mds->get_osd_epoch_barrier());
       m->set_cap_peer(q->second.cap_id, q->second.issue_seq, q->second.mseq, from, 0);
       mds->send_message_client_counted(m, session);
@@ -5157,7 +5170,7 @@ void MDCache::rejoin_gather_finish()
 
   choose_lock_states_and_reconnect_caps();
 
-  identify_files_to_recover(rejoin_recover_q, rejoin_check_q);
+  identify_files_to_recover();
   rejoin_send_acks();
   
   // signal completion of fetches, rejoin_gather_finish, etc.
@@ -5189,12 +5202,9 @@ void MDCache::rejoin_open_ino_finish(inodeno_t ino, int ret)
   } else if (ret == mds->get_nodeid()) {
     assert(get_inode(ino));
   } else {
-    map<inodeno_t,map<client_t,map<mds_rank_t,ceph_mds_cap_reconnect> > >::iterator p;
-    p = cap_imports.find(ino);
+    auto p = cap_imports.find(ino);
     assert(p != cap_imports.end());
-    for (map<client_t,map<mds_rank_t,ceph_mds_cap_reconnect> >::iterator q = p->second.begin();
-	q != p->second.end();
-	++q) {
+    for (auto q = p->second.begin(); q != p->second.end(); ++q) {
       assert(q->second.count(MDS_RANK_NONE));
       assert(q->second.size() == 1);
       rejoin_export_caps(p->first, q->first, q->second[MDS_RANK_NONE], ret);
@@ -5239,9 +5249,7 @@ bool MDCache::process_imported_caps()
 {
   dout(10) << "process_imported_caps" << dendl;
 
-  for (map<inodeno_t,map<client_t, map<mds_rank_t,ceph_mds_cap_reconnect> > >::iterator p = cap_imports.begin();
-       p != cap_imports.end();
-       ++p) {
+  for (auto p = cap_imports.begin(); p != cap_imports.end(); ++p) {
     CInode *in = get_inode(p->first);
     if (in) {
       assert(in->is_auth());
@@ -5308,8 +5316,7 @@ bool MDCache::process_imported_caps()
 
     // process cap imports
     //  ino -> client -> frommds -> capex
-    for (map<inodeno_t,map<client_t, map<mds_rank_t,ceph_mds_cap_reconnect> > >::iterator p = cap_imports.begin();
-	 p != cap_imports.end(); ) {
+    for (auto p = cap_imports.begin(); p != cap_imports.end(); ) {
       CInode *in = get_inode(p->first);
       if (!in) {
 	dout(10) << " still missing ino " << p->first
@@ -5318,20 +5325,16 @@ bool MDCache::process_imported_caps()
 	continue;
       }
       assert(in->is_auth());
-      for (map<client_t,map<mds_rank_t,ceph_mds_cap_reconnect> >::iterator q = p->second.begin();
-	   q != p->second.end();
-	   ++q) {
+      for (auto q = p->second.begin(); q != p->second.end(); ++q) {
 	Session *session = mds->sessionmap.get_session(entity_name_t::CLIENT(q->first.v));
 	assert(session);
-	for (map<mds_rank_t,ceph_mds_cap_reconnect>::iterator r = q->second.begin();
-	     r != q->second.end();
-	     ++r) {
-	  add_reconnected_cap(in, q->first, inodeno_t(r->second.snaprealm));
+	for (auto r = q->second.begin(); r != q->second.end(); ++r) {
 	  Capability *cap = in->reconnect_cap(q->first, r->second, session);
+	  add_reconnected_cap(q->first, in->ino(), r->second);
 	  if (r->first >= 0) {
 	    if (cap->get_last_seq() == 0) // don't increase mseq if cap already exists
 	      cap->inc_mseq();
-	    do_cap_import(session, in, cap, r->second.cap_id, 0, 0, r->first, 0);
+	    do_cap_import(session, in, cap, r->second.capinfo.cap_id, 0, 0, r->first, 0);
 
 	    Capability::Import& im = rejoin_imported_caps[r->first][p->first][q->first];
 	    im.cap_id = cap->get_cap_id();
@@ -5354,12 +5357,21 @@ bool MDCache::process_imported_caps()
   return false;
 }
 
-void MDCache::check_realm_past_parents(SnapRealm *realm)
+void MDCache::check_realm_past_parents(SnapRealm *realm, bool reconnect)
 {
   // are this realm's parents fully open?
   if (realm->have_past_parents_open()) {
     dout(10) << " have past snap parents for realm " << *realm 
 	     << " on " << *realm->inode << dendl;
+    if (reconnect) {
+      // finish off client snaprealm reconnects?
+      auto p = reconnected_snaprealms.find(realm->inode->ino());
+      if (p != reconnected_snaprealms.end()) {
+	for (auto q = p->second.begin(); q != p->second.end(); ++q)
+	  finish_snaprealm_reconnect(q->first, realm, q->second);
+	reconnected_snaprealms.erase(p);
+      }
+    }
   } else {
     if (!missing_snap_parents.count(realm->inode)) {
       dout(10) << " MISSING past snap parents for realm " << *realm
@@ -5373,6 +5385,42 @@ void MDCache::check_realm_past_parents(SnapRealm *realm)
   }
 }
 
+void MDCache::rebuild_need_snapflush(CInode *head_in, SnapRealm *realm,
+				     client_t client, snapid_t snap_follows)
+{
+  dout(10) << "rebuild_need_snapflush " << snap_follows << " on " << *head_in << dendl;
+
+  const set<snapid_t>& snaps = realm->get_snaps();
+  snapid_t follows = snap_follows;
+
+  while (true) {
+    CInode *in = pick_inode_snap(head_in, follows);
+    if (in == head_in)
+      break;
+    dout(10) << " need snapflush from client." << client << " on " << *in << dendl;
+
+    /* TODO: we can check the reconnected/flushing caps to find 
+     *       which locks need gathering */
+    for (int i = 0; i < num_cinode_locks; i++) {
+      int lockid = cinode_lock_info[i].lock;
+      SimpleLock *lock = in->get_lock(lockid);
+      assert(lock);
+      in->client_snap_caps[lockid].insert(client);
+      in->auth_pin(lock);
+      lock->set_state(LOCK_SNAP_SYNC);
+      lock->get_wrlock(true);
+    }
+
+    for (auto p = snaps.lower_bound(in->first);
+	 p != snaps.end() && *p <= in->last;
+	 ++p) {
+      head_in->add_need_snapflush(in, *p, client);
+    }
+
+    follows = in->last;
+  }
+}
+
 /*
  * choose lock states based on reconnected caps
  */
@@ -5386,45 +5434,57 @@ void MDCache::choose_lock_states_and_reconnect_caps()
        i != inode_map.end();
        ++i) {
     CInode *in = i->second;
+
+    if (in->last != CEPH_NOSNAP)
+      continue;
  
     if (in->is_auth() && !in->is_base() && in->inode.is_dirty_rstat())
       in->mark_dirty_rstat();
 
+    auto p = reconnected_caps.find(in->ino());
+
     int dirty_caps = 0;
-    map<inodeno_t, int>::iterator it = cap_imports_dirty.find(in->ino());
-    if (it != cap_imports_dirty.end())
-      dirty_caps = it->second;
+    if (p != reconnected_caps.end()) {
+      for (const auto &it : p->second)
+	dirty_caps |= it.second.dirty_caps;
+    }
     in->choose_lock_states(dirty_caps);
     dout(15) << " chose lock states on " << *in << dendl;
 
     SnapRealm *realm = in->find_snaprealm();
 
-    check_realm_past_parents(realm);
+    check_realm_past_parents(realm, realm == in->snaprealm);
 
-    map<CInode*,map<client_t,inodeno_t> >::iterator p = reconnected_caps.find(in);
     if (p != reconnected_caps.end()) {
-
+      bool missing_snap_parent = false;
       // also, make sure client's cap is in the correct snaprealm.
-      for (map<client_t,inodeno_t>::iterator q = p->second.begin();
-	   q != p->second.end();
-	   ++q) {
-	if (q->second == realm->inode->ino()) {
-	  dout(15) << "  client." << q->first << " has correct realm " << q->second << dendl;
+      for (auto q = p->second.begin(); q != p->second.end(); ++q) {
+	if (q->second.snap_follows > 0 && q->second.snap_follows < in->first - 1) {
+	  if (realm->have_past_parents_open()) {
+	    rebuild_need_snapflush(in, realm, q->first, q->second.snap_follows);
+	  } else {
+	    missing_snap_parent = true;
+	  }
+	}
+
+	if (q->second.realm_ino == realm->inode->ino()) {
+	  dout(15) << "  client." << q->first << " has correct realm " << q->second.realm_ino << dendl;
 	} else {
-	  dout(15) << "  client." << q->first << " has wrong realm " << q->second
+	  dout(15) << "  client." << q->first << " has wrong realm " << q->second.realm_ino
 		   << " != " << realm->inode->ino() << dendl;
 	  if (realm->have_past_parents_open()) {
 	    // ok, include in a split message _now_.
 	    prepare_realm_split(realm, q->first, in->ino(), splits);
 	  } else {
 	    // send the split later.
-	    missing_snap_parents[realm->inode][q->first].insert(in->ino());
+	    missing_snap_parent = true;
 	  }
 	}
       }
+      if (missing_snap_parent)
+	missing_snap_parents[realm->inode].insert(in);
     }
   }    
-  reconnected_caps.clear();
 
   send_snaps(splits);
 }
@@ -5483,14 +5543,21 @@ void MDCache::clean_open_file_lists()
        p != mds->mdlog->segments.end();
        ++p) {
     LogSegment *ls = p->second;
-    
+
     elist<CInode*>::iterator q = ls->open_files.begin(member_offset(CInode, item_open_file));
     while (!q.end()) {
       CInode *in = *q;
       ++q;
-      if (!in->is_any_caps_wanted()) {
-	dout(10) << " unlisting unwanted/capless inode " << *in << dendl;
-	in->item_open_file.remove_myself();
+      if (in->last == CEPH_NOSNAP) {
+	if (!in->is_any_caps_wanted()) {
+	  dout(10) << " unlisting unwanted/capless inode " << *in << dendl;
+	  in->item_open_file.remove_myself();
+	}
+      } else if (in->last != CEPH_NOSNAP) {
+	if (in->client_snap_caps.empty()) {
+	  dout(10) << " unlisting flushed snap inode " << *in << dendl;
+	  in->item_open_file.remove_myself();
+	}
       }
     }
   }
@@ -5498,7 +5565,7 @@ void MDCache::clean_open_file_lists()
 
 
 
-Capability* MDCache::rejoin_import_cap(CInode *in, client_t client, ceph_mds_cap_reconnect& icr, mds_rank_t frommds)
+Capability* MDCache::rejoin_import_cap(CInode *in, client_t client, const cap_reconnect_t& icr, mds_rank_t frommds)
 {
   dout(10) << "rejoin_import_cap for client." << client << " from mds." << frommds
 	   << " on " << *in << dendl;
@@ -5510,7 +5577,7 @@ Capability* MDCache::rejoin_import_cap(CInode *in, client_t client, ceph_mds_cap
   if (frommds >= 0) {
     if (cap->get_last_seq() == 0) // don't increase mseq if cap already exists
       cap->inc_mseq();
-    do_cap_import(session, in, cap, icr.cap_id, 0, 0, frommds, 0);
+    do_cap_import(session, in, cap, icr.capinfo.cap_id, 0, 0, frommds, 0);
   }
 
   return cap;
@@ -5522,13 +5589,9 @@ void MDCache::export_remaining_imported_caps()
 
   stringstream warn_str;
 
-  for (map<inodeno_t,map<client_t,map<mds_rank_t,ceph_mds_cap_reconnect> > >::iterator p = cap_imports.begin();
-       p != cap_imports.end();
-       ++p) {
+  for (auto p = cap_imports.begin(); p != cap_imports.end(); ++p) {
     warn_str << " ino " << p->first << "\n";
-    for (map<client_t,map<mds_rank_t,ceph_mds_cap_reconnect> >::iterator q = p->second.begin();
-	q != p->second.end();
-	++q) {
+    for (auto q = p->second.begin(); q != p->second.end(); ++q) {
       Session *session = mds->sessionmap.get_session(entity_name_t::CLIENT(q->first.v));
       if (session) {
 	// mark client caps stale.
@@ -5545,7 +5608,6 @@ void MDCache::export_remaining_imported_caps()
     mds->queue_waiters(p->second);
 
   cap_imports.clear();
-  cap_imports_dirty.clear();
   cap_reconnect_waiters.clear();
 
   if (warn_str.peek() != EOF) {
@@ -5557,12 +5619,12 @@ void MDCache::export_remaining_imported_caps()
 void MDCache::try_reconnect_cap(CInode *in, Session *session)
 {
   client_t client = session->info.get_client();
-  ceph_mds_cap_reconnect *rc = get_replay_cap_reconnect(in->ino(), client);
+  const cap_reconnect_t *rc = get_replay_cap_reconnect(in->ino(), client);
   if (rc) {
     in->reconnect_cap(client, *rc, session);
     dout(10) << "try_reconnect_cap client." << client
-	     << " reconnect wanted " << ccap_string(rc->wanted)
-	     << " issue " << ccap_string(rc->issued)
+	     << " reconnect wanted " << ccap_string(rc->capinfo.wanted)
+	     << " issue " << ccap_string(rc->capinfo.issued)
 	     << " on " << *in << dendl;
     remove_replay_cap_reconnect(in->ino(), client);
 
@@ -5570,9 +5632,12 @@ void MDCache::try_reconnect_cap(CInode *in, Session *session)
       mds->locker->try_eval(in, CEPH_CAP_LOCKS);
     } else {
       int dirty_caps = 0;
-      map<inodeno_t, int>::iterator it = cap_imports_dirty.find(in->ino());
-      if (it != cap_imports_dirty.end())
-	dirty_caps = it->second;
+      auto p = reconnected_caps.find(in->ino());
+      if (p != reconnected_caps.end()) {
+	auto q = p->second.find(client);
+	if (q != p->second.end())
+	  dirty_caps = q->second.dirty_caps;
+      }
       in->choose_lock_states(dirty_caps);
       dout(15) << " chose lock states on " << *in << dendl;
     }
@@ -5645,21 +5710,26 @@ void MDCache::open_snap_parents()
   map<client_t,MClientSnap*> splits;
   MDSGatherBuilder gather(g_ceph_context);
 
-  map<CInode*,map<client_t,set<inodeno_t> > >::iterator p = missing_snap_parents.begin();
+  auto p = missing_snap_parents.begin();
   while (p != missing_snap_parents.end()) {
     CInode *in = p->first;
     assert(in->snaprealm);
     if (in->snaprealm->open_parents(gather.new_sub())) {
       dout(10) << " past parents now open on " << *in << dendl;
-      
-      // include in a (now safe) snap split?
-      for (map<client_t,set<inodeno_t> >::iterator q = p->second.begin();
-	   q != p->second.end();
-	   ++q)
-	for (set<inodeno_t>::iterator r = q->second.begin();
-	     r != q->second.end();
-	     ++r) 
-	  prepare_realm_split(in->snaprealm, q->first, *r, splits);
+
+      for (CInode *child : p->second) {
+	auto q = reconnected_caps.find(child->ino());
+	assert(q != reconnected_caps.end());
+	for (auto r = q->second.begin(); r != q->second.end(); ++r) {
+	  if (r->second.snap_follows > 0 && r->second.snap_follows < in->first - 1) {
+	    rebuild_need_snapflush(child, in->snaprealm, r->first, r->second.snap_follows);
+	  }
+	  // make sure client's cap is in the correct snaprealm.
+	  if (r->second.realm_ino != in->ino()) {
+	    prepare_realm_split(in->snaprealm, r->first, child->ino(), splits);
+	  }
+	}
+      }
 
       missing_snap_parents.erase(p++);
 
@@ -5707,10 +5777,10 @@ void MDCache::open_snap_parents()
     dout(10) << "open_snap_parents - all open" << dendl;
     do_delayed_cap_imports();
 
-    start_files_to_recover(rejoin_recover_q, rejoin_check_q);
     assert(rejoin_done != NULL);
     rejoin_done->complete(0);
     rejoin_done = NULL;
+    reconnected_caps.clear();
   }
 }
 
@@ -6032,7 +6102,7 @@ void MDCache::_queued_file_recover_cow(CInode *in, MutationRef& mut)
  * called after recovery to recover file sizes for previously opened (for write)
  * files.  that is, those where max_size > size.
  */
-void MDCache::identify_files_to_recover(vector<CInode*>& recover_q, vector<CInode*>& check_q)
+void MDCache::identify_files_to_recover()
 {
   dout(10) << "identify_files_to_recover" << dendl;
   for (ceph::unordered_map<vinodeno_t,CInode*>::iterator p = inode_map.begin();
@@ -6042,6 +6112,9 @@ void MDCache::identify_files_to_recover(vector<CInode*>& recover_q, vector<CInod
     if (!in->is_auth())
       continue;
 
+    if (in->last != CEPH_NOSNAP)
+      continue;
+
     // Only normal files need file size recovery
     if (!in->is_file()) {
       continue;
@@ -6062,27 +6135,26 @@ void MDCache::identify_files_to_recover(vector<CInode*>& recover_q, vector<CInod
     if (recover) {
       in->auth_pin(&in->filelock);
       in->filelock.set_state(LOCK_PRE_SCAN);
-      recover_q.push_back(in);
-      
-      // make sure past parents are open/get opened
-      SnapRealm *realm = in->find_snaprealm();
-      check_realm_past_parents(realm);
+      rejoin_recover_q.push_back(in);
     } else {
-      check_q.push_back(in);
+      rejoin_check_q.push_back(in);
     }
   }
 }
 
-void MDCache::start_files_to_recover(vector<CInode*>& recover_q, vector<CInode*>& check_q)
+void MDCache::start_files_to_recover()
 {
-  for (vector<CInode*>::iterator p = check_q.begin(); p != check_q.end(); ++p) {
-    CInode *in = *p;
+  for (CInode *in : rejoin_check_q) {
     mds->locker->check_inode_max_size(in);
   }
-  for (vector<CInode*>::iterator p = recover_q.begin(); p != recover_q.end(); ++p) {
-    CInode *in = *p;
+  rejoin_check_q.clear();
+  for (CInode *in : rejoin_recover_q) {
     mds->locker->file_recover(&in->filelock);
   }
+  if (!rejoin_recover_q.empty()) {
+    rejoin_recover_q.clear();
+    do_file_recover();
+  }
 }
 
 void MDCache::do_file_recover()
diff --git a/src/mds/MDCache.h b/src/mds/MDCache.h
index 5aadc13..9152c90 100644
--- a/src/mds/MDCache.h
+++ b/src/mds/MDCache.h
@@ -490,11 +490,10 @@ protected:
   map<inodeno_t,pair<mds_rank_t,map<client_t,Capability::Export> > > rejoin_slave_exports;
   map<client_t,entity_inst_t> rejoin_client_map;
 
-  map<inodeno_t,map<client_t,ceph_mds_cap_reconnect> > cap_exports; // ino -> client -> capex
+  map<inodeno_t,map<client_t,cap_reconnect_t> > cap_exports; // ino -> client -> capex
   map<inodeno_t,mds_rank_t> cap_export_targets; // ino -> auth mds
 
-  map<inodeno_t,map<client_t,map<mds_rank_t,ceph_mds_cap_reconnect> > > cap_imports;  // ino -> client -> frommds -> capex
-  map<inodeno_t,int> cap_imports_dirty;
+  map<inodeno_t,map<client_t,map<mds_rank_t,cap_reconnect_t> > > cap_imports;  // ino -> client -> frommds -> capex
   set<inodeno_t> cap_imports_missing;
   map<inodeno_t, list<MDSInternalContextBase*> > cap_reconnect_waiters;
   int cap_imports_num_opening;
@@ -530,16 +529,16 @@ public:
   void rejoin_start(MDSInternalContext *rejoin_done_);
   void rejoin_gather_finish();
   void rejoin_send_rejoins();
-  void rejoin_export_caps(inodeno_t ino, client_t client, ceph_mds_cap_reconnect& capinfo,
+  void rejoin_export_caps(inodeno_t ino, client_t client, const cap_reconnect_t& icr,
 			  int target=-1) {
-    cap_exports[ino][client] = capinfo;
+    cap_exports[ino][client] = icr;
     cap_export_targets[ino] = target;
   }
-  void rejoin_recovered_caps(inodeno_t ino, client_t client, cap_reconnect_t& icr, 
+  void rejoin_recovered_caps(inodeno_t ino, client_t client, const cap_reconnect_t& icr, 
 			     mds_rank_t frommds=MDS_RANK_NONE) {
-    cap_imports[ino][client][frommds] = icr.capinfo;
+    cap_imports[ino][client][frommds] = icr;
   }
-  ceph_mds_cap_reconnect *get_replay_cap_reconnect(inodeno_t ino, client_t client) {
+  const cap_reconnect_t *get_replay_cap_reconnect(inodeno_t ino, client_t client) {
     if (cap_imports.count(ino) &&
 	cap_imports[ino].count(client) &&
 	cap_imports[ino][client].count(MDS_RANK_NONE)) {
@@ -552,19 +551,29 @@ public:
     assert(cap_imports[ino][client].size() == 1);
     cap_imports.erase(ino);
   }
-  void set_reconnect_dirty_caps(inodeno_t ino, int dirty) {
-    cap_imports_dirty[ino] |= dirty;
-  }
   void wait_replay_cap_reconnect(inodeno_t ino, MDSInternalContextBase *c) {
     cap_reconnect_waiters[ino].push_back(c);
   }
 
   // [reconnect/rejoin caps]
-  map<CInode*,map<client_t, inodeno_t> >  reconnected_caps;   // inode -> client -> realmino
+  struct reconnected_cap_info_t {
+    inodeno_t realm_ino;
+    snapid_t snap_follows;
+    int dirty_caps;
+    reconnected_cap_info_t() :
+      realm_ino(0), snap_follows(0), dirty_caps(0) {}
+  };
+  map<inodeno_t,map<client_t, reconnected_cap_info_t> >  reconnected_caps;   // inode -> client -> snap_follows,realmino
   map<inodeno_t,map<client_t, snapid_t> > reconnected_snaprealms;  // realmino -> client -> realmseq
 
-  void add_reconnected_cap(CInode *in, client_t client, inodeno_t realm) {
-    reconnected_caps[in][client] = realm;
+  void add_reconnected_cap(client_t client, inodeno_t ino, const cap_reconnect_t& icr) {
+    reconnected_cap_info_t &info = reconnected_caps[ino][client];
+    info.realm_ino = inodeno_t(icr.capinfo.snaprealm);
+    info.snap_follows = icr.snap_follows;
+  }
+  void set_reconnected_dirty_caps(client_t client, inodeno_t ino, int dirty) {
+    reconnected_cap_info_t &info = reconnected_caps[ino][client];
+    info.dirty_caps |= dirty;
   }
   void add_reconnected_snaprealm(client_t client, inodeno_t ino, snapid_t seq) {
     reconnected_snaprealms[ino][client] = seq;
@@ -581,21 +590,23 @@ public:
 			   map<client_t,MClientSnap*>& splits);
   void do_realm_invalidate_and_update_notify(CInode *in, int snapop, bool nosend=false);
   void send_snaps(map<client_t,MClientSnap*>& splits);
-  Capability* rejoin_import_cap(CInode *in, client_t client, ceph_mds_cap_reconnect& icr, mds_rank_t frommds);
+  Capability* rejoin_import_cap(CInode *in, client_t client, const cap_reconnect_t& icr, mds_rank_t frommds);
   void finish_snaprealm_reconnect(client_t client, SnapRealm *realm, snapid_t seq);
   void try_reconnect_cap(CInode *in, Session *session);
   void export_remaining_imported_caps();
 
   // cap imports.  delayed snap parent opens.
   //  realm inode -> client -> cap inodes needing to split to this realm
-  map<CInode*,map<client_t, set<inodeno_t> > > missing_snap_parents; 
+  map<CInode*,set<CInode*> > missing_snap_parents;
   map<client_t,set<CInode*> > delayed_imported_caps;
 
   void do_cap_import(Session *session, CInode *in, Capability *cap,
 		     uint64_t p_cap_id, ceph_seq_t p_seq, ceph_seq_t p_mseq,
 		     int peer, int p_flags);
   void do_delayed_cap_imports();
-  void check_realm_past_parents(SnapRealm *realm);
+  void rebuild_need_snapflush(CInode *head_in, SnapRealm *realm, client_t client,
+			      snapid_t snap_follows);
+  void check_realm_past_parents(SnapRealm *realm, bool reconnect);
   void open_snap_parents();
 
   bool open_undef_inodes_dirfrags();
@@ -618,9 +629,9 @@ public:
   // File size recovery
 private:
   RecoveryQueue recovery_queue;
-  void identify_files_to_recover(vector<CInode*>& recover_q, vector<CInode*>& check_q);
-  void start_files_to_recover(vector<CInode*>& recover_q, vector<CInode*>& check_q);
+  void identify_files_to_recover();
 public:
+  void start_files_to_recover();
   void do_file_recover();
   void queue_file_recover(CInode *in);
   void _queued_file_recover_cow(CInode *in, MutationRef& mut);
@@ -825,6 +836,7 @@ public:
                                   version_t dpv, MDSInternalContextBase *fin);
 
   void open_foreign_mdsdir(inodeno_t ino, MDSInternalContextBase *c);
+  CDir *get_stray_dir(CInode *in);
   CDentry *get_or_create_stray_dentry(CInode *in);
 
   MDSInternalContextBase *_get_waiter(MDRequestRef& mdr, Message *req, MDSInternalContextBase *fin);
diff --git a/src/mds/MDSDaemon.cc b/src/mds/MDSDaemon.cc
index e7d9fe5..0482477 100644
--- a/src/mds/MDSDaemon.cc
+++ b/src/mds/MDSDaemon.cc
@@ -577,6 +577,33 @@ void MDSDaemon::tick()
   }
 }
 
+void MDSDaemon::send_command_reply(MCommand *m, MDSRank *mds_rank,
+				   int r, bufferlist outbl,
+				   const std::string& outs)
+{
+  Session *session = static_cast<Session *>(m->get_connection()->get_priv());
+  assert(session != NULL);
+  // If someone is using a closed session for sending commands (e.g.
+  // the ceph CLI) then we should feel free to clean up this connection
+  // as soon as we've sent them a response.
+  const bool live_session = mds_rank &&
+    mds_rank->sessionmap.get_session(session->info.inst.name) != nullptr
+    && session->get_state_seq() > 0;
+
+  if (!live_session) {
+    // This session only existed to issue commands, so terminate it
+    // as soon as we can.
+    assert(session->is_closed());
+    session->connection->mark_disposable();
+    session->put();
+  }
+
+  MCommandReply *reply = new MCommandReply(r, outs);
+  reply->set_tid(m->get_tid());
+  reply->set_data(outbl);
+  m->get_connection()->send_message(reply);
+}
+
 /* This function DOES put the passed message before returning*/
 void MDSDaemon::handle_command(MCommand *m)
 {
@@ -589,7 +616,7 @@ void MDSDaemon::handle_command(MCommand *m)
   std::string outs;
   bufferlist outbl;
   Context *run_after = NULL;
-
+  bool need_reply = true;
 
   if (!session->auth_caps.allow_all()) {
     dout(1) << __func__
@@ -605,29 +632,13 @@ void MDSDaemon::handle_command(MCommand *m)
     r = -EINVAL;
     outs = ss.str();
   } else {
-    r = _handle_command(cmdmap, m->get_data(), &outbl, &outs, &run_after);
+    r = _handle_command(cmdmap, m, &outbl, &outs, &run_after, &need_reply);
   }
 
-  // If someone is using a closed session for sending commands (e.g.
-  // the ceph CLI) then we should feel free to clean up this connection
-  // as soon as we've sent them a response.
-  const bool live_session = mds_rank &&
-    mds_rank->sessionmap.get_session(session->info.inst.name) != nullptr
-    && session->get_state_seq() > 0;
-
-  if (!live_session) {
-    // This session only existed to issue commands, so terminate it
-    // as soon as we can.
-    assert(session->is_closed());
-    session->connection->mark_disposable();
-    session->put();
+  if (need_reply) {
+    send_command_reply(m, mds_rank, r, outbl, outs);
   }
 
-  MCommandReply *reply = new MCommandReply(r, outs);
-  reply->set_tid(m->get_tid());
-  reply->set_data(outbl);
-  m->get_connection()->send_message(reply);
-
   if (run_after) {
     run_after->complete(0);
   }
@@ -693,10 +704,11 @@ void MDSDaemon::handle_command(MMonCommand *m)
 
 int MDSDaemon::_handle_command(
     const cmdmap_t &cmdmap,
-    bufferlist const &inbl,
+    MCommand *m,
     bufferlist *outbl,
     std::string *outs,
-    Context **run_later)
+    Context **run_later,
+    bool *need_reply)
 {
   assert(outbl != NULL);
   assert(outs != NULL);
@@ -814,7 +826,8 @@ int MDSDaemon::_handle_command(
   } else {
     // Give MDSRank a shot at the command
     if (mds_rank) {
-      bool handled = mds_rank->handle_command(cmdmap, inbl, &r, &ds, &ss);
+      bool handled = mds_rank->handle_command(cmdmap, m, &r, &ds, &ss,
+					      need_reply);
       if (handled) {
         goto out;
       }
diff --git a/src/mds/MDSDaemon.h b/src/mds/MDSDaemon.h
index 4d6296c..d193dac 100644
--- a/src/mds/MDSDaemon.h
+++ b/src/mds/MDSDaemon.h
@@ -39,7 +39,7 @@
 #include "Beacon.h"
 
 
-#define CEPH_MDS_PROTOCOL    27 /* cluster internal */
+#define CEPH_MDS_PROTOCOL    28 /* cluster internal */
 
 class filepath;
 
@@ -187,13 +187,17 @@ protected:
   bool handle_core_message(Message *m);
   
   // special message types
+  friend class C_MDS_Send_Command_Reply;
+  static void send_command_reply(MCommand *m, MDSRank* mds_rank, int r,
+				 bufferlist outbl, const std::string& outs);
   int _handle_command_legacy(std::vector<std::string> args);
   int _handle_command(
       const cmdmap_t &cmdmap,
-      bufferlist const &inbl,
+      MCommand *m,
       bufferlist *outbl,
       std::string *outs,
-      Context **run_later);
+      Context **run_later,
+      bool *need_reply);
   void handle_command(class MMonCommand *m);
   void handle_command(class MCommand *m);
   void handle_mds_map(class MMDSMap *m);
diff --git a/src/mds/MDSMap.cc b/src/mds/MDSMap.cc
index f08e78e..488eaad 100644
--- a/src/mds/MDSMap.cc
+++ b/src/mds/MDSMap.cc
@@ -263,7 +263,7 @@ void MDSMap::print_summary(Formatter *f, ostream *out) const
     if (p.second.laggy())
       s += "(laggy or crashed)";
 
-    if (p.second.rank >= 0) {
+    if (p.second.rank >= 0 && p.second.state != MDSMap::STATE_STANDBY_REPLAY) {
       if (f) {
 	f->open_object_section("mds");
 	f->dump_unsigned("rank", p.second.rank);
diff --git a/src/mds/MDSRank.cc b/src/mds/MDSRank.cc
index 188750a..5c2bfe3 100644
--- a/src/mds/MDSRank.cc
+++ b/src/mds/MDSRank.cc
@@ -17,7 +17,10 @@
 
 #include "messages/MClientRequestForward.h"
 #include "messages/MMDSMap.h"
+#include "messages/MCommand.h"
+#include "messages/MCommandReply.h"
 
+#include "MDSDaemon.h"
 #include "MDSMap.h"
 #include "SnapClient.h"
 #include "SnapServer.h"
@@ -222,8 +225,6 @@ void MDSRankDispatcher::shutdown()
   // threads block on IOs that require finisher to complete.
   mdlog->shutdown();
 
-  finisher->stop(); // no flushing
-
   // shut down cache
   mdcache->shutdown();
 
@@ -236,9 +237,15 @@ void MDSRankDispatcher::shutdown()
 
   progress_thread.shutdown();
 
-  // shut down messenger
+  // release mds_lock for finisher/messenger threads (e.g.
+  // MDSDaemon::ms_handle_reset called from Messenger).
+  mds_lock.Unlock();
+
+  finisher->stop(); // no flushing
   messenger->shutdown();
 
+  mds_lock.Lock();
+
   // Workaround unclean shutdown: HeartbeatMap will assert if
   // worker is not removed (as we do in ~MDS), but ~MDS is not
   // always called after suicide.
@@ -251,13 +258,13 @@ void MDSRankDispatcher::shutdown()
 /**
  * Helper for simple callbacks that call a void fn with no args.
  */
-class C_VoidFn : public MDSInternalContext
+class C_MDS_VoidFn : public MDSInternalContext
 {
   typedef void (MDSRank::*fn_ptr)();
   protected:
    fn_ptr fn;
   public:
-  C_VoidFn(MDSRank *mds_, fn_ptr fn_)
+  C_MDS_VoidFn(MDSRank *mds_, fn_ptr fn_)
     : MDSInternalContext(mds_), fn(fn_)
   {
     assert(mds_);
@@ -1160,7 +1167,7 @@ void MDSRank::resolve_start()
 
   reopen_log();
 
-  mdcache->resolve_start(new C_VoidFn(this, &MDSRank::resolve_done));
+  mdcache->resolve_start(new C_MDS_VoidFn(this, &MDSRank::resolve_done));
   finish_contexts(g_ceph_context, waiting_for_resolve);
 }
 void MDSRank::resolve_done()
@@ -1177,7 +1184,7 @@ void MDSRank::reconnect_start()
     reopen_log();
   }
 
-  server->reconnect_clients(new C_VoidFn(this, &MDSRank::reconnect_done));
+  server->reconnect_clients(new C_MDS_VoidFn(this, &MDSRank::reconnect_done));
   finish_contexts(g_ceph_context, waiting_for_reconnect);
 }
 void MDSRank::reconnect_done()
@@ -1194,7 +1201,7 @@ void MDSRank::rejoin_joint_start()
 void MDSRank::rejoin_start()
 {
   dout(1) << "rejoin_start" << dendl;
-  mdcache->rejoin_start(new C_VoidFn(this, &MDSRank::rejoin_done));
+  mdcache->rejoin_start(new C_MDS_VoidFn(this, &MDSRank::rejoin_done));
 }
 void MDSRank::rejoin_done()
 {
@@ -1226,6 +1233,7 @@ void MDSRank::clientreplay_start()
 {
   dout(1) << "clientreplay_start" << dendl;
   finish_contexts(g_ceph_context, waiting_for_replay);  // kick waiters
+  mdcache->start_files_to_recover();
   queue_one_replay();
 }
 
@@ -1259,6 +1267,7 @@ void MDSRank::active_start()
   mdcache->clean_open_file_lists();
   mdcache->export_remaining_imported_caps();
   finish_contexts(g_ceph_context, waiting_for_replay);  // kick waiters
+  mdcache->start_files_to_recover();
 
   mdcache->reissue_all_caps();
 
@@ -1299,7 +1308,7 @@ void MDSRank::boot_create()
 {
   dout(3) << "boot_create" << dendl;
 
-  MDSGatherBuilder fin(g_ceph_context, new C_VoidFn(this, &MDSRank::creating_done));
+  MDSGatherBuilder fin(g_ceph_context, new C_MDS_VoidFn(this, &MDSRank::creating_done));
 
   mdcache->init_layouts();
 
@@ -1774,15 +1783,32 @@ bool MDSRankDispatcher::handle_asok_command(
   return true;
 }
 
+class C_MDS_Send_Command_Reply : public MDSInternalContext
+{
+protected:
+  MCommand *m;
+public:
+  C_MDS_Send_Command_Reply(MDSRank *_mds, MCommand *_m) :
+    MDSInternalContext(_mds), m(_m) { m->get(); }
+  void send (int r) {
+    bufferlist bl;
+    MDSDaemon::send_command_reply(m, mds, r, bl, "");
+    m->put();
+  }
+  void finish (int r) {
+    send(r);
+  }
+};
+
 /**
  * This function drops the mds_lock, so don't do anything with
  * MDSRank after calling it (we could have gone into shutdown): just
  * send your result back to the calling client and finish.
  */
-std::vector<entity_name_t> MDSRankDispatcher::evict_sessions(
-    const SessionFilter &filter)
+void MDSRankDispatcher::evict_sessions(const SessionFilter &filter, MCommand *m)
 {
   std::list<Session*> victims;
+  C_MDS_Send_Command_Reply *reply = new C_MDS_Send_Command_Reply(this, m);
 
   const auto sessions = sessionmap.get_sessions();
   for (const auto p : sessions)  {
@@ -1799,24 +1825,17 @@ std::vector<entity_name_t> MDSRankDispatcher::evict_sessions(
 
   dout(20) << __func__ << " matched " << victims.size() << " sessions" << dendl;
 
-  std::vector<entity_name_t> result;
-
   if (victims.empty()) {
-    return result;
+    reply->send(0);
+    delete reply;
+    return;
   }
 
-  C_SaferCond on_safe;
-  C_GatherBuilder gather(g_ceph_context, &on_safe);
+  C_GatherBuilder gather(g_ceph_context, reply);
   for (const auto s : victims) {
     server->kill_session(s, gather.new_sub());
-    result.push_back(s->info.inst.name);
   }
   gather.activate();
-  mds_lock.Unlock();
-  on_safe.wait();
-  mds_lock.Lock();
-
-  return result;
 }
 
 void MDSRankDispatcher::dump_sessions(const SessionFilter &filter, Formatter *f) const
@@ -2533,15 +2552,18 @@ MDSRankDispatcher::MDSRankDispatcher(
 
 bool MDSRankDispatcher::handle_command(
   const cmdmap_t &cmdmap,
-  bufferlist const &inbl,
+  MCommand *m,
   int *r,
   std::stringstream *ds,
-  std::stringstream *ss)
+  std::stringstream *ss,
+  bool *need_reply)
 {
   assert(r != nullptr);
   assert(ds != nullptr);
   assert(ss != nullptr);
 
+  *need_reply = true;
+
   std::string prefix;
   cmd_getval(g_ceph_context, cmdmap, "prefix", prefix);
 
@@ -2570,8 +2592,9 @@ bool MDSRankDispatcher::handle_command(
       return true;
     }
 
-    evict_sessions(filter);
+    evict_sessions(filter, m);
 
+    *need_reply = false;
     return true;
   } else if (prefix == "damage ls") {
     Formatter *f = new JSONFormatter();
diff --git a/src/mds/MDSRank.h b/src/mds/MDSRank.h
index fe65c56..17e259b 100644
--- a/src/mds/MDSRank.h
+++ b/src/mds/MDSRank.h
@@ -19,6 +19,8 @@
 #include "common/LogClient.h"
 #include "common/Timer.h"
 
+#include "messages/MCommand.h"
+
 #include "Beacon.h"
 #include "DamageTable.h"
 #include "MDSMap.h"
@@ -492,15 +494,14 @@ public:
 
   bool handle_command(
     const cmdmap_t &cmdmap,
-    bufferlist const &inbl,
+    MCommand *m,
     int *r,
     std::stringstream *ds,
-    std::stringstream *ss);
+    std::stringstream *ss,
+    bool *need_reply);
 
-  void dump_sessions(
-      const SessionFilter &filter, Formatter *f) const;
-  std::vector<entity_name_t> evict_sessions(
-      const SessionFilter &filter);
+  void dump_sessions(const SessionFilter &filter, Formatter *f) const;
+  void evict_sessions(const SessionFilter &filter, MCommand *m);
 
   // Call into me from MDS::ms_dispatch
   bool ms_dispatch(Message *m);
diff --git a/src/mds/Server.cc b/src/mds/Server.cc
index 3137447..7edb4b3 100644
--- a/src/mds/Server.cc
+++ b/src/mds/Server.cc
@@ -809,8 +809,8 @@ void Server::handle_client_reconnect(MClientReconnect *m)
       // we recovered it, and it's ours.  take note.
       dout(15) << "open cap realm " << inodeno_t(p->second.capinfo.snaprealm)
 	       << " on " << *in << dendl;
-      in->reconnect_cap(from, p->second.capinfo, session);
-      mdcache->add_reconnected_cap(in, from, inodeno_t(p->second.capinfo.snaprealm));
+      in->reconnect_cap(from, p->second, session);
+      mdcache->add_reconnected_cap(from, p->first, p->second);
       recover_filelocks(in, p->second.flockbl, m->get_orig_source().num());
       continue;
     }
@@ -819,11 +819,13 @@ void Server::handle_client_reconnect(MClientReconnect *m)
       // not mine.
       dout(10) << "non-auth " << *in << ", will pass off to authority" << dendl;
       // add to cap export list.
-      mdcache->rejoin_export_caps(p->first, from, p->second.capinfo,
+      p->second.path.clear(); // we don't need path
+      mdcache->rejoin_export_caps(p->first, from, p->second,
 				  in->authority().first);
     } else {
       // don't know if the inode is mine
       dout(10) << "missing ino " << p->first << ", will load later" << dendl;
+      p->second.path.clear(); // we don't need path
       mdcache->rejoin_recovered_caps(p->first, from, p->second, MDS_RANK_NONE);
     }
   }
@@ -2147,6 +2149,23 @@ bool Server::check_access(MDRequestRef& mdr, CInode *in, unsigned mask)
   return true;
 }
 
+/**
+ * check whether fragment has reached maximum size
+ *
+ */
+bool Server::check_fragment_space(MDRequestRef &mdr, CDir *in)
+{
+  const auto size = in->get_frag_size();
+  if (size >= g_conf->mds_bal_fragment_size_max) {
+    dout(10) << "fragment " << *in << " size exceeds " << g_conf->mds_bal_fragment_size_max << " (ENOSPC)" << dendl;
+    respond_to_request(mdr, -ENOSPC);
+    return false;
+  }
+
+  return true;
+}
+
+
 /** validate_dentry_dir
  *
  * verify that the dir exists and would own the dname.
@@ -2231,15 +2250,20 @@ CDentry* Server::prepare_stray_dentry(MDRequestRef& mdr, CInode *in)
 {
   CDentry *straydn = mdr->straydn;
   if (straydn) {
-    string name;
-    in->name_stray_dentry(name);
-    if (straydn->get_name() == name)
+    string straydname;
+    in->name_stray_dentry(straydname);
+    if (straydn->get_name() == straydname)
       return straydn;
 
     assert(!mdr->done_locking);
     mdr->unpin(straydn);
   }
 
+  CDir *straydir = mdcache->get_stray_dir(in);
+
+  if (!check_fragment_space(mdr, straydir))
+    return NULL;
+
   straydn = mdcache->get_or_create_stray_dentry(in);
   mdr->straydn = straydn;
   mdr->pin(straydn);
@@ -2893,9 +2917,13 @@ void Server::handle_client_open(MDRequestRef& mdr)
     return;
   }
 
-  // can only open a dir with mode FILE_MODE_PIN, at least for now.
-  if (cur->inode.is_dir())
+  if (!cur->inode.is_file()) {
+    // can only open non-regular inode with mode FILE_MODE_PIN, at least for now.
     cmode = CEPH_FILE_MODE_PIN;
+    // the inode is symlink and client wants to follow it, ignore the O_TRUNC flag.
+    if (cur->inode.is_symlink() && !(flags & O_NOFOLLOW))
+      flags &= ~O_TRUNC;
+  }
 
   dout(10) << "open flags = " << flags
 	   << ", filemode = " << cmode
@@ -2908,12 +2936,19 @@ void Server::handle_client_open(MDRequestRef& mdr)
     respond_to_request(mdr, -ENXIO);                 // FIXME what error do we want?
     return;
     }*/
-  if ((flags & O_DIRECTORY) && !cur->inode.is_dir()) {
+  if ((flags & O_DIRECTORY) && !cur->inode.is_dir() && !cur->inode.is_symlink()) {
     dout(7) << "specified O_DIRECTORY on non-directory " << *cur << dendl;
     respond_to_request(mdr, -EINVAL);
     return;
   }
 
+  if ((flags & O_TRUNC) && !cur->inode.is_file()) {
+    dout(7) << "specified O_TRUNC on !(file|symlink) " << *cur << dendl;
+    // we should return -EISDIR for directory, return -EINVAL for other non-regular
+    respond_to_request(mdr, cur->inode.is_dir() ? EISDIR : -EINVAL);
+    return;
+  }
+
   if (cur->inode.inline_data.version != CEPH_INLINE_NONE &&
       !mdr->session->connection->has_feature(CEPH_FEATURE_MDS_INLINE_DATA)) {
     dout(7) << "old client cannot open inline data file " << *cur << dendl;
@@ -3168,7 +3203,8 @@ void Server::handle_client_openc(MDRequestRef& mdr)
     return;
   }
 
-  CInode *diri = dn->get_dir()->get_inode();
+  CDir *dir = dn->get_dir();
+  CInode *diri = dir->get_inode();
   rdlocks.insert(&diri->authlock);
   if (!mds->locker->acquire_locks(mdr, rdlocks, wrlocks, xlocks))
     return;
@@ -3176,6 +3212,9 @@ void Server::handle_client_openc(MDRequestRef& mdr)
   if (!check_access(mdr, diri, access))
     return;
 
+  if (!check_fragment_space(mdr, dir))
+    return;
+
   CDentry::linkage_t *dnl = dn->get_projected_linkage();
 
   if (!dnl->is_null()) {
@@ -4577,6 +4616,9 @@ void Server::handle_client_mknod(MDRequestRef& mdr)
   if (!check_access(mdr, diri, MAY_WRITE))
     return;
 
+  if (!check_fragment_space(mdr, dn->get_dir()))
+    return;
+
   unsigned mode = req->head.args.mknod.mode;
   if ((mode & S_IFMT) == 0)
     mode |= S_IFREG;
@@ -4660,7 +4702,8 @@ void Server::handle_client_mkdir(MDRequestRef& mdr)
     respond_to_request(mdr, -EROFS);
     return;
   }
-  CInode *diri = dn->get_dir()->get_inode();
+  CDir *dir = dn->get_dir();
+  CInode *diri = dir->get_inode();
   rdlocks.insert(&diri->authlock);
   if (!mds->locker->acquire_locks(mdr, rdlocks, wrlocks, xlocks))
     return;
@@ -4669,6 +4712,9 @@ void Server::handle_client_mkdir(MDRequestRef& mdr)
   if (!check_access(mdr, diri, MAY_WRITE))
     return;
 
+  if (!check_fragment_space(mdr, dir))
+    return;
+
   // new inode
   SnapRealm *realm = dn->get_dir()->inode->find_snaprealm();
   snapid_t follows = realm->get_newest_seq();
@@ -4740,7 +4786,8 @@ void Server::handle_client_symlink(MDRequestRef& mdr)
     respond_to_request(mdr, -EROFS);
     return;
   }
-  CInode *diri = dn->get_dir()->get_inode();
+  CDir *dir = dn->get_dir();
+  CInode *diri = dir->get_inode();
   rdlocks.insert(&diri->authlock);
   if (!mds->locker->acquire_locks(mdr, rdlocks, wrlocks, xlocks))
     return;
@@ -4748,6 +4795,9 @@ void Server::handle_client_symlink(MDRequestRef& mdr)
   if (!check_access(mdr, diri, MAY_WRITE))
    return;
 
+  if (!check_fragment_space(mdr, dir))
+    return;
+
   unsigned mode = S_IFLNK | 0777;
   CInode *newi = prepare_new_inode(mdr, dn->get_dir(), inodeno_t(req->head.ino), mode);
   assert(newi);
@@ -4821,6 +4871,9 @@ void Server::handle_client_link(MDRequestRef& mdr)
   if (!check_access(mdr, dir->get_inode(), MAY_WRITE))
     return;
 
+  if (!check_fragment_space(mdr, dir))
+    return;
+
   // go!
   assert(g_conf->mds_kill_link_at != 1);
 
@@ -5392,6 +5445,8 @@ void Server::handle_client_unlink(MDRequestRef& mdr)
   CDentry *straydn = NULL;
   if (dnl->is_primary()) {
     straydn = prepare_stray_dentry(mdr, dnl->get_inode());
+    if (!straydn)
+      return;
     dout(10) << " straydn is " << *straydn << dendl;
   } else if (mdr->straydn) {
     mdr->unpin(mdr->straydn);
@@ -5598,8 +5653,9 @@ void Server::_unlink_local_finish(MDRequestRef& mdr,
   // removing a new dn?
   dn->get_dir()->try_remove_unlinked_dn(dn);
 
-  // clean up?
-  if (straydn) {
+  // clean up ?
+  // respond_to_request() drops locks. So stray reintegration can race with us.
+  if (straydn && !straydn->get_projected_linkage()->is_null()) {
     // Tip off the MDCache that this dentry is a stray that
     // might be elegible for purge.
     mdcache->notify_stray(straydn);
@@ -6171,6 +6227,8 @@ void Server::handle_client_rename(MDRequestRef& mdr)
   CDentry *straydn = NULL;
   if (destdnl->is_primary() && !linkmerge) {
     straydn = prepare_stray_dentry(mdr, destdnl->get_inode());
+    if (!straydn)
+      return;
     dout(10) << " straydn is " << *straydn << dendl;
   } else if (mdr->straydn) {
     mdr->unpin(mdr->straydn);
@@ -6281,6 +6339,9 @@ void Server::handle_client_rename(MDRequestRef& mdr)
   if (!check_access(mdr, destdn->get_dir()->get_inode(), MAY_WRITE))
     return;
 
+  if (!check_fragment_space(mdr, destdn->get_dir()))
+    return;
+
   if (!check_access(mdr, srci, MAY_WRITE))
     return;
 
@@ -6446,7 +6507,8 @@ void Server::_rename_finish(MDRequestRef& mdr, CDentry *srcdn, CDentry *destdn,
     mds->locker->eval(in, CEPH_CAP_LOCKS, true);
 
   // clean up?
-  if (straydn) {
+  // respond_to_request() drops locks. So stray reintegration can race with us.
+  if (straydn && !straydn->get_projected_linkage()->is_null()) {
     mdcache->notify_stray(straydn);
   }
 }
diff --git a/src/mds/Server.h b/src/mds/Server.h
index 66aa6b9..0e87103 100644
--- a/src/mds/Server.h
+++ b/src/mds/Server.h
@@ -134,6 +134,7 @@ public:
   void handle_slave_auth_pin_ack(MDRequestRef& mdr, MMDSSlaveRequest *ack);
 
   // some helpers
+  bool check_fragment_space(MDRequestRef& mdr, CDir *in);
   bool check_access(MDRequestRef& mdr, CInode *in, unsigned mask);
   bool _check_access(Session *session, CInode *in, unsigned mask, int caller_uid, int caller_gid, int setattr_uid, int setattr_gid);
   CDir *validate_dentry_dir(MDRequestRef& mdr, CInode *diri, const string& dname);
diff --git a/src/mds/SessionMap.cc b/src/mds/SessionMap.cc
index 1e858e3..43053fe 100644
--- a/src/mds/SessionMap.cc
+++ b/src/mds/SessionMap.cc
@@ -854,7 +854,7 @@ int Session::check_access(CInode *in, unsigned mask,
     path = in->get_projected_inode()->stray_prior_path;
     dout(20) << __func__ << " stray_prior_path " << path << dendl;
   } else {
-    in->make_path_string(path, false, in->get_projected_parent_dn());
+    in->make_path_string(path, in->get_projected_parent_dn());
     dout(20) << __func__ << " path " << path << dendl;
   }
   if (path.length())
diff --git a/src/mds/SimpleLock.h b/src/mds/SimpleLock.h
index faec26a..6d1d7fa 100644
--- a/src/mds/SimpleLock.h
+++ b/src/mds/SimpleLock.h
@@ -306,7 +306,7 @@ public:
     parent->take_waiting(mask << get_wait_shift(), ls);
   }
   void add_waiter(uint64_t mask, MDSInternalContextBase *c) {
-    parent->add_waiter(mask << get_wait_shift(), c);
+    parent->add_waiter((mask << get_wait_shift()) | MDSCacheObject::WAIT_ORDERED, c);
   }
   bool is_waiter_for(uint64_t mask) const {
     return parent->is_waiter_for(mask << get_wait_shift());
diff --git a/src/mds/SnapRealm.cc b/src/mds/SnapRealm.cc
index b44ca22..d8a860e 100644
--- a/src/mds/SnapRealm.cc
+++ b/src/mds/SnapRealm.cc
@@ -189,8 +189,8 @@ bool SnapRealm::have_past_parents_open(snapid_t first, snapid_t last)
       return false;
     }
     SnapRealm *parent_realm = open_past_parents[p->second.ino].first;
-    if (parent_realm->have_past_parents_open(MAX(first, p->second.first),
-					     MIN(last, p->first)))
+    if (!parent_realm->have_past_parents_open(MAX(first, p->second.first),
+					      MIN(last, p->first)))
       return false;
   }
 
diff --git a/src/mds/StrayManager.cc b/src/mds/StrayManager.cc
index e6f4959..38e7f11 100644
--- a/src/mds/StrayManager.cc
+++ b/src/mds/StrayManager.cc
@@ -564,7 +564,12 @@ bool StrayManager::__eval_stray(CDentry *dn, bool delay)
     if (in->is_dir()) {
       if (in->snaprealm && in->snaprealm->has_past_parents()) {
 	dout(20) << "  directory has past parents "
-          << in->snaprealm->srnode.past_parents << dendl;
+		 << in->snaprealm->srnode.past_parents << dendl;
+	if (in->state_test(CInode::STATE_MISSINGOBJS)) {
+	  mds->clog->error() << "previous attempt at committing dirfrag of ino "
+			     << in->ino() << " has failed, missing object\n";
+	  mds->handle_write_error(-ENOENT);
+	}
 	return false;  // not until some snaps are deleted.
       }
 
diff --git a/src/mds/events/EMetaBlob.h b/src/mds/events/EMetaBlob.h
index 77f8c3c..777152e 100644
--- a/src/mds/events/EMetaBlob.h
+++ b/src/mds/events/EMetaBlob.h
@@ -62,6 +62,7 @@ public:
     static const int STATE_DIRTY =	 (1<<0);
     static const int STATE_DIRTYPARENT = (1<<1);
     static const int STATE_DIRTYPOOL   = (1<<2);
+    static const int STATE_NEED_SNAPFLUSH = (1<<3);
     typedef compact_map<snapid_t, old_inode_t> old_inodes_t;
     string  dn;         // dentry
     snapid_t dnfirst, dnlast;
@@ -109,6 +110,7 @@ public:
     bool is_dirty() const { return (state & STATE_DIRTY); }
     bool is_dirty_parent() const { return (state & STATE_DIRTYPARENT); }
     bool is_dirty_pool() const { return (state & STATE_DIRTYPOOL); }
+    bool need_snapflush() const { return (state & STATE_NEED_SNAPFLUSH); }
 
     void print(ostream& out) const {
       out << " fullbit dn " << dn << " [" << dnfirst << "," << dnlast << "] dnv " << dnv
@@ -420,11 +422,13 @@ private:
 
   // return remote pointer to to-be-journaled inode
   void add_primary_dentry(CDentry *dn, CInode *in, bool dirty,
-			  bool dirty_parent=false, bool dirty_pool=false) {
+			  bool dirty_parent=false, bool dirty_pool=false,
+			  bool need_snapflush=false) {
     __u8 state = 0;
     if (dirty) state |= fullbit::STATE_DIRTY;
     if (dirty_parent) state |= fullbit::STATE_DIRTYPARENT;
     if (dirty_pool) state |= fullbit::STATE_DIRTYPOOL;
+    if (need_snapflush) state |= fullbit::STATE_NEED_SNAPFLUSH;
     add_primary_dentry(add_dir(dn->get_dir(), false), dn, in, state);
   }
   void add_primary_dentry(dirlump& lump, CDentry *dn, CInode *in, __u8 state) {
diff --git a/src/mds/events/EOpen.h b/src/mds/events/EOpen.h
index c48d735..601652a 100644
--- a/src/mds/events/EOpen.h
+++ b/src/mds/events/EOpen.h
@@ -22,6 +22,7 @@ class EOpen : public LogEvent {
 public:
   EMetaBlob metablob;
   vector<inodeno_t> inos;
+  vector<vinodeno_t> snap_inos;
 
   EOpen() : LogEvent(EVENT_OPEN) { }
   explicit EOpen(MDLog *mdlog) :
@@ -37,7 +38,10 @@ public:
     if (!in->is_base()) {
       metablob.add_dir_context(in->get_projected_parent_dn()->get_dir());
       metablob.add_primary_dentry(in->get_projected_parent_dn(), 0, false);
-      inos.push_back(in->ino());
+      if (in->last == CEPH_NOSNAP)
+	inos.push_back(in->ino());
+      else
+	snap_inos.push_back(in->vino());
     }
   }
   void add_ino(inodeno_t ino) {
diff --git a/src/mds/journal.cc b/src/mds/journal.cc
index 0f27971..fd7aaf8 100644
--- a/src/mds/journal.cc
+++ b/src/mds/journal.cc
@@ -147,7 +147,7 @@ void LogSegment::try_to_expire(MDSRank *mds, MDSGatherBuilder &gather_bld, int o
 
   assert(g_conf->mds_kill_journal_expire_at != 2);
 
-  // open files
+  // open files and snap inodes 
   if (!open_files.empty()) {
     assert(!mds->mdlog->is_capped()); // hmm FIXME
     EOpen *le = 0;
@@ -156,9 +156,9 @@ void LogSegment::try_to_expire(MDSRank *mds, MDSGatherBuilder &gather_bld, int o
     elist<CInode*>::iterator p = open_files.begin(member_offset(CInode, item_open_file));
     while (!p.end()) {
       CInode *in = *p;
-      assert(in->last == CEPH_NOSNAP);
       ++p;
-      if (in->is_auth() && !in->is_ambiguous_auth() && in->is_any_caps()) {
+      if (in->last == CEPH_NOSNAP && in->is_auth() &&
+	  !in->is_ambiguous_auth() && in->is_any_caps()) {
 	if (in->is_any_caps_wanted()) {
 	  dout(20) << "try_to_expire requeueing open file " << *in << dendl;
 	  if (!le) {
@@ -172,6 +172,15 @@ void LogSegment::try_to_expire(MDSRank *mds, MDSGatherBuilder &gather_bld, int o
 	  dout(20) << "try_to_expire not requeueing and delisting unwanted file " << *in << dendl;
 	  in->item_open_file.remove_myself();
 	}
+      } else if (in->last != CEPH_NOSNAP && !in->client_snap_caps.empty()) {
+	// journal snap inodes that need flush. This simplify the mds failover hanlding
+	dout(20) << "try_to_expire requeueing snap needflush inode " << *in << dendl;
+	if (!le) {
+	  le = new EOpen(mds->mdlog);
+	  mds->mdlog->start_entry(le);
+	}
+	le->add_clean_inode(in);
+	ls->open_files.push_back(&in->item_open_file);
       } else {
 	/*
 	 * we can get a capless inode here if we replay an open file, the client fails to
@@ -1332,6 +1341,8 @@ void EMetaBlob::replay(MDSRank *mds, LogSegment *logseg, MDSlaveUpdate *slaveup)
 	in->_mark_dirty(logseg);
       if (p->is_dirty_parent())
 	in->_mark_dirty_parent(logseg, p->is_dirty_pool());
+      if (p->need_snapflush())
+	logseg->open_files.push_back(&in->item_open_file);
       if (dn->is_auth())
 	in->state_set(CInode::STATE_AUTH);
       else
@@ -2120,10 +2131,11 @@ void EUpdate::replay(MDSRank *mds)
 // EOpen
 
 void EOpen::encode(bufferlist &bl, uint64_t features) const {
-  ENCODE_START(3, 3, bl);
+  ENCODE_START(4, 3, bl);
   ::encode(stamp, bl);
   ::encode(metablob, bl, features);
   ::encode(inos, bl);
+  ::encode(snap_inos, bl);
   ENCODE_FINISH(bl);
 } 
 
@@ -2133,6 +2145,8 @@ void EOpen::decode(bufferlist::iterator &bl) {
     ::decode(stamp, bl);
   ::decode(metablob, bl);
   ::decode(inos, bl);
+  if (struct_v >= 4)
+    ::decode(snap_inos, bl);
   DECODE_FINISH(bl);
 }
 
@@ -2167,12 +2181,18 @@ void EOpen::replay(MDSRank *mds)
   metablob.replay(mds, _segment);
 
   // note which segments inodes belong to, so we don't have to start rejournaling them
-  for (vector<inodeno_t>::iterator p = inos.begin();
-       p != inos.end();
-       ++p) {
-    CInode *in = mds->mdcache->get_inode(*p);
+  for (const auto &ino : inos) {
+    CInode *in = mds->mdcache->get_inode(ino);
+    if (!in) {
+      dout(0) << "EOpen.replay ino " << ino << " not in metablob" << dendl;
+      assert(in);
+    }
+    _segment->open_files.push_back(&in->item_open_file);
+  }
+  for (const auto &vino : snap_inos) {
+    CInode *in = mds->mdcache->get_inode(vino);
     if (!in) {
-      dout(0) << "EOpen.replay ino " << *p << " not in metablob" << dendl;
+      dout(0) << "EOpen.replay ino " << vino << " not in metablob" << dendl;
       assert(in);
     }
     _segment->open_files.push_back(&in->item_open_file);
diff --git a/src/mds/mdstypes.cc b/src/mds/mdstypes.cc
index 066b26b..186c4a1 100644
--- a/src/mds/mdstypes.cc
+++ b/src/mds/mdstypes.cc
@@ -996,8 +996,9 @@ void mds_load_t::generate_test_instances(list<mds_load_t*>& ls)
  * cap_reconnect_t
  */
 void cap_reconnect_t::encode(bufferlist& bl) const {
-  ENCODE_START(1, 1, bl);
+  ENCODE_START(2, 1, bl);
   encode_old(bl); // extract out when something changes
+  ::encode(snap_follows, bl);
   ENCODE_FINISH(bl);
 }
 
@@ -1011,6 +1012,8 @@ void cap_reconnect_t::encode_old(bufferlist& bl) const {
 void cap_reconnect_t::decode(bufferlist::iterator& bl) {
   DECODE_START(1, bl);
   decode_old(bl); // extract out when something changes
+  if (struct_v >= 2)
+    ::decode(snap_follows, bl);
   DECODE_FINISH(bl);
 }
 
@@ -1038,6 +1041,8 @@ void cap_reconnect_t::generate_test_instances(list<cap_reconnect_t*>& ls)
   ls.back()->capinfo.cap_id = 1;
 }
 
+uint64_t MDSCacheObject::last_wait_seq = 0;
+
 void MDSCacheObject::dump(Formatter *f) const
 {
   f->dump_bool("is_auth", is_auth());
diff --git a/src/mds/mdstypes.h b/src/mds/mdstypes.h
index 991712d..b4195eb 100644
--- a/src/mds/mdstypes.h
+++ b/src/mds/mdstypes.h
@@ -888,13 +888,15 @@ namespace std {
 struct cap_reconnect_t {
   string path;
   mutable ceph_mds_cap_reconnect capinfo;
+  snapid_t snap_follows;
   bufferlist flockbl;
 
   cap_reconnect_t() {
     memset(&capinfo, 0, sizeof(capinfo));
+    snap_follows = 0;
   }
   cap_reconnect_t(uint64_t cap_id, inodeno_t pino, const string& p, int w, int i,
-		  inodeno_t sr, bufferlist& lb) :
+		  inodeno_t sr, snapid_t sf, bufferlist& lb) :
     path(p) {
     capinfo.cap_id = cap_id;
     capinfo.wanted = w;
@@ -902,6 +904,7 @@ struct cap_reconnect_t {
     capinfo.snaprealm = sr;
     capinfo.pathbase = pino;
     capinfo.flock_len = 0;
+    snap_follows = sf;
     flockbl.claim(lb);
   }
   void encode(bufferlist& bl) const;
@@ -1342,6 +1345,7 @@ class MDSCacheObject {
 
 
   // -- wait --
+  const static uint64_t WAIT_ORDERED	 = (1ull<<61);
   const static uint64_t WAIT_SINGLEAUTH  = (1ull<<60);
   const static uint64_t WAIT_UNFREEZE    = (1ull<<59); // pka AUTHPINNABLE
 
@@ -1550,7 +1554,8 @@ protected:
   // ---------------------------------------------
   // waiting
  protected:
-  compact_multimap<uint64_t, MDSInternalContextBase*>  waiting;
+  compact_multimap<uint64_t, pair<uint64_t, MDSInternalContextBase*> > waiting;
+  static uint64_t last_wait_seq;
 
  public:
   bool is_waiter_for(uint64_t mask, uint64_t min=0) {
@@ -1559,7 +1564,7 @@ protected:
       while (min & (min-1))  // if more than one bit is set
 	min &= min-1;        //  clear LSB
     }
-    for (compact_multimap<uint64_t,MDSInternalContextBase*>::iterator p = waiting.lower_bound(min);
+    for (auto p = waiting.lower_bound(min);
 	 p != waiting.end();
 	 ++p) {
       if (p->first & mask) return true;
@@ -1570,7 +1575,15 @@ protected:
   virtual void add_waiter(uint64_t mask, MDSInternalContextBase *c) {
     if (waiting.empty())
       get(PIN_WAITER);
-    waiting.insert(pair<uint64_t,MDSInternalContextBase*>(mask, c));
+
+    uint64_t seq = 0;
+    if (mask & WAIT_ORDERED) {
+      seq = ++last_wait_seq;
+      mask &= ~WAIT_ORDERED;
+    }
+    waiting.insert(pair<uint64_t, pair<uint64_t, MDSInternalContextBase*> >(
+			    mask,
+			    pair<uint64_t, MDSInternalContextBase*>(seq, c)));
 //    pdout(10,g_conf->debug_mds) << (mdsco_db_line_prefix(this)) 
 //			       << "add_waiter " << hex << mask << dec << " " << c
 //			       << " on " << *this
@@ -1579,10 +1592,18 @@ protected:
   }
   virtual void take_waiting(uint64_t mask, list<MDSInternalContextBase*>& ls) {
     if (waiting.empty()) return;
-    compact_multimap<uint64_t,MDSInternalContextBase*>::iterator it = waiting.begin();
-    while (it != waiting.end()) {
+
+    // process ordered waiters in the same order that they were added.
+    std::map<uint64_t, MDSInternalContextBase*> ordered_waiters;
+
+    for (auto it = waiting.begin();
+	 it != waiting.end(); ) {
       if (it->first & mask) {
-	ls.push_back(it->second);
+
+	if (it->second.first > 0)
+	  ordered_waiters.insert(it->second);
+	else
+	  ls.push_back(it->second.second);
 //	pdout(10,g_conf->debug_mds) << (mdsco_db_line_prefix(this))
 //				   << "take_waiting mask " << hex << mask << dec << " took " << it->second
 //				   << " tag " << hex << it->first << dec
@@ -1597,6 +1618,11 @@ protected:
 	++it;
       }
     }
+    for (auto it = ordered_waiters.begin();
+	 it != ordered_waiters.end();
+	 ++it) {
+      ls.push_back(it->second);
+    }
     if (waiting.empty())
       put(PIN_WAITER);
   }
diff --git a/src/messages/MClientReconnect.h b/src/messages/MClientReconnect.h
index 8a9ff42..bfb8069 100644
--- a/src/messages/MClientReconnect.h
+++ b/src/messages/MClientReconnect.h
@@ -40,9 +40,9 @@ public:
   }
 
   void add_cap(inodeno_t ino, uint64_t cap_id, inodeno_t pathbase, const string& path,
-	       int wanted, int issued, inodeno_t sr, bufferlist& lb)
+	       int wanted, int issued, inodeno_t sr, snapid_t sf, bufferlist& lb)
   {
-    caps[ino] = cap_reconnect_t(cap_id, pathbase, path, wanted, issued, sr, lb);
+    caps[ino] = cap_reconnect_t(cap_id, pathbase, path, wanted, issued, sr, sf, lb);
   }
   void add_snaprealm(inodeno_t ino, snapid_t seq, inodeno_t parent) {
     ceph_mds_snaprealm_reconnect r;
diff --git a/src/messages/MMDSCacheRejoin.h b/src/messages/MMDSCacheRejoin.h
index 111a244..4bb47eb 100644
--- a/src/messages/MMDSCacheRejoin.h
+++ b/src/messages/MMDSCacheRejoin.h
@@ -21,6 +21,7 @@
 
 #include "mds/CInode.h"
 #include "mds/CDir.h"
+#include "mds/mdstypes.h"
 
 // sent from replica to auth
 
@@ -163,7 +164,7 @@ class MMDSCacheRejoin : public Message {
   map<vinodeno_t, inode_strong> strong_inodes;
 
   // open
-  map<inodeno_t,map<client_t, ceph_mds_cap_reconnect> > cap_exports;
+  map<inodeno_t,map<client_t, cap_reconnect_t> > cap_exports;
   map<client_t, entity_inst_t> client_map;
   bufferlist imported_caps;
 
diff --git a/src/messages/MNop.h b/src/messages/MNop.h
new file mode 100644
index 0000000..f820abb
--- /dev/null
+++ b/src/messages/MNop.h
@@ -0,0 +1,54 @@
+// -*- mode:C++; tab-width:8; c-basic-offset:2; indent-tabs-mode:t -*-
+// vim: ts=8 sw=2 smarttab
+/*
+ * Ceph - scalable distributed file system
+ *
+ * Copyright (C) 2004-2006 Sage Weil <sage at newdream.net>
+ * Portions Copyright (C) 2014 CohortFS, LLC
+ *
+ * This is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License version 2.1, as published by the Free Software
+ * Foundation.  See file COPYING.
+ *
+ */
+
+#ifndef CEPH_MSG_NOP_H
+#define CEPH_MSG_NOP_H
+
+#include "msg/Message.h"
+#include "msg/msg_types.h"
+
+/*
+ * A message with no (remote) effect.
+ */
+class MNop : public Message {
+public:
+  static const int HEAD_VERSION = 1;
+  static const int COMPAT_VERSION = 1;
+
+  __u32 tag; // ignored tag value
+
+  MNop()
+    : Message(MSG_NOP, HEAD_VERSION, COMPAT_VERSION)
+    {}
+
+  ~MNop() {}
+
+  void encode_payload(uint64_t _features) {
+    ::encode(tag, payload);
+  }
+
+  void decode_payload() {
+    bufferlist::iterator p = payload.begin();
+    ::decode(tag, p);
+  }
+
+  const char *get_type_name() const { return "MNop"; }
+
+  void print(ostream& out) const {
+    out << get_type_name() << " ";
+  }
+}; /* MNop */
+
+#endif /* CEPH_MSG_NOP_H */
diff --git a/src/messages/Makefile.am b/src/messages/Makefile.am
index 0a19dad..0f4257b 100644
--- a/src/messages/Makefile.am
+++ b/src/messages/Makefile.am
@@ -123,5 +123,6 @@ noinst_HEADERS += \
 	messages/MStatfsReply.h \
 	messages/MTimeCheck.h \
 	messages/MWatchNotify.h \
-	messages/PaxosServiceMessage.h
+	messages/PaxosServiceMessage.h \
+	messages/MNop.h
 
diff --git a/src/mon/MDSMonitor.cc b/src/mon/MDSMonitor.cc
index 9d72a0f..6d6fb9d 100644
--- a/src/mon/MDSMonitor.cc
+++ b/src/mon/MDSMonitor.cc
@@ -616,7 +616,8 @@ bool MDSMonitor::prepare_beacon(MonOpRequestRef op)
       dout(4) << __func__ << ": marking rank "
               << info.rank << " damaged" << dendl;
 
-      const utime_t until = ceph_clock_now(g_ceph_context);
+      utime_t until = ceph_clock_now(g_ceph_context);
+      until += g_conf->mds_blacklist_interval;
       const auto blacklist_epoch = mon->osdmon()->blacklist(info.addr, until);
       request_proposal(mon->osdmon());
       pending_fsmap.damaged(gid, blacklist_epoch);
@@ -1472,7 +1473,7 @@ class FlagSetHandler : public FileSystemCommandHandler
         return r;
       }
 
-      bool jewel = mon->get_quorum_features() && CEPH_FEATURE_SERVER_JEWEL;
+      bool jewel = mon->get_quorum_features() & CEPH_FEATURE_SERVER_JEWEL;
       if (flag_bool && !jewel) {
         ss << "Multiple-filesystems are forbidden until all mons are updated";
         return -EINVAL;
@@ -2414,22 +2415,22 @@ void MDSMonitor::check_subs()
 {
   std::list<std::string> types;
 
-  // Subscriptions may be to "fsmap" (MDS and legacy clients),
-  // "fsmap.<namespace>", or to "fsmap" for the full state of all
+  // Subscriptions may be to "mdsmap" (MDS and legacy clients),
+  // "mdsmap.<namespace>", or to "fsmap" for the full state of all
   // filesystems.  Build a list of all the types we service
   // subscriptions for.
-  types.push_back("mdsmap");
   types.push_back("fsmap");
+  types.push_back("mdsmap");
   for (const auto &i : fsmap.filesystems) {
     auto fscid = i.first;
     std::ostringstream oss;
-    oss << "fsmap." << fscid;
+    oss << "mdsmap." << fscid;
     types.push_back(oss.str());
   }
 
   for (const auto &type : types) {
     if (mon->session_map.subs.count(type) == 0)
-      return;
+      continue;
     xlist<Subscription*>::iterator p = mon->session_map.subs[type]->begin();
     while (!p.end()) {
       Subscription *sub = *p;
diff --git a/src/mon/MonClient.cc b/src/mon/MonClient.cc
index f906406..6395918 100644
--- a/src/mon/MonClient.cc
+++ b/src/mon/MonClient.cc
@@ -779,7 +779,9 @@ void MonClient::_renew_subs()
     m->what = sub_new;
     _send_mon_message(m);
 
-    sub_sent.insert(sub_new.begin(), sub_new.end());
+    // update sub_sent with sub_new
+    sub_new.insert(sub_sent.begin(), sub_sent.end());
+    std::swap(sub_new, sub_sent);
     sub_new.clear();
   }
 }
diff --git a/src/mon/Monitor.cc b/src/mon/Monitor.cc
index 719d5aa..4047351 100644
--- a/src/mon/Monitor.cc
+++ b/src/mon/Monitor.cc
@@ -1916,7 +1916,10 @@ void Monitor::win_election(epoch_t epoch, set<int>& active, uint64_t features,
     do_health_to_clog_interval();
     scrub_event_start();
   }
-  collect_sys_info(&metadata[rank], g_ceph_context);
+
+  Metadata my_meta;
+  collect_sys_info(&my_meta, g_ceph_context);
+  update_mon_metadata(rank, std::move(my_meta));
 }
 
 void Monitor::lose_election(epoch_t epoch, set<int> &q, int l, uint64_t features) 
@@ -2634,7 +2637,19 @@ void Monitor::handle_command(MonOpRequestRef op)
     return;
   }
 
-  cmd_getval(g_ceph_context, cmdmap, "prefix", prefix);
+  // check return value. If no prefix parameter provided,
+  // return value will be false, then return error info.
+  if(!cmd_getval(g_ceph_context, cmdmap, "prefix", prefix)) {
+    reply_command(op, -EINVAL, "command prefix not found", 0);
+    return;
+  }
+
+  // check prefix is empty
+  if (prefix.empty()) {
+    reply_command(op, -EINVAL, "command prefix must not be empty", 0);
+    return;
+  }
+
   if (prefix == "get_command_descriptions") {
     bufferlist rdata;
     Formatter *f = Formatter::create("json");
@@ -2655,6 +2670,15 @@ void Monitor::handle_command(MonOpRequestRef op)
   boost::scoped_ptr<Formatter> f(Formatter::create(format));
 
   get_str_vec(prefix, fullcmd);
+
+  // make sure fullcmd is not empty.
+  // invalid prefix will cause empty vector fullcmd.
+  // such as, prefix=";,,;"
+  if (fullcmd.empty()) {
+    reply_command(op, -EINVAL, "command requires a prefix to be valid", 0);
+    return;
+  }
+
   module = fullcmd[0];
 
   // validate command is in leader map
@@ -4404,13 +4428,13 @@ void Monitor::handle_mon_metadata(MonOpRequestRef op)
   MMonMetadata *m = static_cast<MMonMetadata*>(op->get_req());
   if (is_leader()) {
     dout(10) << __func__ << dendl;
-    update_mon_metadata(m->get_source().num(), m->data);
+    update_mon_metadata(m->get_source().num(), std::move(m->data));
   }
 }
 
-void Monitor::update_mon_metadata(int from, const Metadata& m)
+void Monitor::update_mon_metadata(int from, Metadata&& m)
 {
-  metadata[from] = m;
+  pending_metadata.insert(make_pair(from, std::move(m)));
 
   bufferlist bl;
   int err = store->get(MONITOR_STORE_PREFIX, "last_metadata", bl);
@@ -4418,12 +4442,12 @@ void Monitor::update_mon_metadata(int from, const Metadata& m)
   if (!err) {
     bufferlist::iterator iter = bl.begin();
     ::decode(last_metadata, iter);
-    metadata.insert(last_metadata.begin(), last_metadata.end());
+    pending_metadata.insert(last_metadata.begin(), last_metadata.end());
   }
 
   MonitorDBStore::TransactionRef t = paxos->get_pending_transaction();
   bl.clear();
-  ::encode(metadata, bl);
+  ::encode(pending_metadata, bl);
   t->put(MONITOR_STORE_PREFIX, "last_metadata", bl);
   paxos->trigger_propose();
 }
diff --git a/src/mon/Monitor.h b/src/mon/Monitor.h
index add1c3e..bf94dea 100644
--- a/src/mon/Monitor.h
+++ b/src/mon/Monitor.h
@@ -717,7 +717,9 @@ public:
   void handle_mon_metadata(MonOpRequestRef op);
   int get_mon_metadata(int mon, Formatter *f, ostream& err);
   int print_nodes(Formatter *f, ostream& err);
-  map<int, Metadata> metadata;
+
+  // Accumulate metadata across calls to update_mon_metadata
+  map<int, Metadata> pending_metadata;
 
   /**
    *
@@ -917,7 +919,7 @@ public:
   int write_default_keyring(bufferlist& bl);
   void extract_save_mon_key(KeyRing& keyring);
 
-  void update_mon_metadata(int from, const Metadata& m);
+  void update_mon_metadata(int from, Metadata&& m);
   int load_metadata(map<int, Metadata>& m);
 
   // features
diff --git a/src/mon/OSDMonitor.cc b/src/mon/OSDMonitor.cc
index 68bcc97..da9cd67 100644
--- a/src/mon/OSDMonitor.cc
+++ b/src/mon/OSDMonitor.cc
@@ -2247,7 +2247,8 @@ bool OSDMonitor::prepare_alive(MonOpRequestRef op)
 
   dout(7) << "prepare_alive want up_thru " << m->want << " have " << m->version
 	  << " from " << m->get_orig_source_inst() << dendl;
-  pending_inc.new_up_thru[from] = m->version;  // set to the latest map the OSD has
+
+  update_up_thru(from, m->version); // set to the latest map the OSD has
   wait_for_finished_proposal(op, new C_ReplyMap(this, op, m->version));
   return true;
 }
@@ -2310,6 +2311,21 @@ bool OSDMonitor::preprocess_pgtemp(MonOpRequestRef op)
       continue;
     }
 
+    int acting_primary = -1;
+    osdmap.pg_to_up_acting_osds(
+      p->first, nullptr, nullptr, nullptr, &acting_primary);
+    if (acting_primary != from) {
+      /* If the source isn't the primary based on the current osdmap, we know
+       * that the interval changed and that we can discard this message.
+       * Indeed, we must do so to avoid 16127 since we can't otherwise determine
+       * which of two pg temp mappings on the same pg is more recent.
+       */
+      dout(10) << __func__ << " ignore " << p->first << " -> " << p->second
+	       << ": primary has changed" << dendl;
+      ignore_cnt++;
+      continue;
+    }
+
     // removal?
     if (p->second.empty() && (osdmap.pg_temp->count(p->first) ||
 			      osdmap.primary_temp->count(p->first)))
@@ -2335,6 +2351,19 @@ bool OSDMonitor::preprocess_pgtemp(MonOpRequestRef op)
   return true;
 }
 
+void OSDMonitor::update_up_thru(int from, epoch_t up_thru)
+{
+  epoch_t old_up_thru = osdmap.get_up_thru(from);
+  auto ut = pending_inc.new_up_thru.find(from);
+  if (ut != pending_inc.new_up_thru.end()) {
+    old_up_thru = ut->second;
+  }
+  if (up_thru > old_up_thru) {
+    // set up_thru too, so the osd doesn't have to ask again
+    pending_inc.new_up_thru[from] = up_thru;
+  }
+}
+
 bool OSDMonitor::prepare_pgtemp(MonOpRequestRef op)
 {
   op->mark_osdmon_event(__func__);
@@ -2362,7 +2391,10 @@ bool OSDMonitor::prepare_pgtemp(MonOpRequestRef op)
 	pending_inc.new_primary_temp.count(p->first))
       pending_inc.new_primary_temp[p->first] = -1;
   }
-  pending_inc.new_up_thru[from] = m->map_epoch;   // set up_thru too, so the osd doesn't have to ask again
+
+  // set up_thru too, so the osd doesn't have to ask again
+  update_up_thru(from, m->map_epoch);
+
   wait_for_finished_proposal(op, new C_ReplyMap(this, op, m->map_epoch));
   return true;
 }
@@ -3272,14 +3304,22 @@ bool OSDMonitor::preprocess_command(MonOpRequestRef op)
         goto reply;
       f->close_section();
     } else {
+      r = 0;
       f->open_array_section("osd_metadata");
       for (int i=0; i<osdmap.get_max_osd(); ++i) {
         if (osdmap.exists(i)) {
           f->open_object_section("osd");
           f->dump_unsigned("id", i);
           r = dump_osd_metadata(i, f.get(), NULL);
-          if (r < 0)
+          if (r == -EINVAL || r == -ENOENT) {
+            // Drop error, continue to get other daemons' metadata
+            dout(4) << "No metadata for osd." << i << dendl;
+            r = 0;
+            continue;
+          } else if (r < 0) {
+            // Unexpected error
             goto reply;
+          }
           f->close_section();
         }
       }
diff --git a/src/mon/OSDMonitor.h b/src/mon/OSDMonitor.h
index 9d4d33d..4fb1ba4 100644
--- a/src/mon/OSDMonitor.h
+++ b/src/mon/OSDMonitor.h
@@ -257,6 +257,7 @@ private:
   bool prepare_boot(MonOpRequestRef op);
   void _booted(MonOpRequestRef op, bool logit);
 
+  void update_up_thru(int from, epoch_t up_thru);
   bool preprocess_alive(MonOpRequestRef op);
   bool prepare_alive(MonOpRequestRef op);
   void _reply_map(MonOpRequestRef op, epoch_t e);
diff --git a/src/msg/async/AsyncConnection.cc b/src/msg/async/AsyncConnection.cc
index 7ae8187..cbdd692 100644
--- a/src/msg/async/AsyncConnection.cc
+++ b/src/msg/async/AsyncConnection.cc
@@ -176,9 +176,9 @@ static void alloc_aligned_buffer(bufferlist& data, unsigned len, unsigned off)
 }
 
 AsyncConnection::AsyncConnection(CephContext *cct, AsyncMessenger *m, EventCenter *c, PerfCounters *p)
-  : Connection(cct, m), async_msgr(m), logger(p), global_seq(0), connect_seq(0), peer_global_seq(0),
-    out_seq(0), ack_left(0), in_seq(0), state(STATE_NONE), state_after_send(0), sd(-1), port(-1),
-    write_lock("AsyncConnection::write_lock"), can_write(WriteStatus::NOWRITE),
+  : Connection(cct, m), delay_state(NULL), async_msgr(m), logger(p), global_seq(0), connect_seq(0), 
+    peer_global_seq(0), out_seq(0), ack_left(0), in_seq(0), state(STATE_NONE), state_after_send(0), sd(-1),
+    port(-1), write_lock("AsyncConnection::write_lock"), can_write(WriteStatus::NOWRITE),
     open_write(false), keepalive(false), lock("AsyncConnection::lock"), recv_buf(NULL),
     recv_max_prefetch(MIN(msgr->cct->_conf->ms_tcp_prefetch_max_size, TCP_PREFETCH_MIN_SIZE)),
     recv_start(0), recv_end(0), got_bad_auth(false), authorizer(NULL), replacing(false),
@@ -207,6 +207,16 @@ AsyncConnection::~AsyncConnection()
     delete[] recv_buf;
   if (state_buffer)
     delete[] state_buffer;
+  assert(!delay_state);
+}
+
+void AsyncConnection::maybe_start_delay_thread()
+{
+  if (!delay_state &&
+      async_msgr->cct->_conf->ms_inject_delay_type.find(ceph_entity_type_name(peer_type)) != string::npos) {
+    ldout(msgr->cct, 1) << __func__ << " setting up a delay queue" << dendl;
+    delay_state = new DelayedDelivery(async_msgr, center);
+  }
 }
 
 /* return -1 means `fd` occurs error or closed, it should be closed
@@ -345,11 +355,8 @@ ssize_t AsyncConnection::do_sendmsg(struct msghdr &msg, unsigned len, bool more)
 
 // return the remaining bytes, it may larger than the length of ptr
 // else return < 0 means error
-ssize_t AsyncConnection::_try_send(bool send, bool more)
+ssize_t AsyncConnection::_try_send(bool more)
 {
-  if (!send)
-    return 0;
-
   if (async_msgr->cct->_conf->ms_inject_socket_failures && sd >= 0) {
     if (rand() % async_msgr->cct->_conf->ms_inject_socket_failures == 0) {
       ldout(async_msgr->cct, 0) << __func__ << " injecting socket failure" << dendl;
@@ -498,6 +505,16 @@ ssize_t AsyncConnection::read_until(unsigned len, char *p)
   return len - state_offset;
 }
 
+void AsyncConnection::inject_delay() {
+  if (async_msgr->cct->_conf->ms_inject_internal_delays) {
+    ldout(async_msgr->cct, 10) << __func__ << " sleep for " << 
+      async_msgr->cct->_conf->ms_inject_internal_delays << dendl;
+    utime_t t;
+    t.set_from_double(async_msgr->cct->_conf->ms_inject_internal_delays);
+    t.sleep();
+  }
+}
+
 void AsyncConnection::process()
 {
   ssize_t r = 0;
@@ -646,7 +663,7 @@ void AsyncConnection::process()
 
           // verify header crc
           if (msgr->crcflags & MSG_CRC_HEADER && header_crc != header.crc) {
-            ldout(async_msgr->cct,0) << __func__ << " reader got bad header crc "
+            ldout(async_msgr->cct,0) << __func__ << " got bad header crc "
                                      << header_crc << " != " << header.crc << dendl;
             goto fail;
           }
@@ -729,7 +746,6 @@ void AsyncConnection::process()
             ldout(async_msgr->cct, 20) << __func__ << " got front " << front.length() << dendl;
           }
           state = STATE_OPEN_MESSAGE_READ_MIDDLE;
-          break;
         }
 
       case STATE_OPEN_MESSAGE_READ_MIDDLE:
@@ -751,7 +767,6 @@ void AsyncConnection::process()
           }
 
           state = STATE_OPEN_MESSAGE_READ_DATA_PREPARE;
-          break;
         }
 
       case STATE_OPEN_MESSAGE_READ_DATA_PREPARE:
@@ -780,7 +795,6 @@ void AsyncConnection::process()
 
           msg_left = data_len;
           state = STATE_OPEN_MESSAGE_READ_DATA;
-          break;
         }
 
       case STATE_OPEN_MESSAGE_READ_DATA:
@@ -801,10 +815,10 @@ void AsyncConnection::process()
             msg_left -= read;
           }
 
-          if (msg_left == 0)
-            state = STATE_OPEN_MESSAGE_READ_FOOTER_AND_DISPATCH;
+          if (msg_left > 0)
+            break;
 
-          break;
+          state = STATE_OPEN_MESSAGE_READ_FOOTER_AND_DISPATCH;
         }
 
       case STATE_OPEN_MESSAGE_READ_FOOTER_AND_DISPATCH:
@@ -916,17 +930,27 @@ void AsyncConnection::process()
 
           state = STATE_OPEN;
 
+          logger->inc(l_msgr_recv_messages);
+          logger->inc(l_msgr_recv_bytes, message_size + sizeof(ceph_msg_header) + sizeof(ceph_msg_footer));
+
           async_msgr->ms_fast_preprocess(message);
-          if (async_msgr->ms_can_fast_dispatch(message)) {
+          if (delay_state) {
+            utime_t release = message->get_recv_stamp();
+            double delay_period = 0;
+            if (rand() % 10000 < async_msgr->cct->_conf->ms_inject_delay_probability * 10000.0) {
+              delay_period = async_msgr->cct->_conf->ms_inject_delay_max * (double)(rand() % 10000) / 10000.0;
+              release += delay_period;
+              ldout(async_msgr->cct, 1) << "queue_received will delay until " << release << " on "
+                                        << message << " " << *message << dendl;
+            }
+            delay_state->queue(delay_period, release, message);
+          } else if (async_msgr->ms_can_fast_dispatch(message)) {
             lock.Unlock();
             async_msgr->ms_fast_dispatch(message);
             lock.Lock();
           } else {
             center->dispatch_event_external(EventCallbackRef(new C_handle_dispatch(async_msgr, message)));
           }
-          logger->inc(l_msgr_recv_messages);
-          logger->inc(l_msgr_recv_bytes, message_size + sizeof(ceph_msg_header) + sizeof(ceph_msg_footer));
-
           break;
         }
 
@@ -954,8 +978,8 @@ void AsyncConnection::process()
 
       case STATE_WAIT:
         {
-          ldout(async_msgr->cct, 20) << __func__ << " enter wait state" << dendl;
-          break;
+          ldout(async_msgr->cct, 1) << __func__ << " enter wait state, failing" << dendl;
+          goto fail;
         }
 
       default:
@@ -1052,9 +1076,12 @@ ssize_t AsyncConnection::_process_connection()
           ldout(async_msgr->cct, 1) << __func__ << " reconnect failed " << dendl;
           goto fail;
         } else if (r > 0) {
+          ldout(async_msgr->cct, 10) << __func__ << " nonblock connect inprogress" << dendl;
+          center->create_file_event(sd, EVENT_WRITABLE, read_handler);
           break;
         }
 
+        center->delete_file_event(sd, EVENT_WRITABLE);
         state = STATE_CONNECTING_WAIT_BANNER;
         break;
       }
@@ -1353,7 +1380,7 @@ ssize_t AsyncConnection::_process_connection()
         if (is_queued())
           center->dispatch_event_external(write_handler);
         write_lock.Unlock();
-
+        maybe_start_delay_thread();
         break;
       }
 
@@ -1493,7 +1520,7 @@ ssize_t AsyncConnection::_process_connection()
         r = read_until(sizeof(newly_acked_seq), state_buffer);
         if (r < 0) {
           ldout(async_msgr->cct, 1) << __func__ << " read ack seq failed" << dendl;
-          goto fail;
+          goto fail_registered;
         } else if (r > 0) {
           break;
         }
@@ -1515,6 +1542,7 @@ ssize_t AsyncConnection::_process_connection()
         if (is_queued())
           center->dispatch_event_external(write_handler);
         write_lock.Unlock();
+        maybe_start_delay_thread();
         break;
       }
 
@@ -1527,6 +1555,10 @@ ssize_t AsyncConnection::_process_connection()
 
   return 0;
 
+fail_registered:
+  ldout(async_msgr->cct, 10) << "accept fault after register" << dendl;
+  inject_delay();
+
 fail:
   return -1;
 }
@@ -1566,21 +1598,21 @@ int AsyncConnection::handle_connect_reply(ceph_msg_connect &connect, ceph_msg_co
   }
   if (reply.tag == CEPH_MSGR_TAG_RETRY_GLOBAL) {
     global_seq = async_msgr->get_global_seq(reply.global_seq);
-    ldout(async_msgr->cct, 10) << __func__ << " connect got RETRY_GLOBAL "
-                         << reply.global_seq << " chose new "
-                         << global_seq << dendl;
+    ldout(async_msgr->cct, 5) << __func__ << " connect got RETRY_GLOBAL "
+                              << reply.global_seq << " chose new "
+                              << global_seq << dendl;
     state = STATE_CONNECTING_SEND_CONNECT_MSG;
   }
   if (reply.tag == CEPH_MSGR_TAG_RETRY_SESSION) {
     assert(reply.connect_seq > connect_seq);
-    ldout(async_msgr->cct, 10) << __func__ << " connect got RETRY_SESSION "
-                               << connect_seq << " -> "
-                               << reply.connect_seq << dendl;
+    ldout(async_msgr->cct, 5) << __func__ << " connect got RETRY_SESSION "
+                              << connect_seq << " -> "
+                              << reply.connect_seq << dendl;
     connect_seq = reply.connect_seq;
     state = STATE_CONNECTING_SEND_CONNECT_MSG;
   }
   if (reply.tag == CEPH_MSGR_TAG_WAIT) {
-    ldout(async_msgr->cct, 3) << __func__ << " connect got WAIT (connection race)" << dendl;
+    ldout(async_msgr->cct, 1) << __func__ << " connect got WAIT (connection race)" << dendl;
     state = STATE_WAIT;
   }
 
@@ -1646,6 +1678,8 @@ ssize_t AsyncConnection::handle_connect_msg(ceph_msg_connect &connect, bufferlis
     return _reply_accept(CEPH_MSGR_TAG_FEATURES, connect, reply, authorizer_reply);
   }
 
+  lock.Unlock();
+
   bool authorizer_valid;
   if (!async_msgr->verify_authorizer(this, peer_type, connect.authorizer_protocol, authorizer_bl,
                                authorizer_reply, authorizer_valid, session_key) || !authorizer_valid) {
@@ -1658,16 +1692,9 @@ ssize_t AsyncConnection::handle_connect_msg(ceph_msg_connect &connect, bufferlis
   ldout(async_msgr->cct, 10) << __func__ << " accept setting up session_security." << dendl;
 
   // existing?
-  lock.Unlock();
   AsyncConnectionRef existing = async_msgr->lookup_conn(peer_addr);
 
-  if (async_msgr->cct->_conf->ms_inject_internal_delays) {
-    ldout(msgr->cct, 10) << __func__ << " sleep for "
-                         << async_msgr->cct->_conf->ms_inject_internal_delays << dendl;
-    utime_t t;
-    t.set_from_double(async_msgr->cct->_conf->ms_inject_internal_delays);
-    t.sleep();
-  }
+  inject_delay();
 
   lock.Lock();
   if (state != STATE_ACCEPTING_WAIT_CONNECT_MSG_AUTH) {
@@ -1802,14 +1829,7 @@ ssize_t AsyncConnection::handle_connect_msg(ceph_msg_connect &connect, bufferlis
  replace:
   ldout(async_msgr->cct, 10) << __func__ << " accept replacing " << existing << dendl;
 
-  if (async_msgr->cct->_conf->ms_inject_internal_delays) {
-    ldout(msgr->cct, 10) << __func__ << " sleep for "
-                         << async_msgr->cct->_conf->ms_inject_internal_delays << dendl;
-    utime_t t;
-    t.set_from_double(async_msgr->cct->_conf->ms_inject_internal_delays);
-    t.sleep();
-  }
-
+  inject_delay();
   if (existing->policy.lossy) {
     // disconnect from the Connection
     existing->center->dispatch_event_external(existing->reset_handler);
@@ -1839,6 +1859,10 @@ ssize_t AsyncConnection::handle_connect_msg(ceph_msg_connect &connect, bufferlis
 
     // Clean up output buffer
     existing->outcoming_bl.clear();
+    if (existing->delay_state) {
+      existing->delay_state->flush();
+      assert(!delay_state);
+    }
     existing->requeue_sent();
 
     swap(existing->sd, sd);
@@ -1917,14 +1941,8 @@ ssize_t AsyncConnection::handle_connect_msg(ceph_msg_connect &connect, bufferlis
   // it's safe that here we don't acquire Connection's lock
   r = async_msgr->accept_conn(this);
 
-  if (async_msgr->cct->_conf->ms_inject_internal_delays) {
-    ldout(msgr->cct, 10) << __func__ << " sleep for "
-                         << async_msgr->cct->_conf->ms_inject_internal_delays << dendl;
-    utime_t t;
-    t.set_from_double(async_msgr->cct->_conf->ms_inject_internal_delays);
-    t.sleep();
-  }
-
+  inject_delay();
+  
   lock.Lock();
   replacing = false;
   if (r < 0) {
@@ -1959,15 +1977,7 @@ ssize_t AsyncConnection::handle_connect_msg(ceph_msg_connect &connect, bufferlis
 
  fail_registered:
   ldout(async_msgr->cct, 10) << __func__ << " accept fault after register" << dendl;
-
-  if (async_msgr->cct->_conf->ms_inject_internal_delays) {
-    ldout(async_msgr->cct, 10) << __func__ << " sleep for "
-                               << async_msgr->cct->_conf->ms_inject_internal_delays
-                               << dendl;
-    utime_t t;
-    t.set_from_double(async_msgr->cct->_conf->ms_inject_internal_delays);
-    t.sleep();
-  }
+  inject_delay();
 
  fail:
   ldout(async_msgr->cct, 10) << __func__ << " failed to accept." << dendl;
@@ -2166,6 +2176,9 @@ void AsyncConnection::fault()
   can_write = WriteStatus::NOWRITE;
   open_write = false;
 
+  // queue delayed items immediately
+  if (delay_state)
+    delay_state->flush();
   // requeue sent items
   requeue_sent();
   recv_start = recv_end = 0;
@@ -2191,7 +2204,8 @@ void AsyncConnection::fault()
   }
 
   write_lock.Unlock();
-  if (!(state >= STATE_CONNECTING && state < STATE_CONNECTING_READY)) {
+  if (!(state >= STATE_CONNECTING && state < STATE_CONNECTING_READY) &&
+      state != STATE_WAIT) { // STATE_WAIT is coming from STATE_CONNECTING_*
     // policy maybe empty when state is in accept
     if (policy.server) {
       ldout(async_msgr->cct, 0) << __func__ << " server, going to standby" << dendl;
@@ -2202,8 +2216,11 @@ void AsyncConnection::fault()
       state = STATE_CONNECTING;
     }
     backoff = utime_t();
+    center->dispatch_event_external(read_handler);
   } else {
-    if (backoff == utime_t()) {
+    if (state == STATE_WAIT) {
+      backoff.set_from_double(async_msgr->cct->_conf->ms_max_backoff);
+    } else if (backoff == utime_t()) {
       backoff.set_from_double(async_msgr->cct->_conf->ms_initial_backoff);
     } else {
       backoff += backoff;
@@ -2213,11 +2230,10 @@ void AsyncConnection::fault()
 
     state = STATE_CONNECTING;
     ldout(async_msgr->cct, 10) << __func__ << " waiting " << backoff << dendl;
+    // woke up again;
+    register_time_events.insert(center->create_time_event(
+            backoff.to_nsec()/1000, wakeup_handler));
   }
-
-  // woke up again;
-  register_time_events.insert(center->create_time_event(
-          backoff.to_nsec()/1000, wakeup_handler));
 }
 
 void AsyncConnection::was_session_reset()
@@ -2225,6 +2241,8 @@ void AsyncConnection::was_session_reset()
   ldout(async_msgr->cct,10) << __func__ << " started" << dendl;
   assert(lock.is_locked());
   Mutex::Locker l(write_lock);
+  if (delay_state)
+    delay_state->discard();
   discard_out_queue();
 
   center->dispatch_event_external(remote_reset_handler);
@@ -2247,6 +2265,9 @@ void AsyncConnection::_stop()
   if (state == STATE_CLOSED)
     return ;
 
+  if (delay_state)
+    delay_state->flush();
+
   ldout(async_msgr->cct, 1) << __func__ << dendl;
   Mutex::Locker l(write_lock);
   if (sd >= 0)
@@ -2269,6 +2290,7 @@ void AsyncConnection::_stop()
     center->delete_time_event(*it);
   // Make sure in-queue events will been processed
   center->dispatch_event_external(EventCallbackRef(new C_clean_handler(this)));
+
 }
 
 void AsyncConnection::prepare_send_message(uint64_t features, Message *m, bufferlist &bl)
@@ -2379,7 +2401,7 @@ ssize_t AsyncConnection::write_message(Message *m, bufferlist& bl, bool more)
   logger->inc(l_msgr_send_bytes, outcoming_bl.length() - original_bl_len);
   ldout(async_msgr->cct, 20) << __func__ << " sending " << m->get_seq()
                              << " " << m << dendl;
-  ssize_t rc = _try_send(true, more);
+  ssize_t rc = _try_send(more);
   if (rc < 0) {
     ldout(async_msgr->cct, 1) << __func__ << " error sending " << m << ", "
                               << cpp_strerror(errno) << dendl;
@@ -2408,6 +2430,59 @@ void AsyncConnection::handle_ack(uint64_t seq)
   }
 }
 
+void AsyncConnection::DelayedDelivery::do_request(int id)
+{
+  Message *m = nullptr;
+  {
+    Mutex::Locker l(delay_lock);
+    register_time_events.erase(id);
+    if (delay_queue.empty())
+      return ;
+    utime_t release = delay_queue.front().first;
+    m = delay_queue.front().second;
+    string delay_msg_type = msgr->cct->_conf->ms_inject_delay_msg_type;
+    utime_t now = ceph_clock_now(msgr->cct);
+    if ((release > now &&
+        (delay_msg_type.empty() || m->get_type_name() == delay_msg_type))) {
+      utime_t t = release - now;
+      t.sleep();
+    }
+    delay_queue.pop_front();
+  }
+  if (msgr->ms_can_fast_dispatch(m)) {
+    msgr->ms_fast_dispatch(m);
+  } else {
+    msgr->ms_deliver_dispatch(m);
+  }
+}
+
+class C_flush_messages : public EventCallback {
+  std::deque<std::pair<utime_t, Message*> > delay_queue;
+  AsyncMessenger *msgr;
+ public:
+  C_flush_messages(std::deque<std::pair<utime_t, Message*> > &&q, AsyncMessenger *m): delay_queue(std::move(q)), msgr(m) {}
+  void do_request(int id) {
+    while (!delay_queue.empty()) {
+      Message *m = delay_queue.front().second;
+      if (msgr->ms_can_fast_dispatch(m)) {
+        msgr->ms_fast_dispatch(m);
+      } else {
+        msgr->ms_deliver_dispatch(m);
+      }
+      delay_queue.pop_front();
+    }
+    delete this;
+  }
+};
+
+void AsyncConnection::DelayedDelivery::flush() {
+  Mutex::Locker l(delay_lock);
+  center->dispatch_event_external(new C_flush_messages(std::move(delay_queue), msgr));
+  for (auto i : register_time_events)
+    center->delete_time_event(i);
+  register_time_events.clear();
+}
+
 void AsyncConnection::send_keepalive()
 {
   ldout(async_msgr->cct, 10) << __func__ << " started." << dendl;
@@ -2425,6 +2500,12 @@ void AsyncConnection::mark_down()
   _stop();
 }
 
+void AsyncConnection::release_worker()
+{
+  if (msgr)
+    reinterpret_cast<AsyncMessenger*>(msgr)->release_worker(center);
+}
+
 void AsyncConnection::_send_keepalive_or_ack(bool ack, utime_t *tp)
 {
   assert(write_lock.is_locked());
@@ -2489,7 +2570,7 @@ void AsyncConnection::handle_write()
       ldout(async_msgr->cct, 10) << __func__ << " try send msg ack, acked " << left << " messages" << dendl;
       ack_left.sub(left);
       left = ack_left.read();
-      r = _try_send(true, left);
+      r = _try_send(left);
     } else if (is_queued()) {
       r = _try_send();
     }
diff --git a/src/msg/async/AsyncConnection.h b/src/msg/async/AsyncConnection.h
index 608ccce..416bccb 100644
--- a/src/msg/async/AsyncConnection.h
+++ b/src/msg/async/AsyncConnection.h
@@ -52,14 +52,14 @@ class AsyncConnection : public Connection {
   void suppress_sigpipe();
   void restore_sigpipe();
   ssize_t do_sendmsg(struct msghdr &msg, unsigned len, bool more);
-  ssize_t try_send(bufferlist &bl, bool send=true, bool more=false) {
+  ssize_t try_send(bufferlist &bl, bool more=false) {
     Mutex::Locker l(write_lock);
     outcoming_bl.claim_append(bl);
-    return _try_send(send, more);
+    return _try_send(more);
   }
   // if "send" is false, it will only append bl to send buffer
   // the main usage is avoid error happen outside messenger threads
-  ssize_t _try_send(bool send=true, bool more=false);
+  ssize_t _try_send(bool more=false);
   ssize_t _send(Message *m);
   void prepare_send_message(uint64_t features, Message *m, bufferlist &bl);
   ssize_t read_until(unsigned needed, char *p);
@@ -77,6 +77,7 @@ class AsyncConnection : public Connection {
   void handle_ack(uint64_t seq);
   void _send_keepalive_or_ack(bool ack=false, utime_t *t=NULL);
   ssize_t write_message(Message *m, bufferlist& bl, bool more);
+  void inject_delay();
   ssize_t _reply_accept(char tag, ceph_msg_connect &connect, ceph_msg_connect_reply &reply,
                     bufferlist &authorizer_reply) {
     bufferlist reply_bl;
@@ -88,8 +89,10 @@ class AsyncConnection : public Connection {
       reply_bl.append(authorizer_reply.c_str(), authorizer_reply.length());
     }
     ssize_t r = try_send(reply_bl);
-    if (r < 0)
+    if (r < 0) {
+      inject_delay();
       return -1;
+    }
 
     state = STATE_ACCEPTING_WAIT_CONNECT_MSG;
     return 0;
@@ -124,9 +127,51 @@ class AsyncConnection : public Connection {
     return !out_q.empty();
   }
 
+   /**
+   * The DelayedDelivery is for injecting delays into Message delivery off
+   * the socket. It is only enabled if delays are requested, and if they
+   * are then it pulls Messages off the DelayQueue and puts them into the
+   * AsyncMessenger event queue.
+   */
+  class DelayedDelivery : public EventCallback {
+    std::set<uint64_t> register_time_events; // need to delete it if stop
+    std::deque<std::pair<utime_t, Message*> > delay_queue;
+    Mutex delay_lock;
+    AsyncMessenger *msgr;
+    EventCenter *center;
+
+   public:
+    explicit DelayedDelivery(AsyncMessenger *omsgr, EventCenter *c)
+      : delay_lock("AsyncConnection::DelayedDelivery::delay_lock"),
+        msgr(omsgr), center(c) { }
+    ~DelayedDelivery() {
+      assert(register_time_events.empty());
+      assert(delay_queue.empty());
+    }
+    void do_request(int id) override;
+    void queue(double delay_period, utime_t release, Message *m) {
+      Mutex::Locker l(delay_lock);
+      delay_queue.push_back(std::make_pair(release, m));
+      register_time_events.insert(center->create_time_event(delay_period*1000000, this));
+    }
+    void discard() {
+      Mutex::Locker l(delay_lock);
+      while (!delay_queue.empty()) {
+        Message *m = delay_queue.front().second;
+        m->put();
+        delay_queue.pop_front();
+      }
+      for (auto i : register_time_events)
+        center->delete_time_event(i);
+      register_time_events.clear();
+    }
+    void flush();
+  } *delay_state;
+
  public:
   AsyncConnection(CephContext *cct, AsyncMessenger *m, EventCenter *c, PerfCounters *p);
   ~AsyncConnection();
+  void maybe_start_delay_thread();
 
   ostream& _conn_prefix(std::ostream *_dout);
 
@@ -151,6 +196,8 @@ class AsyncConnection : public Connection {
     Mutex::Locker l(lock);
     policy.lossy = true;
   }
+  
+  void release_worker();
 
  private:
   enum {
@@ -331,6 +378,10 @@ class AsyncConnection : public Connection {
     delete connect_handler;
     delete local_deliver_handler;
     delete wakeup_handler;
+    if (delay_state) {
+      delete delay_state;
+      delay_state = NULL;
+    }
   }
   PerfCounters *get_perf_counter() {
     return logger;
diff --git a/src/msg/async/AsyncMessenger.cc b/src/msg/async/AsyncMessenger.cc
index c12ee8e..8ae2cff 100644
--- a/src/msg/async/AsyncMessenger.cc
+++ b/src/msg/async/AsyncMessenger.cc
@@ -307,11 +307,14 @@ void *Worker::entry()
  *******************/
 const string WorkerPool::name = "AsyncMessenger::WorkerPool";
 
-WorkerPool::WorkerPool(CephContext *c): cct(c), seq(0), started(false),
+WorkerPool::WorkerPool(CephContext *c): cct(c), started(false),
                                         barrier_lock("WorkerPool::WorkerPool::barrier_lock"),
                                         barrier_count(0)
 {
   assert(cct->_conf->ms_async_op_threads > 0);
+  // make sure user won't try to force some crazy number of worker threads
+  assert(cct->_conf->ms_async_max_op_threads >= cct->_conf->ms_async_op_threads && 
+         cct->_conf->ms_async_op_threads <= 32);
   for (int i = 0; i < cct->_conf->ms_async_op_threads; ++i) {
     Worker *w = new Worker(cct, this, i);
     workers.push_back(w);
@@ -351,6 +354,70 @@ void WorkerPool::start()
   }
 }
 
+Worker* WorkerPool::get_worker()
+{
+  ldout(cct, 10) << __func__ << dendl;
+
+   // start with some reasonably large number
+  unsigned min_load = std::numeric_limits<int>::max();
+  Worker* current_best = nullptr;
+
+  simple_spin_lock(&pool_spin);
+  // find worker with least references
+  // tempting case is returning on references == 0, but in reality
+  // this will happen so rarely that there's no need for special case.
+  for (auto p = workers.begin(); p != workers.end(); ++p) {
+    unsigned worker_load = (*p)->references.load();
+    ldout(cct, 20) << __func__ << " Worker " << *p << " load: " << worker_load << dendl;
+    if (worker_load < min_load) {
+      current_best = *p;
+      min_load = worker_load;
+    }
+  }
+
+  // if minimum load exceeds amount of workers, make a new worker
+  // logic behind this is that we're not going to create new worker
+  // just because others have *some* load, we'll defer worker creation
+  // until others have *plenty* of load. This will cause new worker
+  // to get assigned to all new connections *unless* one or more
+  // of workers get their load reduced - in that case, this worker
+  // will be assigned to new connection.
+  // TODO: add more logic and heuristics, so connections known to be
+  // of light workload (heartbeat service, etc.) won't overshadow
+  // heavy workload (clients, etc).
+  if (!current_best || ((workers.size() < (unsigned)cct->_conf->ms_async_max_op_threads)
+      && (min_load > workers.size()))) {
+     ldout(cct, 20) << __func__ << " creating worker" << dendl;
+     current_best = new Worker(cct, this, workers.size());
+     workers.push_back(current_best);
+     current_best->create("ms_async_worker");
+  } else {
+    ldout(cct, 20) << __func__ << " picked " << current_best 
+                   << " as best worker with load " << min_load << dendl;
+  }
+
+  ++current_best->references;
+  simple_spin_unlock(&pool_spin);
+
+  assert(current_best);
+  return current_best;
+}
+
+void WorkerPool::release_worker(EventCenter* c)
+{
+  ldout(cct, 10) << __func__ << dendl;
+  simple_spin_lock(&pool_spin);
+  for (auto p = workers.begin(); p != workers.end(); ++p) {
+    if (&((*p)->center) == c) {
+      ldout(cct, 10) << __func__ << " found worker, releasing" << dendl;
+      int oldref = (*p)->references.fetch_sub(1);
+      assert(oldref > 0);
+      break;
+    }
+  }
+  simple_spin_unlock(&pool_spin);
+}
+
 void WorkerPool::barrier()
 {
   ldout(cct, 10) << __func__ << " started." << dendl;
diff --git a/src/msg/async/AsyncMessenger.h b/src/msg/async/AsyncMessenger.h
index 3c7aa0a..52d93d7 100644
--- a/src/msg/async/AsyncMessenger.h
+++ b/src/msg/async/AsyncMessenger.h
@@ -36,6 +36,7 @@ using namespace std;
 #include "include/assert.h"
 #include "AsyncConnection.h"
 #include "Event.h"
+#include "common/simple_spin.h"
 
 
 class AsyncMessenger;
@@ -65,8 +66,9 @@ class Worker : public Thread {
 
  public:
   EventCenter center;
+  std::atomic_uint references;
   Worker(CephContext *c, WorkerPool *p, int i)
-    : cct(c), pool(p), done(false), id(i), perf_logger(NULL), center(c) {
+    : cct(c), pool(p), done(false), id(i), perf_logger(NULL), center(c), references(0) {
     center.init(InitEventNumber);
     char name[128];
     sprintf(name, "AsyncMessenger::Worker-%d", id);
@@ -133,7 +135,6 @@ class WorkerPool {
   WorkerPool(const WorkerPool &);
   WorkerPool& operator=(const WorkerPool &);
   CephContext *cct;
-  uint64_t seq;
   vector<Worker*> workers;
   vector<int> coreids;
   // Used to indicate whether thread started
@@ -141,6 +142,7 @@ class WorkerPool {
   Mutex barrier_lock;
   Cond barrier_cond;
   atomic_t barrier_count;
+  simple_spinlock_t pool_spin = SIMPLE_SPINLOCK_INITIALIZER;
 
   class C_barrier : public EventCallback {
     WorkerPool *pool;
@@ -158,9 +160,8 @@ class WorkerPool {
   explicit WorkerPool(CephContext *c);
   virtual ~WorkerPool();
   void start();
-  Worker *get_worker() {
-    return workers[(seq++)%workers.size()];
-  }
+  Worker *get_worker();
+  void release_worker(EventCenter* c);
   int get_cpuid(int id) {
     if (coreids.empty())
       return -1;
@@ -525,6 +526,7 @@ public:
    */
   void unregister_conn(AsyncConnectionRef conn) {
     Mutex::Locker l(deleted_lock);
+    conn->release_worker();
     deleted_conns.insert(conn);
 
     if (deleted_conns.size() >= ReapDeadConnectionThreshold) {
@@ -540,6 +542,10 @@ public:
    * See "deleted_conns"
    */
   int reap_dead();
+  
+  void release_worker(EventCenter* c) {
+    pool->release_worker(c);
+  }
 
   /**
    * @} // AsyncMessenger Internals
diff --git a/src/msg/async/Event.cc b/src/msg/async/Event.cc
index f242fe7..eb20406 100644
--- a/src/msg/async/Event.cc
+++ b/src/msg/async/Event.cc
@@ -229,20 +229,7 @@ uint64_t EventCenter::create_time_event(uint64_t microseconds, EventCallbackRef
 
   ldout(cct, 10) << __func__ << " id=" << id << " trigger after " << microseconds << "us"<< dendl;
   EventCenter::TimeEvent event;
-  utime_t expire;
-  struct timeval tv;
-
-  if (microseconds < 5) {
-    tv.tv_sec = 0;
-    tv.tv_usec = microseconds;
-  } else {
-    expire = ceph_clock_now(cct);
-    expire.copy_to_timeval(&tv);
-    tv.tv_sec += microseconds / 1000000;
-    tv.tv_usec += microseconds % 1000000;
-  }
-  expire.set_from_timeval(&tv);
-
+  clock_type::time_point expire = clock_type::now() + std::chrono::microseconds(microseconds);
   event.id = id;
   event.time_cb = ctxt;
   time_events[expire].push_back(event);
@@ -260,8 +247,7 @@ void EventCenter::delete_time_event(uint64_t id)
   if (id >= time_event_next_id)
     return ;
 
-  for (map<utime_t, list<TimeEvent> >::iterator it = time_events.begin();
-       it != time_events.end(); ++it) {
+  for (auto it = time_events.begin(); it != time_events.end(); ++it) {
     for (list<TimeEvent>::iterator j = it->second.begin();
          j != it->second.end(); ++j) {
       if (j->id == id) {
@@ -276,25 +262,24 @@ void EventCenter::delete_time_event(uint64_t id)
 
 void EventCenter::wakeup()
 {
-  if (already_wakeup.compare_and_swap(0, 1)) {
     ldout(cct, 1) << __func__ << dendl;
+    already_wakeup.compare_and_swap(0, 1);
+
     char buf[1];
     buf[0] = 'c';
     // wake up "event_wait"
     int n = write(notify_send_fd, buf, 1);
     // FIXME ?
     assert(n == 1);
-  }
 }
 
 int EventCenter::process_time_events()
 {
   int processed = 0;
-  time_t now = time(NULL);
-  utime_t cur = ceph_clock_now(cct);
-  ldout(cct, 10) << __func__ << " cur time is " << cur << dendl;
+  clock_type::time_point now = clock_type::now();
+  ldout(cct, 10) << __func__ << " cur time is " << now << dendl;
 
-  time_lock.Lock();
+  Mutex::Locker l(time_lock);
   /* If the system clock is moved to the future, and then set back to the
    * right value, time events may be delayed in a random way. Often this
    * means that scheduled operations will not be performed soon enough.
@@ -303,33 +288,29 @@ int EventCenter::process_time_events()
    * events to be processed ASAP when this happens: the idea is that
    * processing events earlier is less dangerous than delaying them
    * indefinitely, and practice suggests it is. */
-  bool clock_skewed = false;
-  if (now < last_time) {
-    clock_skewed = true;
-  }
+  bool clock_skewed = now < last_time;
   last_time = now;
 
-  map<utime_t, list<TimeEvent> >::iterator prev;
-  list<TimeEvent> need_process;
-  for (map<utime_t, list<TimeEvent> >::iterator it = time_events.begin();
-       it != time_events.end(); ) {
-    prev = it;
-    if (cur >= it->first || clock_skewed) {
-      need_process.splice(need_process.end(), it->second);
-      ++it;
-      time_events.erase(prev);
+  while (!time_events.empty()) {
+    auto it = time_events.begin();
+    if (now >= it->first || clock_skewed) {
+      if (it->second.empty()) {
+        time_events.erase(it);
+      } else {
+        TimeEvent &e = it->second.front();
+        EventCallbackRef cb = e.time_cb;
+        uint64_t id = e.id;
+        it->second.pop_front();
+        ldout(cct, 10) << __func__ << " process time event: id=" << id << dendl;
+        processed++;
+        time_lock.Unlock();
+        cb->do_request(id);
+        time_lock.Lock();
+      }
     } else {
       break;
     }
   }
-  time_lock.Unlock();
-
-  for (list<TimeEvent>::iterator it = need_process.begin();
-       it != need_process.end(); ++it) {
-    ldout(cct, 10) << __func__ << " process time event: id=" << it->id << dendl;
-    it->time_cb->do_request(it->id);
-    processed++;
-  }
 
   return processed;
 }
@@ -341,39 +322,33 @@ int EventCenter::process_events(int timeout_microseconds)
   struct timeval tv;
   int numevents;
   bool trigger_time = false;
+  auto now = clock_type::now();
 
-  utime_t now = ceph_clock_now(cct);;
   // If exists external events, don't block
   if (external_num_events.read()) {
     tv.tv_sec = 0;
     tv.tv_usec = 0;
     next_time = now;
   } else {
-    utime_t period, shortest;
-    now.copy_to_timeval(&tv);
-    if (timeout_microseconds > 0) {
-      tv.tv_sec += timeout_microseconds / 1000000;
-      tv.tv_usec += timeout_microseconds % 1000000;
-    }
-    shortest.set_from_timeval(&tv);
+    clock_type::time_point shortest;
+    shortest = now + std::chrono::microseconds(timeout_microseconds); 
 
     Mutex::Locker l(time_lock);
-    map<utime_t, list<TimeEvent> >::iterator it = time_events.begin();
-    if (it != time_events.end() && shortest >= it->first) {
+    auto it = time_events.begin();
+    if (it != time_events.end() && shortest > it->first) {
       ldout(cct, 10) << __func__ << " shortest is " << shortest << " it->first is " << it->first << dendl;
       shortest = it->first;
       trigger_time = true;
       if (shortest > now) {
-        period = shortest - now;
-        period.copy_to_timeval(&tv);
+        timeout_microseconds = std::chrono::duration_cast<std::chrono::microseconds>(
+            shortest - now).count();
       } else {
-        tv.tv_sec = 0;
-        tv.tv_usec = 0;
+        shortest = now;
+        timeout_microseconds = 0;
       }
-    } else {
-      tv.tv_sec = timeout_microseconds / 1000000;
-      tv.tv_usec = timeout_microseconds % 1000000;
     }
+    tv.tv_sec = timeout_microseconds / 1000000;
+    tv.tv_usec = timeout_microseconds % 1000000;
     next_time = shortest;
   }
 
@@ -430,6 +405,7 @@ int EventCenter::process_events(int timeout_microseconds)
         if (e)
           e->do_request(0);
         cur_process.pop_front();
+        numevents++;
       }
     }
   }
diff --git a/src/msg/async/Event.h b/src/msg/async/Event.h
index 126a36c..4618c15 100644
--- a/src/msg/async/Event.h
+++ b/src/msg/async/Event.h
@@ -42,6 +42,7 @@
 #include "include/atomic.h"
 #include "include/Context.h"
 #include "include/unordered_map.h"
+#include "common/ceph_time.h"
 #include "common/WorkQueue.h"
 #include "net_handler.h"
 
@@ -85,6 +86,7 @@ class EventDriver {
  * EventCenter maintain a set of file descriptor and handle registered events.
  */
 class EventCenter {
+  using clock_type = ceph::coarse_mono_clock;
   struct FileEvent {
     int mask;
     EventCallbackRef read_cb;
@@ -107,10 +109,10 @@ class EventCenter {
   deque<EventCallbackRef> external_events;
   vector<FileEvent> file_events;
   EventDriver *driver;
-  map<utime_t, list<TimeEvent> > time_events;
+  map<clock_type::time_point, list<TimeEvent> > time_events;
   uint64_t time_event_next_id;
-  time_t last_time; // last time process time event
-  utime_t next_time; // next wake up time
+  clock_type::time_point last_time; // last time process time event
+  clock_type::time_point next_time; // next wake up time
   int notify_receive_fd;
   int notify_send_fd;
   NetHandler net;
@@ -136,7 +138,7 @@ class EventCenter {
     notify_receive_fd(-1), notify_send_fd(-1), net(c), owner(0),
     notify_handler(NULL),
     already_wakeup(0) {
-    last_time = time(NULL);
+    last_time = clock_type::now();
   }
   ~EventCenter();
   ostream& _event_prefix(std::ostream *_dout);
diff --git a/src/msg/async/EventKqueue.cc b/src/msg/async/EventKqueue.cc
index 54fef5f..d546462 100644
--- a/src/msg/async/EventKqueue.cc
+++ b/src/msg/async/EventKqueue.cc
@@ -28,7 +28,7 @@ int KqueueDriver::init(int nevent)
   if (!events) {
     lderr(cct) << __func__ << " unable to malloc memory: "
                            << cpp_strerror(errno) << dendl;
-    return -errno;
+    return -ENOMEM;
   }
   memset(events, 0, sizeof(struct kevent)*nevent);
 
@@ -58,7 +58,7 @@ int KqueueDriver::add_event(int fd, int cur_mask, int add_mask)
     if (kevent(kqfd, &ke, 1, NULL, 0, NULL) == -1) {
       lderr(cct) << __func__ << " unable to add event: "
                              << cpp_strerror(errno) << dendl;
-      return -1;
+      return -errno;
     }
   }
 
@@ -80,7 +80,7 @@ int KqueueDriver::del_event(int fd, int cur_mask, int delmask)
     if ((r = kevent(kqfd, &ke, 1, NULL, 0, NULL)) < 0) {
       lderr(cct) << __func__ << " kevent: delete fd=" << fd << " mask=" << filter
                  << " failed." << cpp_strerror(errno) << dendl;
-      return r;
+      return -errno;
     }
   }
   return 0;
diff --git a/src/msg/async/net_handler.cc b/src/msg/async/net_handler.cc
index ba63eec..2a89276 100644
--- a/src/msg/async/net_handler.cc
+++ b/src/msg/async/net_handler.cc
@@ -38,7 +38,7 @@ int NetHandler::create_socket(int domain, bool reuse_addr)
     return -errno;
   }
 
-  /* Make sure connection-intensive things like the benckmark
+  /* Make sure connection-intensive things like the benchmark
    * will be able to close/open sockets a zillion of times */
   if (reuse_addr) {
     if (::setsockopt(s, SOL_SOCKET, SO_REUSEADDR, &on, sizeof(on)) == -1) {
diff --git a/src/ocf/Makefile.am b/src/ocf/Makefile.am
index 569f3ab..5ab8c2a 100644
--- a/src/ocf/Makefile.am
+++ b/src/ocf/Makefile.am
@@ -1,4 +1,4 @@
-EXTRA_DIST = ceph.in Makefile.in
+EXTRA_DIST = Makefile.in
 
 if WITH_OCF
 # The root of the OCF resource agent hierarchy
@@ -9,15 +9,5 @@ ocfdir = $(prefix)/lib/ocf
 # The ceph provider directory
 radir = $(ocfdir)/resource.d/$(PACKAGE_NAME)
 
-ra_SCRIPTS = ceph rbd
-
-install-data-hook:
-	$(LN_S) ceph $(DESTDIR)$(radir)/osd
-	$(LN_S) ceph $(DESTDIR)$(radir)/mds
-	$(LN_S) ceph $(DESTDIR)$(radir)/mon
-
-uninstall-hook:
-	rm -f $(DESTDIR)$(radir)/osd
-	rm -f $(DESTDIR)$(radir)/mds
-	rm -f $(DESTDIR)$(radir)/mon
+ra_SCRIPTS = rbd
 endif
diff --git a/src/ocf/Makefile.in b/src/ocf/Makefile.in
index af674b0..f3f34ff 100644
--- a/src/ocf/Makefile.in
+++ b/src/ocf/Makefile.in
@@ -81,7 +81,7 @@ host_triplet = @host@
 target_triplet = @target@
 subdir = src/ocf
 DIST_COMMON = $(srcdir)/Makefile.in $(srcdir)/Makefile.am \
-	$(srcdir)/ceph.in $(srcdir)/rbd.in
+	$(srcdir)/rbd.in
 ACLOCAL_M4 = $(top_srcdir)/aclocal.m4
 am__aclocal_m4_deps = $(top_srcdir)/m4/ac_check_classpath.m4 \
 	$(top_srcdir)/m4/ac_prog_jar.m4 \
@@ -102,7 +102,7 @@ am__configure_deps = $(am__aclocal_m4_deps) $(CONFIGURE_DEPENDENCIES) \
 	$(ACLOCAL_M4)
 mkinstalldirs = $(install_sh) -d
 CONFIG_HEADER = $(top_builddir)/src/acconfig.h
-CONFIG_CLEAN_FILES = ceph rbd
+CONFIG_CLEAN_FILES = rbd
 CONFIG_CLEAN_VPATH_FILES =
 am__vpath_adj_setup = srcdirstrip=`echo "$(srcdir)" | sed 's|.|.|g'`;
 am__vpath_adj = case $$p in \
@@ -357,7 +357,7 @@ target_vendor = @target_vendor@
 top_build_prefix = @top_build_prefix@
 top_builddir = @top_builddir@
 top_srcdir = @top_srcdir@
-EXTRA_DIST = ceph.in Makefile.in
+EXTRA_DIST = Makefile.in
 
 # The root of the OCF resource agent hierarchy
 # Per the OCF standard, it's always "lib",
@@ -366,7 +366,7 @@ EXTRA_DIST = ceph.in Makefile.in
 
 # The ceph provider directory
 @WITH_OCF_TRUE at radir = $(ocfdir)/resource.d/$(PACKAGE_NAME)
- at WITH_OCF_TRUE@ra_SCRIPTS = ceph rbd
+ at WITH_OCF_TRUE@ra_SCRIPTS = rbd
 all: all-am
 
 .SUFFIXES:
@@ -400,8 +400,6 @@ $(top_srcdir)/configure:  $(am__configure_deps)
 $(ACLOCAL_M4):  $(am__aclocal_m4_deps)
 	cd $(top_builddir) && $(MAKE) $(AM_MAKEFLAGS) am--refresh
 $(am__aclocal_m4_deps):
-ceph: $(top_builddir)/config.status $(srcdir)/ceph.in
-	cd $(top_builddir) && $(SHELL) ./config.status $(subdir)/$@
 rbd: $(top_builddir)/config.status $(srcdir)/rbd.in
 	cd $(top_builddir) && $(SHELL) ./config.status $(subdir)/$@
 install-raSCRIPTS: $(ra_SCRIPTS)
@@ -519,8 +517,6 @@ distclean-generic:
 maintainer-clean-generic:
 	@echo "This command is intended for maintainers to use"
 	@echo "it deletes files that may require special tools to rebuild."
- at WITH_OCF_FALSE@install-data-hook:
- at WITH_OCF_FALSE@uninstall-hook:
 clean: clean-am
 
 clean-am: clean-generic clean-libtool mostlyclean-am
@@ -542,8 +538,7 @@ info: info-am
 info-am:
 
 install-data-am: install-raSCRIPTS
-	@$(NORMAL_INSTALL)
-	$(MAKE) $(AM_MAKEFLAGS) install-data-hook
+
 install-dvi: install-dvi-am
 
 install-dvi-am:
@@ -587,33 +582,21 @@ ps: ps-am
 ps-am:
 
 uninstall-am: uninstall-raSCRIPTS
-	@$(NORMAL_INSTALL)
-	$(MAKE) $(AM_MAKEFLAGS) uninstall-hook
-.MAKE: install-am install-data-am install-strip uninstall-am
+
+.MAKE: install-am install-strip
 
 .PHONY: all all-am check check-am clean clean-generic clean-libtool \
 	cscopelist-am ctags-am distclean distclean-generic \
 	distclean-libtool distdir dvi dvi-am html html-am info info-am \
-	install install-am install-data install-data-am \
-	install-data-hook install-dvi install-dvi-am install-exec \
-	install-exec-am install-html install-html-am install-info \
-	install-info-am install-man install-pdf install-pdf-am \
-	install-ps install-ps-am install-raSCRIPTS install-strip \
-	installcheck installcheck-am installdirs maintainer-clean \
-	maintainer-clean-generic mostlyclean mostlyclean-generic \
-	mostlyclean-libtool pdf pdf-am ps ps-am tags-am uninstall \
-	uninstall-am uninstall-hook uninstall-raSCRIPTS
-
-
- at WITH_OCF_TRUE@install-data-hook:
- at WITH_OCF_TRUE@	$(LN_S) ceph $(DESTDIR)$(radir)/osd
- at WITH_OCF_TRUE@	$(LN_S) ceph $(DESTDIR)$(radir)/mds
- at WITH_OCF_TRUE@	$(LN_S) ceph $(DESTDIR)$(radir)/mon
-
- at WITH_OCF_TRUE@uninstall-hook:
- at WITH_OCF_TRUE@	rm -f $(DESTDIR)$(radir)/osd
- at WITH_OCF_TRUE@	rm -f $(DESTDIR)$(radir)/mds
- at WITH_OCF_TRUE@	rm -f $(DESTDIR)$(radir)/mon
+	install install-am install-data install-data-am install-dvi \
+	install-dvi-am install-exec install-exec-am install-html \
+	install-html-am install-info install-info-am install-man \
+	install-pdf install-pdf-am install-ps install-ps-am \
+	install-raSCRIPTS install-strip installcheck installcheck-am \
+	installdirs maintainer-clean maintainer-clean-generic \
+	mostlyclean mostlyclean-generic mostlyclean-libtool pdf pdf-am \
+	ps ps-am tags-am uninstall uninstall-am uninstall-raSCRIPTS
+
 
 # Tell versions [3.59,3.63) of GNU make to not export all variables.
 # Otherwise a system limit (for SysV at least) may be exceeded.
diff --git a/src/ocf/ceph.in b/src/ocf/ceph.in
deleted file mode 100644
index 9448a29..0000000
--- a/src/ocf/ceph.in
+++ /dev/null
@@ -1,177 +0,0 @@
-#!/bin/sh
-
-# Initialization:
-: ${OCF_FUNCTIONS_DIR=${OCF_ROOT}/lib/heartbeat}
-. ${OCF_FUNCTIONS_DIR}/ocf-shellfuncs
-
-# Convenience variables
-# When sysconfdir isn't passed in as a configure flag,
-# it's defined in terms of prefix
-prefix=@prefix@
-CEPH_INIT=@sysconfdir@/init.d/ceph
-
-ceph_meta_data() {
-    local longdesc
-    local shortdesc
-    case $__SCRIPT_NAME in
-	"osd")
-	    longdesc="Wraps the ceph init script to provide an OCF resource agent that manages and monitors the Ceph OSD service."
-	    shortdesc="Manages a Ceph OSD instance."
-	    ;;
-	"mds")
-	    longdesc="Wraps the ceph init script to provide an OCF resource agent that manages and monitors the Ceph MDS service."
-	    shortdesc="Manages a Ceph MDS instance."
-	    ;;
-	"mon")
-	    longdesc="Wraps the ceph init script to provide an OCF resource agent that manages and monitors the Ceph MON service."
-	    shortdesc="Manages a Ceph MON instance."
-	    ;;
-    esac
-    
-cat <<EOF
-<?xml version="1.0"?>
-<!DOCTYPE resource-agent SYSTEM "ra-api-1.dtd">
-<resource-agent name="${__SCRIPT_NAME}" version="0.1">
-  <version>0.1</version>
-  <longdesc lang="en">${longdesc}</longdesc>
-  <shortdesc lang="en">${shortdesc}</shortdesc>
-  <parameters/>
-  <actions>
-    <action name="start"        timeout="20" />
-    <action name="stop"         timeout="20" />
-    <action name="monitor"      timeout="20"
-                                interval="10"/>
-    <action name="meta-data"    timeout="5" />
-    <action name="validate-all"   timeout="20" />
-  </actions>
-</resource-agent>
-EOF
-}
-
-ceph_action() {
-    local init_action
-    init_action="$1"
-
-    case ${__SCRIPT_NAME} in
-	osd|mds|mon)
-	    ocf_run $CEPH_INIT $init_action ${__SCRIPT_NAME}
-	    ;;
-	*)
-	    ocf_run $CEPH_INIT $init_action
-	    ;;
-    esac
-}
-
-ceph_validate_all() {
-    # Do we have the ceph init script?
-    check_binary @sysconfdir@/init.d/ceph
-
-    # Do we have a configuration file?
-    [ -e @sysconfdir@/ceph/ceph.conf ] || exit $OCF_ERR_INSTALLED
-}
-
-ceph_monitor() {
-    local rc
-
-    ceph_action status
-
-    # 0: running, and fully caught up with master
-    # 3: gracefully stopped
-    # any other: error
-    case "$?" in
-        0)
-            rc=$OCF_SUCCESS
-            ocf_log debug "Resource is running"
-            ;;
-        3)
-            rc=$OCF_NOT_RUNNING
-            ocf_log debug "Resource is not running"
-            ;;
-        *)
-            ocf_log err "Resource has failed"
-            rc=$OCF_ERR_GENERIC
-    esac
-
-    return $rc
-}
-
-ceph_start() {
-    # if resource is already running, bail out early
-    if ceph_monitor; then
-        ocf_log info "Resource is already running"
-        return $OCF_SUCCESS
-    fi
-
-    ceph_action start
-
-    while ! ceph_monitor; do
-        ocf_log debug "Resource has not started yet, waiting"
-        sleep 1
-    done
-
-    return $OCF_SUCCESS
-}
-
-ceph_stop() {
-    local rc
-
-    # exit immediately if configuration is not valid
-    ceph_validate_all || exit $?
-
-    ceph_monitor
-    rc=$?
-    case "$rc" in
-        "$OCF_SUCCESS")
-            # Currently running. Normal, expected behavior.
-            ocf_log debug "Resource is currently running"
-            ;;
-        "$OCF_NOT_RUNNING")
-            # Currently not running. Nothing to do.
-            ocf_log info "Resource is already stopped"
-            return $OCF_SUCCESS
-            ;;
-    esac
-
-    ceph_action stop
-
-    while ceph_monitor; do
-        ocf_log debug "Resource has not stopped yet, waiting"
-        sleep 1
-    done
-
-    # only return $OCF_SUCCESS if _everything_ succeeded as expected
-    return $OCF_SUCCESS
-
-}
-
-
-
-# Make sure meta-data and usage always succeed
-case $__OCF_ACTION in
-meta-data)      ceph_meta_data
-                exit $OCF_SUCCESS
-                ;;
-usage|help)     ceph_usage
-                exit $OCF_SUCCESS
-                ;;
-esac
-
-# Anything other than meta-data and usage must pass validation
-ceph_validate_all || exit $?
-
-# Translate each action into the appropriate function call
-case $__OCF_ACTION in
-start)          ceph_start;;
-stop)           ceph_stop;;
-status|monitor) ceph_monitor;;
-reload)         ocf_log info "Reloading..."
-                ceph_start
-                ;;
-validate-all)   ;;
-*)              ceph_usage
-                exit $OCF_ERR_UNIMPLEMENTED
-                ;;
-esac
-rc=$?
-
-exit $rc
diff --git a/src/os/filestore/FileStore.cc b/src/os/filestore/FileStore.cc
index 95a4f5f..9e183a6 100644
--- a/src/os/filestore/FileStore.cc
+++ b/src/os/filestore/FileStore.cc
@@ -3695,6 +3695,9 @@ void FileStore::sync_entry()
     if (force_sync) {
       dout(20) << "sync_entry force_sync set" << dendl;
       force_sync = false;
+    } else if (stop) {
+      dout(20) << __func__ << " stop set" << dendl;
+      break;
     } else {
       // wait for at least the min interval
       utime_t woke = ceph_clock_now(g_ceph_context);
@@ -4616,6 +4619,7 @@ int FileStore::_collection_remove_recursive(const coll_t &cid,
       if (r < 0)
 	return r;
     }
+    objects.clear();
   }
   return _destroy_collection(cid);
 }
@@ -4829,7 +4833,7 @@ int FileStore::collection_list(const coll_t& c, ghobject_t start, ghobject_t end
     assert(!m_filestore_fail_eio || r != -EIO);
     return r;
   }
-  dout(20) << "objects: " << ls << dendl;
+  dout(20) << "objects: " << *ls << dendl;
 
   // HashIndex doesn't know the pool when constructing a 'next' value
   if (next && !next->is_max()) {
diff --git a/src/os/filestore/HashIndex.cc b/src/os/filestore/HashIndex.cc
index a358ef3..b1bf702 100644
--- a/src/os/filestore/HashIndex.cc
+++ b/src/os/filestore/HashIndex.cc
@@ -550,7 +550,12 @@ int HashIndex::recursive_create_path(vector<string>& path, int level)
 }
 
 int HashIndex::recursive_remove(const vector<string> &path) {
+  return _recursive_remove(path, true);
+}
+
+int HashIndex::_recursive_remove(const vector<string> &path, bool top) {
   vector<string> subdirs;
+  dout(20) << __func__ << " path=" << path << dendl;
   int r = list_subdirs(path, &subdirs);
   if (r < 0)
     return r;
@@ -565,12 +570,15 @@ int HashIndex::recursive_remove(const vector<string> &path) {
        i != subdirs.end();
        ++i) {
     subdir.push_back(*i);
-    r = recursive_remove(subdir);
+    r = _recursive_remove(subdir, false);
     if (r < 0)
       return r;
     subdir.pop_back();
   }
-  return remove_path(path);
+  if (top)
+    return 0;
+  else
+    return remove_path(path);
 }
 
 int HashIndex::start_col_split(const vector<string> &path) {
diff --git a/src/os/filestore/HashIndex.h b/src/os/filestore/HashIndex.h
index d4222f9..461eddc 100644
--- a/src/os/filestore/HashIndex.h
+++ b/src/os/filestore/HashIndex.h
@@ -198,6 +198,11 @@ protected:
     ghobject_t *next
     );
 private:
+  /// Internal recursively remove path and its subdirs
+  int _recursive_remove(
+    const vector<string> &path, ///< [in] path to remove
+    bool top			///< [in] internal tracking of first caller
+    ); /// @return Error Code, 0 on success
   /// Recursively remove path and its subdirs
   int recursive_remove(
     const vector<string> &path ///< [in] path to remove
diff --git a/src/osd/OSD.cc b/src/osd/OSD.cc
index 5e7f8de..8eb2397 100644
--- a/src/osd/OSD.cc
+++ b/src/osd/OSD.cc
@@ -499,7 +499,7 @@ void OSDService::init()
 
 void OSDService::final_init()
 {
-  objecter->start();
+  objecter->start(osdmap.get());
 }
 
 void OSDService::activate_map()
@@ -2885,14 +2885,8 @@ PGPool OSD::_get_pool(int id, OSDMapRef createmap)
     assert(0);
   }
 
-  PGPool p = PGPool(id, createmap->get_pool_name(id),
-		    createmap->get_pg_pool(id)->auid);
+  PGPool p = PGPool(createmap, id);
 
-  const pg_pool_t *pi = createmap->get_pg_pool(id);
-  p.info = *pi;
-  p.snapc = pi->get_snap_context();
-
-  pi->build_removed_snaps(p.cached_removed_snaps);
   dout(10) << "_get_pool " << p.id << dendl;
   return p;
 }
@@ -6343,7 +6337,7 @@ OSDService::ScrubJob::ScrubJob(const spg_t& pg, const utime_t& timestamp,
       pool_scrub_max_interval : g_conf->osd_scrub_max_interval;
 
     sched_time += scrub_min_interval;
-    double r = rand() / RAND_MAX;
+    double r = rand() / (double)RAND_MAX;
     sched_time +=
       scrub_min_interval * g_conf->osd_scrub_interval_randomize_ratio * r;
     deadline += scrub_max_interval;
@@ -7527,6 +7521,7 @@ void OSD::handle_pg_create(OpRequestRef op)
     history.epoch_created = created;
     history.last_scrub_stamp = ci->second;
     history.last_deep_scrub_stamp = ci->second;
+    history.last_clean_scrub_stamp = ci->second;
 
     // project history from created epoch (handle_pg_peering_evt does
     // it from msg send epoch)
diff --git a/src/osd/PG.cc b/src/osd/PG.cc
index 51b5c13..69331a5 100644
--- a/src/osd/PG.cc
+++ b/src/osd/PG.cc
@@ -157,7 +157,10 @@ void PGPool::update(OSDMapRef map)
   info = *pi;
   auid = pi->auid;
   name = map->get_pool_name(id);
-  if (pi->get_snap_epoch() == map->get_epoch()) {
+  bool updated = false;
+  if ((map->get_epoch() == cached_epoch + 1) &&
+      (pi->get_snap_epoch() == map->get_epoch())) {
+    updated = true;
     pi->build_removed_snaps(newly_removed_snaps);
     interval_set<snapid_t> intersection;
     intersection.intersection_of(newly_removed_snaps, cached_removed_snaps);
@@ -175,14 +178,14 @@ void PGPool::update(OSDMapRef map)
   } else {
     newly_removed_snaps.clear();
   }
+  cached_epoch = map->get_epoch();
   lgeneric_subdout(g_ceph_context, osd, 20)
     << "PGPool::update cached_removed_snaps "
     << cached_removed_snaps
     << " newly_removed_snaps "
     << newly_removed_snaps
     << " snapc " << snapc
-    << (pi->get_snap_epoch() == map->get_epoch() ?
-	" (updated)":" (no change)")
+    << (updated ? " (updated)":" (no change)")
     << dendl;
 }
 
@@ -5889,8 +5892,23 @@ PG::RecoveryState::Primary::Primary(my_context ctx)
   assert(pg->want_acting.empty());
 
   // set CREATING bit until we have peered for the first time.
-  if (pg->info.history.last_epoch_started == 0)
+  if (pg->info.history.last_epoch_started == 0) {
     pg->state_set(PG_STATE_CREATING);
+    // use the history timestamp, which ultimately comes from the
+    // monitor in the create case.
+    utime_t t = pg->info.history.last_scrub_stamp;
+    pg->info.stats.last_fresh = t;
+    pg->info.stats.last_active = t;
+    pg->info.stats.last_change = t;
+    pg->info.stats.last_peered = t;
+    pg->info.stats.last_clean = t;
+    pg->info.stats.last_unstale = t;
+    pg->info.stats.last_undegraded = t;
+    pg->info.stats.last_fullsized = t;
+    pg->info.stats.last_scrub_stamp = t;
+    pg->info.stats.last_deep_scrub_stamp = t;
+    pg->info.stats.last_clean_scrub_stamp = t;
+  }
 }
 
 boost::statechart::result PG::RecoveryState::Primary::react(const MNotifyRec& notevt)
diff --git a/src/osd/PG.h b/src/osd/PG.h
index 2121c67..10a81f8 100644
--- a/src/osd/PG.h
+++ b/src/osd/PG.h
@@ -165,6 +165,7 @@ struct PGRecoveryStats {
 };
 
 struct PGPool {
+  epoch_t cached_epoch;
   int64_t id;
   string name;
   uint64_t auid;
@@ -175,8 +176,17 @@ struct PGPool {
   interval_set<snapid_t> cached_removed_snaps;      // current removed_snaps set
   interval_set<snapid_t> newly_removed_snaps;  // newly removed in the last epoch
 
-  PGPool(int64_t i, const string& _name, uint64_t au)
-    : id(i), name(_name), auid(au) { }
+  PGPool(OSDMapRef map, int64_t i)
+    : cached_epoch(map->get_epoch()),
+      id(i),
+      name(map->get_pool_name(id)),
+      auid(map->get_pg_pool(id)->auid) {
+    const pg_pool_t *pi = map->get_pg_pool(id);
+    assert(pi);
+    info = *pi;
+    snapc = pi->get_snap_context();
+    pi->build_removed_snaps(cached_removed_snaps);
+  }
 
   void update(OSDMapRef map);
 };
diff --git a/src/osd/ReplicatedBackend.cc b/src/osd/ReplicatedBackend.cc
index 5c6d5e8..32b9f17 100644
--- a/src/osd/ReplicatedBackend.cc
+++ b/src/osd/ReplicatedBackend.cc
@@ -1824,7 +1824,11 @@ bool ReplicatedBackend::handle_pull_response(
 		   pop.omap_entries,
 		   t);
 
+  pi.stat.num_keys_recovered += pop.omap_entries.size();
+  pi.stat.num_bytes_recovered += data.length();
+
   if (complete) {
+    pi.stat.num_objects_recovered++;
     to_continue->push_back(hoid);
     get_parent()->on_local_recover(
       hoid, pi.recovery_info, pi.obc, t);
diff --git a/src/osd/ReplicatedPG.cc b/src/osd/ReplicatedPG.cc
index 43a29ad..6dfa97b 100644
--- a/src/osd/ReplicatedPG.cc
+++ b/src/osd/ReplicatedPG.cc
@@ -1589,6 +1589,12 @@ void ReplicatedPG::do_op(OpRequestRef& op)
   m->finish_decode();
   m->clear_payload();
 
+  if (m->has_flag(CEPH_OSD_FLAG_PARALLELEXEC)) {
+    // not implemented.
+    osd->reply_op_error(op, -EINVAL);
+    return;
+  }
+
   if (op->rmw_flags == 0) {
     int r = osd->osd->init_op_flags(op);
     if (r) {
@@ -4034,14 +4040,15 @@ struct FillInVerifyExtent : public Context {
 
 struct ToSparseReadResult : public Context {
   bufferlist& data_bl;
+  uint64_t data_offset;
   ceph_le64& len;
-  ToSparseReadResult(bufferlist& bl, ceph_le64& len):
-    data_bl(bl), len(len) {}
+  ToSparseReadResult(bufferlist& bl, uint64_t offset, ceph_le64& len):
+    data_bl(bl), data_offset(offset),len(len) {}
   void finish(int r) {
     if (r < 0) return;
     len = r;
     bufferlist outdata;
-    map<uint64_t, uint64_t> extents = {{0, r}};
+    map<uint64_t, uint64_t> extents = {{data_offset, r}};
     ::encode(extents, outdata);
     ::encode_destructively(data_bl, outdata);
     data_bl.swap(outdata);
@@ -4305,7 +4312,7 @@ int ReplicatedPG::do_osd_ops(OpContext *ctx, vector<OSDOp>& ops)
 	ctx->pending_async_reads.push_back(
 	  make_pair(
 	    boost::make_tuple(op.extent.offset, op.extent.length, op.flags),
-	    make_pair(&osd_op.outdata, new ToSparseReadResult(osd_op.outdata,
+	    make_pair(&osd_op.outdata, new ToSparseReadResult(osd_op.outdata, op.extent.offset,
 							      op.extent.length))));
 	dout(10) << " async_read (was sparse_read) noted for " << soid << dendl;
       } else {
@@ -6077,6 +6084,9 @@ inline int ReplicatedPG::_delete_oid(OpContext *ctx, bool no_whiteout)
     dout(20) << __func__ << " deleting whiteout on " << soid << dendl;
     ctx->delta_stats.num_whiteouts--;
   }
+  if (oi.is_cache_pinned()) {
+    ctx->delta_stats.num_objects_pinned--;
+  }
   if (soid.is_head())
     snapset.head_exists = false;
   obs.exists = false;
@@ -8437,12 +8447,6 @@ void ReplicatedPG::eval_repop(RepGather *repop)
 void ReplicatedPG::issue_repop(RepGather *repop, OpContext *ctx)
 {
   const hobject_t& soid = ctx->obs->oi.soid;
-  if (ctx->op &&
-    ((static_cast<MOSDOp *>(
-	ctx->op->get_req()))->has_flag(CEPH_OSD_FLAG_PARALLELEXEC))) {
-    // replicate original op for parallel execution on replica
-    assert(0 == "broken implementation, do not use");
-  }
   dout(7) << "issue_repop rep_tid " << repop->rep_tid
           << " o " << soid
           << dendl;
@@ -8526,7 +8530,7 @@ ReplicatedPG::RepGather *ReplicatedPG::new_repop(
   return repop;
 }
 
-ReplicatedPG::RepGather *ReplicatedPG::new_repop(
+boost::intrusive_ptr<ReplicatedPG::RepGather> ReplicatedPG::new_repop(
   ObcLockManager &&manager,
   boost::optional<std::function<void(void)> > &&on_complete)
 {
@@ -8539,11 +8543,10 @@ ReplicatedPG::RepGather *ReplicatedPG::new_repop(
   repop->start = ceph_clock_now(cct);
 
   repop_queue.push_back(&repop->queue_item);
-  repop->get();
 
   osd->logger->inc(l_osd_op_wip);
 
-  return repop;
+  return boost::intrusive_ptr<RepGather>(repop);
 }
  
 void ReplicatedPG::remove_repop(RepGather *repop)
@@ -12595,14 +12598,14 @@ void ReplicatedPG::_scrub(
   boost::optional<SnapSet> snapset; // If initialized so will head (above)
   vector<snapid_t>::reverse_iterator curclone; // Defined only if snapset initialized
   unsigned missing = 0;
-  inconsistent_snapset_wrapper snap_error;
+  inconsistent_snapset_wrapper soid_error, head_error;
 
   bufferlist last_data;
 
   for (map<hobject_t,ScrubMap::object, hobject_t::BitwiseComparator>::reverse_iterator
        p = scrubmap.objects.rbegin(); p != scrubmap.objects.rend(); ++p) {
     const hobject_t& soid = p->first;
-    snap_error = inconsistent_snapset_wrapper{soid};
+    soid_error = inconsistent_snapset_wrapper{soid};
     object_stat_sum_t stat;
     boost::optional<object_info_t> oi;
 
@@ -12623,7 +12626,7 @@ void ReplicatedPG::_scrub(
       osd->clog->error() << mode << " " << info.pgid << " " << soid
 			<< " no '" << OI_ATTR << "' attr";
       ++scrubber.shallow_errors;
-      snap_error.set_ss_attr_missing();
+      soid_error.set_oi_attr_missing();
     } else {
       bufferlist bv;
       bv.push_back(p->second.attrs[OI_ATTR]);
@@ -12635,7 +12638,8 @@ void ReplicatedPG::_scrub(
 	osd->clog->error() << mode << " " << info.pgid << " " << soid
 		<< " can't decode '" << OI_ATTR << "' attr " << e.what();
 	++scrubber.shallow_errors;
-	snap_error.set_ss_attr_corrupted();
+	soid_error.set_oi_attr_corrupted();
+        soid_error.set_oi_attr_missing(); // Not available too
       }
     }
 
@@ -12647,7 +12651,7 @@ void ReplicatedPG::_scrub(
 			   << oi->size << ") adjusted for ondisk to ("
 			   << pgbackend->be_get_ondisk_size(oi->size)
 			   << ")";
-	snap_error.set_size_mismatch();
+	soid_error.set_size_mismatch();
 	++scrubber.shallow_errors;
       }
 
@@ -12691,7 +12695,7 @@ void ReplicatedPG::_scrub(
       // This will set missing, but will be a no-op if snap.soid == *curclone.
       missing += process_clones_to(head, snapset, osd->clog, info.pgid, mode,
 		        pool.info.allow_incomplete_clones(), target, &curclone,
-			snap_error);
+			head_error);
     }
     bool expected;
     // Check doing_clones() again in case we ran process_clones_to()
@@ -12707,19 +12711,18 @@ void ReplicatedPG::_scrub(
       expected = soid.has_snapset();
     }
     if (!expected) {
-      // If we couldn't read the head's snapset, then just ignore clones and
-      // don't count as an error.
+      // If we couldn't read the head's snapset, just ignore clones
       if (head && !snapset) {
-	osd->clog->info() << mode << " " << info.pgid << " " << soid
+	osd->clog->error() << mode << " " << info.pgid << " " << soid
 			  << " clone ignored due to missing snapset";
-	scrubber.store->add_snap_error(pool.id, snap_error);
-	continue;
-      }
-      osd->clog->error() << mode << " " << info.pgid << " " << soid
+      } else {
+	osd->clog->error() << mode << " " << info.pgid << " " << soid
 			   << " is an unexpected clone";
+      }
       ++scrubber.shallow_errors;
-      snap_error.set_headless();
-      scrubber.store->add_snap_error(pool.id, snap_error);
+      soid_error.set_headless();
+      scrubber.store->add_snap_error(pool.id, soid_error);
+      head_error.set_clone(soid.snap);
       continue;
     }
 
@@ -12729,13 +12732,15 @@ void ReplicatedPG::_scrub(
       if (missing) {
 	log_missing(missing, head, osd->clog, info.pgid, __func__, mode,
 		    pool.info.allow_incomplete_clones());
-	scrubber.store->add_snap_error(pool.id, snap_error);
       }
 
+      // Save previous head error information
+      if (head && head_error.errors)
+	scrubber.store->add_snap_error(pool.id, head_error);
       // Set this as a new head object
       head = soid;
       missing = 0;
-      snap_error = inconsistent_snapset_wrapper{head.get()};
+      head_error = soid_error;
 
       dout(20) << __func__ << " " << mode << " new head " << head << dendl;
 
@@ -12744,7 +12749,7 @@ void ReplicatedPG::_scrub(
 			  << " no '" << SS_ATTR << "' attr";
         ++scrubber.shallow_errors;
 	snapset = boost::none;
-	snap_error.set_ss_attr_missing();
+	head_error.set_ss_attr_missing();
       } else {
 	bufferlist bl;
 	bl.push_back(p->second.attrs[SS_ATTR]);
@@ -12757,7 +12762,8 @@ void ReplicatedPG::_scrub(
           osd->clog->error() << mode << " " << info.pgid << " " << soid
 		<< " can't decode '" << SS_ATTR << "' attr " << e.what();
 	  ++scrubber.shallow_errors;
-	  snap_error.set_ss_attr_corrupted();
+	  head_error.set_ss_attr_corrupted();
+	  head_error.set_ss_attr_missing(); // Not available too
         }
       }
 
@@ -12771,7 +12777,7 @@ void ReplicatedPG::_scrub(
 	    osd->clog->error() << mode << " " << info.pgid << " " << soid
 			       << " snaps.seq not set";
 	    ++scrubber.shallow_errors;
-	    snap_error.set_snapset_mismatch();
+	    head_error.set_snapset_mismatch();
           }
 	}
 
@@ -12779,13 +12785,13 @@ void ReplicatedPG::_scrub(
 	  osd->clog->error() << mode << " " << info.pgid << " " << soid
 			  << " snapset.head_exists=false, but head exists";
 	  ++scrubber.shallow_errors;
-	  snap_error.set_head_mismatch();
+	  head_error.set_head_mismatch();
 	}
 	if (soid.is_snapdir() && snapset->head_exists) {
 	  osd->clog->error() << mode << " " << info.pgid << " " << soid
 			  << " snapset.head_exists=true, but snapdir exists";
 	  ++scrubber.shallow_errors;
-	  snap_error.set_head_mismatch();
+	  head_error.set_head_mismatch();
 	}
       }
     } else {
@@ -12800,21 +12806,21 @@ void ReplicatedPG::_scrub(
 	osd->clog->error() << mode << " " << info.pgid << " " << soid
 			   << " is missing in clone_size";
 	++scrubber.shallow_errors;
-	snap_error.set_size_mismatch();
+	soid_error.set_size_mismatch();
       } else {
         if (oi && oi->size != snapset->clone_size[soid.snap]) {
 	  osd->clog->error() << mode << " " << info.pgid << " " << soid
 			     << " size " << oi->size << " != clone_size "
 			     << snapset->clone_size[*curclone];
 	  ++scrubber.shallow_errors;
-	  snap_error.set_size_mismatch();
+	  soid_error.set_size_mismatch();
         }
 
         if (snapset->clone_overlap.count(soid.snap) == 0) {
 	  osd->clog->error() << mode << " " << info.pgid << " " << soid
 			     << " is missing in clone_overlap";
 	  ++scrubber.shallow_errors;
-	  snap_error.set_size_mismatch();
+	  soid_error.set_size_mismatch();
 	} else {
 	  // This checking is based on get_clone_bytes().  The first 2 asserts
 	  // can't happen because we know we have a clone_size and
@@ -12837,7 +12843,7 @@ void ReplicatedPG::_scrub(
 	    osd->clog->error() << mode << " " << info.pgid << " " << soid
 			       << " bad interval_set in clone_overlap";
 	    ++scrubber.shallow_errors;
-	    snap_error.set_size_mismatch();
+	    soid_error.set_size_mismatch();
 	  } else {
             stat.num_bytes += snapset->get_clone_bytes(soid.snap);
 	  }
@@ -12846,6 +12852,8 @@ void ReplicatedPG::_scrub(
 
       // what's next?
       ++curclone;
+      if (soid_error.errors)
+        scrubber.store->add_snap_error(pool.id, soid_error);
     }
 
     scrub_cstat.add(stat);
@@ -12857,15 +12865,16 @@ void ReplicatedPG::_scrub(
 
     missing += process_clones_to(head, snapset, osd->clog, info.pgid, mode,
 		      pool.info.allow_incomplete_clones(), all_clones, &curclone,
-		      snap_error);
+		      head_error);
   }
   // There could be missing found by the test above or even
   // before dropping out of the loop for the last head.
   if (missing) {
     log_missing(missing, head, osd->clog, info.pgid, __func__,
 		mode, pool.info.allow_incomplete_clones());
-    scrubber.store->add_snap_error(pool.id, snap_error);
   }
+  if (head && head_error.errors)
+    scrubber.store->add_snap_error(pool.id, head_error);
 
   for (map<hobject_t,pair<uint32_t,uint32_t>, hobject_t::BitwiseComparator>::const_iterator p =
 	 missing_digest.begin();
diff --git a/src/osd/ReplicatedPG.h b/src/osd/ReplicatedPG.h
index 47a6a16..cfbfbe5 100644
--- a/src/osd/ReplicatedPG.h
+++ b/src/osd/ReplicatedPG.h
@@ -878,7 +878,7 @@ protected:
     OpContext *ctx,
     ObjectContextRef obc,
     ceph_tid_t rep_tid);
-  RepGather *new_repop(
+  boost::intrusive_ptr<RepGather> new_repop(
     ObcLockManager &&manager,
     boost::optional<std::function<void(void)> > &&on_complete);
   void remove_repop(RepGather *repop);
diff --git a/src/osd/osd_types.h b/src/osd/osd_types.h
index 2dd16b0..6887fa6 100644
--- a/src/osd/osd_types.h
+++ b/src/osd/osd_types.h
@@ -3056,6 +3056,25 @@ ostream& operator<<(ostream& out, const osd_peer_stat_t &stat);
 // -----------------------------------------
 
 class ObjectExtent {
+  /**
+   * ObjectExtents are used for specifying IO behavior against RADOS
+   * objects when one is using the ObjectCacher.
+   *
+   * To use this in a real system, *every member* must be filled
+   * out correctly. In particular, make sure to initialize the
+   * oloc correctly, as its default values are deliberate poison
+   * and will cause internal ObjectCacher asserts.
+   *
+   * Similarly, your buffer_extents vector *must* specify a total
+   * size equal to your length. If the buffer_extents inadvertently
+   * contain less space than the length member specifies, you
+   * will get unintelligible asserts deep in the ObjectCacher.
+   *
+   * If you are trying to do testing and don't care about actual
+   * RADOS function, the simplest thing to do is to initialize
+   * the ObjectExtent (truncate_size can be 0), create a single entry
+   * in buffer_extents matching the length, and set oloc.pool to 0.
+   */
  public:
   object_t    oid;       // object id
   uint64_t    objectno;
diff --git a/src/osdc/Journaler.cc b/src/osdc/Journaler.cc
index fb38a08..3e20aa9 100644
--- a/src/osdc/Journaler.cc
+++ b/src/osdc/Journaler.cc
@@ -1492,7 +1492,7 @@ void Journaler::shutdown()
     f->complete(-EAGAIN);
   }
 
-  finish_contexts(cct, waitfor_recover, 0);
+  finish_contexts(cct, waitfor_recover, -ESHUTDOWN);
 
   std::map<uint64_t, std::list<Context*> >::iterator i;
   for (i = waitfor_safe.begin(); i != waitfor_safe.end(); ++i) {
diff --git a/src/osdc/ObjectCacher.cc b/src/osdc/ObjectCacher.cc
index 7e76b3c..8732325 100644
--- a/src/osdc/ObjectCacher.cc
+++ b/src/osdc/ObjectCacher.cc
@@ -833,7 +833,6 @@ void ObjectCacher::bh_read_finish(int64_t poolid, sobject_t oid,
       if (bh->error < 0)
 	err = bh->error;
 
-      loff_t oldpos = opos;
       opos = bh->end();
 
       if (r == -ENOENT) {
@@ -853,7 +852,7 @@ void ObjectCacher::bh_read_finish(int64_t poolid, sobject_t oid,
 	mark_error(bh);
       } else {
 	bh->bl.substr_of(bl,
-			 oldpos-bh->start(),
+			 bh->start() - start,
 			 bh->length());
 	mark_clean(bh);
       }
diff --git a/src/osdc/Objecter.cc b/src/osdc/Objecter.cc
index 2f028cf..45c7e66 100644
--- a/src/osdc/Objecter.cc
+++ b/src/osdc/Objecter.cc
@@ -358,12 +358,14 @@ void Objecter::init()
 /*
  * ok, cluster interaction can happen
  */
-void Objecter::start()
+void Objecter::start(const OSDMap* o)
 {
   shared_lock rl(rwlock);
 
   start_tick();
-  if (osdmap->get_epoch() == 0) {
+  if (o) {
+    osdmap->deepish_copy_from(*o);
+  } else if (osdmap->get_epoch() == 0) {
     _maybe_request_map();
   }
 }
diff --git a/src/osdc/Objecter.h b/src/osdc/Objecter.h
index 6b519ff..8167306 100644
--- a/src/osdc/Objecter.h
+++ b/src/osdc/Objecter.h
@@ -1973,7 +1973,7 @@ private:
   ~Objecter();
 
   void init();
-  void start();
+  void start(const OSDMap *o = nullptr);
   void shutdown();
 
   // These two templates replace osdmap_(get)|(put)_read. Simply wrap
diff --git a/src/pybind/ceph_argparse.py b/src/pybind/ceph_argparse.py
index 021c53d..dc93ddc 100644
--- a/src/pybind/ceph_argparse.py
+++ b/src/pybind/ceph_argparse.py
@@ -19,10 +19,15 @@ import socket
 import stat
 import sys
 import threading
-import types
 import uuid
 
 
+try:
+    basestring
+except NameError:
+    basestring = str
+
+
 class ArgumentError(Exception):
     """
     Something wrong with arguments
@@ -134,11 +139,11 @@ class CephInt(CephArgtype):
             self.range = list()
         else:
             self.range = list(range.split('|'))
-            self.range = map(long, self.range)
+            self.range = [int(x) for x in self.range]
 
     def valid(self, s, partial=False):
         try:
-            val = long(s)
+            val = int(s)
         except ValueError:
             raise ArgumentValid("{0} doesn't represent an int".format(s))
         if len(self.range) == 2:
@@ -169,7 +174,7 @@ class CephFloat(CephArgtype):
             self.range = list()
         else:
             self.range = list(range.split('|'))
-            self.range = map(float, self.range)
+            self.range = [float(x) for x in self.range]
 
     def valid(self, s, partial=False):
         try:
@@ -287,7 +292,7 @@ class CephIPAddr(CephArgtype):
                 socket.inet_pton(socket.AF_INET6, a)
             except:
                 raise ArgumentValid('{0} not valid IPv6 address'.format(s))
-        if p is not None and long(p) > 65535:
+        if p is not None and int(p) > 65535:
             raise ArgumentValid("{0} not a valid port number".format(p))
         self.val = s
         self.addr = a
@@ -309,12 +314,12 @@ class CephEntityAddr(CephIPAddr):
             ip = s
         super(self.__class__, self).valid(ip)
         if nonce:
-            nonce_long = None
+            nonce_int = None
             try:
-                nonce_long = long(nonce)
+                nonce_int = int(nonce)
             except ValueError:
                 pass
-            if nonce_long is None or nonce_long < 0:
+            if nonce_int is None or nonce_int < 0:
                 raise ArgumentValid(
                     '{0}: invalid entity, nonce {1} not integer > 0'.
                     format(s, nonce)
@@ -492,11 +497,11 @@ class CephFragment(CephArgtype):
         if not val.startswith('0x'):
             raise ArgumentFormat("{0} not a hex integer".format(val))
         try:
-            long(val)
+            int(val)
         except:
             raise ArgumentFormat('can\'t convert {0} to integer'.format(val))
         try:
-            long(bits)
+            int(bits)
         except:
             raise ArgumentFormat('can\'t convert {0} to integer'.format(bits))
         self.val = s
@@ -529,9 +534,13 @@ class CephPrefix(CephArgtype):
 
     def valid(self, s, partial=False):
         try:
-            # `prefix` can always be converted into unicode when being compared,
-            # but `s` could be anything passed by user.
-            s = unicode(s)
+            s = str(s)
+            if isinstance(s, bytes):
+                # `prefix` can always be converted into unicode when being compared,
+                # but `s` could be anything passed by user.
+                s = s.decode('ascii')
+        except UnicodeEncodeError:
+            raise ArgumentPrefix(u"no match for {0}".format(s))
         except UnicodeDecodeError:
             raise ArgumentPrefix("no match for {0}".format(s))
 
@@ -577,7 +586,7 @@ class argdesc(object):
     and will store the validated value in self.instance.val for extraction.
     """
     def __init__(self, t, name=None, n=1, req=True, **kwargs):
-        if isinstance(t, types.StringTypes):
+        if isinstance(t, basestring):
             self.t = CephPrefix
             self.typeargs = {'prefix': t}
             self.req = True
@@ -597,7 +606,7 @@ class argdesc(object):
     def __repr__(self):
         r = 'argdesc(' + str(self.t) + ', '
         internals = ['N', 'typeargs', 'instance', 't']
-        for (k, v) in self.__dict__.iteritems():
+        for (k, v) in self.__dict__.items():
             if k.startswith('__') or k in internals:
                 pass
             else:
@@ -605,7 +614,7 @@ class argdesc(object):
                 if k == 'n' and self.N:
                     v = 'N'
                 r += '{0}={1}, '.format(k, v)
-        for (k, v) in self.typeargs.iteritems():
+        for (k, v) in self.typeargs.items():
             r += '{0}={1}, '.format(k, v)
         return r[:-2] + ')'
 
@@ -665,7 +674,7 @@ def parse_funcsig(sig):
     argnum = 0
     for desc in sig:
         argnum += 1
-        if isinstance(desc, types.StringTypes):
+        if isinstance(desc, basestring):
             t = CephPrefix
             desc = {'type': t, 'name': 'prefix', 'prefix': desc}
         else:
@@ -674,11 +683,11 @@ def parse_funcsig(sig):
                 s = 'JSON descriptor {0} has no type'.format(sig)
                 raise JsonFormat(s)
             # look up type string in our globals() dict; if it's an
-            # object of type types.TypeType, it must be a
+            # object of type `type`, it must be a
             # locally-defined class. otherwise, we haven't a clue.
             if desc['type'] in globals():
                 t = globals()[desc['type']]
-                if not isinstance(t, types.TypeType):
+                if not isinstance(t, type):
                     s = 'unknown type {0}'.format(desc['type'])
                     raise JsonFormat(s)
             else:
@@ -734,7 +743,7 @@ def parse_json_funcsigs(s, consumer):
         print >> sys.stderr, "Couldn't parse JSON {0}: {1}".format(s, e)
         raise e
     sigdict = {}
-    for cmdtag, cmd in overall.iteritems():
+    for cmdtag, cmd in overall.items():
         if 'sig' not in cmd:
             s = "JSON descriptor {0} has no 'sig'".format(cmdtag)
             raise JsonFormat(s)
@@ -959,7 +968,8 @@ def validate(args, signature, partial=False):
 def cmdsiglen(sig):
     sigdict = sig.values()
     assert len(sigdict) == 1
-    return len(sig.values()[0]['sig'])
+    some_value = next(iter(sig.values()))
+    return len(some_value['sig'])
 
 
 def validate_command(sigdict, args, verbose=False):
@@ -977,7 +987,7 @@ def validate_command(sigdict, args, verbose=False):
         # (so we can maybe give a more-useful error message)
         best_match_cnt = 0
         bestcmds = []
-        for cmdtag, cmd in sigdict.iteritems():
+        for cmdtag, cmd in sigdict.items():
             sig = cmd['sig']
             matched = matchnum(args, sig, partial=True)
             if (matched > best_match_cnt):
@@ -998,8 +1008,7 @@ def validate_command(sigdict, args, verbose=False):
 
         # Sort bestcmds by number of args so we can try shortest first
         # (relies on a cmdsig being key,val where val is a list of len 1)
-        bestcmds_sorted = sorted(bestcmds,
-                                 cmp=lambda x, y: cmp(cmdsiglen(x), cmdsiglen(y)))
+        bestcmds_sorted = sorted(bestcmds, key=cmdsiglen)
 
         if verbose:
             print >> sys.stderr, "bestcmds_sorted: "
@@ -1007,7 +1016,7 @@ def validate_command(sigdict, args, verbose=False):
 
         # for everything in bestcmds, look for a true match
         for cmdsig in bestcmds_sorted:
-            for cmd in cmdsig.itervalues():
+            for cmd in cmdsig.values():
                 sig = cmd['sig']
                 try:
                     valid_dict = validate(args, sig)
@@ -1118,14 +1127,14 @@ class RadosThread(threading.Thread):
         self.args = args
         self.kwargs = kwargs
         self.target = target
-	self.exception = None
+        self.exception = None
         threading.Thread.__init__(self)
 
     def run(self):
         try:
-		self.retval = self.target(*self.args, **self.kwargs)
-	except Exception as e:
-		self.exception = e
+            self.retval = self.target(*self.args, **self.kwargs)
+        except Exception as e:
+            self.exception = e
 
 
 # time in seconds between each call to t.join() for child thread
diff --git a/src/pybind/ceph_volume_client.py b/src/pybind/ceph_volume_client.py
index e37c904..aa63157 100644
--- a/src/pybind/ceph_volume_client.py
+++ b/src/pybind/ceph_volume_client.py
@@ -4,17 +4,24 @@ Copyright (C) 2015 Red Hat, Inc.
 LGPL2.  See file COPYING.
 """
 
+from contextlib import contextmanager
+import errno
+import fcntl
 import json
 import logging
 import os
+import re
+import struct
+import sys
 import threading
-import errno
 import time
+import uuid
 
-import rados
-import cephfs
 from ceph_argparse import json_command
 
+import cephfs
+import rados
+
 
 class RadosError(Exception):
     """
@@ -33,6 +40,8 @@ log = logging.getLogger(__name__)
 # that are not assigned to a group (i.e. created with group=None)
 NO_GROUP_NAME = "_nogroup"
 
+# Filename extensions for meta files.
+META_FILE_EXT = ".meta"
 
 class VolumePath(object):
     """
@@ -183,6 +192,22 @@ class EvictionError(Exception):
     pass
 
 
+class CephFSVolumeClientError(Exception):
+    """
+    Something went wrong talking to Ceph using CephFSVolumeClient.
+    """
+    pass
+
+
+CEPHFSVOLUMECLIENT_VERSION_HISTORY = """
+
+    CephFSVolumeClient Version History:
+
+    * 1 - Initial version
+
+"""
+
+
 class CephFSVolumeClient(object):
     """
     Combine libcephfs and librados interfaces to implement a
@@ -202,6 +227,11 @@ class CephFSVolumeClient(object):
     or cephfs.Error exceptions in unexpected situations.
     """
 
+    # Current version
+    version = 1
+    # Earliest compatible version
+    compat_version = 1
+
     # Where shall we create our volumes?
     POOL_PREFIX = "fsvolume_"
     DEFAULT_VOL_PREFIX = "/volumes"
@@ -216,6 +246,131 @@ class CephFSVolumeClient(object):
         self.auth_id = auth_id
         self.volume_prefix = volume_prefix if volume_prefix else self.DEFAULT_VOL_PREFIX
         self.pool_ns_prefix = pool_ns_prefix if pool_ns_prefix else self.DEFAULT_NS_PREFIX
+        # For flock'ing in cephfs, I want a unique ID to distinguish me
+        # from any other manila-share services that are loading this module.
+        # We could use pid, but that's unnecessary weak: generate a
+        # UUID
+        self._id = struct.unpack(">Q", uuid.uuid1().get_bytes()[0:8])[0]
+
+        # TODO: version the on-disk structures
+
+    def recover(self):
+        # Scan all auth keys to see if they're dirty: if they are, they have
+        # state that might not have propagated to Ceph or to the related
+        # volumes yet.
+
+        # Important: we *always* acquire locks in the order auth->volume
+        # That means a volume can never be dirty without the auth key
+        # we're updating it with being dirty at the same time.
+
+        # First list the auth IDs that have potentially dirty on-disk metadata
+        log.debug("Recovering from partial auth updates (if any)...")
+
+        try:
+            dir_handle = self.fs.opendir(self.volume_prefix)
+        except cephfs.ObjectNotFound:
+            log.debug("Nothing to recover. No auth meta files.")
+            return
+
+        d = self.fs.readdir(dir_handle)
+        auth_ids = []
+
+        if not d:
+            log.debug("Nothing to recover. No auth meta files.")
+
+        while d:
+            # Identify auth IDs from auth meta filenames. The auth meta files
+            # are named as, "$<auth_id><meta filename extension>"
+            regex = "^\$(.*){0}$".format(re.escape(META_FILE_EXT))
+            match = re.search(regex, d.d_name)
+            if match:
+                auth_ids.append(match.group(1))
+
+            d = self.fs.readdir(dir_handle)
+
+        self.fs.closedir(dir_handle)
+
+        # Key points based on ordering:
+        # * Anything added in VMeta is already added in AMeta
+        # * Anything added in Ceph is already added in VMeta
+        # * Anything removed in VMeta is already removed in Ceph
+        # * Anything removed in AMeta is already removed in VMeta
+
+        # Deauthorization: because I only update metadata AFTER the
+        # update of the next level down, I have the same ordering of
+        # -> things which exist in the AMeta should also exist
+        #    in the VMeta, should also exist in Ceph, and the same
+        #    recovery procedure that gets me consistent after crashes
+        #    during authorization will also work during deauthorization
+
+        # Now for each auth ID, check for dirty flag and apply updates
+        # if dirty flag is found
+        for auth_id in auth_ids:
+            with self._auth_lock(auth_id):
+                auth_meta = self._auth_metadata_get(auth_id)
+                if not auth_meta or not auth_meta['volumes']:
+                    # Clean up auth meta file
+                    self.fs.unlink(self._auth_metadata_path(auth_id))
+                    continue
+                if not auth_meta['dirty']:
+                    continue
+                self._recover_auth_meta(auth_id, auth_meta)
+
+        log.debug("Recovered from partial auth updates (if any).")
+
+    def _recover_auth_meta(auth_id, auth_meta):
+        """
+        Call me after locking the auth meta file.
+        """
+        remove_volumes = []
+
+        for volume, volume_data in auth_meta['volumes'].items():
+            if not volume_data['dirty']:
+                continue
+
+            (group_id, volume_id) = volume.split('/')
+            volume_path = VolumePath(group_id, volume_id)
+            access_level = volume_data['access_level']
+
+            with self._volume_lock(volume_path):
+                vol_meta = self._volume_metadata_get(volume_path)
+
+                # No VMeta update indicates that there was no auth update
+                # in Ceph either. So it's safe to remove corresponding
+                # partial update in AMeta.
+                if auth_id not in vol_meta['auths']:
+                    remove_volumes.append(volume)
+                    continue
+
+                want_auth = {
+                    'access_level': access_level,
+                    'dirty': False,
+                }
+                # VMeta update looks clean. Ceph auth update must have been
+                # clean.
+                if vol_meta['auths'][auth_id] == want_auth:
+                    continue
+
+                readonly = True if access_level is 'r' else False
+                self._authorize_volume(volume_path, auth_id, readonly)
+
+            # Recovered from partial auth updates for the auth ID's access
+            # to a volume.
+            auth_meta['volumes'][volume]['dirty'] = False
+            self._auth_metadata_set(auth_id, auth_meta)
+
+        for volume in remove_volumes:
+            del auth_meta['volumes'][volume]
+
+        if not auth_meta['volumes']:
+            # Clean up auth meta file
+            self.fs.unlink(self._auth_metadata_path(auth_id))
+            return
+
+        # Recovered from all partial auth updates for the auth ID.
+        auth_meta['dirty'] = False
+        self._auth_metadata_set(auth_id, auth_meta)
+
 
     def evict(self, auth_id, timeout=30, volume_path=None):
         """
@@ -315,6 +470,10 @@ class CephFSVolumeClient(object):
         self.fs.mount()
         log.debug("Connection to cephfs complete")
 
+        # Recover from partial auth updates due to a previous
+        # crash.
+        self.recover()
+
     def get_mon_addrs(self):
         log.info("get_mon_addrs")
         result = []
@@ -343,7 +502,8 @@ class CephFSVolumeClient(object):
         self.disconnect()
 
     def _get_pool_id(self, osd_map, pool_name):
-        # Maybe borrow the OSDMap wrapper class from calamari if more helpers like this aren't needed.
+        # Maybe borrow the OSDMap wrapper class from calamari if more helpers
+        # like this are needed.
         for pool in osd_map['pools']:
             if pool['pool_name'] == pool_name:
                 return pool['pool']
@@ -405,6 +565,11 @@ class CephFSVolumeClient(object):
             return pool_id
 
     def create_group(self, group_id):
+        # Prevent craftily-named volume groups from colliding with the meta
+        # files.
+        if group_id.endswith(META_FILE_EXT):
+            raise ValueError("group ID cannot end with '{0}'.".format(
+                META_FILE_EXT))
         path = self._get_group_path(group_id)
         self._mkdir_p(path)
 
@@ -446,15 +611,15 @@ class CephFSVolumeClient(object):
         :param data_isolated: If true, create a separate OSD pool for this volume
         :return:
         """
-        log.info("create_volume: {0}".format(volume_path))
         path = self._get_path(volume_path)
+        log.info("create_volume: {0}".format(path))
 
         self._mkdir_p(path)
 
         if size is not None:
             self.fs.setxattr(path, 'ceph.quota.max_bytes', size.__str__(), 0)
 
-        # data_isolated means create a seperate pool for this volume
+        # data_isolated means create a separate pool for this volume
         if data_isolated:
             pool_name = "{0}{1}".format(self.POOL_PREFIX, volume_path.volume_id)
             log.info("create_volume: {0}, create pool {1} as data_isolated =True.".format(volume_path, pool_name))
@@ -471,6 +636,12 @@ class CephFSVolumeClient(object):
         log.info("create_volume: {0}, using rados namespace {1} to isolate data.".format(volume_path, namespace))
         self.fs.setxattr(path, 'ceph.dir.layout.pool_namespace', namespace, 0)
 
+        # Create a volume meta file, if it does not already exist, to store
+        # data about auth ids having access to the volume
+        fd = self.fs.open(self._volume_metadata_path(volume_path),
+                          os.O_CREAT, 0755)
+        self.fs.close(fd)
+
         return {
             'mount_path': path
         }
@@ -485,7 +656,8 @@ class CephFSVolumeClient(object):
         :return:
         """
 
-        log.info("delete_volume: {0}".format(volume_path))
+        path = self._get_path(volume_path)
+        log.info("delete_volume: {0}".format(path))
 
         # Create the trash folder if it doesn't already exist
         trash = os.path.join(self.volume_prefix, "_deleting")
@@ -495,7 +667,6 @@ class CephFSVolumeClient(object):
         trashed_volume = os.path.join(trash, volume_path.volume_id)
 
         # Move the volume's data to the trash folder
-        path = self._get_path(volume_path)
         try:
             self.fs.stat(path)
         except cephfs.ObjectNotFound:
@@ -504,6 +675,13 @@ class CephFSVolumeClient(object):
         else:
             self.fs.rename(path, trashed_volume)
 
+        # Delete the volume meta file, if it's not already deleted
+        vol_meta_path = self._volume_metadata_path(volume_path)
+        try:
+            self.fs.unlink(vol_meta_path)
+        except cephfs.ObjectNotFound:
+            pass
+
     def purge_volume(self, volume_path, data_isolated=False):
         """
         Finish clearing up a volume that was previously passed to delete_volume.  This
@@ -576,26 +754,284 @@ class CephFSVolumeClient(object):
             else:
                 return self._get_ancestor_xattr(os.path.split(path)[0], attr)
 
-    def authorize(self, volume_path, auth_id):
+    def _check_compat_version(self, compat_version):
+        if self.version < compat_version:
+            msg = ("The current version of CephFSVolumeClient, version {0} "
+                   "does not support the required feature. Need version {1} "
+                   "or greater".format(self.version, compat_version)
+                  )
+            log.error(msg)
+            raise CephFSVolumeClientError(msg)
+
+    def _metadata_get(self, path):
+        """
+        Return a deserialized JSON object, or None
+        """
+        fd = self.fs.open(path, "r")
+        # TODO iterate instead of assuming file < 4MB
+        read_bytes = self.fs.read(fd, 0, 4096 * 1024)
+        self.fs.close(fd)
+        if read_bytes:
+            return json.loads(read_bytes)
+        else:
+            return None
+
+    def _metadata_set(self, path, data):
+        serialized = json.dumps(data)
+        fd = self.fs.open(path, "w")
+        try:
+            self.fs.write(fd, serialized, 0)
+            self.fs.fsync(fd, 0)
+        finally:
+            self.fs.close(fd)
+
+    def _lock(self, path):
+        @contextmanager
+        def fn():
+            while(1):
+                fd = self.fs.open(path, os.O_CREAT, 0755)
+                self.fs.flock(fd, fcntl.LOCK_EX, self._id)
+
+                # The locked file will be cleaned up sometime. It could be
+                # unlinked e.g., by an another manila share instance, before
+                # lock was applied on it. Perform checks to ensure that this
+                # does not happen.
+                try:
+                    statbuf = self.fs.stat(path)
+                except cephfs.ObjectNotFound:
+                    self.fs.close(fd)
+                    continue
+
+                fstatbuf = self.fs.fstat(fd)
+                if statbuf.st_ino == fstatbuf.st_ino:
+                    break
+
+            try:
+                yield
+            finally:
+                self.fs.flock(fd, fcntl.LOCK_UN, self._id)
+                self.fs.close(fd)
+
+        return fn()
+
+    def _auth_metadata_path(self, auth_id):
+        return os.path.join(self.volume_prefix, "${0}{1}".format(
+            auth_id, META_FILE_EXT))
+
+    def _auth_lock(self, auth_id):
+        return self._lock(self._auth_metadata_path(auth_id))
+
+    def _auth_metadata_get(self, auth_id):
+        """
+        Call me with the metadata locked!
+
+        Check whether a auth metadata structure can be decoded by the current
+        version of CephFSVolumeClient.
+
+        Return auth metadata that the current version of CephFSVolumeClient
+        can decode.
+        """
+        auth_metadata = self._metadata_get(self._auth_metadata_path(auth_id))
+
+        if auth_metadata:
+            self._check_compat_version(auth_metadata['compat_version'])
+
+        return auth_metadata
+
+    def _auth_metadata_set(self, auth_id, data):
+        """
+        Call me with the metadata locked!
+
+        Fsync the auth metadata.
+
+        Add two version attributes to the auth metadata,
+        'compat_version', the minimum CephFSVolumeClient version that can
+        decode the metadata, and 'version', the CephFSVolumeClient version
+        that encoded the metadata.
+        """
+        data['compat_version'] = 1
+        data['version'] = 1
+        return self._metadata_set(self._auth_metadata_path(auth_id), data)
+
+    def _volume_metadata_path(self, volume_path):
+        return os.path.join(self.volume_prefix, "_{0}:{1}{2}".format(
+            volume_path.group_id if volume_path.group_id else "",
+            volume_path.volume_id,
+            META_FILE_EXT
+        ))
+
+    def _volume_lock(self, volume_path):
+        """
+        Return a ContextManager which locks the authorization metadata for
+        a particular volume, and persists a flag to the metadata indicating
+        that it is currently locked, so that we can detect dirty situations
+        during recovery.
+
+        This lock isn't just to make access to the metadata safe: it's also
+        designed to be used over the two-step process of checking the
+        metadata and then responding to an authorization request, to
+        ensure that at the point we respond the metadata hasn't changed
+        in the background.  It's key to how we avoid security holes
+        resulting from races during that problem ,
+        """
+        return self._lock(self._volume_metadata_path(volume_path))
+
+    def _volume_metadata_get(self, volume_path):
+        """
+        Call me with the metadata locked!
+
+        Check whether a volume metadata structure can be decoded by the current
+        version of CephFSVolumeClient.
+
+        Return a volume_metadata structure that the current version of
+        CephFSVolumeClient can decode.
+        """
+        volume_metadata = self._metadata_get(self._volume_metadata_path(volume_path))
+
+        if volume_metadata:
+            self._check_compat_version(volume_metadata['compat_version'])
+
+        return volume_metadata
+
+    def _volume_metadata_set(self, volume_path, data):
+        """
+        Call me with the metadata locked!
+
+        Add two version attributes to the volume metadata,
+        'compat_version', the minimum CephFSVolumeClient version that can
+        decode the metadata and 'version', the CephFSVolumeClient version
+        that encoded the metadata.
+        """
+        data['compat_version'] = 1
+        data['version'] = 1
+        return self._metadata_set(self._volume_metadata_path(volume_path), data)
+
+    def authorize(self, volume_path, auth_id, readonly=False, tenant_id=None):
         """
         Get-or-create a Ceph auth identity for `auth_id` and grant them access
         to
         :param volume_path:
         :param auth_id:
+        :param readonly:
+        :param tenant_id: Optionally provide a stringizable object to
+                          restrict any created cephx IDs to other callers
+                          passing the same tenant ID.
         :return:
         """
 
+        with self._auth_lock(auth_id):
+            # Existing meta, or None, to be updated
+            auth_meta = self._auth_metadata_get(auth_id)
+
+            # volume data to be inserted
+            volume_path_str = str(volume_path)
+            volume = {
+                volume_path_str : {
+                    # The access level at which the auth_id is authorized to
+                    # access the volume.
+                    'access_level': 'r' if readonly else 'rw',
+                    'dirty': True,
+                }
+            }
+            if auth_meta is None:
+                sys.stderr.write("Creating meta for ID {0} with tenant {1}\n".format(
+                    auth_id, tenant_id
+                ))
+                log.debug("Authorize: no existing meta")
+                auth_meta = {
+                    'dirty': True,
+                    'tenant_id': tenant_id.__str__() if tenant_id else None,
+                    'volumes': volume
+                }
+
+                # Note: this is *not* guaranteeing that the key doesn't already
+                # exist in Ceph: we are allowing VolumeClient tenants to
+                # 'claim' existing Ceph keys.  In order to prevent VolumeClient
+                # tenants from reading e.g. client.admin keys, you need to
+                # have configured your VolumeClient user (e.g. Manila) to
+                # have mon auth caps that prevent it from accessing those keys
+                # (e.g. limit it to only access keys with a manila.* prefix)
+            else:
+                # Disallow tenants to share auth IDs
+                if auth_meta['tenant_id'].__str__() != tenant_id.__str__():
+                    msg = "auth ID: {0} is already in use".format(auth_id)
+                    log.error(msg)
+                    raise CephFSVolumeClientError(msg)
+
+                if auth_meta['dirty']:
+                    self._recover_auth_meta(auth_id, auth_meta)
+
+                log.debug("Authorize: existing tenant {tenant}".format(
+                    tenant=auth_meta['tenant_id']
+                ))
+                auth_meta['dirty'] = True
+                auth_meta['volumes'].update(volume)
+
+            self._auth_metadata_set(auth_id, auth_meta)
+
+            with self._volume_lock(volume_path):
+                key = self._authorize_volume(volume_path, auth_id, readonly)
+
+            auth_meta['dirty'] = False
+            auth_meta['volumes'][volume_path_str]['dirty'] = False
+            self._auth_metadata_set(auth_id, auth_meta)
+
+            if tenant_id:
+                return {
+                    'auth_key': key
+                }
+            else:
+                # Caller wasn't multi-tenant aware: be safe and don't give
+                # them a key
+                return {
+                    'auth_key': None
+                }
+
+    def _authorize_volume(self, volume_path, auth_id, readonly):
+        vol_meta = self._volume_metadata_get(volume_path)
+
+        access_level = 'r' if readonly else 'rw'
+        auth = {
+            auth_id: {
+                'access_level': access_level,
+                'dirty': True,
+            }
+        }
+
+        if vol_meta is None:
+            vol_meta = {
+                'auths': auth
+            }
+        else:
+            vol_meta['auths'].update(auth)
+            self._volume_metadata_set(volume_path, vol_meta)
+
+        key = self._authorize_ceph(volume_path, auth_id, readonly)
+
+        vol_meta['auths'][auth_id]['dirty'] = False
+        self._volume_metadata_set(volume_path, vol_meta)
+
+        return key
+
+    def _authorize_ceph(self, volume_path, auth_id, readonly):
+        path = self._get_path(volume_path)
+        log.debug("Authorizing Ceph id '{0}' for path '{1}'".format(
+            auth_id, path
+        ))
+
         # First I need to work out what the data pool is for this share:
         # read the layout
-        path = self._get_path(volume_path)
         pool_name = self._get_ancestor_xattr(path, "ceph.dir.layout.pool")
         namespace = self.fs.getxattr(path, "ceph.dir.layout.pool_namespace")
 
         # Now construct auth capabilities that give the guest just enough
         # permissions to access the share
         client_entity = "client.{0}".format(auth_id)
-        want_mds_cap = 'allow rw path={0}'.format(path)
-        want_osd_cap = 'allow rw pool={0} namespace={1}'.format(pool_name, namespace)
+        want_access_level = 'r' if readonly else 'rw'
+        want_mds_cap = 'allow {0} path={1}'.format(want_access_level, path)
+        want_osd_cap = 'allow {0} pool={1} namespace={2}'.format(
+            want_access_level, pool_name, namespace)
+
         try:
             existing = self._rados_command(
                 'auth get',
@@ -615,18 +1051,30 @@ class CephFSVolumeClient(object):
                         'mon', 'allow r']
                 })
         else:
-            # entity exists, extend it
+            # entity exists, update it
             cap = existing[0]
 
-            def cap_extend(orig, want):
-                cap_tokens = orig.split(",")
-                if want not in cap_tokens:
-                    cap_tokens.append(want)
+            # Construct auth caps that if present might conflict with the desired
+            # auth caps.
+            unwanted_access_level = 'r' if want_access_level is 'rw' else 'rw'
+            unwanted_mds_cap = 'allow {0} path={1}'.format(unwanted_access_level, path)
+            unwanted_osd_cap = 'allow {0} pool={1} namespace={2}'.format(
+                unwanted_access_level, pool_name, namespace)
+
+            def cap_update(orig, want, unwanted):
+                # Updates the existing auth caps such that there is a single
+                # occurrence of wanted auth caps and no occurrence of
+                # conflicting auth caps.
+
+                cap_tokens = set(orig.split(","))
+
+                cap_tokens.discard(unwanted)
+                cap_tokens.add(want)
 
                 return ",".join(cap_tokens)
 
-            osd_cap_str = cap_extend(cap['caps'].get('osd', ""), want_osd_cap)
-            mds_cap_str = cap_extend(cap['caps'].get('mds', ""), want_mds_cap)
+            osd_cap_str = cap_update(cap['caps'].get('osd', ""), want_osd_cap, unwanted_osd_cap)
+            mds_cap_str = cap_update(cap['caps'].get('mds', ""), want_mds_cap, unwanted_mds_cap)
 
             caps = self._rados_command(
                 'auth caps',
@@ -657,13 +1105,76 @@ class CephFSVolumeClient(object):
         # ]
         assert len(caps) == 1
         assert caps[0]['entity'] == client_entity
-        key = caps[0]['key']
-
-        return {
-            'auth_key': key
-        }
+        return caps[0]['key']
 
     def deauthorize(self, volume_path, auth_id):
+        with self._auth_lock(auth_id):
+            # Existing meta, or None, to be updated
+            auth_meta = self._auth_metadata_get(auth_id)
+
+            volume_path_str = str(volume_path)
+            if (auth_meta is None) or (not auth_meta['volumes']):
+                log.warn("deauthorized called for already-removed auth"
+                         "ID '{auth_id}' for volume ID '{volume}'".format(
+                    auth_id=auth_id, volume=volume_path.volume_id
+                ))
+                # Clean up the auth meta file of an auth ID
+                self.fs.unlink(self._auth_metadata_path(auth_id))
+                return
+
+            if volume_path_str not in auth_meta['volumes']:
+                log.warn("deauthorized called for already-removed auth"
+                         "ID '{auth_id}' for volume ID '{volume}'".format(
+                    auth_id=auth_id, volume=volume_path.volume_id
+                ))
+                return
+
+            if auth_meta['dirty']:
+                self._recover_auth_meta(auth_id, auth_meta)
+
+            auth_meta['dirty'] = True
+            auth_meta['volumes'][volume_path_str]['dirty'] = True
+            self._auth_metadata_set(auth_id, auth_meta)
+
+            self._deauthorize_volume(volume_path, auth_id)
+
+            # Filter out the volume we're deauthorizing
+            del auth_meta['volumes'][volume_path_str]
+
+            # Clean up auth meta file
+            if not auth_meta['volumes']:
+                self.fs.unlink(self._auth_metadata_path(auth_id))
+                return
+
+            auth_meta['dirty'] = False
+            self._auth_metadata_set(auth_id, auth_meta)
+
+    def _deauthorize_volume(self, volume_path, auth_id):
+        with self._volume_lock(volume_path):
+            vol_meta = self._volume_metadata_get(volume_path)
+
+            if (vol_meta is None) or (auth_id not in vol_meta['auths']):
+                log.warn("deauthorized called for already-removed auth"
+                         "ID '{auth_id}' for volume ID '{volume}'".format(
+                    auth_id=auth_id, volume=volume_path.volume_id
+                ))
+                return
+
+            vol_meta['auths'][auth_id]['dirty'] = True
+            self._volume_metadata_set(volume_path, vol_meta)
+
+            self._deauthorize(volume_path, auth_id)
+
+            # Remove the auth_id from the metadata *after* removing it
+            # from ceph, so that if we crashed here, we would actually
+            # recreate the auth ID during recovery (i.e. end up with
+            # a consistent state).
+
+            # Filter out the auth we're removing
+            del vol_meta['auths'][auth_id]
+            self._volume_metadata_set(volume_path, vol_meta)
+
+    def _deauthorize(self, volume_path, auth_id):
         """
         The volume must still exist.
         """
@@ -672,8 +1183,14 @@ class CephFSVolumeClient(object):
         pool_name = self._get_ancestor_xattr(path, "ceph.dir.layout.pool")
         namespace = self.fs.getxattr(path, "ceph.dir.layout.pool_namespace")
 
-        want_mds_cap = 'allow rw path={0}'.format(path)
-        want_osd_cap = 'allow rw pool={0} namespace={1}'.format(pool_name, namespace)
+        # The auth_id might have read-only or read-write mount access for the
+        # volume path.
+        access_levels = ('r', 'rw')
+        want_mds_caps = {'allow {0} path={1}'.format(access_level, path)
+                         for access_level in access_levels}
+        want_osd_caps = {'allow {0} pool={1} namespace={2}'.format(
+                         access_level, pool_name, namespace)
+                         for access_level in access_levels}
 
         try:
             existing = self._rados_command(
@@ -684,15 +1201,12 @@ class CephFSVolumeClient(object):
             )
 
             def cap_remove(orig, want):
-                cap_tokens = orig.split(",")
-                if want in cap_tokens:
-                    cap_tokens.remove(want)
-
-                return ",".join(cap_tokens)
+                cap_tokens = set(orig.split(","))
+                return ",".join(cap_tokens.difference(want))
 
             cap = existing[0]
-            osd_cap_str = cap_remove(cap['caps'].get('osd', ""), want_osd_cap)
-            mds_cap_str = cap_remove(cap['caps'].get('mds', ""), want_mds_cap)
+            osd_cap_str = cap_remove(cap['caps'].get('osd', ""), want_osd_caps)
+            mds_cap_str = cap_remove(cap['caps'].get('mds', ""), want_mds_caps)
             if (not osd_cap_str) and (not mds_cap_str):
                 self._rados_command('auth del', {'entity': client_entity}, decode=False)
             else:
@@ -711,6 +1225,27 @@ class CephFSVolumeClient(object):
             # Already gone, great.
             return
 
+    def get_authorized_ids(self, volume_path):
+        """
+        Expose a list of auth IDs that have access to a volume.
+
+        return: a list of (auth_id, access_level) tuples, where
+                the access_level can be 'r' , or 'rw'.
+                None if no auth ID is given access to the volume.
+        """
+        with self._volume_lock(volume_path):
+            meta = self._volume_metadata_get(volume_path)
+            auths = []
+            if not meta or not meta['auths']:
+                return None
+
+            for auth, auth_data in meta['auths'].items():
+                # Skip partial auth updates.
+                if not auth_data['dirty']:
+                    auths.append((auth, auth_data['access_level']))
+
+            return auths
+
     def _rados_command(self, prefix, args=None, decode=True):
         """
         Safer wrapper for ceph_argparse.json_command, which raises
diff --git a/src/pybind/cephfs/cephfs.pyx b/src/pybind/cephfs/cephfs.pyx
index d3b581b..b27e797 100644
--- a/src/pybind/cephfs/cephfs.pyx
+++ b/src/pybind/cephfs/cephfs.pyx
@@ -108,6 +108,7 @@ cdef extern from "cephfs/libcephfs.h" nogil:
     int ceph_conf_set(ceph_mount_info *cmount, const char *option, const char *value)
 
     int ceph_mount(ceph_mount_info *cmount, const char *root)
+    int ceph_fstat(ceph_mount_info *cmount, int fd, stat *stbuf)
     int ceph_stat(ceph_mount_info *cmount, const char *path, stat *stbuf)
     int ceph_statfs(ceph_mount_info *cmount, const char *path, statvfs *stbuf)
 
@@ -135,6 +136,7 @@ cdef extern from "cephfs/libcephfs.h" nogil:
     int ceph_rmdir(ceph_mount_info *cmount, const char *path)
     const char* ceph_getcwd(ceph_mount_info *cmount)
     int ceph_sync_fs(ceph_mount_info *cmount)
+    int ceph_fsync(ceph_mount_info *cmount, int fd, int syncdataonly)
     int ceph_conf_parse_argv(ceph_mount_info *cmount, int argc, const char **argv)
     void ceph_buffer_free(char *buf)
 
@@ -506,6 +508,13 @@ cdef class LibCephFS(object):
         if ret < 0:
             raise make_ex(ret, "sync_fs failed")
 
+    def fsync(self, int fd, int syncdataonly):
+        self.require_state("mounted")
+        with nogil:
+            ret = ceph_fsync(self.cluster, fd, syncdataonly)
+        if ret < 0:
+            raise make_ex(ret, "fsync failed")
+
     def getcwd(self):
         self.require_state("mounted")
         with nogil:
@@ -599,25 +608,30 @@ cdef class LibCephFS(object):
 
     def open(self, path, flags, mode=0):
         self.require_state("mounted")
-
         path = cstr(path, 'path')
-        flags = cstr(flags, 'flags')
+
         if not isinstance(mode, int):
             raise TypeError('mode must be an int')
-        cephfs_flags = 0
-        if flags == '':
-            cephfs_flags = os.O_RDONLY
+        if isinstance(flags, basestring):
+            flags = cstr(flags, 'flags')
+            cephfs_flags = 0
+            if flags == '':
+                cephfs_flags = os.O_RDONLY
+            else:
+                for c in flags:
+                    if c == 'r':
+                        cephfs_flags |= os.O_RDONLY
+                    elif c == 'w':
+                        cephfs_flags |= os.O_WRONLY | os.O_TRUNC | os.O_CREAT
+                    elif c == '+':
+                        cephfs_flags |= os.O_RDWR
+                    else:
+                        raise OperationNotSupported(
+                            "open flags doesn't support %s" % c)
+        elif isinstance(flags, int):
+            cephfs_flags = flags
         else:
-            for c in flags:
-                if c == 'r':
-                    cephfs_flags |= os.O_RDONLY
-                elif c == 'w':
-                    cephfs_flags |= os.O_WRONLY | os.O_TRUNC | os.O_CREAT
-                elif c == '+':
-                    cephfs_flags |= os.O_RDWR
-                else:
-                    raise OperationNotSupported(
-                        "open flags doesn't support %s" % c)
+            raise TypeError("flags must be a string or an integer")
 
         cdef:
             char* _path = path
@@ -786,6 +800,29 @@ cdef class LibCephFS(object):
                           st_mtime=datetime.fromtimestamp(statbuf.st_mtime),
                           st_ctime=datetime.fromtimestamp(statbuf.st_ctime))
 
+    def fstat(self, fd):
+        self.require_state("mounted")
+        if not isinstance(fd, int):
+            raise TypeError('fd must be an int')
+
+        cdef:
+            int _fd = fd
+            stat statbuf
+
+        with nogil:
+            ret = ceph_fstat(self.cluster, _fd, &statbuf)
+        if ret < 0:
+            raise make_ex(ret, "error in fsat")
+        return StatResult(st_dev=statbuf.st_dev, st_ino=statbuf.st_ino,
+                          st_mode=statbuf.st_mode, st_nlink=statbuf.st_nlink,
+                          st_uid=statbuf.st_uid, st_gid=statbuf.st_gid,
+                          st_rdev=statbuf.st_rdev, st_size=statbuf.st_size,
+                          st_blksize=statbuf.st_blksize,
+                          st_blocks=statbuf.st_blocks,
+                          st_atime=datetime.fromtimestamp(statbuf.st_atime),
+                          st_mtime=datetime.fromtimestamp(statbuf.st_mtime),
+                          st_ctime=datetime.fromtimestamp(statbuf.st_ctime))
+
     def symlink(self, existing, newname):
         self.require_state("mounted")
         existing = cstr(existing, 'existing')
diff --git a/src/rbd_replay/ActionTypes.cc b/src/rbd_replay/ActionTypes.cc
index dc5388f..4e7a297 100644
--- a/src/rbd_replay/ActionTypes.cc
+++ b/src/rbd_replay/ActionTypes.cc
@@ -288,12 +288,18 @@ void ActionEntry::decode(__u8 version, bufferlist::iterator &it) {
   case ACTION_TYPE_WRITE:
     action = WriteAction();
     break;
+  case ACTION_TYPE_DISCARD:
+    action = DiscardAction();
+    break;
   case ACTION_TYPE_AIO_READ:
     action = AioReadAction();
     break;
   case ACTION_TYPE_AIO_WRITE:
     action = AioWriteAction();
     break;
+  case ACTION_TYPE_AIO_DISCARD:
+    action = AioDiscardAction();
+    break;
   case ACTION_TYPE_OPEN_IMAGE:
     action = OpenImageAction();
     break;
@@ -330,12 +336,18 @@ void ActionEntry::generate_test_instances(std::list<ActionEntry *> &o) {
   o.push_back(new ActionEntry(WriteAction()));
   o.push_back(new ActionEntry(WriteAction(1, 123456789, dependencies, 3, 4,
                                           5)));
+  o.push_back(new ActionEntry(DiscardAction()));
+  o.push_back(new ActionEntry(DiscardAction(1, 123456789, dependencies, 3, 4,
+                                            5)));
   o.push_back(new ActionEntry(AioReadAction()));
   o.push_back(new ActionEntry(AioReadAction(1, 123456789, dependencies, 3, 4,
                                             5)));
   o.push_back(new ActionEntry(AioWriteAction()));
   o.push_back(new ActionEntry(AioWriteAction(1, 123456789, dependencies, 3, 4,
                                              5)));
+  o.push_back(new ActionEntry(AioDiscardAction()));
+  o.push_back(new ActionEntry(AioDiscardAction(1, 123456789, dependencies, 3, 4,
+                                               5)));
 
   o.push_back(new ActionEntry(OpenImageAction()));
   o.push_back(new ActionEntry(OpenImageAction(1, 123456789, dependencies, 3,
@@ -372,12 +384,18 @@ std::ostream &operator<<(std::ostream &out,
   case ACTION_TYPE_WRITE:
     out << "Write";
     break;
+  case ACTION_TYPE_DISCARD:
+    out << "Discard";
+    break;
   case ACTION_TYPE_AIO_READ:
     out << "AioRead";
     break;
   case ACTION_TYPE_AIO_WRITE:
     out << "AioWrite";
     break;
+  case ACTION_TYPE_AIO_DISCARD:
+    out << "AioDiscard";
+    break;
   case ACTION_TYPE_OPEN_IMAGE:
     out << "OpenImage";
     break;
diff --git a/src/rbd_replay/ActionTypes.h b/src/rbd_replay/ActionTypes.h
index 302b9e2..b6dc433 100644
--- a/src/rbd_replay/ActionTypes.h
+++ b/src/rbd_replay/ActionTypes.h
@@ -72,6 +72,8 @@ enum ActionType {
   ACTION_TYPE_CLOSE_IMAGE     = 7,
   ACTION_TYPE_AIO_OPEN_IMAGE  = 8,
   ACTION_TYPE_AIO_CLOSE_IMAGE = 9,
+  ACTION_TYPE_DISCARD         = 10,
+  ACTION_TYPE_AIO_DISCARD     = 11
 };
 
 struct ActionBase {
@@ -170,6 +172,18 @@ struct WriteAction : public IoActionBase {
   }
 };
 
+struct DiscardAction : public IoActionBase {
+  static const ActionType ACTION_TYPE = ACTION_TYPE_DISCARD;
+
+  DiscardAction() {
+  }
+  DiscardAction(action_id_t id, thread_id_t thread_id,
+                const Dependencies &dependencies, imagectx_id_t imagectx_id,
+                uint64_t offset, uint64_t length)
+    : IoActionBase(id, thread_id, dependencies, imagectx_id, offset, length) {
+  }
+};
+
 struct AioReadAction : public IoActionBase {
   static const ActionType ACTION_TYPE = ACTION_TYPE_AIO_READ;
 
@@ -194,6 +208,18 @@ struct AioWriteAction : public IoActionBase {
   }
 };
 
+struct AioDiscardAction : public IoActionBase {
+  static const ActionType ACTION_TYPE = ACTION_TYPE_AIO_DISCARD;
+
+  AioDiscardAction() {
+  }
+  AioDiscardAction(action_id_t id, thread_id_t thread_id,
+                   const Dependencies &dependencies, imagectx_id_t imagectx_id,
+                   uint64_t offset, uint64_t length)
+    : IoActionBase(id, thread_id, dependencies, imagectx_id, offset, length) {
+  }
+};
+
 struct OpenImageAction : public ImageActionBase {
   static const ActionType ACTION_TYPE = ACTION_TYPE_OPEN_IMAGE;
 
@@ -272,8 +298,10 @@ typedef boost::variant<StartThreadAction,
                        StopThreadAction,
                        ReadAction,
                        WriteAction,
+                       DiscardAction,
                        AioReadAction,
                        AioWriteAction,
+                       AioDiscardAction,
                        OpenImageAction,
                        CloseImageAction,
                        AioOpenImageAction,
diff --git a/src/rbd_replay/actions.cc b/src/rbd_replay/actions.cc
index 33c8de5..e596ec0 100644
--- a/src/rbd_replay/actions.cc
+++ b/src/rbd_replay/actions.cc
@@ -57,6 +57,14 @@ struct ConstructVisitor : public boost::static_visitor<Action::ptr> {
     return Action::ptr(new AioWriteAction(action));
   }
 
+  inline Action::ptr operator()(const action::DiscardAction &action) const {
+    return Action::ptr(new DiscardAction(action));
+  }
+
+  inline Action::ptr operator()(const action::AioDiscardAction &action) const {
+    return Action::ptr(new AioDiscardAction(action));
+  }
+
   inline Action::ptr operator()(const action::OpenImageAction &action) const {
     return Action::ptr(new OpenImageAction(action));
   }
@@ -118,7 +126,6 @@ void ReadAction::perform(ActionCtx &worker) {
   worker.remove_pending(io);
 }
 
-
 void AioWriteAction::perform(ActionCtx &worker) {
   static const std::string fake_data(create_fake_data());
   dout(ACTION_LEVEL) << "Performing " << *this << dendl;
@@ -152,6 +159,31 @@ void WriteAction::perform(ActionCtx &worker) {
   worker.remove_pending(io);
 }
 
+void AioDiscardAction::perform(ActionCtx &worker) {
+  dout(ACTION_LEVEL) << "Performing " << *this << dendl;
+  librbd::Image *image = worker.get_image(m_action.imagectx_id);
+  PendingIO::ptr io(new PendingIO(pending_io_id(), worker));
+  worker.add_pending(io);
+  if (worker.readonly()) {
+    worker.remove_pending(io);
+  } else {
+    int r = image->aio_discard(m_action.offset, m_action.length, &io->completion());
+    assertf(r >= 0, "id = %d, r = %d", id(), r);
+  }
+}
+
+void DiscardAction::perform(ActionCtx &worker) {
+  dout(ACTION_LEVEL) << "Performing " << *this << dendl;
+  librbd::Image *image = worker.get_image(m_action.imagectx_id);
+  PendingIO::ptr io(new PendingIO(pending_io_id(), worker));
+  worker.add_pending(io);
+  if (!worker.readonly()) {
+    ssize_t r = image->discard(m_action.offset, m_action.length);
+    assertf(r >= 0, "id = %d, r = %d", id(), r);
+  }
+  worker.remove_pending(io);
+}
+
 void OpenImageAction::perform(ActionCtx &worker) {
   dout(ACTION_LEVEL) << "Performing " << *this << dendl;
   PendingIO::ptr io(new PendingIO(pending_io_id(), worker));
diff --git a/src/rbd_replay/actions.hpp b/src/rbd_replay/actions.hpp
index eec655a..df5dde4 100644
--- a/src/rbd_replay/actions.hpp
+++ b/src/rbd_replay/actions.hpp
@@ -251,6 +251,36 @@ protected:
 };
 
 
+class AioDiscardAction : public TypedAction<action::AioDiscardAction> {
+public:
+  explicit AioDiscardAction(const action::AioDiscardAction &action)
+    : TypedAction<action::AioDiscardAction>(action) {
+  }
+
+  virtual void perform(ActionCtx &ctx);
+
+protected:
+  virtual const char *get_action_name() const {
+    return "AioDiscardAction";
+  }
+};
+
+
+class DiscardAction : public TypedAction<action::DiscardAction> {
+public:
+  explicit DiscardAction(const action::DiscardAction &action)
+    : TypedAction<action::DiscardAction>(action) {
+  }
+
+  virtual void perform(ActionCtx &ctx);
+
+protected:
+  virtual const char *get_action_name() const {
+    return "DiscardAction";
+  }
+};
+
+
 class OpenImageAction : public TypedAction<action::OpenImageAction> {
 public:
   explicit OpenImageAction(const action::OpenImageAction &action)
diff --git a/src/rbd_replay/ios.cc b/src/rbd_replay/ios.cc
index 52d885a..7bbc9ab 100644
--- a/src/rbd_replay/ios.cc
+++ b/src/rbd_replay/ios.cc
@@ -111,6 +111,18 @@ void WriteIO::write_debug(std::ostream& out) const {
   out << ", imagectx=" << m_imagectx << ", offset=" << m_offset << ", length=" << m_length << "]";
 }
 
+void DiscardIO::encode(bufferlist &bl) const {
+  action::Action action((action::DiscardAction(
+    ionum(), thread_id(), convert_dependencies(start_time(), dependencies()),
+    m_imagectx, m_offset, m_length)));
+  ::encode(action, bl);
+}
+
+void DiscardIO::write_debug(std::ostream& out) const {
+  write_debug_base(out, "discard");
+  out << ", imagectx=" << m_imagectx << ", offset=" << m_offset << ", length=" << m_length << "]";
+}
+
 void AioReadIO::encode(bufferlist &bl) const {
   action::Action action((action::AioReadAction(
     ionum(), thread_id(), convert_dependencies(start_time(), dependencies()),
@@ -135,6 +147,18 @@ void AioWriteIO::write_debug(std::ostream& out) const {
   out << ", imagectx=" << m_imagectx << ", offset=" << m_offset << ", length=" << m_length << "]";
 }
 
+void AioDiscardIO::encode(bufferlist &bl) const {
+  action::Action action((action::AioDiscardAction(
+    ionum(), thread_id(), convert_dependencies(start_time(), dependencies()),
+    m_imagectx, m_offset, m_length)));
+  ::encode(action, bl);
+}
+
+void AioDiscardIO::write_debug(std::ostream& out) const {
+  write_debug_base(out, "aio discard");
+  out << ", imagectx=" << m_imagectx << ", offset=" << m_offset << ", length=" << m_length << "]";
+}
+
 void OpenImageIO::encode(bufferlist &bl) const {
   action::Action action((action::OpenImageAction(
     ionum(), thread_id(), convert_dependencies(start_time(), dependencies()),
diff --git a/src/rbd_replay/ios.hpp b/src/rbd_replay/ios.hpp
index e6c0bf5..b4ab76d 100644
--- a/src/rbd_replay/ios.hpp
+++ b/src/rbd_replay/ios.hpp
@@ -186,6 +186,31 @@ private:
   uint64_t m_length;
 };
 
+class DiscardIO : public IO {
+public:
+  DiscardIO(action_id_t ionum,
+	    uint64_t start_time,
+	    thread_id_t thread_id,
+            const io_set_t& deps,
+	    imagectx_id_t imagectx,
+	    uint64_t offset,
+	    uint64_t length)
+    : IO(ionum, start_time, thread_id, deps),
+      m_imagectx(imagectx),
+      m_offset(offset),
+      m_length(length) {
+  }
+
+  virtual void encode(bufferlist &bl) const;
+
+  void write_debug(std::ostream& out) const;
+
+private:
+  imagectx_id_t m_imagectx;
+  uint64_t m_offset;
+  uint64_t m_length;
+};
+
 class AioReadIO : public IO {
 public:
   AioReadIO(action_id_t ionum,
@@ -236,6 +261,31 @@ private:
   uint64_t m_length;
 };
 
+class AioDiscardIO : public IO {
+public:
+  AioDiscardIO(action_id_t ionum,
+	       uint64_t start_time,
+	       thread_id_t thread_id,
+               const io_set_t& deps,
+	       imagectx_id_t imagectx,
+	       uint64_t offset,
+	       uint64_t length)
+    : IO(ionum, start_time, thread_id, deps),
+      m_imagectx(imagectx),
+      m_offset(offset),
+      m_length(length) {
+  }
+
+  virtual void encode(bufferlist &bl) const;
+
+  void write_debug(std::ostream& out) const;
+
+private:
+  imagectx_id_t m_imagectx;
+  uint64_t m_offset;
+  uint64_t m_length;
+};
+
 class OpenImageIO : public IO {
 public:
   OpenImageIO(action_id_t ionum,
diff --git a/src/rbd_replay/rbd-replay-prep.cc b/src/rbd_replay/rbd-replay-prep.cc
index c887813..1ece6e9 100644
--- a/src/rbd_replay/rbd-replay-prep.cc
+++ b/src/rbd_replay/rbd-replay-prep.cc
@@ -427,6 +427,21 @@ private:
       ios->push_back(io);
     } else if (strcmp(event_name, "librbd:write_exit") == 0) {
       completed(thread->latest_io());
+    } else if (strcmp(event_name, "librbd:discard_enter") == 0) {
+      string name(fields.string("name"));
+      string snap_name(fields.string("snap_name"));
+      bool readonly = fields.int64("read_only");
+      uint64_t offset = fields.uint64("off");
+      uint64_t length = fields.uint64("len");
+      imagectx_id_t imagectx = fields.uint64("imagectx");
+      require_image(ts, thread, imagectx, name, snap_name, readonly, ios);
+      action_id_t ionum = next_id();
+      IO::ptr io(new DiscardIO(ionum, ts, threadID, m_recent_completions,
+                                imagectx, offset, length));
+      thread->issued_io(io, &m_latest_ios);
+      ios->push_back(io);
+    } else if (strcmp(event_name, "librbd:discard_exit") == 0) {
+      completed(thread->latest_io());
     } else if (strcmp(event_name, "librbd:aio_read_enter") == 0 ||
                strcmp(event_name, "librbd:aio_read2_enter") == 0) {
       string name(fields.string("name"));
@@ -459,6 +474,21 @@ private:
       thread->issued_io(io, &m_latest_ios);
       ios->push_back(io);
       m_pending_ios[completion] = io;
+    } else if (strcmp(event_name, "librbd:aio_discard_enter") == 0) {
+      string name(fields.string("name"));
+      string snap_name(fields.string("snap_name"));
+      bool readonly = fields.int64("read_only");
+      uint64_t offset = fields.uint64("off");
+      uint64_t length = fields.uint64("len");
+      uint64_t completion = fields.uint64("completion");
+      imagectx_id_t imagectx = fields.uint64("imagectx");
+      require_image(ts, thread, imagectx, name, snap_name, readonly, ios);
+      action_id_t ionum = next_id();
+      IO::ptr io(new AioDiscardIO(ionum, ts, threadID, m_recent_completions,
+                                imagectx, offset, length));
+      thread->issued_io(io, &m_latest_ios);
+      ios->push_back(io);
+      m_pending_ios[completion] = io;
     } else if (strcmp(event_name, "librbd:aio_complete_enter") == 0) {
       uint64_t completion = fields.uint64("completion");
       map<uint64_t, IO::ptr>::iterator itr = m_pending_ios.find(completion);
diff --git a/src/rbdmap b/src/rbdmap
index da60b31..ec58261 100755
--- a/src/rbdmap
+++ b/src/rbdmap
@@ -1,4 +1,4 @@
-#!/bin/sh
+#!/bin/bash
 
 do_map() {
 
diff --git a/src/rgw/Makefile.am b/src/rgw/Makefile.am
index c96e8c5..b083dd1 100644
--- a/src/rgw/Makefile.am
+++ b/src/rgw/Makefile.am
@@ -147,7 +147,7 @@ radosgw_SOURCES = \
 	civetweb/src/civetweb.c \
 	rgw/rgw_main.cc
 
-radosgw_CFLAGS = -I$(srcdir)/civetweb/include -fPIC -I$(srcdir)/xxHash
+radosgw_CFLAGS = -I$(srcdir)/civetweb/include -fPIC -I$(srcdir)/xxHash ${CIVETWEB_INCLUDE}
 radosgw_LDADD = $(LIBRGW) $(LIBCIVETWEB) $(LIBCIVETWEB_DEPS) $(LIBRGW_DEPS) $(RESOLV_LIBS) \
 	$(CEPH_GLOBAL)
 bin_PROGRAMS += radosgw
diff --git a/src/rgw/librgw.cc b/src/rgw/librgw.cc
index 37414fc..c476129 100644
--- a/src/rgw/librgw.cc
+++ b/src/rgw/librgw.cc
@@ -52,6 +52,7 @@
 #include <string.h>
 #include <mutex>
 
+
 #define dout_subsys ceph_subsys_rgw
 
 bool global_stop = false;
@@ -469,9 +470,10 @@ namespace rgw {
     const string& ldap_searchdn = store->ctx()->_conf->rgw_ldap_searchdn;
     const string& ldap_dnattr =
       store->ctx()->_conf->rgw_ldap_dnattr;
+    std::string ldap_bindpw = parse_rgw_ldap_bindpw(store->ctx());
 
-    ldh = new rgw::LDAPHelper(ldap_uri, ldap_binddn, ldap_searchdn,
-			      ldap_dnattr);
+    ldh = new rgw::LDAPHelper(ldap_uri, ldap_binddn, ldap_bindpw.c_str(),
+			      ldap_searchdn, ldap_dnattr);
     ldh->init();
     ldh->bind();
 
diff --git a/src/rgw/rgw_admin.cc b/src/rgw/rgw_admin.cc
index 87c27ed..c36b913 100644
--- a/src/rgw/rgw_admin.cc
+++ b/src/rgw/rgw_admin.cc
@@ -114,6 +114,7 @@ void _usage()
   cout << "  zone modify                modify an existing zone\n";
   cout << "  zone set                   set zone cluster params (requires infile)\n";
   cout << "  zone list                  list all zones set on this cluster\n";
+  cout << "  zone rename                rename a zone\n";
   cout << "  pool add                   add an existing pool for data placement\n";
   cout << "  pool rm                    remove an existing pool from data placement set\n";
   cout << "  pools list                 list placement active set\n";
@@ -126,8 +127,6 @@ void _usage()
   cout << "  log rm                     remove log object\n";
   cout << "  usage show                 show usage (by user, date range)\n";
   cout << "  usage trim                 trim usage (by user, date range)\n";
-  cout << "  temp remove                remove temporary objects that were created up to\n";
-  cout << "                             specified date (and optional time)\n";
   cout << "  gc list                    dump expired garbage collection objects (specify\n";
   cout << "                             --include-all to list all entries, including unexpired)\n";
   cout << "  gc process                 manually process garbage\n";
@@ -234,7 +233,7 @@ void _usage()
   cout << "\nQuota options:\n";
   cout << "   --bucket                  specified bucket for quota command\n";
   cout << "   --max-objects             specify max objects (negative value to disable)\n";
-  cout << "   --max-size                specify max size (in bytes, negative value to disable)\n";
+  cout << "   --max-size                specify max size (in B/K/M/G/T, negative value to disable)\n";
   cout << "   --quota-scope             scope of quota (bucket, user)\n";
   cout << "\nOrphans search options:\n";
   cout << "   --pool                    data pool to scan for leaked rados objects in\n";
@@ -409,7 +408,6 @@ static int get_cmd(const char *cmd, const char *prev_cmd, const char *prev_prev_
       strcmp(cmd, "replicalog") == 0 ||
       strcmp(cmd, "subuser") == 0 ||
       strcmp(cmd, "sync") == 0 ||
-      strcmp(cmd, "temp") == 0 ||
       strcmp(cmd, "usage") == 0 ||
       strcmp(cmd, "user") == 0 ||
       strcmp(cmd, "zone") == 0 ||
@@ -1024,7 +1022,11 @@ void set_quota_info(RGWQuotaInfo& quota, int opt_cmd, int64_t max_size, int64_t
 
     case OPT_QUOTA_SET:
       if (have_max_objects) {
-        quota.max_objects = max_objects;
+        if (max_objects < 0) {
+          quota.max_objects = -1;
+        } else {
+          quota.max_objects = max_objects;
+        }
       }
       if (have_max_size) {
         if (max_size < 0) {
@@ -1521,10 +1523,10 @@ static int update_period(const string& realm_id, const string& realm_name,
   return 0;
 }
 
-static int init_bucket_for_sync(const string& tenant, const string& bucket_name, string& bucket_id)
+static int init_bucket_for_sync(const string& tenant, const string& bucket_name,
+                                const string& bucket_id, rgw_bucket& bucket)
 {
   RGWBucketInfo bucket_info;
-  rgw_bucket bucket;
 
   int ret = init_bucket(tenant, bucket_name, bucket_id, bucket_info, bucket);
   if (ret == -ENOENT) {
@@ -1532,8 +1534,6 @@ static int init_bucket_for_sync(const string& tenant, const string& bucket_name,
       cerr << "ERROR: bucket id specified" << std::endl;
       return EINVAL;
     }
-  } else {
-    bucket_id = bucket.bucket_id;
   }
   if (ret < 0) {
     cerr << "ERROR: could not init bucket: " << cpp_strerror(-ret) << std::endl;
@@ -2000,6 +2000,7 @@ int main(int argc, char **argv)
   int remove_bad = false;
   int check_head_obj_locator = false;
   int max_buckets = -1;
+  bool max_buckets_specified = false;
   map<string, bool> categories;
   string caps;
   int check_objects = false;
@@ -2138,6 +2139,7 @@ int main(int argc, char **argv)
         cerr << "ERROR: failed to parse max buckets: " << err << std::endl;
         return EINVAL;
       }
+      max_buckets_specified = true;
     } else if (ceph_argparse_witharg(args, i, &val, "--max-entries", (char*)NULL)) {
       max_entries = (int)strict_strtol(val.c_str(), 10, &err);
       if (!err.empty()) {
@@ -2145,7 +2147,7 @@ int main(int argc, char **argv)
         return EINVAL;
       }
     } else if (ceph_argparse_witharg(args, i, &val, "--max-size", (char*)NULL)) {
-      max_size = (int64_t)strict_strtoll(val.c_str(), 10, &err);
+      max_size = strict_si_cast<int64_t>(val.c_str(), &err);
       if (!err.empty()) {
         cerr << "ERROR: failed to parse max size: " << err << std::endl;
         return EINVAL;
@@ -3039,6 +3041,20 @@ int main(int argc, char **argv)
           need_update = true;
         }
 
+        if (!realm_id.empty()) {
+          zonegroup.realm_id = realm_id;
+          need_update = true;
+        } else if (!realm_name.empty()) {
+          // get realm id from name
+          RGWRealm realm{g_ceph_context, store};
+          ret = realm.read_id(realm_name, zonegroup.realm_id);
+          if (ret < 0) {
+            cerr << "failed to find realm by name " << realm_name << std::endl;
+            return -ret;
+          }
+          need_update = true;
+        }
+
         if (need_update) {
           zonegroup.post_process_params();
 	  ret = zonegroup.update();
@@ -3512,6 +3528,20 @@ int main(int argc, char **argv)
           need_zone_update = true;
         }
 
+        if (!realm_id.empty()) {
+          zone.realm_id = realm_id;
+          need_zone_update = true;
+        } else if (!realm_name.empty()) {
+          // get realm id from name
+          RGWRealm realm{g_ceph_context, store};
+          ret = realm.read_id(realm_name, zone.realm_id);
+          if (ret < 0) {
+            cerr << "failed to find realm by name " << realm_name << std::endl;
+            return -ret;
+          }
+          need_zone_update = true;
+        }
+
         if (need_zone_update) {
           ret = zone.update();
           if (ret < 0) {
@@ -3633,7 +3663,7 @@ int main(int argc, char **argv)
   if (gen_secret_key)
     user_op.set_gen_secret(); // assume that a key pair should be created
 
-  if (max_buckets >= 0)
+  if (max_buckets_specified)
     user_op.set_max_buckets(max_buckets);
 
   if (system_specified)
@@ -5118,11 +5148,12 @@ next:
       cerr << "ERROR: bucket not specified" << std::endl;
       return EINVAL;
     }
-    int ret = init_bucket_for_sync(tenant, bucket_name, bucket_id);
+    rgw_bucket bucket;
+    int ret = init_bucket_for_sync(tenant, bucket_name, bucket_id, bucket);
     if (ret < 0) {
       return -ret;
     }
-    RGWBucketSyncStatusManager sync(store, source_zone, bucket_name, bucket_id);
+    RGWBucketSyncStatusManager sync(store, source_zone, bucket);
 
     ret = sync.init();
     if (ret < 0) {
@@ -5145,11 +5176,12 @@ next:
       cerr << "ERROR: bucket not specified" << std::endl;
       return EINVAL;
     }
-    int ret = init_bucket_for_sync(tenant, bucket_name, bucket_id);
+    rgw_bucket bucket;
+    int ret = init_bucket_for_sync(tenant, bucket_name, bucket_id, bucket);
     if (ret < 0) {
       return -ret;
     }
-    RGWBucketSyncStatusManager sync(store, source_zone, bucket_name, bucket_id);
+    RGWBucketSyncStatusManager sync(store, source_zone, bucket);
 
     ret = sync.init();
     if (ret < 0) {
@@ -5177,11 +5209,12 @@ next:
       cerr << "ERROR: bucket not specified" << std::endl;
       return EINVAL;
     }
-    int ret = init_bucket_for_sync(tenant, bucket_name, bucket_id);
+    rgw_bucket bucket;
+    int ret = init_bucket_for_sync(tenant, bucket_name, bucket_id, bucket);
     if (ret < 0) {
       return -ret;
     }
-    RGWBucketSyncStatusManager sync(store, source_zone, bucket_name, bucket_id);
+    RGWBucketSyncStatusManager sync(store, source_zone, bucket);
 
     ret = sync.init();
     if (ret < 0) {
diff --git a/src/rgw/rgw_auth_s3.cc b/src/rgw/rgw_auth_s3.cc
index aba0655..8baa799 100644
--- a/src/rgw/rgw_auth_s3.cc
+++ b/src/rgw/rgw_auth_s3.cc
@@ -277,7 +277,11 @@ void rgw_create_s3_v4_canonical_request(struct req_state *s, const string& canon
     if (s->aws4_auth_needs_complete) {
       request_payload_hash = STREAM_IO(s)->grab_aws4_sha256_hash();
     } else {
-      rgw_hash_s3_string_sha256(request_payload.c_str(), request_payload.size(), request_payload_hash);
+      if (s->aws4_auth_streaming_mode) {
+        request_payload_hash = "STREAMING-AWS4-HMAC-SHA256-PAYLOAD";
+      } else {
+        rgw_hash_s3_string_sha256(request_payload.c_str(), request_payload.size(), request_payload_hash);
+      }
     }
   }
 
@@ -392,13 +396,16 @@ int rgw_calculate_s3_v4_aws_signature(struct req_state *s,
 
   /* aws4_request */
 
-  char signing_k[CEPH_CRYPTO_HMACSHA256_DIGESTSIZE];
+  char *signing_k = s->aws4_auth->signing_k;
+
   calc_hmac_sha256(service_k, CEPH_CRYPTO_HMACSHA256_DIGESTSIZE, "aws4_request", 12, signing_k);
 
   buf_to_hex((unsigned char *) signing_k, CEPH_CRYPTO_HMACSHA256_DIGESTSIZE, aux);
 
   ldout(s->cct, 10) << "signing_k     = " << string(aux) << dendl;
 
+  s->aws4_auth->signing_key = aux;
+
   /* new signature */
 
   char signature_k[CEPH_CRYPTO_HMACSHA256_DIGESTSIZE];
diff --git a/src/rgw/rgw_bucket.cc b/src/rgw/rgw_bucket.cc
index 685968c..daf18f5 100644
--- a/src/rgw/rgw_bucket.cc
+++ b/src/rgw/rgw_bucket.cc
@@ -6,6 +6,8 @@
 #include <string>
 #include <map>
 
+#include <boost/utility/string_ref.hpp>
+
 #include "common/errno.h"
 #include "common/ceph_json.h"
 #include "rgw_rados.h"
@@ -298,6 +300,33 @@ int rgw_bucket_instance_remove_entry(RGWRados *store, string& entry, RGWObjVersi
   return store->meta_mgr->remove_entry(bucket_instance_meta_handler, entry, objv_tracker);
 }
 
+// 'tenant/' is used in bucket instance keys for sync to avoid parsing ambiguity
+// with the existing instance[:shard] format. once we parse the shard, the / is
+// replaced with a : to match the [tenant:]instance format
+void rgw_bucket_instance_key_to_oid(string& key)
+{
+  // replace tenant/ with tenant:
+  auto c = key.find('/');
+  if (c != string::npos) {
+    key[c] = ':';
+  }
+}
+
+// convert bucket instance oids back to the tenant/ format for metadata keys.
+// it's safe to parse 'tenant:' only for oids, because they won't contain the
+// optional :shard at the end
+void rgw_bucket_instance_oid_to_key(string& oid)
+{
+  // find first : (could be tenant:bucket or bucket:instance)
+  auto c = oid.find(':');
+  if (c != string::npos) {
+    // if we find another :, the first one was for tenant
+    if (oid.find(':', c + 1) != string::npos) {
+      oid[c] = '/';
+    }
+  }
+}
+
 int rgw_bucket_parse_bucket_instance(const string& bucket_instance, string *target_bucket_instance, int *shard_id)
 {
   ssize_t pos = bucket_instance.rfind(':');
@@ -324,6 +353,53 @@ int rgw_bucket_parse_bucket_instance(const string& bucket_instance, string *targ
   return 0;
 }
 
+// parse key in format: [tenant/]name:instance[:shard_id]
+int rgw_bucket_parse_bucket_key(CephContext *cct, const string& key,
+                                rgw_bucket *bucket, int *shard_id)
+{
+  boost::string_ref name{key};
+  boost::string_ref instance;
+
+  // split tenant/name
+  auto pos = name.find('/');
+  if (pos != boost::string_ref::npos) {
+    auto tenant = name.substr(0, pos);
+    bucket->tenant.assign(tenant.begin(), tenant.end());
+    name = name.substr(pos + 1);
+  }
+
+  // split name:instance
+  pos = name.find(':');
+  if (pos != boost::string_ref::npos) {
+    instance = name.substr(pos + 1);
+    name = name.substr(0, pos);
+  }
+  bucket->name.assign(name.begin(), name.end());
+
+  // split instance:shard
+  pos = instance.find(':');
+  if (pos == boost::string_ref::npos) {
+    bucket->bucket_id.assign(instance.begin(), instance.end());
+    *shard_id = -1;
+    return 0;
+  }
+
+  // parse shard id
+  auto shard = instance.substr(pos + 1);
+  string err;
+  auto id = strict_strtol(shard.data(), 10, &err);
+  if (!err.empty()) {
+    ldout(cct, 0) << "ERROR: failed to parse bucket shard '"
+        << instance.data() << "': " << err << dendl;
+    return -EINVAL;
+  }
+
+  *shard_id = id;
+  instance = instance.substr(0, pos);
+  bucket->bucket_id.assign(instance.begin(), instance.end());
+  return 0;
+}
+
 int rgw_bucket_set_attrs(RGWRados *store, RGWBucketInfo& bucket_info,
                          map<string, bufferlist>& attrs,
                          RGWObjVersionTracker *objv_tracker)
@@ -339,13 +415,9 @@ int rgw_bucket_set_attrs(RGWRados *store, RGWBucketInfo& bucket_info,
       return ret;
     }
   }
-  string oid;
-  store->get_bucket_meta_oid(bucket, oid);
-  rgw_obj obj(store->get_zone_params().domain_root, oid);
 
-  string key;
-  store->get_bucket_instance_entry(bucket, key); /* we want the bucket instance name without
-						    the oid prefix cruft */
+  /* we want the bucket instance name without the oid prefix cruft */
+  string key = bucket.get_key();
   bufferlist bl;
 
   ::encode(bucket_info, bl);
@@ -1320,8 +1392,6 @@ int RGWDataChangesLog::renew_entries()
   real_time ut = real_clock::now();
   for (iter = entries.begin(); iter != entries.end(); ++iter) {
     const rgw_bucket_shard& bs = iter->first;
-    const rgw_bucket& bucket = bs.bucket;
-    int shard_id = bs.shard_id;
 
     int index = choose_oid(bs);
 
@@ -1330,19 +1400,14 @@ int RGWDataChangesLog::renew_entries()
     rgw_data_change change;
     bufferlist bl;
     change.entity_type = ENTITY_TYPE_BUCKET;
-    change.key = bucket.name + ":" + bucket.bucket_id;
-    if (shard_id >= 0) {
-      char buf[16];
-      snprintf(buf, sizeof(buf), ":%d", shard_id);
-      change.key += buf;
-    }
+    change.key = bs.get_key();
     change.timestamp = ut;
     ::encode(change, bl);
 
-    store->time_log_prepare_entry(entry, ut, section, bucket.name, bl);
+    store->time_log_prepare_entry(entry, ut, section, change.key, bl);
 
     m[index].first.push_back(bs);
-    m[index].second.push_back(entry);
+    m[index].second.emplace_back(std::move(entry));
   }
 
   map<int, pair<list<rgw_bucket_shard>, list<cls_log_entry> > >::iterator miter;
@@ -1470,12 +1535,7 @@ int RGWDataChangesLog::add_entry(rgw_bucket& bucket, int shard_id) {
     bufferlist bl;
     rgw_data_change change;
     change.entity_type = ENTITY_TYPE_BUCKET;
-    change.key = bucket.name + ":" + bucket.bucket_id;
-    if (shard_id >= 0) {
-      char buf[16];
-      snprintf(buf, sizeof(buf), ":%d", shard_id);
-      change.key += buf;
-    }
+    change.key = bs.get_key();
     change.timestamp = now;
     ::encode(change, bl);
     string section;
@@ -1652,12 +1712,9 @@ void RGWDataChangesLog::ChangesRenewThread::stop()
   cond.Signal();
 }
 
-void RGWDataChangesLog::mark_modified(int shard_id, rgw_bucket_shard& bs)
+void RGWDataChangesLog::mark_modified(int shard_id, const rgw_bucket_shard& bs)
 {
-  string key = bs.bucket.name + ":" + bs.bucket.bucket_id;
-  char buf[16];
-  snprintf(buf, sizeof(buf), ":%d", bs.shard_id);
-  key.append(buf);
+  auto key = bs.get_key();
   modified_lock.get_read();
   map<int, set<string> >::iterator iter = modified_shards.find(shard_id);
   if (iter != modified_shards.end()) {
@@ -1891,9 +1948,11 @@ public:
 
     if (!exists || old_bci.info.bucket.bucket_id != bci.info.bucket.bucket_id) {
       /* a new bucket, we need to select a new bucket placement for it */
+      auto key(entry);
+      rgw_bucket_instance_oid_to_key(key);
       string tenant_name;
       string bucket_name;
-      parse_bucket(entry, tenant_name, bucket_name);
+      parse_bucket(key, tenant_name, bucket_name);
 
       rgw_bucket bucket;
       RGWZonePlacementInfo rule_info;
@@ -1903,13 +1962,16 @@ public:
         ldout(store->ctx(), 0) << "ERROR: select_bucket_placement() returned " << ret << dendl;
         return ret;
       }
+      bci.info.bucket.tenant = bucket.tenant;
       bci.info.bucket.data_pool = bucket.data_pool;
       bci.info.bucket.index_pool = bucket.index_pool;
+      bci.info.bucket.data_extra_pool = bucket.data_extra_pool;
       bci.info.index_type = rule_info.index_type;
     } else {
       /* existing bucket, keep its placement pools */
       bci.info.bucket.data_pool = old_bci.info.bucket.data_pool;
       bci.info.bucket.index_pool = old_bci.info.bucket.index_pool;
+      bci.info.bucket.data_extra_pool = old_bci.info.bucket.data_extra_pool;
       bci.info.index_type = old_bci.info.index_type;
     }
 
@@ -1956,6 +2018,7 @@ public:
 
   void get_pool_and_oid(RGWRados *store, const string& key, rgw_bucket& bucket, string& oid) {
     oid = RGW_BUCKET_INSTANCE_MD_PREFIX + key;
+    rgw_bucket_instance_key_to_oid(oid);
     bucket = store->get_zone_params().domain_root;
   }
 
@@ -1991,14 +2054,16 @@ public:
       return 0;
     }
 
-    int prefix_size = sizeof(RGW_BUCKET_INSTANCE_MD_PREFIX) - 1;
+    constexpr int prefix_size = sizeof(RGW_BUCKET_INSTANCE_MD_PREFIX) - 1;
     // now filter in the relevant entries
     list<string>::iterator iter;
     for (iter = unfiltered_keys.begin(); iter != unfiltered_keys.end(); ++iter) {
       string& k = *iter;
 
       if (k.compare(0, prefix_size, RGW_BUCKET_INSTANCE_MD_PREFIX) == 0) {
-        keys.push_back(k.substr(prefix_size));
+        auto oid = k.substr(prefix_size);
+        rgw_bucket_instance_oid_to_key(oid);
+        keys.emplace_back(std::move(oid));
       }
     }
 
diff --git a/src/rgw/rgw_bucket.h b/src/rgw/rgw_bucket.h
index 8a2c28c..32bc0ab 100644
--- a/src/rgw/rgw_bucket.h
+++ b/src/rgw/rgw_bucket.h
@@ -34,8 +34,12 @@ extern int rgw_bucket_instance_store_info(RGWRados *store, string& oid, bufferli
                                  real_time mtime);
 
 extern int rgw_bucket_parse_bucket_instance(const string& bucket_instance, string *target_bucket_instance, int *shard_id);
+extern int rgw_bucket_parse_bucket_key(CephContext *cct, const string& key,
+                                       rgw_bucket* bucket, int *shard_id);
 
 extern int rgw_bucket_instance_remove_entry(RGWRados *store, string& entry, RGWObjVersionTracker *objv_tracker);
+extern void rgw_bucket_instance_key_to_oid(string& key);
+extern void rgw_bucket_instance_oid_to_key(string& oid);
 
 extern int rgw_bucket_delete_bucket_obj(RGWRados *store,
                                         const string& tenant_name,
@@ -485,7 +489,7 @@ public:
   int list_entries(const real_time& start_time, const real_time& end_time, int max_entries,
                list<rgw_data_change_log_entry>& entries, LogMarker& marker, bool *ptruncated);
 
-  void mark_modified(int shard_id, rgw_bucket_shard& bs);
+  void mark_modified(int shard_id, const rgw_bucket_shard& bs);
   void read_clear_modified(map<int, set<string> > &modified);
 
   bool going_down();
diff --git a/src/rgw/rgw_common.cc b/src/rgw/rgw_common.cc
index d597339..d309015 100644
--- a/src/rgw/rgw_common.cc
+++ b/src/rgw/rgw_common.cc
@@ -195,6 +195,7 @@ req_state::req_state(CephContext* _cct, RGWEnv* e, RGWUserInfo* u)
   object_acl = NULL;
   expect_cont = false;
   aws4_auth_needs_complete = false;
+  aws4_auth_streaming_mode = false;
 
   header_ended = false;
   obj_size = 0;
@@ -213,8 +214,6 @@ req_state::req_state(CephContext* _cct, RGWEnv* e, RGWUserInfo* u)
   http_auth = NULL;
   local_source = false;
 
-  aws4_auth = NULL;
-
   obj_ctx = NULL;
 }
 
@@ -222,7 +221,6 @@ req_state::~req_state() {
   delete formatter;
   delete bucket_acl;
   delete object_acl;
-  delete aws4_auth;
 }
 
 struct str_len {
@@ -1349,6 +1347,37 @@ bool RGWUserCaps::is_valid_cap_type(const string& tp)
   return false;
 }
 
+std::string rgw_bucket::get_key(char tenant_delim, char id_delim) const
+{
+  static constexpr size_t shard_len{12}; // ":4294967295\0"
+  const size_t max_len = tenant.size() + sizeof(tenant_delim) +
+      name.size() + sizeof(id_delim) + bucket_id.size() + shard_len;
+
+  std::string key;
+  key.reserve(max_len);
+  if (!tenant.empty() && tenant_delim) {
+    key.append(tenant);
+    key.append(1, tenant_delim);
+  }
+  key.append(name);
+  if (!bucket_id.empty() && id_delim) {
+    key.append(1, id_delim);
+    key.append(bucket_id);
+  }
+  return key;
+}
+
+std::string rgw_bucket_shard::get_key(char tenant_delim, char id_delim,
+                                      char shard_delim) const
+{
+  auto key = bucket.get_key(tenant_delim, id_delim);
+  if (shard_id >= 0 && shard_delim) {
+    key.append(1, shard_delim);
+    key.append(std::to_string(shard_id));
+  }
+  return key;
+}
+
 static struct rgw_name_to_flag op_type_mapping[] = { {"*",  RGW_OP_TYPE_ALL},
                   {"read",  RGW_OP_TYPE_READ},
 		  {"write", RGW_OP_TYPE_WRITE},
diff --git a/src/rgw/rgw_common.h b/src/rgw/rgw_common.h
index 39e0d4a..ab6c267 100644
--- a/src/rgw/rgw_common.h
+++ b/src/rgw/rgw_common.h
@@ -535,7 +535,7 @@ struct RGWUserInfo
   map<string, RGWAccessKey> swift_keys;
   map<string, RGWSubUser> subusers;
   __u8 suspended;
-  uint32_t max_buckets;
+  int32_t max_buckets;
   uint32_t op_mask;
   RGWUserCaps caps;
   __u8 system;
@@ -753,6 +753,10 @@ struct rgw_bucket {
     DECODE_FINISH(bl);
   }
 
+  // format a key for the bucket/instance. pass delim=0 to skip a field
+  std::string get_key(char tenant_delim = '/',
+                      char id_delim = ':') const;
+
   const string& get_data_extra_pool() {
     if (data_extra_pool.empty()) {
       return data_pool;
@@ -797,7 +801,10 @@ struct rgw_bucket_shard {
   int shard_id;
 
   rgw_bucket_shard() : shard_id(-1) {}
-  rgw_bucket_shard(rgw_bucket& _b, int _sid) : bucket(_b), shard_id(_sid) {}
+  rgw_bucket_shard(const rgw_bucket& _b, int _sid) : bucket(_b), shard_id(_sid) {}
+
+  std::string get_key(char tenant_delim = '/', char id_delim = ':',
+                      char shard_delim = ':') const;
 
   bool operator<(const rgw_bucket_shard& b) const {
     if (bucket < b.bucket) {
@@ -992,11 +999,14 @@ struct RGWBucketInfo
 
   void decode_json(JSONObj *obj);
 
-  bool versioned() { return (flags & BUCKET_VERSIONED) != 0; }
+  bool versioned() const { return (flags & BUCKET_VERSIONED) != 0; }
   int versioning_status() { return flags & (BUCKET_VERSIONED | BUCKET_VERSIONS_SUSPENDED); }
   bool versioning_enabled() { return versioning_status() == BUCKET_VERSIONED; }
 
-  bool has_swift_versioning() { return swift_versioning; }
+  bool has_swift_versioning() const {
+    /* A bucket may be versioned through one mechanism only. */
+    return swift_versioning && !versioned();
+  }
 
   RGWBucketInfo() : flags(0), has_instance_obj(false), num_shards(0), bucket_index_shard_hash_type(MOD), requester_pays(false),
                     has_website(false), swift_versioning(false) {}
@@ -1188,6 +1198,10 @@ struct rgw_aws4_auth {
   string signature;
   string new_signature;
   string payload_hash;
+  string seed_signature;
+  string signing_key;
+  char signing_k[CEPH_CRYPTO_HMACSHA256_DIGESTSIZE];
+  bufferlist bl;
 };
 
 struct req_init_state {
@@ -1260,7 +1274,8 @@ struct req_state {
 
   /* aws4 auth support */
   bool aws4_auth_needs_complete;
-  rgw_aws4_auth *aws4_auth;
+  bool aws4_auth_streaming_mode;
+  unique_ptr<rgw_aws4_auth> aws4_auth;
 
   string canned_acl;
   bool has_acl_header;
@@ -1713,13 +1728,16 @@ public:
   bool operator<(const rgw_obj& o) const {
     int r = bucket.name.compare(o.bucket.name);
     if (r == 0) {
-     r = object.compare(o.object);
-     if (r == 0) {
-       r = ns.compare(o.ns);
-       if (r == 0) {
-         r = instance.compare(o.instance);
-       }
-     }
+      r = bucket.bucket_id.compare(o.bucket.bucket_id);
+      if (r == 0) {
+        r = object.compare(o.object);
+        if (r == 0) {
+          r = ns.compare(o.ns);
+          if (r == 0) {
+            r = instance.compare(o.instance);
+          }
+        }
+      }
     }
 
     return (r < 0);
diff --git a/src/rgw/rgw_coroutine.cc b/src/rgw/rgw_coroutine.cc
index a09afc8..2e478f2 100644
--- a/src/rgw/rgw_coroutine.cc
+++ b/src/rgw/rgw_coroutine.cc
@@ -292,7 +292,7 @@ int RGWCoroutinesStack::unwind(int retcode)
 }
 
 
-bool RGWCoroutinesStack::collect(RGWCoroutine *op, int *ret) /* returns true if needs to be called again */
+bool RGWCoroutinesStack::collect(RGWCoroutine *op, int *ret, RGWCoroutinesStack *skip_stack) /* returns true if needs to be called again */
 {
   rgw_spawned_stacks *s = (op ? &op->spawned : &spawned);
   *ret = 0;
@@ -300,7 +300,7 @@ bool RGWCoroutinesStack::collect(RGWCoroutine *op, int *ret) /* returns true if
 
   for (vector<RGWCoroutinesStack *>::iterator iter = s->entries.begin(); iter != s->entries.end(); ++iter) {
     RGWCoroutinesStack *stack = *iter;
-    if (!stack->is_done()) {
+    if (stack == skip_stack || !stack->is_done()) {
       new_list.push_back(stack);
       ldout(cct, 20) << "collect(): s=" << (void *)this << " stack=" << (void *)stack << " is still running" << dendl;
       continue;
@@ -349,9 +349,9 @@ bool RGWCoroutinesStack::collect_next(RGWCoroutine *op, int *ret, RGWCoroutinesS
   return false;
 }
 
-bool RGWCoroutinesStack::collect(int *ret) /* returns true if needs to be called again */
+bool RGWCoroutinesStack::collect(int *ret, RGWCoroutinesStack *skip_stack) /* returns true if needs to be called again */
 {
-  return collect(NULL, ret);
+  return collect(NULL, ret, skip_stack);
 }
 
 static void _aio_completion_notifier_cb(librados::completion_t cb, void *arg);
@@ -712,9 +712,9 @@ RGWCoroutinesStack *RGWCoroutine::spawn(RGWCoroutine *op, bool wait)
   return stack->spawn(this, op, wait);
 }
 
-bool RGWCoroutine::collect(int *ret) /* returns true if needs to be called again */
+bool RGWCoroutine::collect(int *ret, RGWCoroutinesStack *skip_stack) /* returns true if needs to be called again */
 {
-  return stack->collect(this, ret);
+  return stack->collect(this, ret, skip_stack);
 }
 
 bool RGWCoroutine::collect_next(int *ret, RGWCoroutinesStack **collected_stack) /* returns true if found a stack to collect */
@@ -752,14 +752,18 @@ ostream& operator<<(ostream& out, const RGWCoroutine& cr)
   return out;
 }
 
-bool RGWCoroutine::drain_children(int num_cr_left)
+bool RGWCoroutine::drain_children(int num_cr_left, RGWCoroutinesStack *skip_stack)
 {
   bool done = false;
+  assert(num_cr_left >= 0);
+  if (num_cr_left == 0 && skip_stack) {
+    num_cr_left = 1;
+  }
   reenter(&drain_cr) {
     while (num_spawned() > (size_t)num_cr_left) {
       yield wait_for_child();
       int ret;
-      while (collect(&ret)) {
+      while (collect(&ret, skip_stack)) {
         if (ret < 0) {
           ldout(cct, 10) << "collect() returned ret=" << ret << dendl;
           /* we should have reported this error */
diff --git a/src/rgw/rgw_coroutine.h b/src/rgw/rgw_coroutine.h
index 11addf6..6b17fa0 100644
--- a/src/rgw/rgw_coroutine.h
+++ b/src/rgw/rgw_coroutine.h
@@ -265,11 +265,11 @@ public:
 
   void call(RGWCoroutine *op); /* call at the same stack we're in */
   RGWCoroutinesStack *spawn(RGWCoroutine *op, bool wait); /* execute on a different stack */
-  bool collect(int *ret); /* returns true if needs to be called again */
+  bool collect(int *ret, RGWCoroutinesStack *skip_stack); /* returns true if needs to be called again */
   bool collect_next(int *ret, RGWCoroutinesStack **collected_stack = NULL); /* returns true if found a stack to collect */
 
   int wait(const utime_t& interval);
-  bool drain_children(int num_cr_left); /* returns true if needed to be called again */
+  bool drain_children(int num_cr_left, RGWCoroutinesStack *skip_stack = NULL); /* returns true if needed to be called again */
   void wakeup();
   void set_sleeping(bool flag); /* put in sleep, or wakeup from sleep */
 
@@ -306,6 +306,10 @@ do {                            \
   drain_cr = boost::asio::coroutine(); \
   yield_until_true(drain_children(n))
 
+#define drain_all_but_stack(stack) \
+  drain_cr = boost::asio::coroutine(); \
+  yield_until_true(drain_children(1, stack))
+
 template <class T>
 class RGWConsumerCR : public RGWCoroutine {
   list<T> product;
@@ -371,7 +375,7 @@ protected:
   RGWCoroutinesStack *parent;
 
   RGWCoroutinesStack *spawn(RGWCoroutine *source_op, RGWCoroutine *next_op, bool wait);
-  bool collect(RGWCoroutine *op, int *ret); /* returns true if needs to be called again */
+  bool collect(RGWCoroutine *op, int *ret, RGWCoroutinesStack *skip_stack); /* returns true if needs to be called again */
   bool collect_next(RGWCoroutine *op, int *ret, RGWCoroutinesStack **collected_stack); /* returns true if found a stack to collect */
 public:
   RGWCoroutinesStack(CephContext *_cct, RGWCoroutinesManager *_ops_mgr, RGWCoroutine *start = NULL);
@@ -442,7 +446,7 @@ public:
   int wait(const utime_t& interval);
   void wakeup();
 
-  bool collect(int *ret); /* returns true if needs to be called again */
+  bool collect(int *ret, RGWCoroutinesStack *skip_stack); /* returns true if needs to be called again */
 
   RGWAioCompletionNotifier *create_completion_notifier();
   RGWCompletionManager *get_completion_mgr();
diff --git a/src/rgw/rgw_cors.cc b/src/rgw/rgw_cors.cc
index a120a68..1ad5b43 100644
--- a/src/rgw/rgw_cors.cc
+++ b/src/rgw/rgw_cors.cc
@@ -116,6 +116,13 @@ static bool is_string_in_set(set<string>& s, string h) {
   return false;
 }
 
+bool RGWCORSRule::has_wildcard_origin() {
+  if (allowed_origins.find("*") != allowed_origins.end())
+    return true;
+
+  return false;
+}
+
 bool RGWCORSRule::is_origin_present(const char *o) {
   string origin = o;
   return is_string_in_set(allowed_origins, origin);
diff --git a/src/rgw/rgw_cors.h b/src/rgw/rgw_cors.h
index 239cfd7..61b352d 100644
--- a/src/rgw/rgw_cors.h
+++ b/src/rgw/rgw_cors.h
@@ -80,6 +80,7 @@ public:
     ::decode(exposable_hdrs, bl);
     DECODE_FINISH(bl);
   }
+  bool has_wildcard_origin();
   bool is_origin_present(const char *o);
   void format_exp_headers(std::string& s);
   void erase_origin_if_present(std::string& origin, bool *rule_empty);
diff --git a/src/rgw/rgw_cr_rados.cc b/src/rgw/rgw_cr_rados.cc
index e812347..19fb949 100644
--- a/src/rgw/rgw_cr_rados.cc
+++ b/src/rgw/rgw_cr_rados.cc
@@ -141,9 +141,10 @@ RGWAsyncPutSystemObjAttrs::RGWAsyncPutSystemObjAttrs(RGWCoroutine *caller, RGWAi
 }
 
 
-RGWOmapAppend::RGWOmapAppend(RGWAsyncRadosProcessor *_async_rados, RGWRados *_store, rgw_bucket& _pool, const string& _oid)
+RGWOmapAppend::RGWOmapAppend(RGWAsyncRadosProcessor *_async_rados, RGWRados *_store, rgw_bucket& _pool, const string& _oid,
+                             uint64_t _window_size)
                       : RGWConsumerCR<string>(_store->ctx()), async_rados(_async_rados),
-                        store(_store), pool(_pool), oid(_oid), going_down(false), num_pending_entries(0), total_entries(0)
+                        store(_store), pool(_pool), oid(_oid), going_down(false), num_pending_entries(0), window_size(_window_size), total_entries(0)
 {
 }
 
@@ -289,6 +290,37 @@ int RGWRadosGetOmapKeysCR::send_request() {
   return ioctx.aio_operate(oid, cn->completion(), &op, NULL);
 }
 
+RGWRadosRemoveOmapKeysCR::RGWRadosRemoveOmapKeysCR(RGWRados *_store,
+                      const rgw_bucket& _pool, const string& _oid,
+                      const set<string>& _keys) : RGWSimpleCoroutine(_store->ctx()),
+                                                store(_store),
+                                                keys(_keys),
+                                                pool(_pool), oid(_oid), cn(NULL)
+{
+  set_description() << "remove omap keys dest=" << pool.name << "/" << oid << " keys=" << keys;
+}
+
+RGWRadosRemoveOmapKeysCR::~RGWRadosRemoveOmapKeysCR()
+{
+}
+
+int RGWRadosRemoveOmapKeysCR::send_request() {
+  librados::Rados *rados = store->get_rados_handle();
+  int r = rados->ioctx_create(pool.name.c_str(), ioctx); /* system object only! */
+  if (r < 0) {
+    lderr(store->ctx()) << "ERROR: failed to open pool (" << pool.name << ") ret=" << r << dendl;
+    return r;
+  }
+
+  set_status() << "send request";
+
+  librados::ObjectWriteOperation op;
+  op.omap_rm_keys(keys);
+
+  cn = stack->create_completion_notifier();
+  return ioctx.aio_operate(oid, cn->completion(), &op);
+}
+
 RGWSimpleRadosLockCR::RGWSimpleRadosLockCR(RGWAsyncRadosProcessor *_async_rados, RGWRados *_store,
                       const rgw_bucket& _pool, const string& _oid, const string& _lock_name,
                       const string& _cookie,
@@ -368,7 +400,6 @@ int RGWSimpleRadosUnlockCR::request_complete()
 }
 
 
-#define OMAP_APPEND_MAX_ENTRIES 100
 int RGWOmapAppend::operate() {
   reenter(this) {
     for (;;) {
@@ -383,11 +414,11 @@ int RGWOmapAppend::operate() {
         while (consume(&entry)) {
           set_status() << "adding entry: " << entry;
           entries[entry] = bufferlist();
-          if (entries.size() >= OMAP_APPEND_MAX_ENTRIES) {
+          if (entries.size() >= window_size) {
             break;
           }
         }
-        if (entries.size() >= OMAP_APPEND_MAX_ENTRIES || going_down) {
+        if (entries.size() >= window_size || going_down) {
           set_status() << "flushing to omap";
           call(new RGWRadosSetOmapKeysCR(store, pool, oid, entries));
           entries.clear();
@@ -415,7 +446,7 @@ bool RGWOmapAppend::append(const string& s) {
   }
   ++total_entries;
   pending_entries.push_back(s);
-  if (++num_pending_entries >= OMAP_APPEND_MAX_ENTRIES) {
+  if (++num_pending_entries >= (int)window_size) {
     flush_pending();
   }
   return true;
@@ -430,12 +461,11 @@ bool RGWOmapAppend::finish() {
 
 int RGWAsyncGetBucketInstanceInfo::_send_request()
 {
-  string id = bucket_name + ":" + bucket_id;
   RGWObjectCtx obj_ctx(store);
-
-  int r = store->get_bucket_instance_info(obj_ctx, id, *bucket_info, NULL, NULL);
+  int r = store->get_bucket_instance_info(obj_ctx, bucket, *bucket_info, NULL, NULL);
   if (r < 0) {
-    ldout(store->ctx(), 0) << "ERROR: failed to get bucket instance info for bucket id=" << id << dendl;
+    ldout(store->ctx(), 0) << "ERROR: failed to get bucket instance info for "
+        << bucket << dendl;
     return r;
   }
 
diff --git a/src/rgw/rgw_cr_rados.h b/src/rgw/rgw_cr_rados.h
index af0481c..24a284d 100644
--- a/src/rgw/rgw_cr_rados.h
+++ b/src/rgw/rgw_cr_rados.h
@@ -429,6 +429,37 @@ public:
   }
 };
 
+class RGWRadosRemoveOmapKeysCR : public RGWSimpleCoroutine {
+  RGWRados *store;
+
+  string marker;
+  map<string, bufferlist> *entries;
+  int max_entries;
+
+  int rval;
+  librados::IoCtx ioctx;
+
+  set<string> keys;
+
+  rgw_bucket pool;
+  string oid;
+
+  RGWAioCompletionNotifier *cn;
+
+public:
+  RGWRadosRemoveOmapKeysCR(RGWRados *_store,
+		      const rgw_bucket& _pool, const string& _oid,
+		      const set<string>& _keys);
+
+  ~RGWRadosRemoveOmapKeysCR();
+
+  int send_request();
+
+  int request_complete() {
+    return rval;
+  }
+};
+
 class RGWSimpleRadosLockCR : public RGWSimpleCoroutine {
   RGWAsyncRadosProcessor *async_rados;
   RGWRados *store;
@@ -479,6 +510,8 @@ public:
   int request_complete();
 };
 
+#define OMAP_APPEND_MAX_ENTRIES_DEFAULT 100
+
 class RGWOmapAppend : public RGWConsumerCR<string> {
   RGWAsyncRadosProcessor *async_rados;
   RGWRados *store;
@@ -493,9 +526,11 @@ class RGWOmapAppend : public RGWConsumerCR<string> {
 
   map<string, bufferlist> entries;
 
+  uint64_t window_size;
   uint64_t total_entries;
 public:
-  RGWOmapAppend(RGWAsyncRadosProcessor *_async_rados, RGWRados *_store, rgw_bucket& _pool, const string& _oid);
+  RGWOmapAppend(RGWAsyncRadosProcessor *_async_rados, RGWRados *_store, rgw_bucket& _pool, const string& _oid,
+                uint64_t _window_size = OMAP_APPEND_MAX_ENTRIES_DEFAULT);
   int operate();
   void flush_pending();
   bool append(const string& s);
@@ -504,6 +539,14 @@ public:
   uint64_t get_total_entries() {
     return total_entries;
   }
+
+  const rgw_bucket& get_pool() {
+    return pool;
+  }
+
+  const string& get_oid() {
+    return oid;
+  }
 };
 
 class RGWAsyncWait : public RGWAsyncRadosRequest {
@@ -617,35 +660,32 @@ public:
 
 class RGWAsyncGetBucketInstanceInfo : public RGWAsyncRadosRequest {
   RGWRados *store;
-  string bucket_name;
-  string bucket_id;
+  rgw_bucket bucket;
   RGWBucketInfo *bucket_info;
 
 protected:
   int _send_request();
 public:
-  RGWAsyncGetBucketInstanceInfo(RGWCoroutine *caller, RGWAioCompletionNotifier *cn, RGWRados *_store,
-		        const string& _bucket_name, const string& _bucket_id,
-                        RGWBucketInfo *_bucket_info) : RGWAsyncRadosRequest(caller, cn), store(_store),
-                                                       bucket_name(_bucket_name), bucket_id(_bucket_id),
-                                                       bucket_info(_bucket_info) {}
+  RGWAsyncGetBucketInstanceInfo(RGWCoroutine *caller, RGWAioCompletionNotifier *cn,
+                                RGWRados *_store, const rgw_bucket& bucket,
+                                RGWBucketInfo *_bucket_info)
+    : RGWAsyncRadosRequest(caller, cn), store(_store),
+      bucket(bucket), bucket_info(_bucket_info) {}
 };
 
 class RGWGetBucketInstanceInfoCR : public RGWSimpleCoroutine {
   RGWAsyncRadosProcessor *async_rados;
   RGWRados *store;
-  string bucket_name;
-  string bucket_id;
+  rgw_bucket bucket;
   RGWBucketInfo *bucket_info;
 
   RGWAsyncGetBucketInstanceInfo *req;
   
 public:
   RGWGetBucketInstanceInfoCR(RGWAsyncRadosProcessor *_async_rados, RGWRados *_store,
-		        const string& _bucket_name, const string& _bucket_id,
-                        RGWBucketInfo *_bucket_info) : RGWSimpleCoroutine(_store->ctx()), async_rados(_async_rados), store(_store),
-                                                       bucket_name(_bucket_name), bucket_id(_bucket_id),
-                                                       bucket_info(_bucket_info), req(NULL) {}
+                             const rgw_bucket& bucket, RGWBucketInfo *_bucket_info)
+    : RGWSimpleCoroutine(_store->ctx()), async_rados(_async_rados), store(_store),
+      bucket(bucket), bucket_info(_bucket_info), req(NULL) {}
   ~RGWGetBucketInstanceInfoCR() {
     request_cleanup();
   }
@@ -657,7 +697,7 @@ public:
   }
 
   int send_request() {
-    req = new RGWAsyncGetBucketInstanceInfo(this, stack->create_completion_notifier(), store, bucket_name, bucket_id, bucket_info);
+    req = new RGWAsyncGetBucketInstanceInfo(this, stack->create_completion_notifier(), store, bucket, bucket_info);
     async_rados->queue(req);
     return 0;
   }
diff --git a/src/rgw/rgw_cr_rest.h b/src/rgw/rgw_cr_rest.h
index 4958c45..63a6c0f 100644
--- a/src/rgw/rgw_cr_rest.h
+++ b/src/rgw/rgw_cr_rest.h
@@ -126,7 +126,7 @@ public:
     if (ret < 0) {
       error_stream << "http operation failed: " << op->to_str()
           << " status=" << op->get_http_status() << std::endl;
-      lsubdout(cct, rgw, 0) << "ERROR: failed to wait for op, ret=" << ret
+      lsubdout(cct, rgw, 5) << "failed to wait for op, ret=" << ret
           << ": " << op->to_str() << dendl;
       op->put();
       return ret;
diff --git a/src/rgw/rgw_data_sync.cc b/src/rgw/rgw_data_sync.cc
index ef4e434..95d5b8d 100644
--- a/src/rgw/rgw_data_sync.cc
+++ b/src/rgw/rgw_data_sync.cc
@@ -1,3 +1,5 @@
+#include <boost/utility/string_ref.hpp>
+
 #include "common/ceph_json.h"
 #include "common/RWLock.h"
 #include "common/RefCountedObj.h"
@@ -26,6 +28,73 @@ static string datalog_sync_status_shard_prefix = "datalog.sync-status.shard";
 static string datalog_sync_full_sync_index_prefix = "data.full-sync.index";
 static string bucket_status_oid_prefix = "bucket.sync-status";
 
+class RGWSyncDebugLogger {
+  CephContext *cct;
+  string prefix;
+
+  bool ended;
+
+public:
+  RGWSyncDebugLogger(CephContext *_cct, const string& source_zone,
+                     const string& sync_type, const string& sync_stage,
+                     const string& resource, bool log_start = true) {
+    init(_cct, source_zone, sync_type, sync_stage, resource, log_start);
+  }
+  RGWSyncDebugLogger() : cct(NULL), ended(false) {}
+  ~RGWSyncDebugLogger();
+
+  void init(CephContext *_cct, const string& source_zone,
+            const string& sync_type, const string& sync_stage,
+            const string& resource, bool log_start = true);
+  void log(const string& state);
+  void finish(int status);
+};
+
+void RGWSyncDebugLogger::init(CephContext *_cct, const string& source_zone,
+                              const string& sync_type, const string& sync_section,
+                              const string& resource, bool log_start)
+{
+  cct = _cct;
+  ended = false;
+  string zone_str = source_zone.substr(0, 8);
+  prefix = "Sync:" + zone_str + ":" + sync_type + ":" + sync_section + ":" + resource;
+  if (log_start) {
+    log("start");
+  }
+}
+
+RGWSyncDebugLogger::~RGWSyncDebugLogger()
+{
+  if (!ended) {
+    log("finish");
+  }
+}
+
+void RGWSyncDebugLogger::log(const string& state)
+{
+  ldout(cct, 5) << prefix << ":" << state << dendl;
+}
+
+void RGWSyncDebugLogger::finish(int status)
+{
+  ended = true;
+  ldout(cct, 5) << prefix << ":" << "finish r=" << status << dendl;
+}
+
+class RGWDataSyncDebugLogger : public RGWSyncDebugLogger {
+public:
+  RGWDataSyncDebugLogger() {}
+  RGWDataSyncDebugLogger(RGWDataSyncEnv *sync_env, const string& sync_section,
+                         const string& resource, bool log_start = true) {
+    init(sync_env, sync_section, resource, log_start);
+  }
+  void init(RGWDataSyncEnv *sync_env, const string& sync_section,
+            const string& resource, bool log_start = true) {
+    RGWSyncDebugLogger::init(sync_env->cct, sync_env->source_zone, "data", sync_section, resource, log_start);
+  }
+
+};
+
 void rgw_datalog_info::decode_json(JSONObj *obj) {
   JSONDecoder::decode_json("num_objects", num_shards, obj);
 }
@@ -402,7 +471,7 @@ public:
           spawn(new RGWReadRemoteDataLogShardInfoCR(sync_env, i, &shards_info[i]), true);
 	}
       }
-      while (collect(&ret)) {
+      while (collect(&ret, NULL)) {
 	if (ret < 0) {
 	  return set_state(RGWCoroutine_Error);
 	}
@@ -427,7 +496,7 @@ public:
 	call(new RGWSimpleRadosUnlockCR(sync_env->async_rados, store, store->get_zone_params().log_pool, sync_status_oid,
 			             lock_name, cookie));
       }
-      while (collect(&ret)) {
+      while (collect(&ret, NULL)) {
 	if (ret < 0) {
 	  return set_state(RGWCoroutine_Error);
 	}
@@ -654,7 +723,7 @@ public:
           yield call(sync_env->error_logger->log_error_cr(sync_env->conn->get_remote_id(), "data.init", "",
                                                           EIO, string("failed to build bucket instances map")));
       }
-      while (collect(&ret)) {
+      while (collect(&ret, NULL)) {
 	if (ret < 0) {
           yield call(sync_env->error_logger->log_error_cr(sync_env->conn->get_remote_id(), "data.init", "",
                                                           -ret, string("failed to store sync status: ") + cpp_strerror(-ret)));
@@ -730,54 +799,61 @@ public:
   }
 };
 
+// ostream wrappers to print buckets without copying strings
+struct bucket_str {
+  const rgw_bucket& b;
+  bucket_str(const rgw_bucket& b) : b(b) {}
+};
+std::ostream& operator<<(std::ostream& out, const bucket_str& rhs) {
+  auto& b = rhs.b;
+  if (!b.tenant.empty()) {
+    out << b.tenant << '/';
+  }
+  out << b.name;
+  if (!b.bucket_id.empty()) {
+    out << ':' << b.bucket_id;
+  }
+  return out;
+}
+
+struct bucket_shard_str {
+  const rgw_bucket_shard& bs;
+  bucket_shard_str(const rgw_bucket_shard& bs) : bs(bs) {}
+};
+std::ostream& operator<<(std::ostream& out, const bucket_shard_str& rhs) {
+  auto& bs = rhs.bs;
+  out << bucket_str{bs.bucket};
+  if (bs.shard_id >= 0) {
+    out << ':' << bs.shard_id;
+  }
+  return out;
+}
+
 class RGWRunBucketSyncCoroutine : public RGWCoroutine {
   RGWDataSyncEnv *sync_env;
-  string bucket_name;
-  string bucket_id;
+  rgw_bucket_shard bs;
   RGWBucketInfo bucket_info;
-  int shard_id;
   rgw_bucket_shard_sync_info sync_status;
   RGWMetaSyncEnv meta_sync_env;
 
+  RGWDataSyncDebugLogger logger;
+
 public:
-  RGWRunBucketSyncCoroutine(RGWDataSyncEnv *_sync_env,
-                            const string& _bucket_name, const string _bucket_id, int _shard_id) : RGWCoroutine(_sync_env->cct),
-                                                                            sync_env(_sync_env),
-                                                                            bucket_name(_bucket_name),
-									    bucket_id(_bucket_id), shard_id(_shard_id) {}
+  RGWRunBucketSyncCoroutine(RGWDataSyncEnv *_sync_env, const rgw_bucket_shard& bs)
+    : RGWCoroutine(_sync_env->cct), sync_env(_sync_env), bs(bs) {
+    logger.init(sync_env, "Bucket", bs.get_key());
+  }
 
   int operate();
 };
 
-static int parse_bucket_shard(CephContext *cct, const string& raw_key, string *bucket_name, string *bucket_instance, int *shard_id)
-{
-  ssize_t pos = raw_key.find(':');
-  *bucket_name = raw_key.substr(0, pos);
-  *bucket_instance = raw_key.substr(pos + 1);
-  pos = bucket_instance->find(':');
-  *shard_id = -1;
-  if (pos >= 0) {
-    string err;
-    string s = bucket_instance->substr(pos + 1);
-    *shard_id = strict_strtol(s.c_str(), 10, &err);
-    if (!err.empty()) {
-      ldout(cct, 0) << "ERROR: failed to parse bucket instance key: " << *bucket_instance << dendl;
-      return -EINVAL;
-    }
-
-    *bucket_instance = bucket_instance->substr(0, pos);
-  }
-  return 0;
-}
-
 class RGWDataSyncSingleEntryCR : public RGWCoroutine {
   RGWDataSyncEnv *sync_env;
 
   string raw_key;
   string entry_marker;
 
-  string bucket_name;
-  string bucket_instance;
+  rgw_bucket_shard bs;
 
   int sync_status;
 
@@ -785,13 +861,20 @@ class RGWDataSyncSingleEntryCR : public RGWCoroutine {
 
   RGWDataSyncShardMarkerTrack *marker_tracker;
 
+  boost::intrusive_ptr<RGWOmapAppend> error_repo;
+  bool remove_from_repo;
+
+  set<string> keys;
+
 public:
   RGWDataSyncSingleEntryCR(RGWDataSyncEnv *_sync_env,
-		           const string& _raw_key, const string& _entry_marker, RGWDataSyncShardMarkerTrack *_marker_tracker) : RGWCoroutine(_sync_env->cct),
+		           const string& _raw_key, const string& _entry_marker, RGWDataSyncShardMarkerTrack *_marker_tracker,
+                           RGWOmapAppend *_error_repo, bool _remove_from_repo) : RGWCoroutine(_sync_env->cct),
                                                       sync_env(_sync_env),
 						      raw_key(_raw_key), entry_marker(_entry_marker),
                                                       sync_status(0),
-                                                      marker_tracker(_marker_tracker) {
+                                                      marker_tracker(_marker_tracker),
+                                                      error_repo(_error_repo), remove_from_repo(_remove_from_repo) {
     set_description() << "data sync single entry (source_zone=" << sync_env->source_zone << ") key=" <<_raw_key << " entry=" << entry_marker;
   }
 
@@ -799,24 +882,48 @@ public:
     reenter(this) {
       do {
         yield {
-          int shard_id;
-          int ret = parse_bucket_shard(sync_env->cct, raw_key, &bucket_name, &bucket_instance, &shard_id);
+          int ret = rgw_bucket_parse_bucket_key(sync_env->cct, raw_key,
+                                                &bs.bucket, &bs.shard_id);
           if (ret < 0) {
             return set_cr_error(-EIO);
           }
-          marker_tracker->reset_need_retry(raw_key);
-          call(new RGWRunBucketSyncCoroutine(sync_env, bucket_name, bucket_instance, shard_id));
+          if (marker_tracker) {
+            marker_tracker->reset_need_retry(raw_key);
+          }
+          call(new RGWRunBucketSyncCoroutine(sync_env, bs));
         }
-      } while (marker_tracker->need_retry(raw_key));
+      } while (marker_tracker && marker_tracker->need_retry(raw_key));
 
       sync_status = retcode;
 
+      if (sync_status == -ENOENT) {
+        // this was added when 'tenant/' was added to datalog entries, because
+        // preexisting tenant buckets could never sync and would stay in the
+        // error_repo forever
+        ldout(sync_env->store->ctx(), 0) << "WARNING: skipping data log entry "
+            "for missing bucket " << raw_key << dendl;
+        sync_status = 0;
+      }
+
       if (sync_status < 0) {
-        yield call(sync_env->error_logger->log_error_cr(sync_env->conn->get_remote_id(), "data", bucket_name + ":" + bucket_instance,
+        yield call(sync_env->error_logger->log_error_cr(sync_env->conn->get_remote_id(), "data", raw_key,
                                                         -sync_status, string("failed to sync bucket instance: ") + cpp_strerror(-sync_status)));
+        if (retcode < 0) {
+          ldout(sync_env->store->ctx(), 0) << "ERROR: failed to log sync failure: retcode=" << retcode << dendl;
+        }
+        if (error_repo && !error_repo->append(raw_key)) {
+          ldout(sync_env->store->ctx(), 0) << "ERROR: failed to log sync failure in error repo: retcode=" << retcode << dendl;
+        }
+      } else if (error_repo && remove_from_repo) {
+        keys = {raw_key};
+        yield call(new RGWRadosRemoveOmapKeysCR(sync_env->store, error_repo->get_pool(), error_repo->get_oid(), keys));
+        if (retcode < 0) {
+          ldout(sync_env->store->ctx(), 0) << "ERROR: failed to remove omap key from error repo ("
+             << error_repo->get_pool() << ":" << error_repo->get_oid() << " retcode=" << retcode << dendl;
+        }
       }
       /* FIXME: what do do in case of error */
-      if (!entry_marker.empty()) {
+      if (marker_tracker && !entry_marker.empty()) {
         /* update marker */
         yield call(marker_tracker->finish(entry_marker));
       }
@@ -833,6 +940,7 @@ public:
 };
 
 #define BUCKET_SHARD_SYNC_SPAWN_WINDOW 20
+#define DATA_SYNC_MAX_ERR_ENTRIES 10
 
 class RGWDataSyncShardCR : public RGWCoroutine {
   RGWDataSyncEnv *sync_env;
@@ -877,7 +985,24 @@ class RGWDataSyncShardCR : public RGWCoroutine {
   set<string> spawned_keys;
 
   RGWContinuousLeaseCR *lease_cr;
+  RGWCoroutinesStack *lease_stack;
   string status_oid;
+
+
+  string error_oid;
+  RGWOmapAppend *error_repo;
+  map<string, bufferlist> error_entries;
+  string error_marker;
+  int max_error_entries;
+
+  ceph::real_time error_retry_time;
+
+#define RETRY_BACKOFF_SECS_MIN 60
+#define RETRY_BACKOFF_SECS_DEFAULT 60
+#define RETRY_BACKOFF_SECS_MAX 600
+  uint32_t retry_backoff_secs;
+
+  RGWDataSyncDebugLogger logger;
 public:
   RGWDataSyncShardCR(RGWDataSyncEnv *_sync_env,
                      rgw_bucket& _pool,
@@ -888,9 +1013,13 @@ public:
 						      sync_marker(_marker),
                                                       marker_tracker(NULL), truncated(false), inc_lock("RGWDataSyncShardCR::inc_lock"),
                                                       total_entries(0), spawn_window(BUCKET_SHARD_SYNC_SPAWN_WINDOW), reset_backoff(NULL),
-                                                      lease_cr(NULL) {
+                                                      lease_cr(nullptr), lease_stack(nullptr), error_repo(nullptr), max_error_entries(DATA_SYNC_MAX_ERR_ENTRIES),
+                                                      retry_backoff_secs(RETRY_BACKOFF_SECS_DEFAULT) {
     set_description() << "data sync shard source_zone=" << sync_env->source_zone << " shard_id=" << shard_id;
     status_oid = RGWDataSyncStatusManager::shard_obj_name(sync_env->source_zone, shard_id);
+    error_oid = status_oid + ".retry";
+
+    logger.init(sync_env, "DataShard", status_oid);
   }
 
   ~RGWDataSyncShardCR() {
@@ -899,6 +1028,9 @@ public:
       lease_cr->abort();
       lease_cr->put();
     }
+    if (error_repo) {
+      error_repo->put();
+    }
   }
 
   void append_modified_shards(set<string>& keys) {
@@ -948,7 +1080,7 @@ public:
     lease_cr = new RGWContinuousLeaseCR(sync_env->async_rados, store, store->get_zone_params().log_pool, status_oid,
                                         lock_name, lock_duration, this);
     lease_cr->get();
-    spawn(lease_cr, false);
+    lease_stack = spawn(lease_cr, false);
   }
 
   int full_sync() {
@@ -958,13 +1090,14 @@ public:
       yield init_lease_cr();
       while (!lease_cr->is_locked()) {
         if (lease_cr->is_done()) {
-          ldout(cct, 0) << "ERROR: lease cr failed, done early " << dendl;
+          ldout(cct, 5) << "lease cr failed, done early " << dendl;
           set_status("lease lock failed, early abort");
           return set_cr_error(lease_cr->get_ret_status());
         }
         set_sleeping(true);
         yield;
       }
+      logger.log("full sync");
       oid = full_data_sync_index_shard_oid(sync_env->source_zone, shard_id);
       set_marker_tracker(new RGWDataSyncShardMarkerTrack(sync_env, status_oid, sync_marker));
       total_entries = sync_marker.pos;
@@ -984,7 +1117,7 @@ public:
             ldout(sync_env->cct, 0) << "ERROR: cannot start syncing " << iter->first << ". Duplicate entry?" << dendl;
           } else {
             // fetch remote and write locally
-            yield spawn(new RGWDataSyncSingleEntryCR(sync_env, iter->first, iter->first, marker_tracker), false);
+            yield spawn(new RGWDataSyncSingleEntryCR(sync_env, iter->first, iter->first, marker_tracker, error_repo, false), false);
             if (retcode < 0) {
               lease_cr->go_down();
               drain_all();
@@ -1018,10 +1151,13 @@ public:
 
   int incremental_sync() {
     reenter(&incremental_cr) {
+      error_repo = new RGWOmapAppend(sync_env->async_rados, sync_env->store, pool, error_oid, 1 /* no buffer */);
+      error_repo->get();
+      spawn(error_repo, false);
       yield init_lease_cr();
       while (!lease_cr->is_locked()) {
         if (lease_cr->is_done()) {
-          ldout(cct, 0) << "ERROR: lease cr failed, done early " << dendl;
+          ldout(cct, 5) << "lease cr failed, done early " << dendl;
           set_status("lease lock failed, early abort");
           return set_cr_error(lease_cr->get_ret_status());
         }
@@ -1029,6 +1165,7 @@ public:
         yield;
       }
       set_status("lease acquired");
+      logger.log("inc sync");
       set_marker_tracker(new RGWDataSyncShardMarkerTrack(sync_env, status_oid, sync_marker));
       do {
         current_modified.clear();
@@ -1040,14 +1177,38 @@ public:
         for (modified_iter = current_modified.begin(); modified_iter != current_modified.end(); ++modified_iter) {
           yield {
             ldout(sync_env->cct, 20) << __func__ << "(): async update notification: " << *modified_iter << dendl;
-            spawn(new RGWDataSyncSingleEntryCR(sync_env, *modified_iter, string(), marker_tracker), false);
+            spawn(new RGWDataSyncSingleEntryCR(sync_env, *modified_iter, string(), marker_tracker, error_repo, false), false);
           }
         }
 
+        /* process bucket shards that previously failed */
+        yield call(new RGWRadosGetOmapKeysCR(sync_env->store, pool, error_oid, error_marker, &error_entries, max_error_entries));
+        ldout(sync_env->cct, 20) << __func__ << "(): read error repo, got " << error_entries.size() << " entries" << dendl;
+        iter = error_entries.begin();
+        for (; iter != error_entries.end(); ++iter) {
+          ldout(sync_env->cct, 20) << __func__ << "(): handle error entry: " << iter->first << dendl;
+          spawn(new RGWDataSyncSingleEntryCR(sync_env, iter->first, iter->first, nullptr /* no marker tracker */, error_repo, true), false);
+          error_marker = iter->first;
+        }
+        if ((int)error_entries.size() != max_error_entries) {
+          if (error_marker.empty() && error_entries.empty()) {
+            /* the retry repo is empty, we back off a bit before calling it again */
+            retry_backoff_secs *= 2;
+            if (retry_backoff_secs > RETRY_BACKOFF_SECS_MAX) {
+              retry_backoff_secs = RETRY_BACKOFF_SECS_MAX;
+            }
+          } else {
+            retry_backoff_secs = RETRY_BACKOFF_SECS_DEFAULT;
+          }
+          error_retry_time = ceph::real_clock::now() + make_timespan(retry_backoff_secs);
+          error_marker.clear();
+        }
+
+
         yield call(new RGWReadRemoteDataLogShardInfoCR(sync_env, shard_id, &shard_info));
         if (retcode < 0) {
           ldout(sync_env->cct, 0) << "ERROR: failed to fetch remote data log info: ret=" << retcode << dendl;
-          lease_cr->go_down();
+          stop_spawned_services();
           drain_all();
           return set_cr_error(retcode);
         }
@@ -1059,7 +1220,7 @@ public:
           yield call(new RGWReadRemoteDataLogShardCR(sync_env, shard_id, &sync_marker.marker, &log_entries, &truncated));
           if (retcode < 0) {
             ldout(sync_env->cct, 0) << "ERROR: failed to read remote data log info: ret=" << retcode << dendl;
-            lease_cr->go_down();
+            stop_spawned_services();
             drain_all();
             return set_cr_error(retcode);
           }
@@ -1078,9 +1239,9 @@ public:
                */
               if (spawned_keys.find(log_iter->entry.key) == spawned_keys.end()) {
                 spawned_keys.insert(log_iter->entry.key);
-                spawn(new RGWDataSyncSingleEntryCR(sync_env, log_iter->entry.key, log_iter->log_id, marker_tracker), false);
+                spawn(new RGWDataSyncSingleEntryCR(sync_env, log_iter->entry.key, log_iter->log_id, marker_tracker, error_repo, false), false);
                 if (retcode < 0) {
-                  lease_cr->go_down();
+                  stop_spawned_services();
                   drain_all();
                   return set_cr_error(retcode);
                 }
@@ -1091,7 +1252,7 @@ public:
             set_status() << "num_spawned() > spawn_window";
             yield wait_for_child();
             int ret;
-            while (collect(&ret)) {
+            while (collect(&ret, lease_stack)) {
               if (ret < 0) {
                 ldout(sync_env->cct, 0) << "ERROR: a sync operation returned error" << dendl;
                 /* we have reported this error */
@@ -1109,6 +1270,14 @@ public:
     }
     return 0;
   }
+  void stop_spawned_services() {
+    lease_cr->go_down();
+    if (error_repo) {
+      error_repo->finish();
+      error_repo->put();
+      error_repo = NULL;
+    }
+  }
 };
 
 class RGWDataSyncShardControlCR : public RGWBackoffControlCR {
@@ -1163,13 +1332,14 @@ class RGWDataSyncCR : public RGWCoroutine {
 
   bool *reset_backoff;
 
+  RGWDataSyncDebugLogger logger;
 public:
   RGWDataSyncCR(RGWDataSyncEnv *_sync_env, uint32_t _num_shards, bool *_reset_backoff) : RGWCoroutine(_sync_env->cct),
                                                       sync_env(_sync_env),
                                                       num_shards(_num_shards),
                                                       marker_tracker(NULL),
                                                       shard_crs_lock("RGWDataSyncCR::shard_crs_lock"),
-                                                      reset_backoff(_reset_backoff) {
+                                                      reset_backoff(_reset_backoff), logger(sync_env, "Data", "all") {
   }
 
   ~RGWDataSyncCR() {
@@ -1400,14 +1570,14 @@ string RGWDataSyncStatusManager::shard_obj_name(const string& source_zone, int s
   return string(buf);
 }
 
-int RGWRemoteBucketLog::init(const string& _source_zone, RGWRESTConn *_conn, const string& _bucket_name,
-                             const string& _bucket_id, int _shard_id, RGWSyncErrorLogger *_error_logger)
+int RGWRemoteBucketLog::init(const string& _source_zone, RGWRESTConn *_conn,
+                             const rgw_bucket& bucket, int shard_id,
+                             RGWSyncErrorLogger *_error_logger)
 {
   conn = _conn;
   source_zone = _source_zone;
-  bucket_name = _bucket_name;
-  bucket_id = _bucket_id;
-  shard_id = _shard_id;
+  bs.bucket = bucket;
+  bs.shard_id = shard_id;
 
   sync_env.init(store->ctx(), store, conn, async_rados, http_manager, _error_logger, source_zone);
 
@@ -1428,28 +1598,16 @@ struct bucket_index_marker_info {
 
 class RGWReadRemoteBucketIndexLogInfoCR : public RGWCoroutine {
   RGWDataSyncEnv *sync_env;
-
-  string bucket_name;
-  string bucket_id;
-  int shard_id;
-
-  string instance_key;
+  const string instance_key;
 
   bucket_index_marker_info *info;
 
 public:
   RGWReadRemoteBucketIndexLogInfoCR(RGWDataSyncEnv *_sync_env,
-                                  const string& _bucket_name, const string& _bucket_id, int _shard_id,
-                                  bucket_index_marker_info *_info) : RGWCoroutine(_sync_env->cct), sync_env(_sync_env),
-                                                      bucket_name(_bucket_name), bucket_id(_bucket_id), shard_id(_shard_id),
-                                                      info(_info) {
-    instance_key = bucket_name + ":" + bucket_id;
-    if (shard_id >= 0) {
-      char buf[16];
-      snprintf(buf, sizeof(buf), ":%d", shard_id);
-      instance_key.append(buf);
-    }
-  }
+                                  const rgw_bucket_shard& bs,
+                                  bucket_index_marker_info *_info)
+    : RGWCoroutine(_sync_env->cct), sync_env(_sync_env),
+      instance_key(bs.get_key()), info(_info) {}
 
   int operate() {
     reenter(this) {
@@ -1475,10 +1633,7 @@ class RGWInitBucketShardSyncStatusCoroutine : public RGWCoroutine {
   RGWDataSyncEnv *sync_env;
   RGWRados *store;
 
-  string bucket_name;
-  string bucket_id;
-  int shard_id;
-
+  rgw_bucket_shard bs;
   string sync_status_oid;
 
   string lock_name;
@@ -1488,8 +1643,8 @@ class RGWInitBucketShardSyncStatusCoroutine : public RGWCoroutine {
   bucket_index_marker_info info;
 public:
   RGWInitBucketShardSyncStatusCoroutine(RGWDataSyncEnv *_sync_env,
-                      const string& _bucket_name, const string& _bucket_id, int _shard_id) : RGWCoroutine(_sync_env->cct), sync_env(_sync_env),
-                                                                                             bucket_name(_bucket_name), bucket_id(_bucket_id), shard_id(_shard_id) {
+                                        const rgw_bucket_shard& bs)
+    : RGWCoroutine(_sync_env->cct), sync_env(_sync_env), bs(bs) {
     store = sync_env->store;
     lock_name = "sync_lock";
 
@@ -1499,7 +1654,7 @@ public:
     gen_rand_alphanumeric(cct, buf, sizeof(buf) - 1);
     string cookie = buf;
 
-    sync_status_oid = RGWBucketSyncStatusManager::status_oid(sync_env->source_zone, bucket_name, bucket_id, shard_id);
+    sync_status_oid = RGWBucketSyncStatusManager::status_oid(sync_env->source_zone, bs);
   }
 
   int operate() {
@@ -1525,7 +1680,7 @@ public:
 	}
       }
       /* fetch current position in logs */
-      yield call(new RGWReadRemoteBucketIndexLogInfoCR(sync_env, bucket_name, bucket_id, shard_id, &info));
+      yield call(new RGWReadRemoteBucketIndexLogInfoCR(sync_env, bs, &info));
       if (retcode < 0 && retcode != -ENOENT) {
         ldout(cct, 0) << "ERROR: failed to fetch bucket index status" << dendl;
         return set_cr_error(retcode);
@@ -1550,8 +1705,7 @@ public:
 
 RGWCoroutine *RGWRemoteBucketLog::init_sync_status_cr()
 {
-  return new RGWInitBucketShardSyncStatusCoroutine(&sync_env,
-                                                   bucket_name, bucket_id, shard_id);
+  return new RGWInitBucketShardSyncStatusCoroutine(&sync_env, bs);
 }
 
 template <class T>
@@ -1608,11 +1762,11 @@ class RGWReadBucketSyncStatusCoroutine : public RGWCoroutine {
   map<string, bufferlist> attrs;
 public:
   RGWReadBucketSyncStatusCoroutine(RGWDataSyncEnv *_sync_env,
-                      const string& _bucket_name, const string _bucket_id, int _shard_id,
-		      rgw_bucket_shard_sync_info *_status) : RGWCoroutine(_sync_env->cct),
-                                                            sync_env(_sync_env),
-                                                            oid(RGWBucketSyncStatusManager::status_oid(sync_env->source_zone, _bucket_name, _bucket_id, _shard_id)),
-                                                            status(_status) {}
+                                   const rgw_bucket_shard& bs,
+                                   rgw_bucket_shard_sync_info *_status)
+    : RGWCoroutine(_sync_env->cct), sync_env(_sync_env),
+      oid(RGWBucketSyncStatusManager::status_oid(sync_env->source_zone, bs)),
+      status(_status) {}
   int operate();
 };
 
@@ -1638,7 +1792,7 @@ int RGWReadBucketSyncStatusCoroutine::operate()
 }
 RGWCoroutine *RGWRemoteBucketLog::read_sync_status_cr(rgw_bucket_shard_sync_info *sync_status)
 {
-  return new RGWReadBucketSyncStatusCoroutine(&sync_env, bucket_name, bucket_id, shard_id, sync_status);
+  return new RGWReadBucketSyncStatusCoroutine(&sync_env, bs, sync_status);
 }
 
 RGWBucketSyncStatusManager::~RGWBucketSyncStatusManager() {
@@ -1725,31 +1879,18 @@ struct bucket_list_result {
 
 class RGWListBucketShardCR: public RGWCoroutine {
   RGWDataSyncEnv *sync_env;
-
-  string bucket_name;
-  string bucket_id;
-  int shard_id;
-
-  string instance_key;
+  const rgw_bucket_shard& bs;
+  const string instance_key;
   rgw_obj_key marker_position;
 
   bucket_list_result *result;
 
 public:
-  RGWListBucketShardCR(RGWDataSyncEnv *_sync_env,
-                                  const string& _bucket_name, const string& _bucket_id, int _shard_id,
-                                  rgw_obj_key& _marker_position,
-                                  bucket_list_result *_result) : RGWCoroutine(_sync_env->cct), sync_env(_sync_env),
-                                                      bucket_name(_bucket_name), bucket_id(_bucket_id), shard_id(_shard_id),
-                                                      marker_position(_marker_position),
-                                                      result(_result) {
-    instance_key = bucket_name + ":" + bucket_id;
-    if (shard_id >= 0) {
-      char buf[16];
-      snprintf(buf, sizeof(buf), ":%d", shard_id);
-      instance_key.append(buf);
-    }
-  }
+  RGWListBucketShardCR(RGWDataSyncEnv *_sync_env, const rgw_bucket_shard& bs,
+                       rgw_obj_key& _marker_position, bucket_list_result *_result)
+    : RGWCoroutine(_sync_env->cct), sync_env(_sync_env), bs(bs),
+      instance_key(bs.get_key()), marker_position(_marker_position),
+      result(_result) {}
 
   int operate() {
     reenter(this) {
@@ -1761,8 +1902,8 @@ public:
 					{ "key-marker" , marker_position.name.c_str() },
 					{ "version-id-marker" , marker_position.instance.c_str() },
 	                                { NULL, NULL } };
-
-        string p = string("/") + bucket_name;
+        // don't include tenant in the url, it's already part of instance_key
+        string p = string("/") + bs.bucket.name;
         call(new RGWReadRESTResourceCR<bucket_list_result>(sync_env->cct, sync_env->conn, sync_env->http_manager, p, pairs, result));
       }
       if (retcode < 0) {
@@ -1776,31 +1917,16 @@ public:
 
 class RGWListBucketIndexLogCR: public RGWCoroutine {
   RGWDataSyncEnv *sync_env;
-
-  string bucket_name;
-  string bucket_id;
-  int shard_id;
-
-  string instance_key;
+  const string instance_key;
   string marker;
 
   list<rgw_bi_log_entry> *result;
 
 public:
-  RGWListBucketIndexLogCR(RGWDataSyncEnv *_sync_env,
-                          const string& _bucket_name, const string& _bucket_id, int _shard_id,
-                          string& _marker,
-                          list<rgw_bi_log_entry> *_result) : RGWCoroutine(_sync_env->cct), sync_env(_sync_env),
-                                                      bucket_name(_bucket_name), bucket_id(_bucket_id), shard_id(_shard_id),
-                                                      marker(_marker),
-                                                      result(_result) {
-    instance_key = bucket_name + ":" + bucket_id;
-    if (shard_id >= 0) {
-      char buf[16];
-      snprintf(buf, sizeof(buf), ":%d", shard_id);
-      instance_key.append(buf);
-    }
-  }
+  RGWListBucketIndexLogCR(RGWDataSyncEnv *_sync_env, const rgw_bucket_shard& bs,
+                          string& _marker, list<rgw_bi_log_entry> *_result)
+    : RGWCoroutine(_sync_env->cct), sync_env(_sync_env),
+      instance_key(bs.get_key()), marker(_marker), result(_result) {}
 
   int operate() {
     reenter(this) {
@@ -1922,7 +2048,7 @@ class RGWBucketSyncSingleEntryCR : public RGWCoroutine {
   RGWDataSyncEnv *sync_env;
 
   RGWBucketInfo *bucket_info;
-  int shard_id;
+  const rgw_bucket_shard& bs;
 
   rgw_obj_key key;
   bool versioned;
@@ -1939,17 +2065,19 @@ class RGWBucketSyncSingleEntryCR : public RGWCoroutine {
 
   stringstream error_ss;
 
+  RGWDataSyncDebugLogger logger;
 
 public:
   RGWBucketSyncSingleEntryCR(RGWDataSyncEnv *_sync_env,
-                             RGWBucketInfo *_bucket_info, int _shard_id,
+                             RGWBucketInfo *_bucket_info,
+                             const rgw_bucket_shard& bs,
                              const rgw_obj_key& _key, bool _versioned, uint64_t _versioned_epoch,
                              real_time& _timestamp,
                              const bucket_entry_owner& _owner,
                              RGWModifyOp _op, RGWPendingState _op_state,
 		             const T& _entry_marker, RGWSyncShardMarkerTrack<T, K> *_marker_tracker) : RGWCoroutine(_sync_env->cct),
 						      sync_env(_sync_env),
-                                                      bucket_info(_bucket_info), shard_id(_shard_id),
+                                                      bucket_info(_bucket_info), bs(bs),
                                                       key(_key), versioned(_versioned), versioned_epoch(_versioned_epoch),
                                                       owner(_owner),
                                                       timestamp(_timestamp), op(_op),
@@ -1957,9 +2085,13 @@ public:
                                                       entry_marker(_entry_marker),
                                                       marker_tracker(_marker_tracker),
                                                       sync_status(0) {
-    set_description() << "bucket sync single entry (source_zone=" << sync_env->source_zone << ") b=" << bucket_info->bucket << ":" << shard_id <<"/" << key << "[" << versioned_epoch << "] log_entry=" << entry_marker << " op=" << (int)op << " op_state=" << (int)op_state;
-    ldout(sync_env->cct, 20) << "bucket sync single entry (source_zone=" << sync_env->source_zone << ") b=" << bucket_info->bucket << ":" << shard_id <<"/" << key << "[" << versioned_epoch << "] log_entry=" << entry_marker << " op=" << (int)op << " op_state=" << (int)op_state << dendl;
+    stringstream ss;
+    ss << bucket_shard_str{bs} << "/" << key << "[" << versioned_epoch << "]";
+    set_description() << "bucket sync single entry (source_zone=" << sync_env->source_zone << ") b=" << ss.str() << " log_entry=" << entry_marker << " op=" << (int)op << " op_state=" << (int)op_state;
+    ldout(sync_env->cct, 20) << "bucket sync single entry (source_zone=" << sync_env->source_zone << ") b=" << ss.str() << " log_entry=" << entry_marker << " op=" << (int)op << " op_state=" << (int)op_state << dendl;
     set_status("init");
+
+    logger.init(sync_env, "Object", ss.str());
   }
 
   int operate() {
@@ -1987,6 +2119,7 @@ public:
             }
             set_status("syncing obj");
             ldout(sync_env->cct, 5) << "bucket sync: sync obj: " << sync_env->source_zone << "/" << bucket_info->bucket << "/" << key << "[" << versioned_epoch << "]" << dendl;
+            logger.log("fetch");
             call(new RGWFetchRemoteObjCR(sync_env->async_rados, sync_env->store, sync_env->source_zone, *bucket_info,
                                          key, versioned_epoch,
                                          true));
@@ -1995,19 +2128,31 @@ public:
             if (op == CLS_RGW_OP_UNLINK_INSTANCE) {
               versioned = true;
             }
+            logger.log("remove");
             call(new RGWRemoveObjCR(sync_env->async_rados, sync_env->store, sync_env->source_zone, *bucket_info, key, versioned, versioned_epoch, NULL, NULL, false, &timestamp));
           } else if (op == CLS_RGW_OP_LINK_OLH_DM) {
+            logger.log("creating delete marker");
             set_status("creating delete marker");
             ldout(sync_env->cct, 10) << "creating delete marker: obj: " << sync_env->source_zone << "/" << bucket_info->bucket << "/" << key << "[" << versioned_epoch << "]" << dendl;
             call(new RGWRemoveObjCR(sync_env->async_rados, sync_env->store, sync_env->source_zone, *bucket_info, key, versioned, versioned_epoch, &owner.id, &owner.display_name, true, &timestamp));
           }
         }
       } while (marker_tracker->need_retry(key));
+      {
+        stringstream ss;
+        if (retcode >= 0) {
+          ss << "done";
+        } else {
+          ss << "done, retcode=" << retcode;
+        }
+        logger.log(ss.str());
+      }
+
       if (retcode < 0 && retcode != -ENOENT) {
         set_status() << "failed to sync obj; retcode=" << retcode;
-        rgw_bucket& bucket = bucket_info->bucket;
-        ldout(sync_env->cct, 0) << "ERROR: failed to sync object: " << bucket.name << ":" << bucket.bucket_id << ":" << shard_id << "/" << key.name << dendl;
-        error_ss << bucket.name << ":" << bucket.bucket_id << ":" << shard_id << "/" << key.name;
+        ldout(sync_env->cct, 0) << "ERROR: failed to sync object: "
+            << bucket_shard_str{bs} << "/" << key.name << dendl;
+        error_ss << bucket_shard_str{bs} << "/" << key.name;
         sync_status = retcode;
       }
       if (!error_ss.str().empty()) {
@@ -2033,9 +2178,7 @@ done:
 
 class RGWBucketShardFullSyncCR : public RGWCoroutine {
   RGWDataSyncEnv *sync_env;
-  string bucket_name;
-  string bucket_id;
-  int shard_id;
+  const rgw_bucket_shard& bs;
   RGWBucketInfo *bucket_info;
   bucket_list_result list_result;
   list<bucket_list_entry>::iterator entries_iter;
@@ -2049,21 +2192,23 @@ class RGWBucketShardFullSyncCR : public RGWCoroutine {
   int total_entries;
 
   RGWContinuousLeaseCR *lease_cr;
+  RGWCoroutinesStack *lease_stack;
 
   string status_oid;
+
+  RGWDataSyncDebugLogger logger;
 public:
-  RGWBucketShardFullSyncCR(RGWDataSyncEnv *_sync_env,
-                           const string& _bucket_name, const string _bucket_id, int _shard_id,
+  RGWBucketShardFullSyncCR(RGWDataSyncEnv *_sync_env, const rgw_bucket_shard& bs,
                            RGWBucketInfo *_bucket_info,  rgw_bucket_shard_full_sync_marker& _full_marker) : RGWCoroutine(_sync_env->cct),
 									    sync_env(_sync_env),
-                                                                            bucket_name(_bucket_name),
-									    bucket_id(_bucket_id), shard_id(_shard_id),
+                                                                            bs(bs),
                                                                             bucket_info(_bucket_info),
                                                                             full_marker(_full_marker), marker_tracker(NULL),
                                                                             spawn_window(BUCKET_SYNC_SPAWN_WINDOW), entry(NULL),
                                                                             op(CLS_RGW_OP_ADD),
-                                                                            total_entries(0), lease_cr(NULL) {
-    status_oid = RGWBucketSyncStatusManager::status_oid(sync_env->source_zone, bucket_name, bucket_id, shard_id);
+                                                                            total_entries(0), lease_cr(nullptr), lease_stack(nullptr) {
+    status_oid = RGWBucketSyncStatusManager::status_oid(sync_env->source_zone, bs);
+    logger.init(sync_env, "BucketFull", bs.get_key());
   }
 
   ~RGWBucketShardFullSyncCR() {
@@ -2088,11 +2233,11 @@ int RGWBucketShardFullSyncCR::operate()
       lease_cr = new RGWContinuousLeaseCR(sync_env->async_rados, store, store->get_zone_params().log_pool, status_oid,
                                           lock_name, lock_duration, this);
       lease_cr->get();
-      spawn(lease_cr, false);
+      lease_stack = spawn(lease_cr, false);
     }
     while (!lease_cr->is_locked()) {
       if (lease_cr->is_done()) {
-        ldout(cct, 0) << "ERROR: lease cr failed, done early " << dendl;
+        ldout(cct, 5) << "lease cr failed, done early " << dendl;
         set_status("lease lock failed, early abort");
         return set_cr_error(lease_cr->get_ret_status());
       }
@@ -2107,8 +2252,8 @@ int RGWBucketShardFullSyncCR::operate()
     do {
       set_status("listing remote bucket");
       ldout(sync_env->cct, 20) << __func__ << "(): listing bucket for full sync" << dendl;
-      yield call(new RGWListBucketShardCR(sync_env, bucket_name, bucket_id, shard_id,
-                                          list_marker, &list_result));
+      yield call(new RGWListBucketShardCR(sync_env, bs, list_marker,
+                                          &list_result));
       if (retcode < 0 && retcode != -ENOENT) {
         set_status("failed bucket listing, going down");
         yield lease_cr->go_down();
@@ -2117,7 +2262,8 @@ int RGWBucketShardFullSyncCR::operate()
       }
       entries_iter = list_result.entries.begin();
       for (; entries_iter != list_result.entries.end(); ++entries_iter) {
-        ldout(sync_env->cct, 20) << "[full sync] syncing object: " << bucket_name << ":" << bucket_id << ":" << shard_id << "/" << entries_iter->key << dendl;
+        ldout(sync_env->cct, 20) << "[full sync] syncing object: "
+            << bucket_shard_str{bs} << "/" << entries_iter->key << dendl;
         entry = &(*entries_iter);
         total_entries++;
         list_marker = entries_iter->key;
@@ -2127,7 +2273,7 @@ int RGWBucketShardFullSyncCR::operate()
           op = (entry->key.instance.empty() || entry->key.instance == "null" ? CLS_RGW_OP_ADD : CLS_RGW_OP_LINK_OLH);
 
           yield {
-            spawn(new RGWBucketSyncSingleEntryCR<rgw_obj_key, rgw_obj_key>(sync_env, bucket_info, shard_id,
+            spawn(new RGWBucketSyncSingleEntryCR<rgw_obj_key, rgw_obj_key>(sync_env, bucket_info, bs,
                                                                            entry->key,
                                                                            false, /* versioned, only matters for object removal */
                                                                            entry->versioned_epoch, entry->mtime,
@@ -2136,7 +2282,7 @@ int RGWBucketShardFullSyncCR::operate()
         }
         while ((int)num_spawned() > spawn_window) {
           yield wait_for_child();
-          while (collect(&ret)) {
+          while (collect(&ret, lease_stack)) {
             if (ret < 0) {
               ldout(sync_env->cct, 0) << "ERROR: a sync operation returned error" << dendl;
               /* we have reported this error */
@@ -2147,14 +2293,14 @@ int RGWBucketShardFullSyncCR::operate()
     } while (list_result.is_truncated);
     set_status("done iterating over all objects");
     /* wait for all operations to complete */
-    drain_all_but(1); /* still need to hold lease cr */
+    drain_all_but_stack(lease_stack); /* still need to hold lease cr */
     /* update sync state to incremental */
     yield {
       rgw_bucket_shard_sync_info sync_status;
       sync_status.state = rgw_bucket_shard_sync_info::StateIncrementalSync;
       map<string, bufferlist> attrs;
       sync_status.encode_state_attr(attrs);
-      string oid = RGWBucketSyncStatusManager::status_oid(sync_env->source_zone, bucket_name, bucket_id, shard_id);
+      string oid = RGWBucketSyncStatusManager::status_oid(sync_env->source_zone, bs);
       RGWRados *store = sync_env->store;
       call(new RGWSimpleRadosWriteAttrsCR(sync_env->async_rados, store, store->get_zone_params().log_pool,
                                           oid, attrs));
@@ -2162,8 +2308,8 @@ int RGWBucketShardFullSyncCR::operate()
     yield lease_cr->go_down();
     drain_all();
     if (retcode < 0) {
-      ldout(sync_env->cct, 0) << "ERROR: failed to set sync state on bucket " << bucket_name << ":" << bucket_id << ":" << shard_id
-        << " retcode=" << retcode << dendl;
+      ldout(sync_env->cct, 0) << "ERROR: failed to set sync state on bucket "
+          << bucket_shard_str{bs} << " retcode=" << retcode << dendl;
       return set_cr_error(retcode);
     }
     return set_cr_done();
@@ -2173,20 +2319,20 @@ int RGWBucketShardFullSyncCR::operate()
 
 class RGWBucketShardIncrementalSyncCR : public RGWCoroutine {
   RGWDataSyncEnv *sync_env;
-  string bucket_name;
-  string bucket_id;
-  int shard_id;
+  const rgw_bucket_shard& bs;
   RGWBucketInfo *bucket_info;
   list<rgw_bi_log_entry> list_result;
   list<rgw_bi_log_entry>::iterator entries_iter;
+  map<string, pair<real_time, RGWModifyOp> > squash_map;
   rgw_bucket_shard_inc_sync_marker inc_marker;
   rgw_obj_key key;
-  rgw_bi_log_entry *entry;
-  RGWBucketIncSyncShardMarkerTrack *marker_tracker;
-  int spawn_window;
-  bool updated_status;
-  RGWContinuousLeaseCR *lease_cr;
-  string status_oid;
+  rgw_bi_log_entry *entry{nullptr};
+  RGWBucketIncSyncShardMarkerTrack *marker_tracker{nullptr};
+  const int spawn_window{BUCKET_SYNC_SPAWN_WINDOW};
+  bool updated_status{false};
+  RGWContinuousLeaseCR *lease_cr{nullptr};
+  RGWCoroutinesStack *lease_stack{nullptr};
+  const string status_oid;
 
   string name;
   string instance;
@@ -2194,22 +2340,20 @@ class RGWBucketShardIncrementalSyncCR : public RGWCoroutine {
 
   string cur_id;
 
-
+  RGWDataSyncDebugLogger logger;
 
 public:
   RGWBucketShardIncrementalSyncCR(RGWDataSyncEnv *_sync_env,
-                           const string& _bucket_name, const string _bucket_id, int _shard_id,
-                           RGWBucketInfo *_bucket_info, rgw_bucket_shard_inc_sync_marker& _inc_marker) : RGWCoroutine(_sync_env->cct),
-                                                                            sync_env(_sync_env),
-                                                                            bucket_name(_bucket_name),
-									    bucket_id(_bucket_id), shard_id(_shard_id),
-                                                                            bucket_info(_bucket_info),
-                                                                            inc_marker(_inc_marker), entry(NULL), marker_tracker(NULL),
-                                                                            spawn_window(BUCKET_SYNC_SPAWN_WINDOW), updated_status(false),
-                                                                            lease_cr(NULL) {
-    status_oid = RGWBucketSyncStatusManager::status_oid(sync_env->source_zone, bucket_name, bucket_id, shard_id);
-    set_description() << "bucket shard incremental sync bucket=" << _bucket_name << ":" << _bucket_id << ":" << _shard_id;
+                                  const rgw_bucket_shard& bs,
+                                  RGWBucketInfo *_bucket_info,
+                                  rgw_bucket_shard_inc_sync_marker& _inc_marker)
+    : RGWCoroutine(_sync_env->cct), sync_env(_sync_env), bs(bs),
+      bucket_info(_bucket_info), inc_marker(_inc_marker),
+      status_oid(RGWBucketSyncStatusManager::status_oid(sync_env->source_zone, bs)) {
+    set_description() << "bucket shard incremental sync bucket="
+        << bucket_shard_str{bs};
     set_status("init");
+    logger.init(sync_env, "BucketInc", bs.get_key());
   }
 
   ~RGWBucketShardIncrementalSyncCR() {
@@ -2229,16 +2373,16 @@ int RGWBucketShardIncrementalSyncCR::operate()
     yield {
       set_status("acquiring sync lock");
       uint32_t lock_duration = cct->_conf->rgw_sync_lease_period;
-      string lock_name = "sync_lock";
+      string lock_name = "sync_lock.incremental"; /* allow concurrent full sync and incremental sync on the same bucket */
       RGWRados *store = sync_env->store;
       lease_cr = new RGWContinuousLeaseCR(sync_env->async_rados, store, store->get_zone_params().log_pool, status_oid,
                                           lock_name, lock_duration, this);
       lease_cr->get();
-      spawn(lease_cr, false);
+      lease_stack = spawn(lease_cr, false);
     }
     while (!lease_cr->is_locked()) {
       if (lease_cr->is_done()) {
-        ldout(cct, 0) << "ERROR: lease cr failed, done early " << dendl;
+        ldout(cct, 5) << "lease cr failed, done early " << dendl;
         set_status("lease lock failed, early abort");
         return set_cr_error(lease_cr->get_ret_status());
       }
@@ -2251,14 +2395,28 @@ int RGWBucketShardIncrementalSyncCR::operate()
     do {
       ldout(sync_env->cct, 20) << __func__ << "(): listing bilog for incremental sync" << dendl;
       set_status() << "listing bilog; position=" << inc_marker.position;
-      yield call(new RGWListBucketIndexLogCR(sync_env, bucket_name, bucket_id, shard_id,
-                                         inc_marker.position, &list_result));
+      yield call(new RGWListBucketIndexLogCR(sync_env, bs, inc_marker.position,
+                                             &list_result));
       if (retcode < 0 && retcode != -ENOENT) {
         /* wait for all operations to complete */
+        drain_all_but_stack(lease_stack);
         lease_cr->go_down();
         drain_all();
         return set_cr_error(retcode);
       }
+      squash_map.clear();
+      for (auto& e : list_result) {
+        if (e.state != CLS_RGW_STATE_COMPLETE) {
+          continue;
+        }
+        auto& squash_entry = squash_map[e.object];
+        if (squash_entry.first == e.timestamp &&
+            e.op == CLS_RGW_OP_DEL) {
+          squash_entry.second = e.op;
+        } else if (squash_entry.first < e.timestamp) {
+          squash_entry = make_pair<>(e.timestamp, e.op);
+        }
+      }
       entries_iter = list_result.begin();
       for (; entries_iter != list_result.end(); ++entries_iter) {
         entry = &(*entries_iter);
@@ -2292,17 +2450,27 @@ int RGWBucketShardIncrementalSyncCR::operate()
         set_status() << "got entry.id=" << cur_id << " key=" << key << " op=" << (int)entry->op;
         if (entry->op == CLS_RGW_OP_CANCEL) {
           set_status() << "canceled operation, skipping";
-          ldout(sync_env->cct, 20) << "[inc sync] skipping object: " << bucket_name << ":" << bucket_id << ":" << shard_id << "/" << key << ": canceled operation" << dendl;
+          ldout(sync_env->cct, 20) << "[inc sync] skipping object: "
+              << bucket_shard_str{bs} << "/" << key << ": canceled operation" << dendl;
           marker_tracker->try_update_high_marker(cur_id, 0, entry->timestamp);
           continue;
         }
         if (entry->state != CLS_RGW_STATE_COMPLETE) {
           set_status() << "non-complete operation, skipping";
-          ldout(sync_env->cct, 20) << "[inc sync] skipping object: " << bucket_name << ":" << bucket_id << ":" << shard_id << "/" << key << ": non-complete operation" << dendl;
+          ldout(sync_env->cct, 20) << "[inc sync] skipping object: "
+              << bucket_shard_str{bs} << "/" << key << ": non-complete operation" << dendl;
           marker_tracker->try_update_high_marker(cur_id, 0, entry->timestamp);
           continue;
         }
-        ldout(sync_env->cct, 20) << "[inc sync] syncing object: " << bucket_name << ":" << bucket_id << ":" << shard_id << "/" << key << dendl;
+        if (make_pair<>(entry->timestamp, entry->op) != squash_map[entry->object]) {
+          set_status() << "squashed operation, skipping";
+          ldout(sync_env->cct, 20) << "[inc sync] skipping object: "
+              << bucket_shard_str{bs} << "/" << key << ": squashed operation" << dendl;
+          /* not updating high marker though */
+          continue;
+        }
+        ldout(sync_env->cct, 20) << "[inc sync] syncing object: "
+            << bucket_shard_str{bs} << "/" << key << dendl;
         updated_status = false;
         while (!marker_tracker->can_do_op(key)) {
           if (!updated_status) {
@@ -2311,7 +2479,7 @@ int RGWBucketShardIncrementalSyncCR::operate()
           }
           ldout(sync_env->cct, 5) << *this << ": [inc sync] can't do op on key=" << key << " need to wait for conflicting operation to complete" << dendl;
           yield wait_for_child();
-          while (collect(&ret)) {
+          while (collect(&ret, lease_stack)) {
             if (ret < 0) {
               ldout(sync_env->cct, 0) << "ERROR: a child operation returned error (ret=" << ret << ")" << dendl;
               /* we have reported this error */
@@ -2335,7 +2503,7 @@ int RGWBucketShardIncrementalSyncCR::operate()
               versioned_epoch = entry->ver.epoch;
             }
             ldout(sync_env->cct, 20) << __func__ << "(): entry->timestamp=" << entry->timestamp << dendl;
-            spawn(new RGWBucketSyncSingleEntryCR<string, rgw_obj_key>(sync_env, bucket_info, shard_id,
+            spawn(new RGWBucketSyncSingleEntryCR<string, rgw_obj_key>(sync_env, bucket_info, bs,
                                                          key, entry->is_versioned(), versioned_epoch, entry->timestamp, owner, entry->op,
                                                          entry->state, cur_id, marker_tracker), false);
           }
@@ -2343,7 +2511,7 @@ int RGWBucketShardIncrementalSyncCR::operate()
         while ((int)num_spawned() > spawn_window) {
           set_status() << "num_spawned() > spawn_window";
           yield wait_for_child();
-          while (collect(&ret)) {
+          while (collect(&ret, lease_stack)) {
             if (ret < 0) {
               ldout(sync_env->cct, 0) << "ERROR: a sync operation returned error" << dendl;
               /* we have reported this error */
@@ -2359,11 +2527,13 @@ int RGWBucketShardIncrementalSyncCR::operate()
     }
     if (retcode < 0) {
       ldout(sync_env->cct, 0) << "ERROR: marker_tracker->flush() returned retcode=" << retcode << dendl;
+      drain_all_but_stack(lease_stack);
       lease_cr->go_down();
       drain_all();
       return set_cr_error(retcode);
     }
 
+    drain_all_but_stack(lease_stack);
     lease_cr->go_down();
     /* wait for all operations to complete */
     drain_all();
@@ -2376,20 +2546,23 @@ int RGWBucketShardIncrementalSyncCR::operate()
 int RGWRunBucketSyncCoroutine::operate()
 {
   reenter(this) {
-    yield call(new RGWReadBucketSyncStatusCoroutine(sync_env, bucket_name, bucket_id, shard_id, &sync_status));
+    yield call(new RGWReadBucketSyncStatusCoroutine(sync_env, bs, &sync_status));
     if (retcode < 0 && retcode != -ENOENT) {
-      ldout(sync_env->cct, 0) << "ERROR: failed to read sync status for bucket=" << bucket_name << " bucket_id=" << bucket_id << " shard_id=" << shard_id << dendl;
+      ldout(sync_env->cct, 0) << "ERROR: failed to read sync status for bucket="
+          << bucket_shard_str{bs} << dendl;
       return set_cr_error(retcode);
     }
 
-    ldout(sync_env->cct, 20) << __func__ << "(): sync status for bucket " << bucket_name << ":" << bucket_id << ":" << shard_id << ": " << sync_status.state << dendl;
+    ldout(sync_env->cct, 20) << __func__ << "(): sync status for bucket "
+        << bucket_shard_str{bs} << ": " << sync_status.state << dendl;
 
-    yield call(new RGWGetBucketInstanceInfoCR(sync_env->async_rados, sync_env->store, bucket_name, bucket_id, &bucket_info));
+    yield call(new RGWGetBucketInstanceInfoCR(sync_env->async_rados, sync_env->store, bs.bucket, &bucket_info));
     if (retcode == -ENOENT) {
       /* bucket instance info has not been synced in yet, fetch it now */
       yield {
-        ldout(sync_env->cct, 10) << "no local info for bucket " << bucket_name << ":" << bucket_id << ": fetching metadata" << dendl;
-        string raw_key = string("bucket.instance:") + bucket_name + ":" + bucket_id;
+        ldout(sync_env->cct, 10) << "no local info for bucket "
+            << bucket_str{bs.bucket} << ": fetching metadata" << dendl;
+        string raw_key = string("bucket.instance:") + bs.bucket.get_key();
 
         meta_sync_env.init(cct, sync_env->store, sync_env->store->rest_master_conn, sync_env->async_rados, sync_env->http_manager, sync_env->error_logger);
 
@@ -2399,48 +2572,51 @@ int RGWRunBucketSyncCoroutine::operate()
                                           NULL /* no marker tracker */));
       }
       if (retcode < 0) {
-        ldout(sync_env->cct, 0) << "ERROR: failed to fetch bucket instance info for " << bucket_name << ":" << bucket_id << dendl;
+        ldout(sync_env->cct, 0) << "ERROR: failed to fetch bucket instance info for " << bucket_str{bs.bucket} << dendl;
         return set_cr_error(retcode);
       }
 
-      yield call(new RGWGetBucketInstanceInfoCR(sync_env->async_rados, sync_env->store, bucket_name, bucket_id, &bucket_info));
+      yield call(new RGWGetBucketInstanceInfoCR(sync_env->async_rados, sync_env->store, bs.bucket, &bucket_info));
     }
     if (retcode < 0) {
-      ldout(sync_env->cct, 0) << "ERROR: failed to retrieve bucket info for bucket=" << bucket_name << " bucket_id=" << bucket_id << dendl;
+      ldout(sync_env->cct, 0) << "ERROR: failed to retrieve bucket info for bucket=" << bucket_str{bs.bucket} << dendl;
       return set_cr_error(retcode);
     }
 
     yield {
       if ((rgw_bucket_shard_sync_info::SyncState)sync_status.state == rgw_bucket_shard_sync_info::StateInit) {
-        call(new RGWInitBucketShardSyncStatusCoroutine(sync_env, bucket_name, bucket_id, shard_id));
+        call(new RGWInitBucketShardSyncStatusCoroutine(sync_env, bs));
         sync_status.state = rgw_bucket_shard_sync_info::StateFullSync;
       }
     }
 
     if (retcode < 0) {
-      ldout(sync_env->cct, 0) << "ERROR: init sync on " << bucket_name << " bucket_id=" << bucket_id << " shard_id=" << shard_id << " failed, retcode=" << retcode << dendl;
+      ldout(sync_env->cct, 0) << "ERROR: init sync on " << bucket_shard_str{bs}
+          << " failed, retcode=" << retcode << dendl;
       return set_cr_error(retcode);
     }
     yield {
       if ((rgw_bucket_shard_sync_info::SyncState)sync_status.state == rgw_bucket_shard_sync_info::StateFullSync) {
-        call(new RGWBucketShardFullSyncCR(sync_env, bucket_name, bucket_id, shard_id,
-                                          &bucket_info, sync_status.full_marker));
+        call(new RGWBucketShardFullSyncCR(sync_env, bs, &bucket_info,
+                                          sync_status.full_marker));
         sync_status.state = rgw_bucket_shard_sync_info::StateIncrementalSync;
       }
     }
     if (retcode < 0) {
-      ldout(sync_env->cct, 0) << "ERROR: full sync on " << bucket_name << " bucket_id=" << bucket_id << " shard_id=" << shard_id << " failed, retcode=" << retcode << dendl;
+      ldout(sync_env->cct, 5) << "full sync on " << bucket_shard_str{bs}
+          << " failed, retcode=" << retcode << dendl;
       return set_cr_error(retcode);
     }
 
     yield {
       if ((rgw_bucket_shard_sync_info::SyncState)sync_status.state == rgw_bucket_shard_sync_info::StateIncrementalSync) {
-        call(new RGWBucketShardIncrementalSyncCR(sync_env, bucket_name, bucket_id, shard_id,
-                                                 &bucket_info, sync_status.inc_marker));
+        call(new RGWBucketShardIncrementalSyncCR(sync_env, bs, &bucket_info,
+                                                 sync_status.inc_marker));
       }
     }
     if (retcode < 0) {
-      ldout(sync_env->cct, 0) << "ERROR: incremental sync on " << bucket_name << " bucket_id=" << bucket_id << " shard_id=" << shard_id << " failed, retcode=" << retcode << dendl;
+      ldout(sync_env->cct, 5) << "incremental sync on " << bucket_shard_str{bs}
+          << " failed, retcode=" << retcode << dendl;
       return set_cr_error(retcode);
     }
 
@@ -2452,7 +2628,7 @@ int RGWRunBucketSyncCoroutine::operate()
 
 RGWCoroutine *RGWRemoteBucketLog::run_sync_cr()
 {
-  return new RGWRunBucketSyncCoroutine(&sync_env, bucket_name, bucket_id, shard_id);
+  return new RGWRunBucketSyncCoroutine(&sync_env, bs);
 }
 
 int RGWBucketSyncStatusManager::init()
@@ -2473,7 +2649,7 @@ int RGWBucketSyncStatusManager::init()
   }
 
 
-  string key = bucket_name + ":" + bucket_id;
+  const string key = bucket.get_key();
 
   rgw_http_param_pair pairs[] = { { "key", key.c_str() },
                                   { NULL, NULL } };
@@ -2496,7 +2672,7 @@ int RGWBucketSyncStatusManager::init()
 
   for (int i = 0; i < effective_num_shards; i++) {
     RGWRemoteBucketLog *l = new RGWRemoteBucketLog(store, this, async_rados, &http_manager);
-    ret = l->init(source_zone, conn, bucket_name, bucket_id, (num_shards ? i : -1), error_logger);
+    ret = l->init(source_zone, conn, bucket, (num_shards ? i : -1), error_logger);
     if (ret < 0) {
       ldout(store->ctx(), 0) << "ERROR: failed to initialize RGWRemoteBucketLog object" << dendl;
       return ret;
@@ -2536,7 +2712,8 @@ int RGWBucketSyncStatusManager::read_sync_status()
 
   int ret = cr_mgr.run(stacks);
   if (ret < 0) {
-    ldout(store->ctx(), 0) << "ERROR: failed to read sync status for " << bucket_name << ":" << bucket_id << dendl;
+    ldout(store->ctx(), 0) << "ERROR: failed to read sync status for "
+        << bucket_str{bucket} << dendl;
     return ret;
   }
 
@@ -2557,21 +2734,17 @@ int RGWBucketSyncStatusManager::run()
 
   int ret = cr_mgr.run(stacks);
   if (ret < 0) {
-    ldout(store->ctx(), 0) << "ERROR: failed to read sync status for " << bucket_name << ":" << bucket_id << dendl;
+    ldout(store->ctx(), 0) << "ERROR: failed to read sync status for "
+        << bucket_str{bucket} << dendl;
     return ret;
   }
 
   return 0;
 }
 
-string RGWBucketSyncStatusManager::status_oid(const string& source_zone, const string& bucket_name, const string& bucket_id, int shard_id)
+string RGWBucketSyncStatusManager::status_oid(const string& source_zone,
+                                              const rgw_bucket_shard& bs)
 {
-  string oid = bucket_status_oid_prefix + "." + source_zone + ":" + bucket_name + ":" + bucket_id;
-  if (shard_id >= 0) {
-    char buf[16];
-    snprintf(buf, sizeof(buf), ":%d", shard_id);
-    oid.append(buf);
-  }
-  return oid;
+  return bucket_status_oid_prefix + "." + source_zone + ":" + bs.get_key();
 }
 
diff --git a/src/rgw/rgw_data_sync.h b/src/rgw/rgw_data_sync.h
index 33b723a..f3fc2f2 100644
--- a/src/rgw/rgw_data_sync.h
+++ b/src/rgw/rgw_data_sync.h
@@ -394,11 +394,9 @@ WRITE_CLASS_ENCODER(rgw_bucket_shard_sync_info)
 
 class RGWRemoteBucketLog : public RGWCoroutinesManager {
   RGWRados *store;
-  RGWRESTConn *conn;
+  RGWRESTConn *conn{nullptr};
   string source_zone;
-  string bucket_name;
-  string bucket_id;
-  int shard_id;
+  rgw_bucket_shard bs;
 
   RGWBucketSyncStatusManager *status_manager;
   RGWAsyncRadosProcessor *async_rados;
@@ -406,16 +404,16 @@ class RGWRemoteBucketLog : public RGWCoroutinesManager {
 
   RGWDataSyncEnv sync_env;
 
-  RGWBucketSyncCR *sync_cr;
+  RGWBucketSyncCR *sync_cr{nullptr};
 
 public:
   RGWRemoteBucketLog(RGWRados *_store, RGWBucketSyncStatusManager *_sm,
                      RGWAsyncRadosProcessor *_async_rados, RGWHTTPManager *_http_manager) : RGWCoroutinesManager(_store->ctx(), _store->get_cr_registry()), store(_store),
-                                       conn(NULL), shard_id(0),
-                                       status_manager(_sm), async_rados(_async_rados), http_manager(_http_manager),
-                                       sync_cr(NULL) {}
+                                       status_manager(_sm), async_rados(_async_rados), http_manager(_http_manager) {}
 
-  int init(const string& _source_zone, RGWRESTConn *_conn, const string& _bucket_name, const string& _bucket_id, int _shard_id, RGWSyncErrorLogger *_error_logger);
+  int init(const string& _source_zone, RGWRESTConn *_conn,
+           const rgw_bucket& bucket, int shard_id,
+           RGWSyncErrorLogger *_error_logger);
   void finish();
 
   RGWCoroutine *read_sync_status_cr(rgw_bucket_shard_sync_info *sync_status);
@@ -438,8 +436,7 @@ class RGWBucketSyncStatusManager {
   RGWRESTConn *conn;
   RGWSyncErrorLogger *error_logger;
 
-  string bucket_name;
-  string bucket_id;
+  rgw_bucket bucket;
 
   map<int, RGWRemoteBucketLog *> source_logs;
 
@@ -454,13 +451,13 @@ class RGWBucketSyncStatusManager {
 
 public:
   RGWBucketSyncStatusManager(RGWRados *_store, const string& _source_zone,
-                             const string& _bucket_name, const string& _bucket_id) : store(_store),
+                             const rgw_bucket& bucket) : store(_store),
                                                                                      cr_mgr(_store->ctx(), _store->get_cr_registry()),
                                                                                      async_rados(NULL),
                                                                                      http_manager(store->ctx(), cr_mgr.get_completion_mgr()),
                                                                                      source_zone(_source_zone),
                                                                                      conn(NULL), error_logger(NULL),
-                                                                                     bucket_name(_bucket_name), bucket_id(_bucket_id),
+                                                                                     bucket(bucket),
                                                                                      num_shards(0) {}
   ~RGWBucketSyncStatusManager();
 
@@ -469,7 +466,7 @@ public:
   map<int, rgw_bucket_shard_sync_info>& get_sync_status() { return sync_status; }
   int init_sync_status();
 
-  static string status_oid(const string& source_zone, const string& bucket_name, const string& bucket_id, int shard_id);
+  static string status_oid(const string& source_zone, const rgw_bucket_shard& bs);
 
   int read_sync_status();
   int run();
diff --git a/src/rgw/rgw_fcgi_process.cc b/src/rgw/rgw_fcgi_process.cc
index fd6cae6..b9246ae 100644
--- a/src/rgw/rgw_fcgi_process.cc
+++ b/src/rgw/rgw_fcgi_process.cc
@@ -20,10 +20,12 @@ void RGWFCGXProcess::run()
   string socket_path;
   string socket_port;
   string socket_host;
+  int socket_backlog;
 
   conf->get_val("socket_path", "", &socket_path);
   conf->get_val("socket_port", g_conf->rgw_port, &socket_port);
   conf->get_val("socket_host", g_conf->rgw_host, &socket_host);
+  socket_backlog = g_conf->rgw_fcgi_socket_backlog;
 
   if (socket_path.empty() && socket_port.empty() && socket_host.empty()) {
     socket_path = g_conf->rgw_socket_path;
@@ -54,7 +56,7 @@ void RGWFCGXProcess::run()
     }
 
     const char *path = path_str.c_str();
-    sock_fd = FCGX_OpenSocket(path, SOCKET_BACKLOG);
+    sock_fd = FCGX_OpenSocket(path, socket_backlog);
     if (sock_fd < 0) {
       dout(0) << "ERROR: FCGX_OpenSocket (" << path << ") returned "
 	      << sock_fd << dendl;
@@ -66,7 +68,7 @@ void RGWFCGXProcess::run()
     }
   } else if (!socket_port.empty()) {
     string bind = socket_host + ":" + socket_port;
-    sock_fd = FCGX_OpenSocket(bind.c_str(), SOCKET_BACKLOG);
+    sock_fd = FCGX_OpenSocket(bind.c_str(), socket_backlog);
     if (sock_fd < 0) {
       dout(0) << "ERROR: FCGX_OpenSocket (" << bind.c_str() << ") returned "
 	      << sock_fd << dendl;
diff --git a/src/rgw/rgw_http_client.cc b/src/rgw/rgw_http_client.cc
index 05d9acd..283886a 100644
--- a/src/rgw/rgw_http_client.cc
+++ b/src/rgw/rgw_http_client.cc
@@ -41,6 +41,7 @@ struct rgw_http_req_data : public RefCountedObject {
     return ret;
   }
 
+
   void finish(int r) {
     Mutex::Locker l(lock);
     ret = r;
@@ -64,6 +65,11 @@ struct rgw_http_req_data : public RefCountedObject {
     Mutex::Locker l(lock);
     return ret;
   }
+
+  RGWHTTPManager *get_manager() {
+    Mutex::Locker l(lock);
+    return mgr;
+  }
 };
 
 /*
@@ -328,7 +334,10 @@ int RGWHTTPClient::wait()
 RGWHTTPClient::~RGWHTTPClient()
 {
   if (req_data) {
-    req_data->mgr->remove_request(this);
+    RGWHTTPManager *http_manager = req_data->get_manager();
+    if (http_manager) {
+      http_manager->remove_request(this);
+    }
 
     req_data->put();
   }
@@ -475,6 +484,10 @@ void RGWHTTPManager::_complete_request(rgw_http_req_data *req_data)
   if (iter != reqs.end()) {
     reqs.erase(iter);
   }
+  {
+    Mutex::Locker l(req_data->lock);
+    req_data->mgr = nullptr;
+  }
   if (completion_mgr) {
     completion_mgr->complete(NULL, req_data->user_info);
   }
@@ -790,7 +803,14 @@ void *RGWHTTPManager::reqs_thread_entry()
     }
   }
 
+
   RWLock::WLocker rl(reqs_lock);
+  for (auto r : unregistered_reqs) {
+    _finish_request(r, -ECANCELED);
+  }
+
+  unregistered_reqs.clear();
+
   auto all_reqs = std::move(reqs);
   for (auto iter : all_reqs) {
     _finish_request(iter.second, -ECANCELED);
diff --git a/src/rgw/rgw_json_enc.cc b/src/rgw/rgw_json_enc.cc
index cd4a0f5..4a4e84b 100644
--- a/src/rgw/rgw_json_enc.cc
+++ b/src/rgw/rgw_json_enc.cc
@@ -506,6 +506,7 @@ void rgw_bucket::dump(Formatter *f) const
   encode_json("index_pool", index_pool, f);
   encode_json("marker", marker, f);
   encode_json("bucket_id", bucket_id, f);
+  encode_json("tenant", tenant, f);
 }
 
 void rgw_bucket::decode_json(JSONObj *obj) {
@@ -515,6 +516,7 @@ void rgw_bucket::decode_json(JSONObj *obj) {
   JSONDecoder::decode_json("index_pool", index_pool, obj);
   JSONDecoder::decode_json("marker", marker, obj);
   JSONDecoder::decode_json("bucket_id", bucket_id, obj);
+  JSONDecoder::decode_json("tenant", tenant, obj);
 }
 
 void RGWBucketEntryPoint::dump(Formatter *f) const
diff --git a/src/rgw/rgw_ldap.cc b/src/rgw/rgw_ldap.cc
index ac420e3..6cca3b8 100644
--- a/src/rgw/rgw_ldap.cc
+++ b/src/rgw/rgw_ldap.cc
@@ -2,3 +2,38 @@
 // vim: ts=8 sw=2 smarttab
 
 #include "rgw_ldap.h"
+
+#include "common/ceph_context.h"
+#include "common/common_init.h"
+#include "common/dout.h"
+#include "common/safe_io.h"
+#include <boost/algorithm/string.hpp>
+
+#include "include/assert.h"
+
+#define dout_subsys ceph_subsys_rgw
+
+std::string parse_rgw_ldap_bindpw(CephContext* ctx)
+{
+  string ldap_bindpw;
+  string ldap_secret = ctx->_conf->rgw_ldap_secret;
+
+  if (ldap_secret.empty()) {
+    ldout(ctx, 10)
+      << __func__ << " LDAP auth no rgw_ldap_secret file found in conf"
+      << dendl;
+    } else {
+      char bindpw[1024];
+      memset(bindpw, 0, 1024);
+      int pwlen = safe_read_file("" /* base */, ldap_secret.c_str(),
+				 bindpw, 1023);
+    if (pwlen) {
+      ldap_bindpw = bindpw;
+      boost::algorithm::trim(ldap_bindpw);
+      if (ldap_bindpw.back() == '\n')
+	ldap_bindpw.pop_back();
+    }
+  }
+
+  return std::move(ldap_bindpw);
+}
diff --git a/src/rgw/rgw_ldap.h b/src/rgw/rgw_ldap.h
index 02eb61e..b29e33a 100644
--- a/src/rgw/rgw_ldap.h
+++ b/src/rgw/rgw_ldap.h
@@ -23,27 +23,38 @@ namespace rgw {
   {
     std::string uri;
     std::string binddn;
+    std::string bindpw;
     std::string searchdn;
     std::string dnattr;
     LDAP *ldap;
+    bool msad = false; /* TODO: possible future specialization */
 
   public:
-    LDAPHelper(std::string _uri, std::string _binddn, std::string _searchdn,
-	      std::string _dnattr)
-      : uri(std::move(_uri)), binddn(std::move(_binddn)), searchdn(_searchdn),
-	dnattr(_dnattr), ldap(nullptr) {
+    LDAPHelper(std::string _uri, std::string _binddn, std::string _bindpw,
+	       std::string _searchdn, std::string _dnattr)
+      : uri(std::move(_uri)), binddn(std::move(_binddn)),
+	bindpw(std::move(_bindpw)), searchdn(_searchdn), dnattr(_dnattr),
+	ldap(nullptr) {
       // nothing
     }
 
     int init() {
       int ret;
       ret = ldap_initialize(&ldap, uri.c_str());
+      if (ret == LDAP_SUCCESS) {
+	unsigned long ldap_ver = LDAP_VERSION3;
+	ret = ldap_set_option(ldap, LDAP_OPT_PROTOCOL_VERSION,
+			      (void*) &ldap_ver);
+      }
+      if (ret == LDAP_SUCCESS) {
+	ret = ldap_set_option(ldap, LDAP_OPT_REFERRALS, LDAP_OPT_OFF); 
+      }
       return (ret == LDAP_SUCCESS) ? ret : -EINVAL;
     }
 
     int bind() {
       int ret;
-      ret = ldap_simple_bind_s(ldap, nullptr, nullptr);
+      ret = ldap_simple_bind_s(ldap, binddn.c_str(), bindpw.c_str());
       return (ret == LDAP_SUCCESS) ? ret : -EINVAL;
     }
 
@@ -60,11 +71,18 @@ namespace rgw {
     int auth(const std::string uid, const std::string pwd) {
       int ret;
       std::string filter;
-      filter = "(";
-      filter += dnattr;
-      filter += "=";
-      filter += uid;
-      filter += ")";
+      if (msad) {
+	filter = "(&(objectClass=user)(sAMAccountName=";
+	filter += uid;
+	filter += "))";
+      } else {
+	/* openldap */
+	filter = "(";
+	filter += dnattr;
+	filter += "=";
+	filter += uid;
+	filter += ")";
+      }
       char *attrs[] = { const_cast<char*>(dnattr.c_str()), nullptr };
       LDAPMessage *answer = nullptr, *entry = nullptr;
       ret = ldap_search_s(ldap, searchdn.c_str(), LDAP_SCOPE_SUBTREE,
@@ -95,8 +113,8 @@ namespace rgw {
   class LDAPHelper
   {
   public:
-    LDAPHelper(std::string _uri, std::string _binddn, std::string _searchdn,
-	      std::string _dnattr)
+    LDAPHelper(std::string _uri, std::string _binddn, std::string _bindpw,
+	       std::string _searchdn, std::string _dnattr)
       {}
 
     int init() {
@@ -117,7 +135,17 @@ namespace rgw {
 
 
 #endif /* HAVE_OPENLDAP */
-
+  
 } /* namespace rgw */
 
+#include "common/ceph_context.h"
+#include "common/common_init.h"
+#include "common/dout.h"
+#include "common/safe_io.h"
+#include <boost/algorithm/string.hpp>
+
+#include "include/assert.h"
+
+std::string parse_rgw_ldap_bindpw(CephContext* ctx);
+
 #endif /* RGW_LDAP_H */
diff --git a/src/rgw/rgw_object_expirer_core.cc b/src/rgw/rgw_object_expirer_core.cc
index a06e267..1480a29 100644
--- a/src/rgw/rgw_object_expirer_core.cc
+++ b/src/rgw/rgw_object_expirer_core.cc
@@ -124,15 +124,19 @@ void RGWObjectExpirer::garbage_chunk(list<cls_timeindex_entry>& entries,      /*
 }
 
 void RGWObjectExpirer::trim_chunk(const string& shard,
-                               const utime_t& from,
-                               const utime_t& to)
+                                  const utime_t& from,
+                                  const utime_t& to,
+                                  const string& from_marker,
+                                  const string& to_marker)
 {
-  ldout(store->ctx(), 20) << "trying to trim removal hints to  " << to << dendl;
+  ldout(store->ctx(), 20) << "trying to trim removal hints to=" << to
+                          << ", to_marker=" << to_marker << dendl;
 
   real_time rt_from = from.to_real_time();
   real_time rt_to = to.to_real_time();
 
-  int ret = store->objexp_hint_trim(shard, rt_from, rt_to);
+  int ret = store->objexp_hint_trim(shard, rt_from, rt_to,
+                                    from_marker, to_marker);
   if (ret < 0) {
     ldout(store->ctx(), 0) << "ERROR during trim: " << ret << dendl;
   }
@@ -140,13 +144,14 @@ void RGWObjectExpirer::trim_chunk(const string& shard,
   return;
 }
 
-void RGWObjectExpirer::process_single_shard(const string& shard,
-                                         const utime_t& last_run,
-                                         const utime_t& round_start)
+bool RGWObjectExpirer::process_single_shard(const string& shard,
+                                            const utime_t& last_run,
+                                            const utime_t& round_start)
 {
   string marker;
   string out_marker;
   bool truncated = false;
+  bool done = true;
 
   CephContext *cct = store->ctx();
   int num_entries = cct->_conf->rgw_objexp_chunk_size;
@@ -163,18 +168,20 @@ void RGWObjectExpirer::process_single_shard(const string& shard,
   int ret = l.lock_exclusive(&store->objexp_pool_ctx, shard);
   if (ret == -EBUSY) { /* already locked by another processor */
     dout(5) << __func__ << "(): failed to acquire lock on " << shard << dendl;
-    return;
+    return false;
   }
+
   do {
     real_time rt_last = last_run.to_real_time();
     real_time rt_start = round_start.to_real_time();
 
     list<cls_timeindex_entry> entries;
     ret = store->objexp_hint_list(shard, rt_last, rt_start,
-                                      num_entries, marker, entries,
-                                      &out_marker, &truncated);
+                                  num_entries, marker, entries,
+                                  &out_marker, &truncated);
     if (ret < 0) {
-      ldout(cct, 10) << "cannot get removal hints from shard: " << shard << dendl;
+      ldout(cct, 10) << "cannot get removal hints from shard: " << shard
+                     << dendl;
       continue;
     }
 
@@ -182,11 +189,12 @@ void RGWObjectExpirer::process_single_shard(const string& shard,
     garbage_chunk(entries, need_trim);
 
     if (need_trim) {
-      trim_chunk(shard, last_run, round_start);
+      trim_chunk(shard, last_run, round_start, marker, out_marker);
     }
 
     utime_t now = ceph_clock_now(g_ceph_context);
     if (now >= end) {
+      done = false;
       break;
     }
 
@@ -194,15 +202,16 @@ void RGWObjectExpirer::process_single_shard(const string& shard,
   } while (truncated);
 
   l.unlock(&store->objexp_pool_ctx, shard);
-  return;
+  return done;
 }
 
-void RGWObjectExpirer::inspect_all_shards(const utime_t& last_run, const utime_t& round_start)
+/* Returns true if all shards have been processed successfully. */
+bool RGWObjectExpirer::inspect_all_shards(const utime_t& last_run,
+                                          const utime_t& round_start)
 {
-  utime_t shard_marker;
-
-  CephContext *cct = store->ctx();
+  CephContext * const cct = store->ctx();
   int num_shards = cct->_conf->rgw_objexp_hints_num_shards;
+  bool all_done = true;
 
   for (int i = 0; i < num_shards; i++) {
     string shard;
@@ -210,10 +219,12 @@ void RGWObjectExpirer::inspect_all_shards(const utime_t& last_run, const utime_t
 
     ldout(store->ctx(), 20) << "proceeding shard = " << shard << dendl;
 
-    process_single_shard(shard, last_run, round_start);
+    if (! process_single_shard(shard, last_run, round_start)) {
+      all_done = false;
+    }
   }
 
-  return;
+  return all_done;
 }
 
 bool RGWObjectExpirer::going_down()
@@ -243,10 +254,13 @@ void *RGWObjectExpirer::OEWorker::entry() {
   do {
     utime_t start = ceph_clock_now(cct);
     ldout(cct, 2) << "object expiration: start" << dendl;
-    oe->inspect_all_shards(last_run, start);
+    if (oe->inspect_all_shards(last_run, start)) {
+      /* All shards have been processed properly. Next time we can start
+       * from this moment. */
+      last_run = start;
+    }
     ldout(cct, 2) << "object expiration: stop" << dendl;
 
-    last_run = start;
 
     if (oe->going_down())
       break;
diff --git a/src/rgw/rgw_object_expirer_core.h b/src/rgw/rgw_object_expirer_core.h
index 284dc14..f8170f3 100644
--- a/src/rgw/rgw_object_expirer_core.h
+++ b/src/rgw/rgw_object_expirer_core.h
@@ -41,9 +41,9 @@ class RGWObjectExpirer {
 protected:
   RGWRados *store;
 
-  int init_bucket_info(const string& tenant_name,
-                       const string& bucket_name,
-                       const string& bucket_id,
+  int init_bucket_info(const std::string& tenant_name,
+                       const std::string& bucket_name,
+                       const std::string& bucket_id,
                        RGWBucketInfo& bucket_info);
 
   class OEWorker : public Thread {
@@ -53,7 +53,13 @@ protected:
     Cond cond;
 
   public:
-    OEWorker(CephContext *_cct, RGWObjectExpirer *_oe) : cct(_cct), oe(_oe), lock("OEWorker") {}
+    OEWorker(CephContext * const cct,
+             RGWObjectExpirer * const oe)
+      : cct(cct),
+        oe(oe),
+        lock("OEWorker") {
+    }
+
     void *entry();
     void stop();
   };
@@ -63,23 +69,25 @@ protected:
 
 public:
   explicit RGWObjectExpirer(RGWRados *_store)
-    : store(_store)
-  {}
+    : store(_store) {
+  }
 
   int garbage_single_object(objexp_hint_entry& hint);
 
-  void garbage_chunk(list<cls_timeindex_entry>& entries,      /* in  */
+  void garbage_chunk(std::list<cls_timeindex_entry>& entries, /* in  */
                      bool& need_trim);                        /* out */
 
-  void trim_chunk(const string& shard,
+  void trim_chunk(const std::string& shard,
                   const utime_t& from,
-                  const utime_t& to);
+                  const utime_t& to,
+                  const string& from_marker,
+                  const string& to_marker);
 
-  void process_single_shard(const string& shard,
+  bool process_single_shard(const std::string& shard,
                             const utime_t& last_run,
                             const utime_t& round_start);
 
-  void inspect_all_shards(const utime_t& last_run,
+  bool inspect_all_shards(const utime_t& last_run,
                           const utime_t& round_start);
 
   bool going_down();
diff --git a/src/rgw/rgw_op.cc b/src/rgw/rgw_op.cc
index a2202ca..1e2e052 100644
--- a/src/rgw/rgw_op.cc
+++ b/src/rgw/rgw_op.cc
@@ -637,6 +637,18 @@ bool RGWOp::generate_cors_headers(string& origin, string& method, string& header
   if (!rule)
     return false;
 
+  /*
+   * Set the Allowed-Origin header to a asterisk if this is allowed in the rule
+   * and no Authorization was send by the client
+   *
+   * The origin parameter specifies a URI that may access the resource.  The browser must enforce this.
+   * For requests without credentials, the server may specify "*" as a wildcard,
+   * thereby allowing any origin to access the resource.
+   */
+  const char *authorization = s->info.env->get("HTTP_AUTHORIZATION");
+  if (!authorization && rule->has_wildcard_origin())
+    origin = "*";
+
   /* CORS 6.2.3. */
   const char *req_meth = s->info.env->get("HTTP_ACCESS_CONTROL_REQUEST_METHOD");
   if (!req_meth) {
@@ -1015,13 +1027,26 @@ int RGWGetObj::handle_slo_manifest(bufferlist& bl)
 
   for (const auto& entry : slo_info.entries) {
     const string& path = entry.path;
-    const size_t pos = path.find('/', 1); /* skip first / */
-    if (pos == string::npos) {
+
+    /* If the path starts with slashes, strip them all. */
+    const size_t pos_init = path.find_first_not_of('/');
+    /* According to the documentation of std::string::find following check
+     * is not necessary as we should get the std::string::npos propagation
+     * here. This might be true with the accuracy to implementation's bugs.
+     * See following question on SO:
+     * http://stackoverflow.com/questions/1011790/why-does-stdstring-findtext-stdstringnpos-not-return-npos
+     */
+    if (pos_init == string::npos) {
+      return -EINVAL;
+    }
+
+    const size_t pos_sep = path.find('/', pos_init);
+    if (pos_sep == string::npos) {
       return -EINVAL;
     }
 
-    string bucket_name = path.substr(1, pos - 1);
-    string obj_name = path.substr(pos + 1);
+    string bucket_name = path.substr(pos_init, pos_sep - pos_init);
+    string obj_name = path.substr(pos_sep + 1);
 
     rgw_bucket bucket;
     RGWAccessControlPolicy *bucket_policy;
@@ -1225,6 +1250,7 @@ void RGWGetObj::execute()
     if (op_ret < 0) {
       ldout(s->cct, 0) << "ERROR: failed to handle user manifest ret="
 		       << op_ret << dendl;
+      goto done_err;
     }
     return;
   }
@@ -1769,6 +1795,9 @@ int RGWCreateBucket::verify_permission()
       << dendl;
     return -EACCES;
   }
+  if (s->user->max_buckets < 0) {
+    return -EPERM;
+  }
 
   if (s->user->max_buckets) {
     RGWUserBuckets buckets;
@@ -1838,6 +1867,14 @@ static void prepare_add_del_attrs(const map<string, bufferlist>& orig_attrs,
   }
 }
 
+/* Fuse resource metadata basing on original attributes in @orig_attrs, set
+ * of _custom_ attribute names to remove in @rmattr_names and attributes in
+ * @out_attrs. Place results in @out_attrs.
+ *
+ * NOTE: it's supposed that all special attrs already present in @out_attrs
+ * will be preserved without any change. Special attributes are those which
+ * names start with RGW_ATTR_META_PREFIX. They're complement to custom ones
+ * used for X-Account-Meta-*, X-Container-Meta-*, X-Amz-Meta and so on.  */
 static void prepare_add_del_attrs(const map<string, bufferlist>& orig_attrs,
                                   const set<string>& rmattr_names,
                                   map<string, bufferlist>& out_attrs)
@@ -1856,6 +1893,10 @@ static void prepare_add_del_attrs(const map<string, bufferlist>& orig_attrs,
         if (aiter != std::end(out_attrs)) {
           out_attrs.erase(aiter);
         }
+      } else {
+        /* emplace() won't alter the map if the key is already present.
+         * This behaviour is fully intensional here. */
+        out_attrs.emplace(kv);
       }
     } else if (out_attrs.find(name) == std::end(out_attrs)) {
       out_attrs[name] = kv.second;
@@ -1966,12 +2007,9 @@ void RGWCreateBucket::execute()
     }
   }
 
-  if (need_metadata_upload()) {
-    rgw_get_request_metadata(s->cct, s->info, attrs, false);
-    prepare_add_del_attrs(s->bucket_attrs, rmattr_names, attrs);
-    populate_with_generic_attrs(s, attrs);
-  }
-
+  /* Encode special metadata first as we're using std::map::emplace under
+   * the hood. This method will add the new items only if the map doesn't
+   * contain such keys yet. */
   policy.encode(aclbl);
   emplace_attr(RGW_ATTR_ACL, std::move(aclbl));
 
@@ -1979,12 +2017,28 @@ void RGWCreateBucket::execute()
     cors_config.encode(corsbl);
     emplace_attr(RGW_ATTR_CORS, std::move(corsbl));
   }
+
+  if (need_metadata_upload()) {
+    /* It's supposed that following functions WILL NOT change any special
+     * attributes (like RGW_ATTR_ACL) if they are already present in attrs. */
+    rgw_get_request_metadata(s->cct, s->info, attrs, false);
+    prepare_add_del_attrs(s->bucket_attrs, rmattr_names, attrs);
+    populate_with_generic_attrs(s, attrs);
+  }
+
   s->bucket.tenant = s->bucket_tenant; /* ignored if bucket exists */
   s->bucket.name = s->bucket_name;
+
+  /* Handle updates of the metadata for Swift's object versioning. */
+  if (swift_ver_location) {
+    s->bucket_info.swift_ver_location = *swift_ver_location;
+    s->bucket_info.swift_versioning = (! swift_ver_location->empty());
+  }
+
   op_ret = store->create_bucket(*(s->user), s->bucket, zonegroup_id,
-				placement_rule, swift_ver_location, attrs,
-				info, pobjv, &ep_objv, creation_time,
-				pmaster_bucket, true);
+                                placement_rule, s->bucket_info.swift_ver_location,
+                                attrs, info, pobjv, &ep_objv, creation_time,
+                                pmaster_bucket, true);
   /* continue if EEXIST and create_bucket will fail below.  this way we can
    * recover from a partial create by retrying it. */
   ldout(s->cct, 20) << "rgw_create_bucket returned ret=" << op_ret << " bucket=" << s->bucket << dendl;
@@ -2052,6 +2106,12 @@ void RGWCreateBucket::execute()
       prepare_add_del_attrs(s->bucket_attrs, rmattr_names, attrs);
       populate_with_generic_attrs(s, attrs);
 
+      /* Handle updates of the metadata for Swift's object versioning. */
+      if (swift_ver_location) {
+        s->bucket_info.swift_ver_location = *swift_ver_location;
+        s->bucket_info.swift_versioning = (! swift_ver_location->empty());
+      }
+
       op_ret = rgw_bucket_set_attrs(store, s->bucket_info, attrs,
                                     &s->bucket_info.objv_tracker);
     } while (op_ret == -ECANCELED && tries++ < 20);
@@ -2395,6 +2455,18 @@ void RGWPutObj::execute()
 
   processor = select_processor(*static_cast<RGWObjectCtx *>(s->obj_ctx), &multipart);
 
+  /* Handle object versioning of Swift API. */
+  if (! multipart) {
+    rgw_obj obj(s->bucket, s->object);
+    op_ret = store->swift_versioning_copy(*static_cast<RGWObjectCtx *>(s->obj_ctx),
+                                          s->bucket_owner.get_id(),
+                                          s->bucket_info,
+                                          obj);
+    if (op_ret < 0) {
+      return;
+    }
+  }
+
   op_ret = processor->prepare(store, NULL);
   if (op_ret < 0) {
     ldout(s->cct, 20) << "processor->prepare() returned ret=" << op_ret
@@ -2403,8 +2475,8 @@ void RGWPutObj::execute()
   }
 
   do {
-    bufferlist data;
-    len = get_data(data);
+    bufferlist data_in;
+    len = get_data(data_in);
     if (len < 0) {
       op_ret = len;
       goto done;
@@ -2412,6 +2484,13 @@ void RGWPutObj::execute()
     if (!len)
       break;
 
+    bufferlist &data = data_in;
+    if (s->aws4_auth_streaming_mode) {
+      /* use unwrapped data */
+      data = s->aws4_auth->bl;
+      len = data.length();
+    }
+
     /* do we need this operation to be synchronous? if we're dealing with an object with immutable
      * head, e.g., multipart object we need to make sure we're the first one writing to this object
      */
@@ -2463,7 +2542,9 @@ void RGWPutObj::execute()
     ofs += len;
   } while (len > 0);
 
-  if (!chunked_upload && ofs != s->content_length) {
+  if (!chunked_upload &&
+      ofs != s->content_length &&
+      !s->aws4_auth_streaming_mode) {
     op_ret = -ERR_REQUEST_TIMEOUT;
     goto done;
   }
@@ -2509,6 +2590,7 @@ void RGWPutObj::execute()
   hash.Final(m);
 
   buf_to_hex(m, CEPH_CRYPTO_MD5_DIGESTSIZE, calc_md5);
+
   etag = calc_md5;
 
   if (supplied_md5_b64 && strcmp(calc_md5, supplied_md5)) {
@@ -2773,11 +2855,23 @@ void RGWPutMetadataAccount::execute()
     return;
   }
 
+  op_ret = rgw_get_user_attrs_by_uid(store, s->user->user_id, orig_attrs,
+                                     &acct_op_tracker);
+  if (op_ret < 0) {
+    return;
+  }
+
   rgw_get_request_metadata(s->cct, s->info, attrs, false);
-  RGWUserInfo orig_uinfo;
-  rgw_get_user_info_by_uid(store, s->user->user_id, orig_uinfo, &acct_op_tracker);
+  prepare_add_del_attrs(orig_attrs, rmattr_names, attrs);
   populate_with_generic_attrs(s, attrs);
 
+  RGWUserInfo orig_uinfo;
+  op_ret = rgw_get_user_info_by_uid(store, s->user->user_id, orig_uinfo,
+                                    &acct_op_tracker);
+  if (op_ret < 0) {
+    return;
+  }
+
   /* Handle the TempURL-related stuff. */
   map<int, string> temp_url_keys;
   filter_out_temp_url(attrs, rmattr_names, temp_url_keys);
@@ -2819,8 +2913,6 @@ void RGWPutMetadataBucket::pre_exec()
 
 void RGWPutMetadataBucket::execute()
 {
-  map<string, buffer::list> orig_attrs;
-
   op_ret = get_params();
   if (op_ret < 0) {
     return;
@@ -2834,10 +2926,9 @@ void RGWPutMetadataBucket::execute()
     return;
   }
 
-  orig_attrs = s->bucket_attrs; /* XXX map copy */
-  prepare_add_del_attrs(orig_attrs, rmattr_names, attrs);
-  populate_with_generic_attrs(s, attrs);
-
+  /* Encode special metadata first as we're using std::map::emplace under
+   * the hood. This method will add the new items only if the map doesn't
+   * contain such keys yet. */
   if (has_policy) {
     buffer::list bl;
     policy.encode(bl);
@@ -2850,8 +2941,15 @@ void RGWPutMetadataBucket::execute()
     emplace_attr(RGW_ATTR_CORS, std::move(bl));
   }
 
-  s->bucket_info.swift_ver_location = swift_ver_location;
-  s->bucket_info.swift_versioning = (!swift_ver_location.empty());
+  /* It's supposed that following functions WILL NOT change any special
+   * attributes (like RGW_ATTR_ACL) if they are already present in attrs. */
+  prepare_add_del_attrs(s->bucket_attrs, rmattr_names, attrs);
+  populate_with_generic_attrs(s, attrs);
+
+  if (swift_ver_location) {
+    s->bucket_info.swift_ver_location = *swift_ver_location;
+    s->bucket_info.swift_versioning = (! swift_ver_location->empty());
+  }
 
   op_ret = rgw_bucket_set_attrs(store, s->bucket_info, attrs,
 				&s->bucket_info.objv_tracker);
@@ -3023,35 +3121,46 @@ void RGWDeleteObj::execute()
     }
 
     RGWObjectCtx *obj_ctx = static_cast<RGWObjectCtx *>(s->obj_ctx);
-
     obj_ctx->set_atomic(obj);
 
-    RGWRados::Object del_target(store, s->bucket_info, *obj_ctx, obj);
-    RGWRados::Object::Delete del_op(&del_target);
-
-    op_ret = get_system_versioning_params(s, &del_op.params.olh_epoch,
-					  &del_op.params.marker_version_id);
+    bool ver_restored = false;
+    op_ret = store->swift_versioning_restore(*obj_ctx, s->bucket_owner.get_id(),
+                                             s->bucket_info, obj, ver_restored);
     if (op_ret < 0) {
       return;
     }
 
-    del_op.params.bucket_owner = s->bucket_owner.get_id();
-    del_op.params.versioning_status = s->bucket_info.versioning_status();
-    del_op.params.obj_owner = s->owner;
-    del_op.params.unmod_since = unmod_since;
-    del_op.params.high_precision_time = s->system_request; /* system request uses high precision time */
+    if (!ver_restored) {
+      /* Swift's versioning mechanism hasn't found any previous version of
+       * the object that could be restored. This means we should proceed
+       * with the regular delete path. */
+      RGWRados::Object del_target(store, s->bucket_info, *obj_ctx, obj);
+      RGWRados::Object::Delete del_op(&del_target);
 
-    op_ret = del_op.delete_obj();
-    if (op_ret >= 0) {
-      delete_marker = del_op.result.delete_marker;
-      version_id = del_op.result.version_id;
-    }
+      op_ret = get_system_versioning_params(s, &del_op.params.olh_epoch,
+                                            &del_op.params.marker_version_id);
+      if (op_ret < 0) {
+        return;
+      }
 
-    /* Check whether the object has expired. Swift API documentation
-     * stands that we should return 404 Not Found in such case. */
-    if (need_object_expiration() && object_is_expired(attrs)) {
-      op_ret = -ENOENT;
-      return;
+      del_op.params.bucket_owner = s->bucket_owner.get_id();
+      del_op.params.versioning_status = s->bucket_info.versioning_status();
+      del_op.params.obj_owner = s->owner;
+      del_op.params.unmod_since = unmod_since;
+      del_op.params.high_precision_time = s->system_request; /* system request uses high precision time */
+
+      op_ret = del_op.delete_obj();
+      if (op_ret >= 0) {
+        delete_marker = del_op.result.delete_marker;
+        version_id = del_op.result.version_id;
+      }
+
+      /* Check whether the object has expired. Swift API documentation
+       * stands that we should return 404 Not Found in such case. */
+      if (need_object_expiration() && object_is_expired(attrs)) {
+        op_ret = -ENOENT;
+        return;
+      }
     }
 
     if (op_ret == -ERR_PRECONDITION_FAILED && no_precondition_error) {
@@ -3273,6 +3382,16 @@ void RGWCopyObj::execute()
 
   bool high_precision_time = (s->system_request);
 
+  /* Handle object versioning of Swift API. In case of copying to remote this
+   * should fail gently (op_ret == 0) as the dst_obj will not exist here. */
+  op_ret = store->swift_versioning_copy(obj_ctx,
+                                        dest_bucket_info.owner,
+                                        dest_bucket_info,
+                                        dst_obj);
+  if (op_ret < 0) {
+    return;
+  }
+
   op_ret = store->copy_obj(obj_ctx,
 			   s->user->user_id,
 			   client_id,
diff --git a/src/rgw/rgw_op.h b/src/rgw/rgw_op.h
index d0884ea..f66d877 100644
--- a/src/rgw/rgw_op.h
+++ b/src/rgw/rgw_op.h
@@ -18,6 +18,8 @@
 #include <set>
 #include <map>
 
+#include <boost/optional.hpp>
+
 #include "common/armor.h"
 #include "common/mime.h"
 #include "common/utf8.h"
@@ -535,7 +537,7 @@ protected:
   obj_version ep_objv;
   bool has_cors;
   RGWCORSConfiguration cors_config;
-  string swift_ver_location;
+  boost::optional<std::string> swift_ver_location;
   map<string, buffer::list> attrs;
   set<string> rmattr_names;
 
@@ -781,7 +783,7 @@ protected:
   RGWAccessControlPolicy policy;
   RGWCORSConfiguration cors_config;
   string placement_rule;
-  string swift_ver_location;
+  boost::optional<std::string> swift_ver_location;
 
 public:
   RGWPutMetadataBucket()
diff --git a/src/rgw/rgw_period_puller.cc b/src/rgw/rgw_period_puller.cc
index a35591c..b76f73e 100644
--- a/src/rgw/rgw_period_puller.cc
+++ b/src/rgw/rgw_period_puller.cc
@@ -57,8 +57,15 @@ int RGWPeriodPuller::pull(const std::string& period_id, RGWPeriod& period)
 {
   // try to read the period from rados
   period.set_id(period_id);
+  period.set_epoch(0);
   int r = period.init(store->ctx(), store);
   if (r < 0) {
+    if (store->is_meta_master()) {
+      // can't pull if we're the master
+      ldout(store->ctx(), 1) << "metadata master failed to read period "
+          << period_id << " from local storage: " << cpp_strerror(r) << dendl;
+      return r;
+    }
     ldout(store->ctx(), 14) << "pulling period " << period_id
         << " from master" << dendl;
     // request the period from the master zone
diff --git a/src/rgw/rgw_process.h b/src/rgw/rgw_process.h
index faf942c..61e1870 100644
--- a/src/rgw/rgw_process.h
+++ b/src/rgw/rgw_process.h
@@ -21,7 +21,6 @@
 #define def_dout_subsys
 #endif
 
-#define SOCKET_BACKLOG 1024
 
 extern void signal_shutdown();
 
diff --git a/src/rgw/rgw_rados.cc b/src/rgw/rgw_rados.cc
index e6cb001..15544c8 100644
--- a/src/rgw/rgw_rados.cc
+++ b/src/rgw/rgw_rados.cc
@@ -6,6 +6,9 @@
 #include <stdlib.h>
 #include <sys/types.h>
 
+#include <boost/format.hpp>
+#include <boost/optional.hpp>
+
 #include "common/ceph_json.h"
 #include "common/utf8.h"
 
@@ -238,13 +241,15 @@ int RGWZoneGroup::equals(const string& other_zonegroup) const
 
 int RGWZoneGroup::add_zone(const RGWZoneParams& zone_params, bool *is_master, bool *read_only, const list<string>& endpoints)
 {
-  if (is_master && *is_master) {
-    if (!master_zone.empty() && master_zone != zone_params.get_id()) {
-      ldout(cct, 0) << "NOTICE: overriding master zone: " << master_zone  << dendl;
+  if (is_master) {
+    if (*is_master) {
+      if (!master_zone.empty() && master_zone != zone_params.get_id()) {
+        ldout(cct, 0) << "NOTICE: overriding master zone: " << master_zone << dendl;
+      }
+      master_zone = zone_params.get_id();
+    } else if (master_zone == zone_params.get_id()) {
+      master_zone.clear();
     }
-    master_zone = zone_params.get_id();
-  } else if (master_zone == zone_params.get_id()) {
-    master_zone ="";
   }
 
   RGWZone& zone = zones[zone_params.get_id()];
@@ -949,7 +954,7 @@ int RGWPeriod::read_latest_epoch(RGWPeriodLatestEpochInfo& info)
   RGWObjectCtx obj_ctx(store);
   int ret = rgw_get_system_obj(store, obj_ctx, pool, oid, bl, NULL, NULL);
   if (ret < 0) {
-    ldout(cct, 0) << "error read_lastest_epoch " << pool << ":" << oid << dendl;
+    ldout(cct, 1) << "error read_lastest_epoch " << pool << ":" << oid << dendl;
     return ret;
   }
   try {
@@ -1507,7 +1512,6 @@ int RGWZoneParams::create(bool exclusive)
 
   r = RGWSystemMetaObj::create(exclusive);
   if (r < 0) {
-    ldout(cct, 0) << "RGWZoneParams::create(): error creating default zone params: " << cpp_strerror(-r) << dendl;
     return r;
   }
 
@@ -2520,7 +2524,7 @@ int RGWRados::unwatch(uint64_t watch_handle)
     ldout(cct, 0) << "ERROR: rados->unwatch2() returned r=" << r << dendl;
     return r;
   }
-  r = rados[0]->watch_flush();
+  r = rados[0].watch_flush();
   if (r < 0) {
     ldout(cct, 0) << "ERROR: rados->watch_flush() returned r=" << r << dendl;
     return r;
@@ -3161,6 +3165,7 @@ void RGWRados::finalize()
     cr_registry->put();
   }
   delete binfo_cache;
+  delete obj_tombstone_cache;
 }
 
 /** 
@@ -3170,58 +3175,32 @@ void RGWRados::finalize()
 int RGWRados::init_rados()
 {
   int ret = 0;
+  auto handles = std::vector<librados::Rados>{cct->_conf->rgw_num_rados_handles};
 
-  num_rados_handles = cct->_conf->rgw_num_rados_handles;
-
-  rados = new librados::Rados *[num_rados_handles];
-  if (!rados) {
-    ret = -ENOMEM;
-    return ret;
-  }
-
-  for (uint32_t i=0; i < num_rados_handles; i++) {
-
-    rados[i] = new Rados();
-    if (!rados[i]) {
-      ret = -ENOMEM;
-      goto fail;
-    }
-
-    ret = rados[i]->init_with_context(cct);
+  for (auto& r : handles) {
+    ret = r.init_with_context(cct);
     if (ret < 0) {
-      goto fail;
+      return ret;
     }
 
-    ret = rados[i]->connect();
+    ret = r.connect();
     if (ret < 0) {
-      goto fail;
+      return ret;
     }
   }
 
-  cr_registry = new RGWCoroutinesManagerRegistry(cct);
-  ret =  cr_registry->hook_to_admin_command("cr dump");
+  auto crs = std::unique_ptr<RGWCoroutinesManagerRegistry>{
+    new RGWCoroutinesManagerRegistry(cct)};
+  ret = crs->hook_to_admin_command("cr dump");
   if (ret < 0) {
-    goto fail;
+    return ret;
   }
 
   meta_mgr = new RGWMetadataManager(cct, this);
   data_log = new RGWDataChangesLog(cct, this);
+  cr_registry = crs.release();
 
-  return ret;
-
-fail:
-  for (uint32_t i=0; i < num_rados_handles; i++) {
-    if (rados[i]) {
-      delete rados[i];
-      rados[i] = NULL;
-    }
-  }
-  num_rados_handles = 0;
-  if (rados) {
-    delete[] rados;
-    rados = NULL;
-  }
-
+  std::swap(handles, rados);
   return ret;
 }
 
@@ -3339,6 +3318,28 @@ int RGWRados::replace_region_with_zonegroup()
     ldout(cct, 0) << "failed to list regions: ret "<< ret << " " << cpp_strerror(-ret) << dendl;
     return ret;
   } else if (ret == -ENOENT || regions.empty()) {
+    RGWZoneParams zoneparams(default_zone_name);
+    int ret = zoneparams.init(cct, this);
+    if (ret < 0 && ret != -ENOENT) {
+      ldout(cct, 0) << __func__ << ": error initializing default zone params: " << cpp_strerror(-ret) << dendl;
+      return ret;
+    }
+    /* default zone is missing meta_heap */
+    if (ret != -ENOENT && zoneparams.metadata_heap.name.empty()) {
+      zoneparams.metadata_heap = ".rgw.meta";
+      return zoneparams.update();
+    }
+    /* update master zone */
+    RGWZoneGroup default_zg(default_zonegroup_name);
+    ret = default_zg.init(cct, this);
+    if (ret < 0 && ret != -ENOENT) {
+      ldout(cct, 0) << __func__ << ": error in initializing default zonegroup: " << cpp_strerror(-ret) << dendl;
+      return ret;
+    }
+    if (ret != -ENOENT && default_zg.master_zone.empty()) {
+      default_zg.master_zone = zoneparams.get_id();
+      return default_zg.update();
+    }
     return 0;
   }
 
@@ -3439,6 +3440,9 @@ int RGWRados::replace_region_with_zonegroup()
         ldout(cct, 0) << "failed to init zoneparams  " << iter->first <<  ": " << cpp_strerror(-ret) << dendl;
         return ret;
       }
+      if (zoneparams.metadata_heap.name.empty()) {
+	zoneparams.metadata_heap = ".rgw.meta";
+      }
       zonegroup.realm_id = realm.get_id();
       ret = zoneparams.update();
       if (ret < 0 && ret != -EEXIST) {
@@ -3510,10 +3514,18 @@ int RGWRados::init_zg_from_period(bool *initialized)
   for (iter = current_period.get_map().zonegroups.begin();
        iter != current_period.get_map().zonegroups.end(); ++iter){
     const RGWZoneGroup& zg = iter->second;
-    add_new_connection_to_map(zonegroup_conn_map, zg, new RGWRESTConn(cct, this, zg.get_id(), zg.endpoints));
+    // use endpoints from the zonegroup's master zone
+    auto master = zg.zones.find(zg.master_zone);
+    if (master == zg.zones.end()) {
+      ldout(cct, 0) << "zonegroup " << zg.get_name() << " missing zone for "
+          "master_zone=" << zg.master_zone << dendl;
+      return -EINVAL;
+    }
+    const auto& endpoints = master->second.endpoints;
+    add_new_connection_to_map(zonegroup_conn_map, zg, new RGWRESTConn(cct, this, zg.get_id(), endpoints));
     if (!current_period.get_master_zonegroup().empty() &&
         zg.get_id() == current_period.get_master_zonegroup()) {
-      rest_master_conn = new RGWRESTConn(cct, this, zg.get_id(), zg.endpoints);
+      rest_master_conn = new RGWRESTConn(cct, this, zg.get_id(), endpoints);
     }
   }
 
@@ -3546,7 +3558,15 @@ int RGWRados::init_zg_from_local(bool *creating_defaults)
   }
   ldout(cct, 20) << "zonegroup " << zonegroup.get_name() << dendl;
   if (zonegroup.is_master) {
-    rest_master_conn = new RGWRESTConn(cct, this, zonegroup.get_id(), zonegroup.endpoints);
+    // use endpoints from the zonegroup's master zone
+    auto master = zonegroup.zones.find(zonegroup.master_zone);
+    if (master == zonegroup.zones.end()) {
+      ldout(cct, 0) << "zonegroup " << zonegroup.get_name() << " missing zone for "
+          "master_zone=" << zonegroup.master_zone << dendl;
+      return -EINVAL;
+    }
+    const auto& endpoints = master->second.endpoints;
+    rest_master_conn = new RGWRESTConn(cct, this, zonegroup.get_id(), endpoints);
   }
 
   return 0;
@@ -3620,7 +3640,7 @@ int RGWRados::init_complete()
       lderr(cct) << "Cannot find zone id=" << zone_params.get_id() << " (name=" << zone_params.get_name() << ")" << dendl;
       return -EINVAL;
     }
-    ldout(cct, 0) << "Cannot find zone id=" << zone_params.get_id() << " (name=" << zone_params.get_name() << "), switching to local zonegroup configuration" << dendl;
+    ldout(cct, 1) << "Cannot find zone id=" << zone_params.get_id() << " (name=" << zone_params.get_name() << "), switching to local zonegroup configuration" << dendl;
     ret = init_zg_from_local(&creating_defaults);
     if (ret < 0) {
       return ret;
@@ -3759,6 +3779,12 @@ int RGWRados::init_complete()
   binfo_cache = new RGWChainedCacheImpl<bucket_info_entry>;
   binfo_cache->init(this);
 
+  bool need_tombstone_cache = !zone_conn_map.empty();
+
+  if (need_tombstone_cache) {
+    obj_tombstone_cache = new tombstone_cache_t(cct->_conf->rgw_obj_tombstone_cache_size);
+  }
+
   return ret;
 }
 
@@ -3950,7 +3976,7 @@ int RGWRados::init_watch()
 {
   const char *control_pool = get_zone_params().control_pool.name.c_str();
 
-  librados::Rados *rad = rados[0];
+  librados::Rados *rad = &rados[0];
   int r = rad->ioctx_create(control_pool, control_pool_ctx);
 
   if (r == -ENOENT) {
@@ -5082,8 +5108,7 @@ int RGWRados::create_bucket(RGWUserInfo& owner, rgw_bucket& bucket,
           return r;
 
         /* remove bucket meta instance */
-        string entry;
-        get_bucket_instance_entry(bucket, entry);
+        string entry = bucket.get_key();
         r = rgw_bucket_instance_remove_entry(this, entry, &instance_ver);
         if (r < 0)
           return r;
@@ -5697,13 +5722,69 @@ int RGWRados::BucketShard::init(rgw_bucket& _bucket, rgw_obj& obj)
 }
 
 
-int RGWRados::swift_versioning_copy(RGWBucketInfo& bucket_info, RGWRados::Object *source, RGWObjState *state,
-                                    rgw_user& user)
+/* Execute @handler on last item in bucket listing for bucket specified
+ * in @bucket_info. @obj_prefix and @obj_delim narrow down the listing
+ * to objects matching these criterias. */
+int RGWRados::on_last_entry_in_listing(RGWBucketInfo& bucket_info,
+                                       const std::string& obj_prefix,
+                                       const std::string& obj_delim,
+                                       std::function<int(const RGWObjEnt&)> handler)
 {
-  if (!bucket_info.has_swift_versioning() || bucket_info.swift_ver_location.empty()) {
+  RGWRados::Bucket target(this, bucket_info);
+  RGWRados::Bucket::List list_op(&target);
+
+  list_op.params.prefix = obj_prefix;
+  list_op.params.delim = obj_delim;
+
+  ldout(cct, 20) << "iterating listing for bucket=" << bucket_info.bucket.name
+                 << ", obj_prefix=" << obj_prefix
+                 << ", obj_delim=" << obj_delim
+                 << dendl;
+
+  bool is_truncated = false;
+
+  boost::optional<RGWObjEnt> last_entry;
+  /* We need to rewind to the last object in a listing. */
+  do {
+    /* List bucket entries in chunks. */
+    static constexpr int MAX_LIST_OBJS = 100;
+    std::vector<RGWObjEnt> entries(MAX_LIST_OBJS);
+
+    int ret = list_op.list_objects(MAX_LIST_OBJS, &entries, nullptr,
+                                   &is_truncated);
+    if (ret < 0) {
+      return ret;
+    } else if (!entries.empty()) {
+      last_entry = last_entry = entries.back();
+    }
+  } while (is_truncated);
+
+  if (last_entry) {
+    return handler(*last_entry);
+  }
+
+  /* Empty listing - no items we can run handler on. */
+  return 0;
+}
+
+
+int RGWRados::swift_versioning_copy(RGWObjectCtx& obj_ctx,
+                                    const rgw_user& user,
+                                    RGWBucketInfo& bucket_info,
+                                    rgw_obj& obj)
+{
+  if (! swift_versioning_enabled(bucket_info)) {
     return 0;
   }
 
+  obj_ctx.set_atomic(obj);
+
+  RGWObjState * state = nullptr;
+  int r = get_obj_state(&obj_ctx, obj, &state, false);
+  if (r < 0) {
+    return r;
+  }
+
   if (!state->exists) {
     return 0;
   }
@@ -5711,16 +5792,15 @@ int RGWRados::swift_versioning_copy(RGWBucketInfo& bucket_info, RGWRados::Object
   string client_id;
   string op_id;
 
-  rgw_obj& obj = source->get_obj();
   const string& src_name = obj.get_object();
   char buf[src_name.size() + 32];
   struct timespec ts = ceph::real_clock::to_timespec(state->mtime);
-  snprintf(buf, sizeof(buf), "%03d%s/%lld.%06ld", (int)src_name.size(),
+  snprintf(buf, sizeof(buf), "%03x%s/%lld.%06ld", (int)src_name.size(),
            src_name.c_str(), (long long)ts.tv_sec, ts.tv_nsec / 1000);
 
   RGWBucketInfo dest_bucket_info;
 
-  int r = get_bucket_info(source->get_ctx(), bucket_info.bucket.tenant, bucket_info.swift_ver_location, dest_bucket_info, NULL, NULL);
+  r = get_bucket_info(obj_ctx, bucket_info.bucket.tenant, bucket_info.swift_ver_location, dest_bucket_info, NULL, NULL);
   if (r < 0) {
     ldout(cct, 10) << "failed to read dest bucket info: r=" << r << dendl;
     return r;
@@ -5731,10 +5811,11 @@ int RGWRados::swift_versioning_copy(RGWBucketInfo& bucket_info, RGWRados::Object
   }
 
   rgw_obj dest_obj(dest_bucket_info.bucket, buf);
+  obj_ctx.set_atomic(dest_obj);
 
   string no_zone;
 
-  r = copy_obj(source->get_ctx(),
+  r = copy_obj(obj_ctx,
                user,
                client_id,
                op_id,
@@ -5763,13 +5844,121 @@ int RGWRados::swift_versioning_copy(RGWBucketInfo& bucket_info, RGWRados::Object
                NULL, /* struct rgw_err *err */
                NULL, /* void (*progress_cb)(off_t, void *) */
                NULL); /* void *progress_data */
-  if (r == -ECANCELED || r == -ENOENT) { /* has already been overwritten, meaning another rgw process already copied it out */
+  if (r == -ECANCELED || r == -ENOENT) {
+    /* Has already been overwritten, meaning another rgw process already
+     * copied it out */
     return 0;
   }
 
   return r;
 }
 
+int RGWRados::swift_versioning_restore(RGWObjectCtx& obj_ctx,
+                                       const rgw_user& user,
+                                       RGWBucketInfo& bucket_info,
+                                       rgw_obj& obj,
+                                       bool& restored)             /* out */
+{
+  if (! swift_versioning_enabled(bucket_info)) {
+    return 0;
+  }
+
+  /* Bucket info of the bucket that stores previous versions of our object. */
+  RGWBucketInfo archive_binfo;
+
+  int ret = get_bucket_info(obj_ctx, bucket_info.bucket.tenant,
+                            bucket_info.swift_ver_location, archive_binfo,
+                            nullptr, nullptr);
+  if (ret < 0) {
+    return ret;
+  }
+
+  /* Abort the operation if the bucket storing our archive belongs to someone
+   * else. This is a limitation in comparison to Swift as we aren't taking ACLs
+   * into consideration. For we can live with that.
+   *
+   * TODO: delegate this check to un upper layer and compare with ACLs. */
+  if (bucket_info.owner != archive_binfo.owner) {
+    return -EPERM;
+  }
+
+  /* This code will be executed on latest version of the object. */
+  const auto handler = [&](const RGWObjEnt& entry) -> int {
+    std::string no_client_id;
+    std::string no_op_id;
+    std::string no_zone;
+
+    /* We don't support object versioning of Swift API on those buckets that
+     * are already versioned using the S3 mechanism. This affects also bucket
+     * storing archived objects. Otherwise the delete operation would create
+     * a deletion marker. */
+    if (archive_binfo.versioned()) {
+      restored = false;
+      return -ERR_PRECONDITION_FAILED;
+    }
+
+    /* We are requesting ATTRSMOD_NONE so the attr attribute is perfectly
+     * irrelevant and may be safely skipped. */
+    std::map<std::string, ceph::bufferlist> no_attrs;
+
+    rgw_obj archive_obj(archive_binfo.bucket, entry.key);
+    obj_ctx.set_atomic(archive_obj);
+    obj_ctx.set_atomic(obj);
+
+    int ret = copy_obj(obj_ctx,
+                       user,
+                       no_client_id,
+                       no_op_id,
+                       nullptr,       /* req_info *info */
+                       no_zone,
+                       obj,           /* dest obj */
+                       archive_obj,   /* src obj */
+                       bucket_info,   /* dest bucket info */
+                       archive_binfo, /* src bucket info */
+                       nullptr,       /* time_t *src_mtime */
+                       nullptr,       /* time_t *mtime */
+                       nullptr,       /* const time_t *mod_ptr */
+                       nullptr,       /* const time_t *unmod_ptr */
+                       false,         /* bool high_precision_time */
+                       nullptr,       /* const char *if_match */
+                       nullptr,       /* const char *if_nomatch */
+                       RGWRados::ATTRSMOD_NONE,
+                       true,          /* bool copy_if_newer */
+                       no_attrs,
+                       RGW_OBJ_CATEGORY_MAIN,
+                       0,             /* uint64_t olh_epoch */
+                       real_time(),   /* time_t delete_at */
+                       nullptr,       /* string *version_id */
+                       nullptr,       /* string *ptag */
+                       nullptr,       /* string *petag */
+                       nullptr,       /* struct rgw_err *err */
+                       nullptr,       /* void (*progress_cb)(off_t, void *) */
+                       nullptr);      /* void *progress_data */
+    if (ret == -ECANCELED || ret == -ENOENT) {
+      /* Has already been overwritten, meaning another rgw process already
+       * copied it out */
+      return 0;
+    } else if (ret < 0) {
+      return ret;
+    } else {
+      restored = true;
+    }
+
+    /* Need to remove the archived copy. */
+    ret = delete_obj(obj_ctx, archive_binfo, archive_obj,
+                     archive_binfo.versioning_status());
+
+    return ret;
+  };
+
+  const std::string& obj_name = obj.get_object();
+  const auto prefix = boost::str(boost::format("%03x%s") % obj_name.size()
+                                                         % obj_name);
+
+  return on_last_entry_in_listing(archive_binfo, prefix, std::string(),
+                                  handler);
+}
+
 /**
  * Write/overwrite an object to the bucket storage.
  * bucket: the bucket to store the object in
@@ -5905,10 +6094,6 @@ int RGWRados::Object::Write::write_meta(uint64_t size,
     index_op.set_bilog_flags(RGW_BILOG_FLAG_VERSIONED_OP);
   }
 
-  r = store->swift_versioning_copy(bucket_info, target, state, meta.owner);
-  if (r < 0) {
-    goto done_cancel;
-  }
 
   r = index_op.prepare(CLS_RGW_OP_ADD);
   if (r < 0)
@@ -6478,7 +6663,7 @@ int RGWRados::fetch_remote_obj(RGWObjectCtx& obj_ctx,
     if (ret < 0)
       return ret;
 
-    if (dest_state->exists) {
+    if (!real_clock::is_zero(dest_state->mtime)) {
       dest_mtime_weight.init(dest_state);
       pmod = &dest_mtime_weight.mtime;
     }
@@ -7048,8 +7233,8 @@ bool RGWRados::is_syncing_bucket_meta(rgw_bucket& bucket)
 int RGWRados::delete_bucket(rgw_bucket& bucket, RGWObjVersionTracker& objv_tracker)
 {
   librados::IoCtx index_ctx;
-  string oid;
-  int r = open_bucket_index(bucket, index_ctx, oid);
+  map<int, string> bucket_objs;
+  int r = open_bucket_index(bucket, index_ctx, bucket_objs);
   if (r < 0)
     return r;
 
@@ -7084,12 +7269,16 @@ int RGWRados::delete_bucket(rgw_bucket& bucket, RGWObjVersionTracker& objv_track
   /* if the bucket is not synced we can remove the meta file */
   if (!is_syncing_bucket_meta(bucket)) {
     RGWObjVersionTracker objv_tracker;
-    string entry;
-    get_bucket_instance_entry(bucket, entry);
+    string entry = bucket.get_key();
     r= rgw_bucket_instance_remove_entry(this, entry, &objv_tracker);
     if (r < 0) {
       return r;
     }
+    /* remove bucket index objects*/
+    map<int, string>::const_iterator biter;
+    for (biter = bucket_objs.begin(); biter != bucket_objs.end(); ++biter) {
+      index_ctx.remove(biter->second);
+    }
   }
   return 0;
 }
@@ -7550,10 +7739,6 @@ int RGWRados::Object::Delete::delete_obj()
 
   index_op.set_bilog_flags(params.bilog_flags);
 
-  r = store->swift_versioning_copy(bucket_info, target, state, params.bucket_owner);
-  if (r < 0) {
-    return r;
-  }
 
   r = index_op.prepare(CLS_RGW_OP_DEL);
   if (r < 0)
@@ -7571,6 +7756,11 @@ int RGWRados::Object::Delete::delete_obj()
 
   int64_t poolid = ref.ioctx.get_id();
   if (r >= 0) {
+    tombstone_cache_t *obj_tombstone_cache = store->get_tombstone_cache();
+    if (obj_tombstone_cache) {
+      tombstone_entry entry{*state};
+      obj_tombstone_cache->add(obj, entry);
+    }
     r = index_op.complete_del(poolid, ref.ioctx.get_last_version(), state->mtime, params.remove_objs);
   } else {
     int ret = index_op.cancel();
@@ -7791,7 +7981,16 @@ int RGWRados::get_obj_state_impl(RGWObjectCtx *rctx, rgw_obj& obj, RGWObjState *
   if (r == -ENOENT) {
     s->exists = false;
     s->has_attrs = true;
-    s->mtime = real_time();
+    tombstone_entry entry;
+    if (obj_tombstone_cache && obj_tombstone_cache->find(obj, entry)) {
+      s->mtime = entry.mtime;
+      s->zone_short_id = entry.zone_short_id;
+      s->pg_ver = entry.pg_ver;
+      ldout(cct, 20) << __func__ << "(): found obj in tombstone cache: obj=" << obj
+          << " mtime=" << s->mtime << " pgv=" << s->pg_ver << dendl;
+    } else {
+      s->mtime = real_time();
+    }
     return 0;
   }
   if (r < 0)
@@ -10266,23 +10465,12 @@ int RGWRados::get_user_stats_async(const rgw_user& user, RGWGetUserStats_CB *ctx
   return 0;
 }
 
-void RGWRados::get_bucket_instance_entry(rgw_bucket& bucket, string& entry)
+void RGWRados::get_bucket_meta_oid(const rgw_bucket& bucket, string& oid)
 {
-  if (bucket.tenant.empty()) {
-    entry = bucket.name + ":" + bucket.bucket_id;
-  } else {
-    entry = bucket.tenant + ":" + bucket.name + ":" + bucket.bucket_id;
-  }
+  oid = RGW_BUCKET_INSTANCE_MD_PREFIX + bucket.get_key(':');
 }
 
-void RGWRados::get_bucket_meta_oid(rgw_bucket& bucket, string& oid)
-{
-  string entry;
-  get_bucket_instance_entry(bucket, entry);
-  oid = RGW_BUCKET_INSTANCE_MD_PREFIX + entry;
-}
-
-void RGWRados::get_bucket_instance_obj(rgw_bucket& bucket, rgw_obj& obj)
+void RGWRados::get_bucket_instance_obj(const rgw_bucket& bucket, rgw_obj& obj)
 {
   if (!bucket.oid.empty()) {
     obj.init(get_zone_params().domain_root, bucket.oid);
@@ -10301,6 +10489,7 @@ int RGWRados::get_bucket_instance_info(RGWObjectCtx& obj_ctx, const string& meta
     return -EINVAL;
   }
   string oid = RGW_BUCKET_INSTANCE_MD_PREFIX + meta_key;
+  rgw_bucket_instance_key_to_oid(oid);
 
   return get_bucket_instance_from_oid(obj_ctx, oid, info, pmtime, pattrs);
 }
@@ -10511,8 +10700,7 @@ int RGWRados::put_bucket_instance_info(RGWBucketInfo& info, bool exclusive,
 
   ::encode(info, bl);
 
-  string key;
-  get_bucket_instance_entry(info.bucket, key); /* when we go through meta api, we don't use oid directly */
+  string key = info.bucket.get_key(); /* when we go through meta api, we don't use oid directly */
   int ret = rgw_bucket_instance_store_info(this, key, bl, exclusive, pattrs, &info.objv_tracker, mtime);
   if (ret == -EEXIST) {
     /* well, if it's exclusive we shouldn't overwrite it, because we might race with another
@@ -10642,6 +10830,9 @@ int RGWRados::update_containers_stats(map<string, RGWBucketEnt>& m)
   for (iter = m.begin(); iter != m.end(); ++iter) {
     RGWBucketEnt& ent = iter->second;
     rgw_bucket& bucket = ent.bucket;
+    ent.count = 0;
+    ent.size = 0;
+    ent.size_rounded = 0;
 
     map<string, rgw_bucket_dir_header> headers;
     int r = cls_bucket_head(bucket, RGW_NO_SHARD, headers);
@@ -11295,6 +11486,8 @@ int RGWRados::remove_objs_from_index(rgw_bucket& bucket, list<rgw_obj_key>& oid_
   librados::IoCtx index_ctx;
   string dir_oid;
 
+  uint8_t suggest_flag = (get_zone().log_data ? CEPH_RGW_DIR_SUGGEST_LOG_OP : 0);
+
   int r = open_bucket_index(bucket, index_ctx, dir_oid);
   if (r < 0)
     return r;
@@ -11309,7 +11502,7 @@ int RGWRados::remove_objs_from_index(rgw_bucket& bucket, list<rgw_obj_key>& oid_
     rgw_bucket_dir_entry entry;
     entry.ver.epoch = (uint64_t)-1; // ULLONG_MAX, needed to that objclass doesn't skip out request
     key.transform(&entry.key);
-    updates.append(CEPH_RGW_REMOVE);
+    updates.append(CEPH_RGW_REMOVE | suggest_flag);
     ::encode(entry, updates);
   }
 
@@ -11326,6 +11519,8 @@ int RGWRados::check_disk_state(librados::IoCtx io_ctx,
                                RGWObjEnt& object,
                                bufferlist& suggested_updates)
 {
+  uint8_t suggest_flag = (get_zone().log_data ? CEPH_RGW_DIR_SUGGEST_LOG_OP : 0);
+
   rgw_obj obj;
   std::string oid, instance, loc, ns;
   rgw_obj_key key;
@@ -11422,7 +11617,7 @@ int RGWRados::check_disk_state(librados::IoCtx io_ctx,
   list_state.meta.owner_display_name = owner.get_display_name();
 
   list_state.exists = true;
-  cls_rgw_encode_suggestion(CEPH_RGW_UPDATE, list_state, suggested_updates);
+  cls_rgw_encode_suggestion(CEPH_RGW_UPDATE | suggest_flag, list_state, suggested_updates);
   return 0;
 }
 
@@ -12073,8 +12268,8 @@ void RGWStoreManager::close_storage(RGWRados *store)
 
 librados::Rados* RGWRados::get_rados_handle()
 {
-  if (num_rados_handles == 1) {
-    return rados[0];
+  if (rados.size() == 1) {
+    return &rados[0];
   } else {
     handle_lock.get_read();
     pthread_t id = pthread_self();
@@ -12082,19 +12277,17 @@ librados::Rados* RGWRados::get_rados_handle()
 
     if (it != rados_map.end()) {
       handle_lock.put_read();
-      return rados[it->second];
+      return &rados[it->second];
     } else {
       handle_lock.put_read();
       handle_lock.get_write();
-      uint32_t handle = next_rados_handle.read();
-      if (handle == num_rados_handles) {
-        next_rados_handle.set(0);
-        handle = 0;
-      }
+      const uint32_t handle = next_rados_handle;
       rados_map[id] = handle;
-      next_rados_handle.inc();
+      if (++next_rados_handle == rados.size()) {
+        next_rados_handle = 0;
+      }
       handle_lock.put_write();
-      return rados[handle];
+      return &rados[handle];
     }
   }
 }
diff --git a/src/rgw/rgw_rados.h b/src/rgw/rgw_rados.h
index af290cb..c1450e8 100644
--- a/src/rgw/rgw_rados.h
+++ b/src/rgw/rgw_rados.h
@@ -4,11 +4,14 @@
 #ifndef CEPH_RGWRADOS_H
 #define CEPH_RGWRADOS_H
 
+#include <functional>
+
 #include "include/rados/librados.hpp"
 #include "include/Context.h"
 #include "common/RefCountedObj.h"
 #include "common/RWLock.h"
 #include "common/ceph_time.h"
+#include "common/lru_map.h"
 #include "rgw_common.h"
 #include "cls/rgw/cls_rgw_types.h"
 #include "cls/version/cls_version_types.h"
@@ -1333,6 +1336,8 @@ public:
   const string& get_info_oid_prefix(bool old_format = false);
   const string& get_predefined_name(CephContext *cct);
 
+  using RGWSystemMetaObj::read_id; // expose as public for radosgw-admin
+
   void dump(Formatter *f) const;
   void decode_json(JSONObj *obj);
 
@@ -1699,6 +1704,17 @@ struct bucket_info_entry {
   map<string, bufferlist> attrs;
 };
 
+struct tombstone_entry {
+  ceph::real_time mtime;
+  uint32_t zone_short_id;
+  uint64_t pg_ver;
+
+  tombstone_entry() = default;
+  tombstone_entry(const RGWObjState& state)
+    : mtime(state.mtime), zone_short_id(state.zone_short_id),
+      pg_ver(state.pg_ver) {}
+};
+
 class RGWRados
 {
   friend class RGWGC;
@@ -1800,15 +1816,17 @@ class RGWRados
 protected:
   CephContext *cct;
 
-  librados::Rados **rados;
-  atomic_t next_rados_handle;
-  uint32_t num_rados_handles;
+  std::vector<librados::Rados> rados;
+  uint32_t next_rados_handle;
   RWLock handle_lock;
   std::map<pthread_t, int> rados_map;
 
   using RGWChainedCacheImpl_bucket_info_entry = RGWChainedCacheImpl<bucket_info_entry>;
   RGWChainedCacheImpl_bucket_info_entry *binfo_cache;
 
+  using tombstone_cache_t = lru_map<rgw_obj, tombstone_entry>;
+  tombstone_cache_t *obj_tombstone_cache;
+
   librados::IoCtx gc_pool_ctx;        // .rgw.gc
   librados::IoCtx objexp_pool_ctx;
 
@@ -1841,9 +1859,9 @@ public:
                bucket_id_lock("rados_bucket_id"),
                bucket_index_max_shards(0),
                max_bucket_id(0), cct(NULL),
-               rados(NULL), next_rados_handle(0),
-               num_rados_handles(0), handle_lock("rados_handle_lock"),
-               binfo_cache(NULL),
+               next_rados_handle(0),
+               handle_lock("rados_handle_lock"),
+               binfo_cache(NULL), obj_tombstone_cache(nullptr),
                pools_initialized(false),
                quota_handler(NULL),
                finisher(NULL),
@@ -1956,16 +1974,10 @@ public:
 
   RGWDataChangesLog *data_log;
 
-  virtual ~RGWRados() {
-    for (uint32_t i=0; i < num_rados_handles; i++) {
-      if (rados[i]) {
-        rados[i]->shutdown();
-        delete rados[i];
-      }
-    }
-    if (rados) {
-      delete[] rados;
-    }
+  virtual ~RGWRados() = default;
+
+  tombstone_cache_t *get_tombstone_cache() {
+    return obj_tombstone_cache;
   }
 
   int get_required_alignment(rgw_bucket& bucket, uint64_t *alignment);
@@ -2434,6 +2446,32 @@ public:
   virtual int aio_wait(void *handle);
   virtual bool aio_completed(void *handle);
 
+  int on_last_entry_in_listing(RGWBucketInfo& bucket_info,
+                               const std::string& obj_prefix,
+                               const std::string& obj_delim,
+                               std::function<int(const RGWObjEnt&)> handler);
+
+  bool swift_versioning_enabled(const RGWBucketInfo& bucket_info) const {
+    return bucket_info.has_swift_versioning() &&
+        bucket_info.swift_ver_location.size();
+  }
+
+  int swift_versioning_copy(RGWObjectCtx& obj_ctx,              /* in/out */
+                            const rgw_user& user,               /* in */
+                            RGWBucketInfo& bucket_info,         /* in */
+                            rgw_obj& obj);                      /* in */
+  int swift_versioning_restore(RGWObjectCtx& obj_ctx,           /* in/out */
+                               const rgw_user& user,            /* in */
+                               RGWBucketInfo& bucket_info,      /* in */
+                               rgw_obj& obj,                    /* in */
+                               bool& restored);                 /* out */
+  int copy_obj_to_remote_dest(RGWObjState *astate,
+                              map<string, bufferlist>& src_attrs,
+                              RGWRados::Object::Read& read_op,
+                              const rgw_user& user_id,
+                              rgw_obj& dest_obj,
+                              ceph::real_time *mtime);
+
   enum AttrsMod {
     ATTRSMOD_NONE    = 0,
     ATTRSMOD_REPLACE = 1,
@@ -2471,14 +2509,6 @@ public:
                        struct rgw_err *err,
                        void (*progress_cb)(off_t, void *),
                        void *progress_data);
-  int swift_versioning_copy(RGWBucketInfo& bucket_info, RGWRados::Object *source, RGWObjState *state,
-                            rgw_user& user);
-  int copy_obj_to_remote_dest(RGWObjState *astate,
-                              map<string, bufferlist>& src_attrs,
-                              RGWRados::Object::Read& read_op,
-                              const rgw_user& user_id,
-                              rgw_obj& dest_obj,
-                              ceph::real_time *mtime);
   /**
    * Copy an object.
    * dest_obj: the object to copy into
@@ -2722,9 +2752,8 @@ public:
   int get_bucket_stats_async(rgw_bucket& bucket, int shard_id, RGWGetBucketStats_CB *cb);
   int get_user_stats(const rgw_user& user, RGWStorageStats& stats);
   int get_user_stats_async(const rgw_user& user, RGWGetUserStats_CB *cb);
-  void get_bucket_instance_obj(rgw_bucket& bucket, rgw_obj& obj);
-  void get_bucket_instance_entry(rgw_bucket& bucket, string& entry);
-  void get_bucket_meta_oid(rgw_bucket& bucket, string& oid);
+  void get_bucket_instance_obj(const rgw_bucket& bucket, rgw_obj& obj);
+  void get_bucket_meta_oid(const rgw_bucket& bucket, string& oid);
 
   int put_bucket_entrypoint_info(const string& tenant_name, const string& bucket_name, RGWBucketEntryPoint& entry_point,
                                  bool exclusive, RGWObjVersionTracker& objv_tracker, ceph::real_time mtime,
diff --git a/src/rgw/rgw_realm_watcher.cc b/src/rgw/rgw_realm_watcher.cc
index d9383d6..0ed332c 100644
--- a/src/rgw/rgw_realm_watcher.cc
+++ b/src/rgw/rgw_realm_watcher.cc
@@ -130,9 +130,12 @@ int RGWRealmWatcher::watch_restart()
         << " with " << cpp_strerror(-r) << dendl;
   }
   r = pool_ctx.watch2(watch_oid, &watch_handle, this);
-  if (r < 0)
+  if (r < 0) {
     lderr(cct) << "Failed to restart watch on " << watch_oid
         << " with " << cpp_strerror(-r) << dendl;
+    pool_ctx.close();
+    watch_oid.clear();
+  }
   return r;
 }
 
diff --git a/src/rgw/rgw_rest.cc b/src/rgw/rgw_rest.cc
index 538d5c7..e485d99 100644
--- a/src/rgw/rgw_rest.cc
+++ b/src/rgw/rgw_rest.cc
@@ -330,11 +330,7 @@ void set_req_state_err(struct rgw_err& err,     /* out */
 
   r = search_err(err_no, RGW_HTTP_ERRORS, ARRAY_LEN(RGW_HTTP_ERRORS));
   if (r) {
-    if (prot_flags & RGW_REST_WEBSITE && err_no == ERR_WEBSITE_REDIRECT && err.is_clear()) {
-      // http_ret was custom set, so don't change it!
-    } else {
-      err.http_ret = r->http_ret;
-    }
+    err.http_ret = r->http_ret;
     err.s3_code = r->s3_code;
     return;
   }
@@ -543,6 +539,14 @@ void dump_access_control(struct req_state *s, const char *origin,
 			 uint32_t max_age) {
   if (origin && (origin[0] != '\0')) {
     STREAM_IO(s)->print("Access-Control-Allow-Origin: %s\r\n", origin);
+    /* If the server specifies an origin host rather than "*",
+     * then it must also include Origin in the Vary response header
+     * to indicate to clients that server responses will differ
+     * based on the value of the Origin request header.
+     */
+    if (strcmp(origin, "*") != 0)
+      STREAM_IO(s)->print("Vary: Origin\r\n");
+
     if (meth && (meth[0] != '\0'))
       STREAM_IO(s)->print("Access-Control-Allow-Methods: %s\r\n", meth);
     if (hdr && (hdr[0] != '\0'))
@@ -699,49 +703,56 @@ void abort_early(struct req_state *s, RGWOp *op, int err_no,
 		      << " new_err_no=" << new_err_no << dendl;
     err_no = new_err_no;
   }
-  set_req_state_err(s, err_no);
-  dump_errno(s);
-  dump_bucket_from_state(s);
-  if (err_no == -ERR_PERMANENT_REDIRECT || err_no == -ERR_WEBSITE_REDIRECT) {
-    string dest_uri;
-    if (!s->redirect.empty()) {
-      dest_uri = s->redirect;
-    } else if (!s->zonegroup_endpoint.empty()) {
-      string dest_uri = s->zonegroup_endpoint;
-      /*
-       * reqest_uri is always start with slash, so we need to remove
-       * the unnecessary slash at the end of dest_uri.
-       */
-      if (dest_uri[dest_uri.size() - 1] == '/') {
-        dest_uri = dest_uri.substr(0, dest_uri.size() - 1);
+
+  // If the error handler(s) above dealt with it completely, they should have
+  // returned 0. If non-zero, we need to continue here.
+  if (err_no) {
+    // Watch out, we might have a custom error state already set!
+    if (s->err.http_ret && s->err.http_ret != 200) {
+      dump_errno(s);
+    } else {
+      set_req_state_err(s, err_no);
+      dump_errno(s);
+    }
+    dump_bucket_from_state(s);
+    if (err_no == -ERR_PERMANENT_REDIRECT || err_no == -ERR_WEBSITE_REDIRECT) {
+      string dest_uri;
+      if (!s->redirect.empty()) {
+        dest_uri = s->redirect;
+      } else if (!s->zonegroup_endpoint.empty()) {
+        string dest_uri = s->zonegroup_endpoint;
+        /*
+         * reqest_uri is always start with slash, so we need to remove
+         * the unnecessary slash at the end of dest_uri.
+         */
+        if (dest_uri[dest_uri.size() - 1] == '/') {
+          dest_uri = dest_uri.substr(0, dest_uri.size() - 1);
+        }
+        dest_uri += s->info.request_uri;
+        dest_uri += "?";
+        dest_uri += s->info.request_params;
+      }
+
+      if (!dest_uri.empty()) {
+        dump_redirect(s, dest_uri);
       }
-      dest_uri += s->info.request_uri;
-      dest_uri += "?";
-      dest_uri += s->info.request_params;
     }
 
-    if (!dest_uri.empty()) {
-      dump_redirect(s, dest_uri);
+    if (!error_content.empty()) {
+      /*
+       * TODO we must add all error entries as headers here:
+       * when having a working errordoc, then the s3 error fields are
+       * rendered as HTTP headers, e.g.:
+       *   x-amz-error-code: NoSuchKey
+       *   x-amz-error-message: The specified key does not exist.
+       *   x-amz-error-detail-Key: foo
+       */
+      end_header(s, op, NULL, error_content.size(), false, true);
+      STREAM_IO(s)->write(error_content.c_str(), error_content.size());
+    } else {
+      end_header(s, op);
     }
-  }
-  if (!error_content.empty()) {
-    ldout(s->cct, 20) << "error_content is set, we need to serve it INSTEAD"
-      " of firing the formatter" << dendl;
-    /*
-     * FIXME we must add all error entries as headers here:
-     * when having a working errordoc, then the s3 error fields are
-     * rendered as HTTP headers, e.g.:
-     *
-     *   x-amz-error-code: NoSuchKey
-     *   x-amz-error-message: The specified key does not exist.
-     *   x-amz-error-detail-Key: foo
-     */
-    end_header(s, op, NULL, NO_CONTENT_LENGTH, false, true);
-    STREAM_IO(s)->write(error_content.c_str(), error_content.size());
-    s->formatter->reset();
-  } else {
-    end_header(s, op);
-    rgw_flush_formatter_and_reset(s, s->formatter);
+    rgw_flush_formatter(s, s->formatter);
   }
   perfcounter->inc(l_rgw_failed_req);
 }
@@ -994,6 +1005,49 @@ int RGWPutObj_ObjStore::get_params()
   return 0;
 }
 
+int RGWPutObj_ObjStore::get_padding_last_aws4_chunk_encoded(bufferlist &bl, uint64_t chunk_size) {
+
+  const int chunk_str_min_len = 1 + 17 + 64 + 2; /* len('0') = 1 */
+
+  char *chunk_str = bl.c_str();
+  int budget = bl.length();
+
+  unsigned int chunk_data_size;
+  unsigned int chunk_offset = 0;
+
+  while (1) {
+
+    /* check available metadata */
+    if (budget < chunk_str_min_len) {
+      return -ERR_SIGNATURE_NO_MATCH;
+    }
+
+    chunk_offset = 0;
+
+    /* grab chunk size */
+    while ((*(chunk_str+chunk_offset) != ';') && (chunk_offset < chunk_str_min_len))
+      chunk_offset++;
+    string str = string(chunk_str, chunk_offset);
+    stringstream ss;
+    ss << std::hex << str;
+    ss >> chunk_data_size;
+
+    /* next chunk */
+    chunk_offset += 17 + 64 + 2 + chunk_data_size;
+
+    /* last chunk? */
+    budget -= chunk_offset;
+    if (budget < 0) {
+      budget *= -1;
+      break;
+    }
+
+    chunk_str += chunk_offset;
+  }
+
+  return budget;
+}
+
 int RGWPutObj_ObjStore::get_data(bufferlist& bl)
 {
   size_t cl;
@@ -1019,6 +1073,30 @@ int RGWPutObj_ObjStore::get_data(bufferlist& bl)
 
     len = read_len;
     bl.append(bp, 0, len);
+
+    /* read last aws4 chunk padding */
+    if (s->aws4_auth_streaming_mode && len == (int)chunk_size) {
+      int ret_auth = get_padding_last_aws4_chunk_encoded(bl, chunk_size);
+      if (ret_auth < 0) {
+        return ret_auth;
+      }
+      int len_padding = ret_auth;
+      if (len_padding) {
+        int read_len;
+        bufferptr bp_extra(len_padding);
+        int r = STREAM_IO(s)->read(bp_extra.c_str(), len_padding, &read_len,
+                                   s->aws4_auth_needs_complete);
+        if (r < 0) {
+          return r;
+        }
+        if (read_len != len_padding) {
+          return -ERR_SIGNATURE_NO_MATCH;
+        }
+        bl.append(bp_extra.c_str(), len_padding);
+        bl.rebuild();
+      }
+    }
+
   }
 
   if ((uint64_t)ofs + len > s->cct->_conf->rgw_max_put_size) {
@@ -1470,7 +1548,7 @@ int RGWHandler_REST::read_permissions(RGWOp* op_obj)
   case OP_POST:
   case OP_COPY:
     /* is it a 'multi-object delete' request? */
-    if (s->info.request_params == "delete") {
+    if (s->info.args.exists("delete")) {
       only_bucket = true;
       break;
     }
@@ -1593,6 +1671,27 @@ int RGWREST::preprocess(struct req_state *s, RGWClientIO* cio)
   s->info.request_uri_aws4 = s->info.request_uri;
 
   s->cio = cio;
+
+  // We need to know if this RGW instance is running the s3website API with a
+  // higher priority than regular S3 API, or possibly in place of the regular
+  // S3 API.
+  // Map the listing of rgw_enable_apis in REVERSE order, so that items near
+  // the front of the list have a higher number assigned (and -1 for items not in the list).
+  list<string> apis;
+  get_str_list(g_conf->rgw_enable_apis, apis);
+  int api_priority_s3 = -1;
+  int api_priority_s3website = -1;
+  auto api_s3website_priority_rawpos = std::find(apis.begin(), apis.end(), "s3website");
+  auto api_s3_priority_rawpos = std::find(apis.begin(), apis.end(), "s3");
+  if (api_s3_priority_rawpos != apis.end()) {
+    api_priority_s3 = apis.size() - std::distance(apis.begin(), api_s3_priority_rawpos);
+  }
+  if (api_s3website_priority_rawpos != apis.end()) {
+    api_priority_s3website = apis.size() - std::distance(apis.begin(), api_s3website_priority_rawpos);
+  }
+  ldout(s->cct, 10) << "rgw api priority: s3=" << api_priority_s3 << " s3website=" << api_priority_s3website << dendl;
+  bool s3website_enabled = api_priority_s3website >= 0;
+
   if (info.host.size()) {
     ldout(s->cct, 10) << "host=" << info.host << dendl;
     string domain;
@@ -1600,7 +1699,6 @@ int RGWREST::preprocess(struct req_state *s, RGWClientIO* cio)
     bool in_hosted_domain_s3website = false;
     bool in_hosted_domain = rgw_find_host_in_domains(info.host, &domain, &subdomain, hostnames_set);
 
-    bool s3website_enabled = g_conf->rgw_enable_apis.find("s3website") != std::string::npos;
     string s3website_domain;
     string s3website_subdomain;
 
@@ -1610,7 +1708,6 @@ int RGWREST::preprocess(struct req_state *s, RGWClientIO* cio)
 	in_hosted_domain = true; // TODO: should hostnames be a strict superset of hostnames_s3website?
         domain = s3website_domain;
         subdomain = s3website_subdomain;
-        s->prot_flags |= RGW_REST_WEBSITE;
       }
     }
 
@@ -1650,7 +1747,6 @@ int RGWREST::preprocess(struct req_state *s, RGWClientIO* cio)
 				     // strict superset of hostnames_s3website?
 	    domain = s3website_domain;
 	    subdomain = s3website_subdomain;
-	    s->prot_flags |= RGW_REST_WEBSITE;
 	  }
         }
 
@@ -1663,6 +1759,31 @@ int RGWREST::preprocess(struct req_state *s, RGWClientIO* cio)
       }
     }
 
+    // Handle A/CNAME records that point to the RGW storage, but do match the
+    // CNAME test above, per issue http://tracker.ceph.com/issues/15975
+    // If BOTH domain & subdomain variables are empty, then none of the above
+    // cases matched anything, and we should fall back to using the Host header
+    // directly as the bucket name.
+    // As additional checks:
+    // - if the Host header is an IP, we're using path-style access without DNS
+    // - Also check that the Host header is a valid bucket name before using it.
+    if (subdomain.empty()
+        && (domain.empty() || domain != info.host)
+        && !looks_like_ip_address(info.host.c_str())
+        && RGWHandler_REST::validate_bucket_name(info.host)) {
+      subdomain.append(info.host);
+      in_hosted_domain = 1;
+    }
+
+    if (s3website_enabled && api_priority_s3website > api_priority_s3) {
+      in_hosted_domain_s3website = 1;
+    }
+
+    if (in_hosted_domain_s3website) {
+      s->prot_flags |= RGW_REST_WEBSITE;
+    }
+
+
     if (in_hosted_domain && !subdomain.empty()) {
       string encoded_bucket = "/";
       encoded_bucket.append(subdomain);
@@ -1675,6 +1796,16 @@ int RGWREST::preprocess(struct req_state *s, RGWClientIO* cio)
     if (!domain.empty()) {
       s->info.domain = domain;
     }
+
+   ldout(s->cct, 20)
+      << "final domain/bucket"
+      << " subdomain=" << subdomain
+      << " domain=" << domain
+      << " in_hosted_domain=" << in_hosted_domain
+      << " in_hosted_domain_s3website=" << in_hosted_domain_s3website
+      << " s->info.domain=" << s->info.domain
+      << " s->info.request_uri=" << s->info.request_uri
+      << dendl;
   }
 
   if (s->info.domain.empty()) {
diff --git a/src/rgw/rgw_rest.h b/src/rgw/rgw_rest.h
index bb288ca..96947b3 100644
--- a/src/rgw/rgw_rest.h
+++ b/src/rgw/rgw_rest.h
@@ -14,6 +14,7 @@
 
 extern std::map<std::string, std::string> rgw_to_http_attrs;
 
+extern string camelcase_dash_http_attr(const string& orig);
 extern string lowercase_dash_http_attr(const string& orig);
 
 extern void rgw_rest_init(CephContext *cct, RGWRados *store, RGWZoneGroup& zone_group);
@@ -216,6 +217,8 @@ public:
   virtual int verify_params();
   virtual int get_params();
   virtual int get_data(bufferlist& bl);
+
+  int get_padding_last_aws4_chunk_encoded(bufferlist &bl, uint64_t chunk_size);
 };
 
 class RGWPostObj_ObjStore : public RGWPostObj
@@ -376,16 +379,16 @@ protected:
   virtual RGWOp *op_copy() { return NULL; }
   virtual RGWOp *op_options() { return NULL; }
 
-  virtual int validate_tenant_name(const string& bucket);
-  virtual int validate_bucket_name(const string& bucket);
-  virtual int validate_object_name(const string& object);
-
   static int allocate_formatter(struct req_state *s, int default_formatter,
 				bool configurable);
 public:
   RGWHandler_REST() {}
   virtual ~RGWHandler_REST() {}
 
+  static int validate_tenant_name(const string& bucket);
+  static int validate_bucket_name(const string& bucket);
+  static int validate_object_name(const string& object);
+
   int init_permissions(RGWOp* op);
   int read_permissions(RGWOp* op);
 
diff --git a/src/rgw/rgw_rest_client.cc b/src/rgw/rgw_rest_client.cc
index b219e6f..e29b160 100644
--- a/src/rgw/rgw_rest_client.cc
+++ b/src/rgw/rgw_rest_client.cc
@@ -607,7 +607,7 @@ int RGWRESTStreamWriteRequest::complete(string& etag, real_time *mtime)
 int RGWRESTStreamRWRequest::get_obj(RGWAccessKey& key, map<string, string>& extra_headers, rgw_obj& obj)
 {
   string urlsafe_bucket, urlsafe_object;
-  url_encode(obj.bucket.name, urlsafe_bucket);
+  url_encode(obj.bucket.get_key(':', 0), urlsafe_bucket);
   url_encode(obj.get_orig_obj(), urlsafe_object);
   string resource = urlsafe_bucket + "/" + urlsafe_object;
 
diff --git a/src/rgw/rgw_rest_realm.cc b/src/rgw/rgw_rest_realm.cc
index 652735f..008ab10 100644
--- a/src/rgw/rgw_rest_realm.cc
+++ b/src/rgw/rgw_rest_realm.cc
@@ -30,17 +30,18 @@ void RGWOp_Period_Base::send_response()
 
   set_req_state_err(s, http_ret);
   dump_errno(s);
-  end_header(s);
 
   if (http_ret < 0) {
     if (!s->err.message.empty()) {
       ldout(s->cct, 4) << "Request failed with " << http_ret
           << ": " << s->err.message << dendl;
     }
+    end_header(s);
     return;
   }
 
   encode_json("period", period, s->formatter);
+  end_header(s, NULL, "application/json", s->formatter->get_len());
   flusher.flush();
 }
 
@@ -262,12 +263,14 @@ void RGWOp_Realm_Get::send_response()
 {
   set_req_state_err(s, http_ret);
   dump_errno(s);
-  end_header(s);
 
-  if (http_ret < 0)
+  if (http_ret < 0) {
+    end_header(s);
     return;
+  }
 
   encode_json("realm", *realm, s->formatter);
+  end_header(s, NULL, "application/json", s->formatter->get_len());
   flusher.flush();
 }
 
diff --git a/src/rgw/rgw_rest_s3.cc b/src/rgw/rgw_rest_s3.cc
index dc4e970..e9f24f3 100644
--- a/src/rgw/rgw_rest_s3.cc
+++ b/src/rgw/rgw_rest_s3.cc
@@ -8,6 +8,8 @@
 #include "common/Formatter.h"
 #include "common/utf8.h"
 #include "common/ceph_json.h"
+#include "common/safe_io.h"
+#include <boost/algorithm/string.hpp>
 
 #include "rgw_rest.h"
 #include "rgw_rest_s3.h"
@@ -92,8 +94,13 @@ int RGWGetObj_ObjStore_S3Website::send_response_data(bufferlist& bl, off_t bl_of
     bufferlist &bl = iter->second;
     s->redirect = string(bl.c_str(), bl.length());
     s->err.http_ret = 301;
-    ldout(s->cct, 20) << __CEPH_ASSERT_FUNCTION << " redirectng per x-amz-website-redirect-location=" << s->redirect << dendl;
+    ldout(s->cct, 20) << __CEPH_ASSERT_FUNCTION << " redirecting per x-amz-website-redirect-location=" << s->redirect << dendl;
     op_ret = -ERR_WEBSITE_REDIRECT;
+    set_req_state_err(s, op_ret);
+    dump_errno(s);
+    dump_content_length(s, 0);
+    dump_redirect(s, s->redirect);
+    end_header(s, this);
     return op_ret;
   } else {
     return RGWGetObj_ObjStore_S3::send_response_data(bl, bl_ofs, bl_len);
@@ -246,9 +253,14 @@ int RGWGetObj_ObjStore_S3::send_response_data(bufferlist& bl, off_t bl_ofs,
   }
 
 done:
-  set_req_state_err(s, (partial_content && !op_ret) ? STATUS_PARTIAL_CONTENT
-		    : op_ret);
-  dump_errno(s);
+  if (custom_http_ret) {
+    set_req_state_err(s, 0);
+    dump_errno(s, custom_http_ret);
+  } else {
+    set_req_state_err(s, (partial_content && !op_ret) ? STATUS_PARTIAL_CONTENT
+          	  : op_ret);
+    dump_errno(s);
+  }
 
   for (riter = response_attrs.begin(); riter != response_attrs.end();
        ++riter) {
@@ -256,7 +268,7 @@ done:
 			riter->second.c_str());
   }
 
-  if (op_ret == ERR_NOT_MODIFIED) {
+  if (op_ret == -ERR_NOT_MODIFIED) {
       end_header(s, this);
   } else {
       if (!content_type)
@@ -1039,17 +1051,163 @@ int RGWPutObj_ObjStore_S3::get_params()
   return RGWPutObj_ObjStore::get_params();
 }
 
+int RGWPutObj_ObjStore_S3::validate_aws4_single_chunk(char *chunk_str,
+                                                      char *chunk_data_str,
+                                                      unsigned int chunk_data_size,
+                                                      string chunk_signature)
+{
+
+  /* string to sign */
+
+  string hash_empty_str;
+  rgw_hash_s3_string_sha256("", 0, hash_empty_str);
+
+  string hash_chunk_data;
+  rgw_hash_s3_string_sha256(chunk_data_str, chunk_data_size, hash_chunk_data);
+
+  string string_to_sign = "AWS4-HMAC-SHA256-PAYLOAD\n";
+  string_to_sign.append(s->aws4_auth->date + "\n");
+  string_to_sign.append(s->aws4_auth->credential_scope + "\n");
+  string_to_sign.append(s->aws4_auth->seed_signature + "\n");
+  string_to_sign.append(hash_empty_str + "\n");
+  string_to_sign.append(hash_chunk_data);
+
+  /* new chunk signature */
+
+  char signature_k[CEPH_CRYPTO_HMACSHA256_DIGESTSIZE];
+  calc_hmac_sha256(s->aws4_auth->signing_k, CEPH_CRYPTO_HMACSHA256_DIGESTSIZE,
+      string_to_sign.c_str(), string_to_sign.size(), signature_k);
+
+  char aux[CEPH_CRYPTO_HMACSHA256_DIGESTSIZE * 2 + 1];
+  buf_to_hex((unsigned char *) signature_k, CEPH_CRYPTO_HMACSHA256_DIGESTSIZE, aux);
+
+  string new_chunk_signature = string(aux);
+
+  ldout(s->cct, 20) << "--------------- aws4 chunk validation" << dendl;
+  ldout(s->cct, 20) << "chunk_signature     = " << chunk_signature << dendl;
+  ldout(s->cct, 20) << "new_chunk_signature = " << new_chunk_signature << dendl;
+  ldout(s->cct, 20) << "aws4 chunk signing_key    = " << s->aws4_auth->signing_key << dendl;
+  ldout(s->cct, 20) << "aws4 chunk string_to_sign = " << string_to_sign << dendl;
+
+  /* chunk auth ok? */
+
+  if (new_chunk_signature != chunk_signature) {
+    ldout(s->cct, 20) << "ERROR: AWS4 chunk signature does NOT match (new_chunk_signature != chunk_signature)" << dendl;
+    return -ERR_SIGNATURE_NO_MATCH;
+  }
+
+  /* update seed signature */
+
+  s->aws4_auth->seed_signature = new_chunk_signature;
+
+  return 0;
+}
+
+int RGWPutObj_ObjStore_S3::validate_and_unwrap_available_aws4_chunked_data(bufferlist& bl_in,
+                                                                           bufferlist& bl_out)
+{
+
+  /* string(IntHexBase(chunk-size)) + ";chunk-signature=" + signature + \r\n + chunk-data + \r\n */
+
+  const unsigned int chunk_str_min_len = 1 + 17 + 64 + 2; /* len('0') = 1 */
+
+  char *chunk_str = bl_in.c_str();
+  unsigned int budget = bl_in.length();
+
+  bl_out.clear();
+
+  while (true) {
+
+    /* check available metadata */
+
+    if (budget < chunk_str_min_len) {
+      return -ERR_SIGNATURE_NO_MATCH;
+    }
+
+    unsigned int chunk_offset = 0;
+
+    /* grab chunk size */
+
+    while ((*(chunk_str+chunk_offset) != ';') && (chunk_offset < chunk_str_min_len))
+      chunk_offset++;
+    string str = string(chunk_str, chunk_offset);
+    unsigned int chunk_data_size;
+    stringstream ss;
+    ss << std::hex << str;
+    ss >> chunk_data_size;
+    if (ss.fail()) {
+      return -ERR_SIGNATURE_NO_MATCH;
+    }
+
+    /* grab chunk signature */
+
+    chunk_offset += 17;
+    string chunk_signature = string(chunk_str, chunk_offset, 64);
+
+    /* get chunk data */
+
+    chunk_offset += 64 + 2;
+    char *chunk_data_str = chunk_str + chunk_offset;
+
+    /* handle budget */
+
+    budget -= chunk_offset;
+    if (budget < chunk_data_size) {
+      return -ERR_SIGNATURE_NO_MATCH;
+    } else {
+      budget -= chunk_data_size;
+    }
+
+    /* auth single chunk */
+
+    if (validate_aws4_single_chunk(chunk_str, chunk_data_str, chunk_data_size, chunk_signature) < 0) {
+      ldout(s->cct, 20) << "ERROR AWS4 single chunk validation" << dendl;
+      return -ERR_SIGNATURE_NO_MATCH;
+    }
+
+    /* aggregate single chunk */
+
+    bl_out.append(chunk_data_str, chunk_data_size);
+
+    /* last chunk or no more budget? */
+
+    if ((chunk_data_size == 0) || (budget == 0))
+      break;
+
+    /* next chunk */
+
+    chunk_offset += chunk_data_size;
+    chunk_str += chunk_offset;
+  }
+
+  /* authorization ok */
+
+  return 0;
+
+}
+
 int RGWPutObj_ObjStore_S3::get_data(bufferlist& bl)
 {
   int ret = RGWPutObj_ObjStore::get_data(bl);
   if (ret < 0)
     s->aws4_auth_needs_complete = false;
+
+  int ret_auth;
+
+  if (s->aws4_auth_streaming_mode && ret > 0) {
+    ret_auth = validate_and_unwrap_available_aws4_chunked_data(bl, s->aws4_auth->bl);
+    if (ret_auth < 0) {
+      return ret_auth;
+    }
+  }
+
   if ((ret == 0) && s->aws4_auth_needs_complete) {
-    int ret_auth = do_aws4_auth_completion();
+    ret_auth = do_aws4_auth_completion();
     if (ret_auth < 0) {
       return ret_auth;
     }
   }
+
   return ret;
 }
 
@@ -1589,10 +1747,32 @@ int RGWPostObj_ObjStore_S3::get_policy()
 	  s->perm_mask = RGW_PERM_FULL_CONTROL;
 	}
       } else if (store->ctx()->_conf->rgw_s3_auth_use_ldap &&
-		store->ctx()->_conf->rgw_ldap_uri.empty()) {
+		 (! store->ctx()->_conf->rgw_ldap_uri.empty())) {
+
+	ldout(store->ctx(), 15)
+	  << __func__ << " LDAP auth uri="
+	  << store->ctx()->_conf->rgw_ldap_uri
+	  << dendl;
+
 	RGWToken token{from_base64(s3_access_key)};
+	if (! token.valid())
+	  return -EACCES;
+
 	rgw::LDAPHelper *ldh = RGW_Auth_S3::get_ldap_ctx(store);
-	if ((! token.valid()) || ldh->auth(token.id, token.key) != 0)
+	if (unlikely(!ldh)) {
+	  ldout(store->ctx(), 0)
+	    << __func__ << " RGW_Auth_S3::get_ldap_ctx() failed"
+	    << dendl;
+	  return -EACCES;
+	}
+
+	ldout(store->ctx(), 10)
+	  << __func__ << " try LDAP auth uri="
+	  << store->ctx()->_conf->rgw_ldap_uri
+	  << " token.id=" << token.id
+	  << dendl;
+
+	if (ldh->auth(token.id, token.key) != 0)
 	  return -EACCES;
 
 	/* ok, succeeded */
@@ -2680,7 +2860,7 @@ RGWOp *RGWHandler_REST_Bucket_S3::op_delete()
 
 RGWOp *RGWHandler_REST_Bucket_S3::op_post()
 {
-  if ( s->info.request_params == "delete" ) {
+  if (s->info.args.exists("delete")) {
     return new RGWDeleteMultiObj_ObjStore_S3;
   }
 
@@ -2911,9 +3091,10 @@ void RGW_Auth_S3::init_impl(RGWRados* store)
   const string& ldap_searchdn = store->ctx()->_conf->rgw_ldap_searchdn;
   const string& ldap_dnattr =
     store->ctx()->_conf->rgw_ldap_dnattr;
+  std::string ldap_bindpw = parse_rgw_ldap_bindpw(store->ctx());
 
-  ldh = new rgw::LDAPHelper(ldap_uri, ldap_binddn, ldap_searchdn,
-			    ldap_dnattr);
+  ldh = new rgw::LDAPHelper(ldap_uri, ldap_binddn, ldap_bindpw,
+			    ldap_searchdn, ldap_dnattr);
 
   ldh->init();
   ldh->bind();
@@ -3107,7 +3288,7 @@ int RGW_Auth_S3::authorize_v4_complete(RGWRados *store, struct req_state *s, con
   if (s->aws4_auth_needs_complete) {
     const char *expected_request_payload_hash = s->info.env->get("HTTP_X_AMZ_CONTENT_SHA256");
     if (expected_request_payload_hash &&
-	s->aws4_auth->payload_hash.compare(expected_request_payload_hash) != 0) {
+        s->aws4_auth->payload_hash.compare(expected_request_payload_hash) != 0) {
       ldout(s->cct, 10) << "ERROR: x-amz-content-sha256 does not match" << dendl;
       return -ERR_AMZ_CONTENT_SHA256_MISMATCH;
     }
@@ -3158,6 +3339,8 @@ int RGW_Auth_S3::authorize_v4_complete(RGWRados *store, struct req_state *s, con
     return err;
   }
 
+  s->aws4_auth->seed_signature = s->aws4_auth->new_signature;
+
   return 0;
 
 }
@@ -3197,6 +3380,8 @@ static void aws4_uri_encode(const string& src, string& dst)
   }
 }
 
+static std::array<string, 3> aws4_presigned_required_keys = { "Credential", "SignedHeaders", "Signature" };
+
 /*
  * handle v4 signatures (rados auth only)
  */
@@ -3205,8 +3390,8 @@ int RGW_Auth_S3::authorize_v4(RGWRados *store, struct req_state *s)
   string::size_type pos;
   bool using_qs;
 
-  time_t now, now_req=0;
-  time(&now);
+  uint64_t now_req = 0;
+  uint64_t now = ceph_clock_now(s->cct);
 
   /* v4 requires rados auth */
   if (!store->ctx()->_conf->rgw_s3_auth_use_rados) {
@@ -3215,7 +3400,11 @@ int RGW_Auth_S3::authorize_v4(RGWRados *store, struct req_state *s)
 
   string algorithm = "AWS4-HMAC-SHA256";
 
-  s->aws4_auth = new rgw_aws4_auth;
+  try {
+    s->aws4_auth = std::unique_ptr<rgw_aws4_auth>(new rgw_aws4_auth);
+  } catch (std::bad_alloc&) {
+    return -ENOMEM;
+  }
 
   if ((!s->http_auth) || !(*s->http_auth)) {
 
@@ -3245,7 +3434,7 @@ int RGW_Auth_S3::authorize_v4(RGWRados *store, struct req_state *s)
         return -EPERM;
       }
       /* handle expiration in epoch time */
-      now_req = mktime(&date_t);
+      now_req = (uint64_t)timegm(&date_t);
       if (now >= now_req + exp) {
         dout(10) << "NOTICE: now = " << now << ", now_req = " << now_req << ", exp = " << exp << dendl;
         return -EPERM;
@@ -3276,72 +3465,42 @@ int RGW_Auth_S3::authorize_v4(RGWRados *store, struct req_state *s)
     /* ------------------------- handle Credential header */
 
     using_qs = false;
-    s->aws4_auth->credential = s->http_auth;
 
-    s->aws4_auth->credential = s->aws4_auth->credential.substr(17, s->aws4_auth->credential.length());
+    string auth_str = s->http_auth;
 
-    pos = s->aws4_auth->credential.find("Credential");
-    if (pos == std::string::npos) {
+#define AWS4_HMAC_SHA256_STR "AWS4-HMAC-SHA256"
+#define CREDENTIALS_PREFIX_LEN (sizeof(AWS4_HMAC_SHA256_STR) - 1)
+    uint64_t min_len = CREDENTIALS_PREFIX_LEN + 1;
+    if (auth_str.length() < min_len) {
+      ldout(store->ctx(), 10) << "credentials string is too short" << dendl;
       return -EINVAL;
     }
 
-    s->aws4_auth->credential = s->aws4_auth->credential.substr(pos, s->aws4_auth->credential.find(","));
-
-    s->aws4_auth->credential = s->aws4_auth->credential.substr(pos + 1, s->aws4_auth->credential.length());
-
-    pos = s->aws4_auth->credential.find("=");
-
-    s->aws4_auth->credential = s->aws4_auth->credential.substr(pos + 1, s->aws4_auth->credential.length());
+    list<string> auth_list;
+    get_str_list(auth_str.substr(min_len), ",", auth_list);
 
-    /* ------------------------- handle SignedHeaders header */
+    map<string, string> kv;
 
-    s->aws4_auth->signedheaders = s->http_auth;
-
-    s->aws4_auth->signedheaders = s->aws4_auth->signedheaders.substr(17, s->aws4_auth->signedheaders.length());
-
-    pos = s->aws4_auth->signedheaders.find("SignedHeaders");
-    if (pos == std::string::npos) {
-      return -EINVAL;
-    }
-
-    s->aws4_auth->signedheaders = s->aws4_auth->signedheaders.substr(pos, s->aws4_auth->signedheaders.length());
-
-    pos = s->aws4_auth->signedheaders.find(",");
-    if (pos == std::string::npos) {
-      return -EINVAL;
-    }
-
-    s->aws4_auth->signedheaders = s->aws4_auth->signedheaders.substr(0, pos);
-
-    pos = s->aws4_auth->signedheaders.find("=");
-    if (pos == std::string::npos) {
-      return -EINVAL;
-    }
-
-    s->aws4_auth->signedheaders = s->aws4_auth->signedheaders.substr(pos + 1, s->aws4_auth->signedheaders.length());
-
-    /* host;user-agent;x-amz-content-sha256;x-amz-date */
-    dout(10) << "v4 signedheaders format = " << s->aws4_auth->signedheaders << dendl;
-
-    /* ------------------------- handle Signature header */
-
-    s->aws4_auth->signature = s->http_auth;
-
-    s->aws4_auth->signature = s->aws4_auth->signature.substr(17, s->aws4_auth->signature.length());
-
-    pos = s->aws4_auth->signature.find("Signature");
-    if (pos == std::string::npos) {
-      return -EINVAL;
+    for (string& s : auth_list) {
+      string key, val;
+      int ret = parse_key_value(s, key, val);
+      if (ret < 0) {
+        ldout(store->ctx(), 10) << "NOTICE: failed to parse auth header (s=" << s << ")" << dendl;
+        return -EINVAL;
+      }
+      kv[key] = std::move(val);
     }
 
-    s->aws4_auth->signature = s->aws4_auth->signature.substr(pos, s->aws4_auth->signature.length());
-
-    pos = s->aws4_auth->signature.find("=");
-    if (pos == std::string::npos) {
-      return -EINVAL;
+    for (string& k : aws4_presigned_required_keys) {
+      if (kv.find(k) == kv.end()) {
+        ldout(store->ctx(), 10) << "NOTICE: auth header missing key: " << k << dendl;
+        return -EINVAL;
+      }
     }
 
-    s->aws4_auth->signature = s->aws4_auth->signature.substr(pos + 1, s->aws4_auth->signature.length());
+    s->aws4_auth->credential = std::move(kv["Credential"]);
+    s->aws4_auth->signedheaders = std::move(kv["SignedHeaders"]);
+    s->aws4_auth->signature = std::move(kv["Signature"]);
 
     /* sig hex str */
     dout(10) << "v4 signature format = " << s->aws4_auth->signature << dendl;
@@ -3474,7 +3633,8 @@ int RGW_Auth_S3::authorize_v4(RGWRados *store, struct req_state *s)
   map<string, string> canonical_hdrs_map;
   istringstream sh(s->aws4_auth->signedheaders);
   string token;
-  string port = s->info.env->get("SERVER_PORT");
+  string port = s->info.env->get("SERVER_PORT", "");
+  string secure_port = s->info.env->get("SERVER_PORT_SECURE", "");
 
   while (getline(sh, token, ';')) {
     string token_env = "HTTP_" + token;
@@ -3500,8 +3660,13 @@ int RGW_Auth_S3::authorize_v4(RGWRados *store, struct req_state *s)
       }
     }
     string token_value = string(t);
-    if (using_qs && (token == "host"))
-      token_value = token_value + ":" + port;
+    if (using_qs && (token == "host")) {
+      if (!port.empty() && port != "80" && port != "0") {
+        token_value = token_value + ":" + port;
+      } else if (!secure_port.empty() && secure_port != "443") {
+        token_value = token_value + ":" + secure_port;
+      }
+    }
     canonical_hdrs_map[token] = rgw_trim_whitespace(token_value);
   }
 
@@ -3523,6 +3688,7 @@ int RGW_Auth_S3::authorize_v4(RGWRados *store, struct req_state *s)
   string request_payload;
 
   bool unsigned_payload = false;
+  s->aws4_auth_streaming_mode = false;
 
   if (using_qs) {
     /* query parameters auth */
@@ -3530,8 +3696,11 @@ int RGW_Auth_S3::authorize_v4(RGWRados *store, struct req_state *s)
   } else {
     /* header auth */
     const char *request_payload_hash = s->info.env->get("HTTP_X_AMZ_CONTENT_SHA256");
-    if (request_payload_hash && string("UNSIGNED-PAYLOAD").compare(request_payload_hash) == 0) {
-      unsigned_payload = true;
+    if (request_payload_hash) {
+      unsigned_payload = string("UNSIGNED-PAYLOAD").compare(request_payload_hash) == 0;
+      if (!unsigned_payload) {
+        s->aws4_auth_streaming_mode = string("STREAMING-AWS4-HMAC-SHA256-PAYLOAD").compare(request_payload_hash) == 0;
+      }
     }
   }
 
@@ -3571,26 +3740,65 @@ int RGW_Auth_S3::authorize_v4(RGWRados *store, struct req_state *s)
 
     /* aws4 auth not completed... delay aws4 auth */
 
-    dout(10) << "body content detected... delaying v4 auth" << dendl;
-
-    switch (s->op_type)
-    {
-      case RGW_OP_CREATE_BUCKET:
-      case RGW_OP_PUT_OBJ:
-      case RGW_OP_PUT_ACLS:
-      case RGW_OP_PUT_CORS:
-      case RGW_OP_COMPLETE_MULTIPART:
-      case RGW_OP_SET_BUCKET_VERSIONING:
-      case RGW_OP_DELETE_MULTI_OBJ:
-      case RGW_OP_ADMIN_SET_METADATA:
-      case RGW_OP_SET_BUCKET_WEBSITE:
-        break;
-      default:
-        dout(10) << "ERROR: AWS4 completion for this operation NOT IMPLEMENTED" << dendl;
-        return -ERR_NOT_IMPLEMENTED;
-    }
+    if (!s->aws4_auth_streaming_mode) {
+
+      dout(10) << "delaying v4 auth" << dendl;
+
+      /* payload in a single chunk */
+
+      switch (s->op_type)
+      {
+        case RGW_OP_CREATE_BUCKET:
+        case RGW_OP_PUT_OBJ:
+        case RGW_OP_PUT_ACLS:
+        case RGW_OP_PUT_CORS:
+        case RGW_OP_COMPLETE_MULTIPART:
+        case RGW_OP_SET_BUCKET_VERSIONING:
+        case RGW_OP_DELETE_MULTI_OBJ:
+        case RGW_OP_ADMIN_SET_METADATA:
+        case RGW_OP_SET_BUCKET_WEBSITE:
+          break;
+        default:
+          dout(10) << "ERROR: AWS4 completion for this operation NOT IMPLEMENTED" << dendl;
+          return -ERR_NOT_IMPLEMENTED;
+      }
+
+      s->aws4_auth_needs_complete = true;
+
+    } else {
+
+      dout(10) << "body content detected in multiple chunks" << dendl;
+
+      /* payload in multiple chunks */
+
+      switch(s->op_type)
+      {
+        case RGW_OP_PUT_OBJ:
+          break;
+        default:
+          dout(10) << "ERROR: AWS4 completion for this operation NOT IMPLEMENTED (streaming mode)" << dendl;
+          return -ERR_NOT_IMPLEMENTED;
+      }
+
+      /* calculate seed */
+
+      int err = authorize_v4_complete(store, s, "", unsigned_payload);
+      if (err) {
+        return err;
+      }
 
-    s->aws4_auth_needs_complete = true;
+      /* verify seed signature */
+
+      if (s->aws4_auth->signature != s->aws4_auth->new_signature) {
+        dout(10) << "ERROR: AWS4 seed signature does NOT match!" << dendl;
+        return -ERR_SIGNATURE_NO_MATCH;
+      }
+
+      dout(10) << "aws4 seed signature ok... delaying v4 auth" << dendl;
+
+      s->aws4_auth_needs_complete = false;
+
+    }
 
   }
 
@@ -3727,29 +3935,45 @@ int RGW_Auth_S3::authorize_v2(RGWRados *store, struct req_state *s)
 
     RGW_Auth_S3::init(store);
 
+    ldout(store->ctx(), 15)
+      << __func__ << " LDAP auth uri="
+      << store->ctx()->_conf->rgw_ldap_uri
+      << dendl;
+
     RGWToken token{from_base64(auth_id)};
-    if ((! token.valid()) || ldh->auth(token.id, token.key) != 0)
+
+    if (! token.valid())
       external_auth_result = -EACCES;
     else {
-      /* ok, succeeded */
-      external_auth_result = 0;
+      ldout(store->ctx(), 10)
+	<< __func__ << " try LDAP auth uri="
+	<< store->ctx()->_conf->rgw_ldap_uri
+	<< " token.id=" << token.id
+	<< dendl;
+
+      if (ldh->auth(token.id, token.key) != 0)
+	external_auth_result = -EACCES;
+      else {
+	/* ok, succeeded */
+	external_auth_result = 0;
 
-      /* create local account, if none exists */
-      s->user->user_id = token.id;
-      s->user->display_name = token.id; // cn?
-      int ret = rgw_get_user_info_by_uid(store, s->user->user_id, *(s->user));
-      if (ret < 0) {
-	ret = rgw_store_user_info(store, *(s->user), nullptr, nullptr,
-				  real_time(), true);
+	/* create local account, if none exists */
+	s->user->user_id = token.id;
+	s->user->display_name = token.id; // cn?
+	int ret = rgw_get_user_info_by_uid(store, s->user->user_id, *(s->user));
 	if (ret < 0) {
-	  dout(10) << "NOTICE: failed to store new user's info: ret=" << ret
-		   << dendl;
+	  ret = rgw_store_user_info(store, *(s->user), nullptr, nullptr,
+				    real_time(), true);
+	  if (ret < 0) {
+	    dout(10) << "NOTICE: failed to store new user's info: ret=" << ret
+		     << dendl;
+	  }
 	}
-      }
 
       /* set request perms */
       s->perm_mask = RGW_PERM_FULL_CONTROL;
-    } /* success */
+      } /* success */
+    } /* token */
   } /* ldap */
 
   /* keystone failed (or not enabled); check if we want to use rados backend */
@@ -3959,52 +4183,87 @@ RGWOp* RGWHandler_REST_S3Website::op_head()
   return get_obj_op(false);
 }
 
-int RGWHandler_REST_S3Website::get_errordoc(const string& errordoc_key,
-					    std::string* error_content) {
-  ldout(s->cct, 20) << "TODO Serve Custom error page here if bucket has "
-    "<Error>" << dendl;
-  *error_content = errordoc_key;
-  // 1. Check if errordoc exists
-  // 2. Check if errordoc is public
-  // 3. Fetch errordoc content
-  /*
-   * FIXME maybe:  need to make sure all of the fields for conditional
-   * requests are cleared
-   */
-  RGWGetObj_ObjStore_S3Website* getop =
-    new RGWGetObj_ObjStore_S3Website(true);
-  getop->set_get_data(true);
+int RGWHandler_REST_S3Website::serve_errordoc(int http_ret, const string& errordoc_key) {
+  int ret = 0;
+  s->formatter->reset(); /* Try to throw it all away */
+
+  std::shared_ptr<RGWGetObj_ObjStore_S3Website> getop( (RGWGetObj_ObjStore_S3Website*) op_get() );
+  if (getop.get() == NULL) {
+    return -1; // Trigger double error handler
+  }
   getop->init(store, s, this);
+  getop->range_str = NULL;
+  getop->if_mod = NULL;
+  getop->if_unmod = NULL;
+  getop->if_match = NULL;
+  getop->if_nomatch = NULL;
+  s->object = errordoc_key;
+
+  ret = init_permissions(getop.get());
+  if (ret < 0) {
+    ldout(s->cct, 20) << "serve_errordoc failed, init_permissions ret=" << ret << dendl;
+    return -1; // Trigger double error handler
+  }
 
-  RGWGetObj_CB cb(getop);
-  rgw_obj obj(s->bucket, errordoc_key);
-  RGWObjectCtx rctx(store);
-  //RGWRados::Object op_target(store, s->bucket_info, *static_cast<RGWObjectCtx *>(s->obj_ctx), obj);
-  RGWRados::Object op_target(store, s->bucket_info, rctx, obj);
-  RGWRados::Object::Read read_op(&op_target);
+  ret = read_permissions(getop.get());
+  if (ret < 0) {
+    ldout(s->cct, 20) << "serve_errordoc failed, read_permissions ret=" << ret << dendl;
+    return -1; // Trigger double error handler
+  }
 
-  int ret;
-  int64_t ofs = 0; 
-  int64_t end = -1;
-  ret = read_op.prepare(&ofs, &end);
+  if (http_ret) {
+     getop->set_custom_http_response(http_ret);
+  }
+
+  ret = getop->init_processing();
   if (ret < 0) {
-    goto done;
+    ldout(s->cct, 20) << "serve_errordoc failed, init_processing ret=" << ret << dendl;
+    return -1; // Trigger double error handler
   }
 
-  ret = read_op.iterate(ofs, end, &cb); // FIXME: need to know the final size?
-done:
-  delete getop;
-  return ret;
+  ret = getop->verify_op_mask();
+  if (ret < 0) {
+    ldout(s->cct, 20) << "serve_errordoc failed, verify_op_mask ret=" << ret << dendl;
+    return -1; // Trigger double error handler
+  }
+
+  ret = getop->verify_permission();
+  if (ret < 0) {
+    ldout(s->cct, 20) << "serve_errordoc failed, verify_permission ret=" << ret << dendl;
+    return -1; // Trigger double error handler
+  }
+
+  ret = getop->verify_params();
+  if (ret < 0) {
+    ldout(s->cct, 20) << "serve_errordoc failed, verify_params ret=" << ret << dendl;
+    return -1; // Trigger double error handler
+  }
+
+  // No going back now
+  getop->pre_exec();
+  /*
+   * FIXME Missing headers:
+   * With a working errordoc, the s3 error fields are rendered as HTTP headers,
+   *   x-amz-error-code: NoSuchKey
+   *   x-amz-error-message: The specified key does not exist.
+   *   x-amz-error-detail-Key: foo
+   */
+  getop->execute();
+  getop->complete();
+  return 0;
+
 }
-  
+
 int RGWHandler_REST_S3Website::error_handler(int err_no,
 					    string* error_content) {
+  int new_err_no = -1;
   const struct rgw_http_errors* r;
   int http_error_code = -1;
-  r = search_err(err_no, RGW_HTTP_ERRORS, ARRAY_LEN(RGW_HTTP_ERRORS));
+  r = search_err(err_no > 0 ? err_no : -err_no, RGW_HTTP_ERRORS, ARRAY_LEN(RGW_HTTP_ERRORS));
   if (r) {
     http_error_code = r->http_ret;
   }
+  ldout(s->cct, 10) << "RGWHandler_REST_S3Website::error_handler err_no=" << err_no << " http_ret=" << http_error_code << dendl;
 
   RGWBWRoutingRule rrule;
   bool should_redirect =
@@ -4025,9 +4284,18 @@ int RGWHandler_REST_S3Website::error_handler(int err_no,
 		      << " proto+host:" << protocol << "://" << hostname
 		      << " -> " << s->redirect << dendl;
     return -ERR_WEBSITE_REDIRECT;
+  } else if (err_no == -ERR_WEBSITE_REDIRECT) {
+    // Do nothing here, this redirect will be handled in abort_early's ERR_WEBSITE_REDIRECT block
+    // Do NOT fire the ErrorDoc handler
   } else if (!s->bucket_info.website_conf.error_doc.empty()) {
-    RGWHandler_REST_S3Website::get_errordoc(
-      s->bucket_info.website_conf.error_doc, error_content);
+    /* This serves an entire page!
+       On success, it will return zero, and no further content should be sent to the socket
+       On failure, we need the double-error handler
+     */
+    new_err_no = RGWHandler_REST_S3Website::serve_errordoc(http_error_code, s->bucket_info.website_conf.error_doc);
+    if (new_err_no && new_err_no != -1) {
+      err_no = new_err_no;
+    }
   } else {
     ldout(s->cct, 20) << "No special error handling today!" << dendl;
   }
diff --git a/src/rgw/rgw_rest_s3.h b/src/rgw/rgw_rest_s3.h
index 9278964..0f42466 100644
--- a/src/rgw/rgw_rest_s3.h
+++ b/src/rgw/rgw_rest_s3.h
@@ -22,12 +22,17 @@ void rgw_get_errno_s3(struct rgw_http_errors *e, int err_no);
 
 class RGWGetObj_ObjStore_S3 : public RGWGetObj_ObjStore
 {
+protected:
+  // Serving a custom error page from an object is really a 200 response with
+  // just the status line altered.
+  int custom_http_ret = 0;
 public:
   RGWGetObj_ObjStore_S3() {}
   ~RGWGetObj_ObjStore_S3() {}
 
   int send_response_data_error();
   int send_response_data(bufferlist& bl, off_t ofs, off_t len);
+  void set_custom_http_response(int http_ret) { custom_http_ret = http_ret; }
 };
 
 class RGWListBuckets_ObjStore_S3 : public RGWListBuckets_ObjStore {
@@ -157,6 +162,13 @@ public:
   int get_params();
   int get_data(bufferlist& bl);
   void send_response();
+
+  int validate_aws4_single_chunk(char *chunk_str,
+                                 char *chunk_data_str,
+                                 unsigned int chunk_data_size,
+                                 string chunk_signature);
+  int validate_and_unwrap_available_aws4_chunked_data(bufferlist& bl_in,
+                                                      bufferlist& bl_out);
 };
 
 struct post_part_field {
@@ -440,11 +452,8 @@ public:
   RGWHandler_Auth_S3() : RGWHandler_REST() {}
   virtual ~RGWHandler_Auth_S3() {}
 
-  virtual int validate_bucket_name(const string& bucket) {
-    return 0;
-  }
-
-  virtual int validate_object_name(const string& bucket) { return 0; }
+  static int validate_bucket_name(const string& bucket);
+  static int validate_object_name(const string& bucket);
 
   virtual int init(RGWRados *store, struct req_state *s, RGWClientIO *cio);
   virtual int authorize() {
@@ -461,8 +470,6 @@ public:
   RGWHandler_REST_S3() : RGWHandler_REST() {}
   virtual ~RGWHandler_REST_S3() {}
 
-  int get_errordoc(const string& errordoc_key, string* error_content);  
-
   virtual int init(RGWRados *store, struct req_state *s, RGWClientIO *cio);
   virtual int authorize() {
     return RGW_Auth_S3::authorize(store, s);
diff --git a/src/rgw/rgw_rest_s3website.h b/src/rgw/rgw_rest_s3website.h
index b14942b..943eabe 100644
--- a/src/rgw/rgw_rest_s3website.h
+++ b/src/rgw/rgw_rest_s3website.h
@@ -32,7 +32,7 @@ protected:
   RGWOp *op_copy() { return NULL; }
   RGWOp *op_options() { return NULL; }
 
-  int get_errordoc(const string& errordoc_key, string *error_content);
+  int serve_errordoc(int http_ret, const string &errordoc_key);
 public:
   RGWHandler_REST_S3Website() : RGWHandler_REST_S3() {}
   virtual ~RGWHandler_REST_S3Website() {}
@@ -69,6 +69,7 @@ public:
 // TODO: do we actually need this?
 class  RGWGetObj_ObjStore_S3Website : public RGWGetObj_ObjStore_S3
 {
+  friend class RGWHandler_REST_S3Website;
 private:
    bool is_errordoc_request;
 public:
@@ -86,7 +87,7 @@ public:
         if_unmod = NULL;
         if_match = NULL;
         if_nomatch = NULL;
-               return 0;
+        return 0;
       } else {
         return RGWGetObj_ObjStore_S3::get_params();
       }
diff --git a/src/rgw/rgw_rest_swift.cc b/src/rgw/rgw_rest_swift.cc
index a9bf03e..a6d3623 100644
--- a/src/rgw/rgw_rest_swift.cc
+++ b/src/rgw/rgw_rest_swift.cc
@@ -1,6 +1,9 @@
 // -*- mode:C++; tab-width:8; c-basic-offset:2; indent-tabs-mode:t -*-
 // vim: ts=8 sw=2 smarttab
 
+#include <boost/optional.hpp>
+#include <boost/utility/in_place_factory.hpp>
+
 #include "include/assert.h"
 
 #include "common/Formatter.h"
@@ -102,8 +105,9 @@ static void dump_account_metadata(struct req_state * const s,
       STREAM_IO(s)->print("%s: %s\r\n", geniter->second.c_str(),
 			  iter->second.c_str());
     } else if (strncmp(name, RGW_ATTR_META_PREFIX, PREFIX_LEN) == 0) {
-      STREAM_IO(s)->print("X-Account-Meta-%s: %s\r\n", name + PREFIX_LEN,
-			  iter->second.c_str());
+      STREAM_IO(s)->print("X-Account-Meta-%s: %s\r\n",
+                          camelcase_dash_http_attr(name + PREFIX_LEN).c_str(),
+                          iter->second.c_str());
     }
   }
 }
@@ -366,8 +370,9 @@ static void dump_container_metadata(struct req_state *s, RGWBucketEnt& bucket)
         STREAM_IO(s)->print("%s: %s\r\n", geniter->second.c_str(),
 			    iter->second.c_str());
       } else if (strncmp(name, RGW_ATTR_META_PREFIX, PREFIX_LEN) == 0) {
-        STREAM_IO(s)->print("X-Container-Meta-%s: %s\r\n", name + PREFIX_LEN,
-			    iter->second.c_str());
+        STREAM_IO(s)->print("X-Container-Meta-%s: %s\r\n",
+                            camelcase_dash_http_attr(name + PREFIX_LEN).c_str(),
+                            iter->second.c_str());
       }
     }
   }
@@ -503,6 +508,33 @@ static void get_rmattrs_from_headers(const req_state * const s,
   }
 }
 
+static int get_swift_versioning_settings(
+  req_state * const s,
+  boost::optional<std::string>& swift_ver_location)
+{
+  /* Removing the Swift's versions location has lower priority than setting
+   * a new one. That's the reason why we're handling it first. */
+  const std::string vlocdel =
+    s->info.env->get("HTTP_X_REMOVE_VERSIONS_LOCATION", "");
+  if (vlocdel.size()) {
+    swift_ver_location = boost::in_place(std::string());
+  }
+
+  std::string vloc = s->info.env->get("HTTP_X_VERSIONS_LOCATION", "");
+  if (vloc.size()) {
+    /* If the Swift's versioning is globally disabled but someone wants to
+     * enable it for a given container, new version of Swift will generate
+     * the precondition failed error. */
+    if (! s->cct->_conf->rgw_swift_versioning_enabled) {
+      return -ERR_PRECONDITION_FAILED;
+    }
+
+    swift_ver_location = std::move(vloc);
+  }
+
+  return 0;
+}
+
 int RGWCreateBucket_ObjStore_SWIFT::get_params()
 {
   bool has_policy;
@@ -521,11 +553,7 @@ int RGWCreateBucket_ObjStore_SWIFT::get_params()
                            CONT_REMOVE_ATTR_PREFIX, rmattr_names);
   placement_rule = s->info.env->get("HTTP_X_STORAGE_POLICY", "");
 
-  if (s->cct->_conf->rgw_swift_versioning_enabled) {
-    swift_ver_location = s->info.env->get("HTTP_X_VERSIONS_LOCATION", "");
-  }
-
-  return 0;
+  return get_swift_versioning_settings(s, swift_ver_location);
 }
 
 void RGWCreateBucket_ObjStore_SWIFT::send_response()
@@ -739,10 +767,7 @@ int RGWPutMetadataBucket_ObjStore_SWIFT::get_params()
 			   rmattr_names);
   placement_rule = s->info.env->get("HTTP_X_STORAGE_POLICY", "");
 
-  if (s->cct->_conf->rgw_swift_versioning_enabled) {
-    swift_ver_location = s->info.env->get("HTTP_X_VERSIONS_LOCATION", "");
-  }
-  return 0;
+  return get_swift_versioning_settings(s, swift_ver_location);
 }
 
 void RGWPutMetadataBucket_ObjStore_SWIFT::send_response()
@@ -919,7 +944,8 @@ static void dump_object_metadata(struct req_state * const s,
     } else if (strncmp(name, RGW_ATTR_META_PREFIX,
 		       sizeof(RGW_ATTR_META_PREFIX)-1) == 0) {
       name += sizeof(RGW_ATTR_META_PREFIX) - 1;
-      STREAM_IO(s)->print("X-Object-Meta-%s: %s\r\n", name,
+      STREAM_IO(s)->print("X-Object-Meta-%s: %s\r\n",
+                          camelcase_dash_http_attr(name).c_str(),
                           kv.second.c_str());
     }
   }
@@ -1173,21 +1199,31 @@ int RGWBulkDelete_ObjStore_SWIFT::get_data(
 
     RGWBulkDelete::acct_path_t path;
 
-    const size_t sep_pos = path_str.find('/');
-    if (string::npos == sep_pos) {
-      url_decode(path_str, path.bucket_name);
-    } else {
-      string bucket_name;
-      url_decode(path_str.substr(0, sep_pos), bucket_name);
+    /* We need to skip all slashes at the beginning in order to preserve
+     * compliance with Swift. */
+    const size_t start_pos = path_str.find_first_not_of('/');
 
-      string obj_name;
-      url_decode(path_str.substr(sep_pos + 1), obj_name);
+    if (string::npos != start_pos) {
+      /* Seperator is the first slash after the leading ones. */
+      const size_t sep_pos = path_str.find('/', start_pos);
 
-      path.bucket_name = bucket_name;
-      path.obj_key = obj_name;
-    }
+      if (string::npos != sep_pos) {
+        string bucket_name;
+        url_decode(path_str.substr(start_pos, sep_pos - start_pos), bucket_name);
+
+        string obj_name;
+        url_decode(path_str.substr(sep_pos + 1), obj_name);
 
-    items.push_back(path);
+        path.bucket_name = bucket_name;
+        path.obj_key = obj_name;
+      } else {
+        /* It's guaranteed here that bucket name is at least one character
+         * long and is different than slash. */
+        url_decode(path_str.substr(start_pos), path.bucket_name);
+      }
+
+      items.push_back(path);
+    }
 
     if (items.size() == MAX_CHUNK_ENTRIES) {
       *is_truncated = true;
diff --git a/src/rgw/rgw_rest_swift.h b/src/rgw/rgw_rest_swift.h
index bbef15b..0c4b1e2 100644
--- a/src/rgw/rgw_rest_swift.h
+++ b/src/rgw/rgw_rest_swift.h
@@ -197,7 +197,7 @@ public:
   RGWHandler_REST_SWIFT() {}
   virtual ~RGWHandler_REST_SWIFT() {}
 
-  int validate_bucket_name(const string& bucket);
+  static int validate_bucket_name(const string& bucket);
 
   int init(RGWRados *store, struct req_state *s, RGWClientIO *cio);
   int authorize();
diff --git a/src/rgw/rgw_rest_user.cc b/src/rgw/rgw_rest_user.cc
index 587e6d0..915c4ea 100644
--- a/src/rgw/rgw_rest_user.cc
+++ b/src/rgw/rgw_rest_user.cc
@@ -73,8 +73,8 @@ void RGWOp_User_Create::execute()
   bool system;
   bool exclusive;
 
-  uint32_t max_buckets;
-  uint32_t default_max_buckets = s->cct->_conf->rgw_user_max_buckets;
+  int32_t max_buckets;
+  int32_t default_max_buckets = s->cct->_conf->rgw_user_max_buckets;
 
   RGWUserAdminOpState op_state;
 
@@ -89,7 +89,7 @@ void RGWOp_User_Create::execute()
   RESTArgs::get_string(s, "user-caps", caps, &caps);
   RESTArgs::get_bool(s, "generate-key", true, &gen_key);
   RESTArgs::get_bool(s, "suspended", false, &suspended);
-  RESTArgs::get_uint32(s, "max-buckets", default_max_buckets, &max_buckets);
+  RESTArgs::get_int32(s, "max-buckets", default_max_buckets, &max_buckets);
   RESTArgs::get_bool(s, "system", false, &system);
   RESTArgs::get_bool(s, "exclusive", false, &exclusive);
 
@@ -174,7 +174,7 @@ void RGWOp_User_Modify::execute()
   bool suspended;
   bool system;
 
-  uint32_t max_buckets;
+  int32_t max_buckets;
 
   RGWUserAdminOpState op_state;
 
@@ -188,7 +188,7 @@ void RGWOp_User_Modify::execute()
   RESTArgs::get_string(s, "user-caps", caps, &caps);
   RESTArgs::get_bool(s, "generate-key", false, &gen_key);
   RESTArgs::get_bool(s, "suspended", false, &suspended);
-  RESTArgs::get_uint32(s, "max-buckets", RGW_DEFAULT_MAX_BUCKETS, &max_buckets);
+  RESTArgs::get_int32(s, "max-buckets", RGW_DEFAULT_MAX_BUCKETS, &max_buckets);
   RESTArgs::get_string(s, "key-type", key_type_str, &key_type_str);
 
   RESTArgs::get_bool(s, "system", false, &system);
diff --git a/src/rgw/rgw_sync.cc b/src/rgw/rgw_sync.cc
index dab98fc..3d494ef 100644
--- a/src/rgw/rgw_sync.cc
+++ b/src/rgw/rgw_sync.cc
@@ -591,12 +591,13 @@ class RGWInitSyncStatusCoroutine : public RGWCoroutine {
   rgw_meta_sync_info status;
   vector<RGWMetadataLogInfo> shards_info;
   RGWContinuousLeaseCR *lease_cr;
+  RGWCoroutinesStack *lease_stack;
 public:
   RGWInitSyncStatusCoroutine(RGWMetaSyncEnv *_sync_env,
                              const rgw_meta_sync_info &status)
     : RGWCoroutine(_sync_env->store->ctx()), sync_env(_sync_env),
       status(status), shards_info(status.num_shards),
-      lease_cr(NULL) {}
+      lease_cr(nullptr), lease_stack(nullptr) {}
 
   ~RGWInitSyncStatusCoroutine() {
     if (lease_cr) {
@@ -616,11 +617,11 @@ public:
 	lease_cr = new RGWContinuousLeaseCR(sync_env->async_rados, store, store->get_zone_params().log_pool, sync_env->status_oid(),
                                             lock_name, lock_duration, this);
         lease_cr->get();
-        spawn(lease_cr, false);
+        lease_stack = spawn(lease_cr, false);
       }
       while (!lease_cr->is_locked()) {
         if (lease_cr->is_done()) {
-          ldout(cct, 0) << "ERROR: lease cr failed, done early " << dendl;
+          ldout(cct, 5) << "lease cr failed, done early " << dendl;
           set_status("lease lock failed, early abort");
           return set_cr_error(lease_cr->get_ret_status());
         }
@@ -649,7 +650,7 @@ public:
 	}
       }
 
-      drain_all_but(1); /* the lease cr still needs to run */
+      drain_all_but_stack(lease_stack); /* the lease cr still needs to run */
 
       yield {
         set_status("updating sync status");
@@ -672,7 +673,7 @@ public:
       }
       set_status("drop lock lease");
       yield lease_cr->go_down();
-      while (collect(&ret)) {
+      while (collect(&ret, NULL)) {
 	if (ret < 0) {
 	  return set_cr_error(ret);
 	}
@@ -735,6 +736,7 @@ class RGWFetchAllMetaCR : public RGWCoroutine {
   RGWShardedOmapCRManager *entries_index;
 
   RGWContinuousLeaseCR *lease_cr;
+  RGWCoroutinesStack *lease_stack;
   bool lost_lock;
   bool failed;
 
@@ -744,7 +746,8 @@ public:
   RGWFetchAllMetaCR(RGWMetaSyncEnv *_sync_env, int _num_shards,
                     map<uint32_t, rgw_meta_sync_marker>& _markers) : RGWCoroutine(_sync_env->cct), sync_env(_sync_env),
 						      num_shards(_num_shards),
-						      ret_status(0), entries_index(NULL), lease_cr(NULL), lost_lock(false), failed(false), markers(_markers) {
+						      ret_status(0), entries_index(NULL), lease_cr(nullptr), lease_stack(nullptr),
+                                                      lost_lock(false), failed(false), markers(_markers) {
   }
 
   ~RGWFetchAllMetaCR() {
@@ -789,11 +792,11 @@ public:
 	lease_cr = new RGWContinuousLeaseCR(sync_env->async_rados, sync_env->store, sync_env->store->get_zone_params().log_pool, sync_env->status_oid(),
                                             lock_name, lock_duration, this);
         lease_cr->get();
-        spawn(lease_cr, false);
+        lease_stack = spawn(lease_cr, false);
       }
       while (!lease_cr->is_locked()) {
         if (lease_cr->is_done()) {
-          ldout(cct, 0) << "ERROR: lease cr failed, done early " << dendl;
+          ldout(cct, 5) << "lease cr failed, done early " << dendl;
           set_status("failed acquiring lock");
           return set_cr_error(lease_cr->get_ret_status());
         }
@@ -868,12 +871,12 @@ public:
         }
       }
 
-      drain_all_but(1); /* the lease cr still needs to run */
+      drain_all_but_stack(lease_stack); /* the lease cr still needs to run */
 
       yield lease_cr->go_down();
 
       int ret;
-      while (collect(&ret)) {
+      while (collect(&ret, NULL)) {
 	if (ret < 0) {
 	  return set_cr_error(ret);
 	}
@@ -1250,6 +1253,7 @@ class RGWMetaSyncShardCR : public RGWCoroutine {
   boost::asio::coroutine full_cr;
 
   RGWContinuousLeaseCR *lease_cr = nullptr;
+  RGWCoroutinesStack *lease_stack = nullptr;
   bool lost_lock = false;
 
   bool *reset_backoff;
@@ -1380,12 +1384,12 @@ public:
                                             sync_env->shard_obj_name(shard_id),
                                             lock_name, lock_duration, this);
         lease_cr->get();
-        spawn(lease_cr, false);
+        lease_stack = spawn(lease_cr, false);
         lost_lock = false;
       }
       while (!lease_cr->is_locked()) {
         if (lease_cr->is_done()) {
-          ldout(cct, 0) << "ERROR: lease cr failed, done early " << dendl;
+          ldout(cct, 5) << "lease cr failed, done early " << dendl;
           drain_all();
           return lease_cr->get_ret_status();
         }
@@ -1502,12 +1506,12 @@ public:
                                               sync_env->shard_obj_name(shard_id),
                                               lock_name, lock_duration, this);
           lease_cr->get();
-          spawn(lease_cr, false);
+          lease_stack = spawn(lease_cr, false);
           lost_lock = false;
         }
         while (!lease_cr->is_locked()) {
           if (lease_cr->is_done()) {
-            ldout(cct, 0) << "ERROR: lease cr failed, done early " << dendl;
+            ldout(cct, 5) << "lease cr failed, done early " << dendl;
             drain_all();
             return lease_cr->get_ret_status();
           }
@@ -1661,8 +1665,13 @@ class RGWMetaSyncCR : public RGWCoroutine {
 
   std::mutex mutex; //< protect access to shard_crs
 
+  // TODO: it should be enough to hold a reference on the stack only, as calling
+  // RGWCoroutinesStack::wakeup() doesn't refer to the RGWCoroutine if it has
+  // already completed
   using ControlCRRef = boost::intrusive_ptr<RGWMetaSyncShardControlCR>;
-  map<int, ControlCRRef> shard_crs;
+  using StackRef = boost::intrusive_ptr<RGWCoroutinesStack>;
+  using RefPair = std::pair<ControlCRRef, StackRef>;
+  map<int, RefPair> shard_crs;
 
 public:
   RGWMetaSyncCR(RGWMetaSyncEnv *_sync_env, RGWPeriodHistory::Cursor cursor,
@@ -1720,12 +1729,12 @@ public:
             auto cr = new RGWMetaSyncShardControlCR(sync_env, pool, period_id,
                                                     mdlog, shard_id, marker,
                                                     std::move(period_marker));
-            shard_crs[shard_id] = cr;
-            spawn(cr, false);
+            auto stack = spawn(cr, false);
+            shard_crs[shard_id] = RefPair{cr, stack};
           }
         }
         // wait for each shard to complete
-        collect(&ret);
+        collect(&ret, NULL);
         drain_all();
         {
           // drop shard cr refs under lock
@@ -1757,7 +1766,7 @@ public:
     if (iter == shard_crs.end()) {
       return;
     }
-    iter->second->wakeup();
+    iter->second.first->wakeup();
   }
 };
 
@@ -2096,7 +2105,7 @@ int RGWCloneMetaLogCoroutine::state_receive_rest_response()
   int ret = http_op->wait(&data);
   if (ret < 0) {
     error_stream << "http operation failed: " << http_op->to_str() << " status=" << http_op->get_http_status() << std::endl;
-    ldout(cct, 0) << "ERROR: failed to wait for op, ret=" << ret << dendl;
+    ldout(cct, 5) << "failed to wait for op, ret=" << ret << dendl;
     http_op->put();
     http_op = NULL;
     return set_cr_error(ret);
diff --git a/src/rgw/rgw_user.cc b/src/rgw/rgw_user.cc
index 53f4a75..6002aec 100644
--- a/src/rgw/rgw_user.cc
+++ b/src/rgw/rgw_user.cc
@@ -5,6 +5,7 @@
 
 #include <string>
 #include <map>
+#include <boost/algorithm/string.hpp>
 
 #include "common/errno.h"
 #include "common/Formatter.h"
@@ -1877,13 +1878,15 @@ int RGWUser::execute_add(RGWUserAdminOpState& op_state, std::string *err_msg)
   // fail if the user exists already
   if (op_state.has_existing_user()) {
     if (!op_state.exclusive &&
-        (user_email.empty() || old_info.user_email == user_email) &&
+        (user_email.empty() ||
+	 boost::iequals(user_email, old_info.user_email)) &&
         old_info.display_name == display_name) {
       return execute_modify(op_state, err_msg);
     }
 
     if (op_state.found_by_email) {
-      set_err_msg(err_msg, "email: " + user_email + " exists");
+      set_err_msg(err_msg, "email: " + user_email +
+		  " is the email address an existing user");
       ret = -ERR_EMAIL_EXIST;
     } else if (op_state.found_by_key) {
       set_err_msg(err_msg, "duplicate key provided");
diff --git a/src/rgw/rgw_user.h b/src/rgw/rgw_user.h
index 6335fb6..269ae90 100644
--- a/src/rgw/rgw_user.h
+++ b/src/rgw/rgw_user.h
@@ -5,6 +5,8 @@
 #define CEPH_RGW_USER_H
 
 #include <string>
+#include <boost/algorithm/string.hpp>
+#include "include/assert.h"
 
 #include "include/types.h"
 #include "rgw_common.h"
@@ -155,7 +157,7 @@ struct RGWUserAdminOpState {
   rgw_user user_id;
   std::string user_email;
   std::string display_name;
-  uint32_t max_buckets;
+  int32_t max_buckets;
   __u8 suspended;
   __u8 system;
   __u8 exclusive;
@@ -225,6 +227,7 @@ struct RGWUserAdminOpState {
     gen_access = false;
     key_op = true;
   }
+
   void set_secret_key(std::string& secret_key) {
     if (secret_key.empty())
       return;
@@ -234,19 +237,24 @@ struct RGWUserAdminOpState {
     gen_secret = false;
     key_op = true;
   }
+
   void set_user_id(rgw_user& id) {
     if (id.empty())
       return;
 
     user_id = id;
   }
+
   void set_user_email(std::string& email) {
     if (email.empty())
       return;
 
+    /* always lowercase email address */
+    boost::algorithm::to_lower(email);
     user_email = email;
     user_email_specified = true;
   }
+
   void set_display_name(std::string& name) {
     if (name.empty())
       return;
@@ -254,6 +262,7 @@ struct RGWUserAdminOpState {
     display_name = name;
     display_name_specified = true;
   }
+
   void set_subuser(std::string& _subuser) {
     if (_subuser.empty())
       return;
@@ -274,6 +283,7 @@ struct RGWUserAdminOpState {
 
     subuser_specified = true;
   }
+
   void set_caps(std::string& _caps) {
     if (_caps.empty())
       return;
@@ -281,52 +291,65 @@ struct RGWUserAdminOpState {
     caps = _caps;
     caps_specified = true;
   }
+
   void set_perm(uint32_t perm) {
     perm_mask = perm;
     perm_specified = true;
   }
+
   void set_op_mask(uint32_t mask) {
     op_mask = mask;
     op_mask_specified = true;
   }
+
   void set_temp_url_key(const string& key, int index) {
     temp_url_keys[index] = key;
     temp_url_key_specified = true;
   }
+
   void set_key_type(int32_t type) {
     key_type = type;
     type_specified = true;
   }
+
   void set_suspension(__u8 is_suspended) {
     suspended = is_suspended;
     suspension_op = true;
   }
+
   void set_system(__u8 is_system) {
     system = is_system;
     system_specified = true;
   }
+
   void set_exclusive(__u8 is_exclusive) {
     exclusive = is_exclusive;
   }
+
   void set_fetch_stats(__u8 is_fetch_stats) {
     fetch_stats = is_fetch_stats;
   }
+
   void set_user_info(RGWUserInfo& user_info) {
     user_id = user_info.user_id;
     info = user_info;
   }
-  void set_max_buckets(uint32_t mb) {
+
+  void set_max_buckets(int32_t mb) {
     max_buckets = mb;
     max_buckets_specified = true;
   }
+
   void set_gen_access() {
     gen_access = true;
     key_op = true;
   }
+
   void set_gen_secret() {
     gen_secret = true;
     key_op = true;
   }
+
   void set_generate_key() {
     if (id.empty())
       gen_access = true;
@@ -334,10 +357,12 @@ struct RGWUserAdminOpState {
       gen_secret = true;
     key_op = true;
   }
+
   void clear_generate_key() {
     gen_access = false;
     gen_secret = false;
   }
+
   void set_purge_keys() {
     purge_keys = true;
     key_op = true;
@@ -385,7 +410,7 @@ struct RGWUserAdminOpState {
   __u8 get_suspension_status() { return suspended; }
   int32_t get_key_type() {return key_type; }
   uint32_t get_subuser_perm() { return perm_mask; }
-  uint32_t get_max_buckets() { return max_buckets; }
+  int32_t get_max_buckets() { return max_buckets; }
   uint32_t get_op_mask() { return op_mask; }
   RGWQuotaInfo& get_bucket_quota() { return bucket_quota; }
   RGWQuotaInfo& get_user_quota() { return user_quota; }
diff --git a/src/rocksdb/Makefile b/src/rocksdb/Makefile
index c64ea36..31ac650 100644
--- a/src/rocksdb/Makefile
+++ b/src/rocksdb/Makefile
@@ -84,7 +84,8 @@ endif
 # compile with -O2 if debug level is not 2
 ifneq ($(DEBUG_LEVEL), 2)
 OPT += -O2 -fno-omit-frame-pointer
-ifneq ($(MACHINE),ppc64) # ppc64 doesn't support -momit-leaf-frame-pointer
+# Skip for archs that don't support -momit-leaf-frame-pointer
+ifeq (,$(shell $(CXX) -fsyntax-only -momit-leaf-frame-pointer -xc /dev/null 2>&1))
 OPT += -momit-leaf-frame-pointer
 endif
 endif
diff --git a/src/script/subman b/src/script/subman
index 129e507..8ad7d76 100755
--- a/src/script/subman
+++ b/src/script/subman
@@ -1,6 +1,7 @@
-#!/usr/bin/env python -B
+#!/usr/bin/env python
 
 import json
+import os
 import re
 import subprocess
 
@@ -13,8 +14,9 @@ for disk in disks:
             df = subprocess.check_output("df --output=used " + partition['path'], shell=True)
             used += int(re.findall('\d+', df)[0])
 
-open("/etc/rhsm/facts/ceph_usage.facts", 'w').write("""
+facts_file = os.environ.get("CEPH_FACTS_FILE", "/etc/rhsm/facts/ceph_usage.facts")
+open(facts_file, 'w').write("""\
 {
 "band.storage.usage": {used}
 }
-""".format(used=used/(1024*1024*1024)))
+""".replace('{used}', str(int(used/(1024*1024*1024)))))
diff --git a/src/test/Makefile-client.am b/src/test/Makefile-client.am
index 153f58d..6eade23 100644
--- a/src/test/Makefile-client.am
+++ b/src/test/Makefile-client.am
@@ -476,8 +476,10 @@ unittest_rbd_mirror_SOURCES = \
 	test/rbd_mirror/test_mock_fixture.cc \
 	test/rbd_mirror/test_mock_ImageReplayer.cc \
 	test/rbd_mirror/test_mock_ImageSync.cc \
+	test/rbd_mirror/test_mock_ImageSyncThrottler.cc \
 	test/rbd_mirror/image_replayer/test_mock_BootstrapRequest.cc \
 	test/rbd_mirror/image_replayer/test_mock_CreateImageRequest.cc \
+	test/rbd_mirror/image_replayer/test_mock_EventPreprocessor.cc \
 	test/rbd_mirror/image_sync/test_mock_ImageCopyRequest.cc \
 	test/rbd_mirror/image_sync/test_mock_ObjectCopyRequest.cc \
 	test/rbd_mirror/image_sync/test_mock_SnapshotCopyRequest.cc \
@@ -529,22 +531,6 @@ ceph_test_rbd_mirror_random_write_LDADD = \
 	$(LIBRBD) $(LIBRADOS) $(CEPH_GLOBAL)
 bin_DEBUGPROGRAMS += ceph_test_rbd_mirror_random_write
 
-ceph_test_rbd_mirror_image_replay_SOURCES = \
-	test/rbd_mirror/image_replay.cc
-ceph_test_rbd_mirror_image_replay_LDADD = \
-	librbd_mirror_internal.la \
-	librbd_internal.la \
-	librbd_api.la \
-	$(LIBRBD_TYPES) \
-	libjournal.la \
-	$(LIBRADOS) $(LIBOSDC) \
-	librados_internal.la \
-	libcls_rbd_client.la \
-	libcls_lock_client.la \
-	libcls_journal_client.la \
-	$(CEPH_GLOBAL)
-bin_DEBUGPROGRAMS += ceph_test_rbd_mirror_image_replay
-
 if LINUX
 ceph_test_librbd_fsx_SOURCES = test/librbd/fsx.cc
 ceph_test_librbd_fsx_LDADD = \
diff --git a/src/test/Makefile.am b/src/test/Makefile.am
index 536edbf..c865b68 100644
--- a/src/test/Makefile.am
+++ b/src/test/Makefile.am
@@ -93,7 +93,8 @@ check_SCRIPTS += \
 	test/mon/mon-handle-forward.sh \
 	test/libradosstriper/rados-striper.sh \
 	test/test_objectstore_memstore.sh \
-        test/test_pidfile.sh
+        test/test_pidfile.sh \
+	test/test_subman.sh
 
 EXTRA_DIST += \
 	$(srcdir)/test/python/brag-client/setup.py \
@@ -452,7 +453,8 @@ check_SCRIPTS += test/pybind/test_ceph_daemon.py
 
 ceph_test_objectcacher_stress_SOURCES = \
 	test/osdc/object_cacher_stress.cc \
-	test/osdc/FakeWriteback.cc
+	test/osdc/FakeWriteback.cc \
+	test/osdc/MemWriteback.cc
 ceph_test_objectcacher_stress_LDADD = $(LIBOSDC) $(CEPH_GLOBAL)
 bin_DEBUGPROGRAMS += ceph_test_objectcacher_stress
 
@@ -489,6 +491,7 @@ noinst_HEADERS += \
 	test/ObjectMap/KeyValueDBMemory.h \
 	test/omap_bench.h \
 	test/osdc/FakeWriteback.h \
+	test/osdc/MemWriteback.h \
 	test/osd/Object.h \
 	test/osd/RadosModel.h \
 	test/osd/TestOpStat.h \
diff --git a/src/test/centos-6/ceph.spec.in b/src/test/centos-6/ceph.spec.in
index 3cf6307..b2e4b12 100644
--- a/src/test/centos-6/ceph.spec.in
+++ b/src/test/centos-6/ceph.spec.in
@@ -18,7 +18,12 @@
 %bcond_without cephfs_java
 %bcond_with tests
 %bcond_with xio
+%ifnarch s390 s390x
 %bcond_without tcmalloc
+%else
+# no gperftools/tcmalloc on s390(x)
+%bcond_with tcmalloc
+%endif
 %bcond_without libs_compat
 %bcond_with lowmem_builder
 %if 0%{?fedora} || 0%{?rhel}
@@ -59,6 +64,13 @@ Group:         System/Filesystems
 %endif
 URL:		http://ceph.com/
 Source0:	http://ceph.com/download/%{name}-%{version}.tar.bz2
+%if 0%{?suse_version}
+%if 0%{?is_opensuse}
+ExclusiveArch:  x86_64 aarch64 ppc64 ppc64le
+%else
+ExclusiveArch:  x86_64 aarch64
+%endif
+%endif
 #################################################################################
 # dependencies that apply across all distro families
 #################################################################################
@@ -81,9 +93,13 @@ BuildRequires:	cryptsetup
 BuildRequires:	fuse-devel
 BuildRequires:	gcc-c++
 BuildRequires:	gdbm
+%if 0%{with tcmalloc}
+BuildRequires:	gperftools-devel
+%endif
 BuildRequires:	hdparm
 BuildRequires:	leveldb-devel > 1.2
 BuildRequires:	libaio-devel
+BuildRequires:	libatomic_ops-devel
 BuildRequires:	libblkid-devel >= 2.17
 BuildRequires:	libcurl-devel
 BuildRequires:	libudev-devel
@@ -118,13 +134,9 @@ BuildRequires:	systemd
 PreReq:		%fillup_prereq
 BuildRequires:	net-tools
 BuildRequires:	libbz2-devel
-%if 0%{with tcmalloc}
-BuildRequires:	gperftools-devel
-%endif
 BuildRequires:  btrfsprogs
 BuildRequires:	mozilla-nss-devel
 BuildRequires:	keyutils-devel
-BuildRequires:	libatomic-ops-devel
 BuildRequires:  libopenssl-devel
 BuildRequires:  lsb-release
 BuildRequires:  openldap2-devel
@@ -136,8 +148,6 @@ BuildRequires:  boost-random
 BuildRequires:	btrfs-progs
 BuildRequires:	nss-devel
 BuildRequires:	keyutils-libs-devel
-BuildRequires:	libatomic_ops-devel
-BuildRequires:	gperftools-devel
 BuildRequires:  openldap-devel
 BuildRequires:  openssl-devel
 BuildRequires:  redhat-lsb-core
@@ -197,7 +207,6 @@ Requires:      python-setuptools
 Requires:      grep
 Requires:      xfsprogs
 Requires:      logrotate
-Requires:      parted
 Requires:      util-linux
 Requires:      hdparm
 Requires:      cryptsetup
@@ -342,6 +351,7 @@ Requires:	gdisk
 %if 0%{?suse_version}
 Requires:	gptfdisk
 %endif
+Requires:       parted
 %description osd
 ceph-osd is the object storage daemon for the Ceph distributed file
 system.  It is responsible for storing objects on a local file system
@@ -660,7 +670,9 @@ export RPM_OPT_FLAGS=`echo $RPM_OPT_FLAGS | sed -e 's/i386/i486/'`
 %endif
 		$CEPH_EXTRA_CONFIGURE_ARGS \
 		%{?_with_ocf} \
-		%{?_with_tcmalloc} \
+%if %{without tcmalloc}
+		--without-tcmalloc \
+%endif
 		CFLAGS="$RPM_OPT_FLAGS" CXXFLAGS="$RPM_OPT_FLAGS"
 
 %if %{with lowmem_builder}
@@ -700,17 +712,18 @@ install -m 0644 -D src/logrotate.conf %{buildroot}%{_sysconfdir}/logrotate.d/cep
 chmod 0644 %{buildroot}%{_docdir}/ceph/sample.ceph.conf
 chmod 0644 %{buildroot}%{_docdir}/ceph/sample.fetch_config
 
-# firewall templates
+# firewall templates and /sbin/mount.ceph symlink
 %if 0%{?suse_version}
 install -m 0644 -D etc/sysconfig/SuSEfirewall2.d/services/ceph-mon %{buildroot}%{_sysconfdir}/sysconfig/SuSEfirewall2.d/services/ceph-mon
 install -m 0644 -D etc/sysconfig/SuSEfirewall2.d/services/ceph-osd-mds %{buildroot}%{_sysconfdir}/sysconfig/SuSEfirewall2.d/services/ceph-osd-mds
+mkdir -p %{buildroot}/sbin
+ln -sf %{_sbindir}/mount.ceph %{buildroot}/sbin/mount.ceph
 %endif
 
 # udev rules
 install -m 0644 -D udev/50-rbd.rules %{buildroot}%{_udevrulesdir}/50-rbd.rules
+install -m 0644 -D udev/60-ceph-by-parttypeuuid.rules %{buildroot}%{_udevrulesdir}/60-ceph-by-parttypeuuid.rules
 install -m 0644 -D udev/95-ceph-osd.rules %{buildroot}%{_udevrulesdir}/95-ceph-osd.rules
-mv %{buildroot}/sbin/mount.ceph %{buildroot}/usr/sbin/mount.ceph
-mv %{buildroot}/sbin/mount.fuse.ceph %{buildroot}/usr/sbin/mount.fuse.ceph
 
 #set up placeholder directories
 mkdir -p %{buildroot}%{_sysconfdir}/ceph
@@ -750,7 +763,6 @@ rm -rf %{buildroot}
 %{_libexecdir}/systemd/system-preset/50-ceph.preset
 %{_sbindir}/ceph-create-keys
 %{_sbindir}/rcceph
-%{_sbindir}/mount.ceph
 %dir %{_libexecdir}/ceph
 %{_libexecdir}/ceph/ceph_common.sh
 %dir %{_libdir}/rados-classes
@@ -785,7 +797,6 @@ rm -rf %{buildroot}
 %{_mandir}/man8/osdmaptool.8*
 %{_mandir}/man8/monmaptool.8*
 %{_mandir}/man8/cephfs.8*
-%{_mandir}/man8/mount.ceph.8*
 #set up placeholder directories
 %attr(750,ceph,ceph) %dir %{_localstatedir}/lib/ceph/tmp
 %attr(750,ceph,ceph) %dir %{_localstatedir}/lib/ceph/bootstrap-osd
@@ -841,6 +852,10 @@ DISABLE_RESTART_ON_UPDATE="yes"
 %{_bindir}/rbd-replay
 %{_bindir}/rbd-replay-many
 %{_bindir}/rbdmap
+%{_sbindir}/mount.ceph
+%if 0%{?suse_version}
+/sbin/mount.ceph
+%endif
 %if %{with lttng}
 %{_bindir}/rbd-replay-prep
 %endif
@@ -854,6 +869,7 @@ DISABLE_RESTART_ON_UPDATE="yes"
 %{_mandir}/man8/ceph-syn.8*
 %{_mandir}/man8/ceph-post-file.8*
 %{_mandir}/man8/ceph.8*
+%{_mandir}/man8/mount.ceph.8*
 %{_mandir}/man8/rados.8*
 %{_mandir}/man8/rbd.8*
 %{_mandir}/man8/rbdmap.8*
@@ -1140,6 +1156,7 @@ fi
 %{_sbindir}/ceph-disk
 %{_sbindir}/ceph-disk-udev
 %{_libexecdir}/ceph/ceph-osd-prestart.sh
+%{_udevrulesdir}/60-ceph-by-parttypeuuid.rules
 %{_udevrulesdir}/95-ceph-osd.rules
 %{_mandir}/man8/ceph-clsinfo.8*
 %{_mandir}/man8/ceph-disk.8*
@@ -1201,10 +1218,6 @@ fi
 %dir %{_prefix}/lib/ocf
 %dir %{_prefix}/lib/ocf/resource.d
 %dir %{_prefix}/lib/ocf/resource.d/ceph
-%exclude %{_prefix}/lib/ocf/resource.d/ceph/ceph
-%exclude %{_prefix}/lib/ocf/resource.d/ceph/mds
-%exclude %{_prefix}/lib/ocf/resource.d/ceph/mon
-%exclude %{_prefix}/lib/ocf/resource.d/ceph/osd
 %{_prefix}/lib/ocf/resource.d/ceph/rbd
 
 %endif
diff --git a/src/test/centos-6/install-deps.sh b/src/test/centos-6/install-deps.sh
index 03ca760..129178f 100755
--- a/src/test/centos-6/install-deps.sh
+++ b/src/test/centos-6/install-deps.sh
@@ -24,7 +24,7 @@ if test -f /etc/redhat-release ; then
 fi
 
 if type apt-get > /dev/null 2>&1 ; then
-    $SUDO apt-get install -y lsb-release
+    $SUDO apt-get install -y lsb-release devscripts equivs
 fi
 
 if type zypper > /dev/null 2>&1 ; then
@@ -39,20 +39,23 @@ Ubuntu|Debian|Devuan)
             exit 1
         fi
         touch $DIR/status
-        packages=$(dpkg-checkbuilddeps --admindir=$DIR debian/control 2>&1 | \
-            perl -p -e 's/.*Unmet build dependencies: *//;' \
-            -e 's/build-essential:native/build-essential/;' \
-            -e 's/\s*\|\s*/\|/g;' \
-            -e 's/\(.*?\)//g;' \
-            -e 's/ +/\n/g;' | sort)
+
+	backports=""
+	control="debian/control"
         case $(lsb_release -sc) in
             squeeze|wheezy)
-                packages=$(echo $packages | perl -pe 's/[-\w]*babeltrace[-\w]*//g')
+		control="/tmp/control.$$"
+		grep -v babeltrace debian/control > $control
                 backports="-t $(lsb_release -sc)-backports"
                 ;;
         esac
-        packages=$(echo $packages) # change newlines into spaces
-        $SUDO env DEBIAN_FRONTEND=noninteractive apt-get install $backports -y $packages || exit 1
+
+	# make a metapackage that expresses the build dependencies,
+	# install it, rm the .deb; then uninstall the package as its
+	# work is done
+	$SUDO env DEBIAN_FRONTEND=noninteractive mk-build-deps --install --remove --tool="apt-get -y --no-install-recommends $backports" $control || exit 1
+	$SUDO env DEBIAN_FRONTEND=noninteractive apt-get -y remove ceph-build-deps
+	if [ -n "$backports" ] ; then rm $control; fi
         ;;
 CentOS|Fedora|RedHatEnterpriseServer)
         case $(lsb_release -si) in
@@ -106,7 +109,14 @@ function activate_virtualenv() {
     local env_dir=$top_srcdir/install-deps-$interpreter
 
     if ! test -d $env_dir ; then
-        virtualenv --python $interpreter $env_dir
+        # Make a temporary virtualenv to get a fresh version of virtualenv
+        # because CentOS 7 has a buggy old version (v1.10.1)
+        # https://github.com/pypa/virtualenv/issues/463
+        virtualenv ${env_dir}_tmp
+        ${env_dir}_tmp/bin/pip install --upgrade virtualenv
+        ${env_dir}_tmp/bin/virtualenv --python $interpreter $env_dir
+        rm -rf ${env_dir}_tmp
+
         . $env_dir/bin/activate
         if ! populate_wheelhouse install ; then
             rm -rf $env_dir
diff --git a/src/test/centos-7/ceph.spec.in b/src/test/centos-7/ceph.spec.in
index 3cf6307..b2e4b12 100644
--- a/src/test/centos-7/ceph.spec.in
+++ b/src/test/centos-7/ceph.spec.in
@@ -18,7 +18,12 @@
 %bcond_without cephfs_java
 %bcond_with tests
 %bcond_with xio
+%ifnarch s390 s390x
 %bcond_without tcmalloc
+%else
+# no gperftools/tcmalloc on s390(x)
+%bcond_with tcmalloc
+%endif
 %bcond_without libs_compat
 %bcond_with lowmem_builder
 %if 0%{?fedora} || 0%{?rhel}
@@ -59,6 +64,13 @@ Group:         System/Filesystems
 %endif
 URL:		http://ceph.com/
 Source0:	http://ceph.com/download/%{name}-%{version}.tar.bz2
+%if 0%{?suse_version}
+%if 0%{?is_opensuse}
+ExclusiveArch:  x86_64 aarch64 ppc64 ppc64le
+%else
+ExclusiveArch:  x86_64 aarch64
+%endif
+%endif
 #################################################################################
 # dependencies that apply across all distro families
 #################################################################################
@@ -81,9 +93,13 @@ BuildRequires:	cryptsetup
 BuildRequires:	fuse-devel
 BuildRequires:	gcc-c++
 BuildRequires:	gdbm
+%if 0%{with tcmalloc}
+BuildRequires:	gperftools-devel
+%endif
 BuildRequires:	hdparm
 BuildRequires:	leveldb-devel > 1.2
 BuildRequires:	libaio-devel
+BuildRequires:	libatomic_ops-devel
 BuildRequires:	libblkid-devel >= 2.17
 BuildRequires:	libcurl-devel
 BuildRequires:	libudev-devel
@@ -118,13 +134,9 @@ BuildRequires:	systemd
 PreReq:		%fillup_prereq
 BuildRequires:	net-tools
 BuildRequires:	libbz2-devel
-%if 0%{with tcmalloc}
-BuildRequires:	gperftools-devel
-%endif
 BuildRequires:  btrfsprogs
 BuildRequires:	mozilla-nss-devel
 BuildRequires:	keyutils-devel
-BuildRequires:	libatomic-ops-devel
 BuildRequires:  libopenssl-devel
 BuildRequires:  lsb-release
 BuildRequires:  openldap2-devel
@@ -136,8 +148,6 @@ BuildRequires:  boost-random
 BuildRequires:	btrfs-progs
 BuildRequires:	nss-devel
 BuildRequires:	keyutils-libs-devel
-BuildRequires:	libatomic_ops-devel
-BuildRequires:	gperftools-devel
 BuildRequires:  openldap-devel
 BuildRequires:  openssl-devel
 BuildRequires:  redhat-lsb-core
@@ -197,7 +207,6 @@ Requires:      python-setuptools
 Requires:      grep
 Requires:      xfsprogs
 Requires:      logrotate
-Requires:      parted
 Requires:      util-linux
 Requires:      hdparm
 Requires:      cryptsetup
@@ -342,6 +351,7 @@ Requires:	gdisk
 %if 0%{?suse_version}
 Requires:	gptfdisk
 %endif
+Requires:       parted
 %description osd
 ceph-osd is the object storage daemon for the Ceph distributed file
 system.  It is responsible for storing objects on a local file system
@@ -660,7 +670,9 @@ export RPM_OPT_FLAGS=`echo $RPM_OPT_FLAGS | sed -e 's/i386/i486/'`
 %endif
 		$CEPH_EXTRA_CONFIGURE_ARGS \
 		%{?_with_ocf} \
-		%{?_with_tcmalloc} \
+%if %{without tcmalloc}
+		--without-tcmalloc \
+%endif
 		CFLAGS="$RPM_OPT_FLAGS" CXXFLAGS="$RPM_OPT_FLAGS"
 
 %if %{with lowmem_builder}
@@ -700,17 +712,18 @@ install -m 0644 -D src/logrotate.conf %{buildroot}%{_sysconfdir}/logrotate.d/cep
 chmod 0644 %{buildroot}%{_docdir}/ceph/sample.ceph.conf
 chmod 0644 %{buildroot}%{_docdir}/ceph/sample.fetch_config
 
-# firewall templates
+# firewall templates and /sbin/mount.ceph symlink
 %if 0%{?suse_version}
 install -m 0644 -D etc/sysconfig/SuSEfirewall2.d/services/ceph-mon %{buildroot}%{_sysconfdir}/sysconfig/SuSEfirewall2.d/services/ceph-mon
 install -m 0644 -D etc/sysconfig/SuSEfirewall2.d/services/ceph-osd-mds %{buildroot}%{_sysconfdir}/sysconfig/SuSEfirewall2.d/services/ceph-osd-mds
+mkdir -p %{buildroot}/sbin
+ln -sf %{_sbindir}/mount.ceph %{buildroot}/sbin/mount.ceph
 %endif
 
 # udev rules
 install -m 0644 -D udev/50-rbd.rules %{buildroot}%{_udevrulesdir}/50-rbd.rules
+install -m 0644 -D udev/60-ceph-by-parttypeuuid.rules %{buildroot}%{_udevrulesdir}/60-ceph-by-parttypeuuid.rules
 install -m 0644 -D udev/95-ceph-osd.rules %{buildroot}%{_udevrulesdir}/95-ceph-osd.rules
-mv %{buildroot}/sbin/mount.ceph %{buildroot}/usr/sbin/mount.ceph
-mv %{buildroot}/sbin/mount.fuse.ceph %{buildroot}/usr/sbin/mount.fuse.ceph
 
 #set up placeholder directories
 mkdir -p %{buildroot}%{_sysconfdir}/ceph
@@ -750,7 +763,6 @@ rm -rf %{buildroot}
 %{_libexecdir}/systemd/system-preset/50-ceph.preset
 %{_sbindir}/ceph-create-keys
 %{_sbindir}/rcceph
-%{_sbindir}/mount.ceph
 %dir %{_libexecdir}/ceph
 %{_libexecdir}/ceph/ceph_common.sh
 %dir %{_libdir}/rados-classes
@@ -785,7 +797,6 @@ rm -rf %{buildroot}
 %{_mandir}/man8/osdmaptool.8*
 %{_mandir}/man8/monmaptool.8*
 %{_mandir}/man8/cephfs.8*
-%{_mandir}/man8/mount.ceph.8*
 #set up placeholder directories
 %attr(750,ceph,ceph) %dir %{_localstatedir}/lib/ceph/tmp
 %attr(750,ceph,ceph) %dir %{_localstatedir}/lib/ceph/bootstrap-osd
@@ -841,6 +852,10 @@ DISABLE_RESTART_ON_UPDATE="yes"
 %{_bindir}/rbd-replay
 %{_bindir}/rbd-replay-many
 %{_bindir}/rbdmap
+%{_sbindir}/mount.ceph
+%if 0%{?suse_version}
+/sbin/mount.ceph
+%endif
 %if %{with lttng}
 %{_bindir}/rbd-replay-prep
 %endif
@@ -854,6 +869,7 @@ DISABLE_RESTART_ON_UPDATE="yes"
 %{_mandir}/man8/ceph-syn.8*
 %{_mandir}/man8/ceph-post-file.8*
 %{_mandir}/man8/ceph.8*
+%{_mandir}/man8/mount.ceph.8*
 %{_mandir}/man8/rados.8*
 %{_mandir}/man8/rbd.8*
 %{_mandir}/man8/rbdmap.8*
@@ -1140,6 +1156,7 @@ fi
 %{_sbindir}/ceph-disk
 %{_sbindir}/ceph-disk-udev
 %{_libexecdir}/ceph/ceph-osd-prestart.sh
+%{_udevrulesdir}/60-ceph-by-parttypeuuid.rules
 %{_udevrulesdir}/95-ceph-osd.rules
 %{_mandir}/man8/ceph-clsinfo.8*
 %{_mandir}/man8/ceph-disk.8*
@@ -1201,10 +1218,6 @@ fi
 %dir %{_prefix}/lib/ocf
 %dir %{_prefix}/lib/ocf/resource.d
 %dir %{_prefix}/lib/ocf/resource.d/ceph
-%exclude %{_prefix}/lib/ocf/resource.d/ceph/ceph
-%exclude %{_prefix}/lib/ocf/resource.d/ceph/mds
-%exclude %{_prefix}/lib/ocf/resource.d/ceph/mon
-%exclude %{_prefix}/lib/ocf/resource.d/ceph/osd
 %{_prefix}/lib/ocf/resource.d/ceph/rbd
 
 %endif
diff --git a/src/test/centos-7/install-deps.sh b/src/test/centos-7/install-deps.sh
index 03ca760..129178f 100755
--- a/src/test/centos-7/install-deps.sh
+++ b/src/test/centos-7/install-deps.sh
@@ -24,7 +24,7 @@ if test -f /etc/redhat-release ; then
 fi
 
 if type apt-get > /dev/null 2>&1 ; then
-    $SUDO apt-get install -y lsb-release
+    $SUDO apt-get install -y lsb-release devscripts equivs
 fi
 
 if type zypper > /dev/null 2>&1 ; then
@@ -39,20 +39,23 @@ Ubuntu|Debian|Devuan)
             exit 1
         fi
         touch $DIR/status
-        packages=$(dpkg-checkbuilddeps --admindir=$DIR debian/control 2>&1 | \
-            perl -p -e 's/.*Unmet build dependencies: *//;' \
-            -e 's/build-essential:native/build-essential/;' \
-            -e 's/\s*\|\s*/\|/g;' \
-            -e 's/\(.*?\)//g;' \
-            -e 's/ +/\n/g;' | sort)
+
+	backports=""
+	control="debian/control"
         case $(lsb_release -sc) in
             squeeze|wheezy)
-                packages=$(echo $packages | perl -pe 's/[-\w]*babeltrace[-\w]*//g')
+		control="/tmp/control.$$"
+		grep -v babeltrace debian/control > $control
                 backports="-t $(lsb_release -sc)-backports"
                 ;;
         esac
-        packages=$(echo $packages) # change newlines into spaces
-        $SUDO env DEBIAN_FRONTEND=noninteractive apt-get install $backports -y $packages || exit 1
+
+	# make a metapackage that expresses the build dependencies,
+	# install it, rm the .deb; then uninstall the package as its
+	# work is done
+	$SUDO env DEBIAN_FRONTEND=noninteractive mk-build-deps --install --remove --tool="apt-get -y --no-install-recommends $backports" $control || exit 1
+	$SUDO env DEBIAN_FRONTEND=noninteractive apt-get -y remove ceph-build-deps
+	if [ -n "$backports" ] ; then rm $control; fi
         ;;
 CentOS|Fedora|RedHatEnterpriseServer)
         case $(lsb_release -si) in
@@ -106,7 +109,14 @@ function activate_virtualenv() {
     local env_dir=$top_srcdir/install-deps-$interpreter
 
     if ! test -d $env_dir ; then
-        virtualenv --python $interpreter $env_dir
+        # Make a temporary virtualenv to get a fresh version of virtualenv
+        # because CentOS 7 has a buggy old version (v1.10.1)
+        # https://github.com/pypa/virtualenv/issues/463
+        virtualenv ${env_dir}_tmp
+        ${env_dir}_tmp/bin/pip install --upgrade virtualenv
+        ${env_dir}_tmp/bin/virtualenv --python $interpreter $env_dir
+        rm -rf ${env_dir}_tmp
+
         . $env_dir/bin/activate
         if ! populate_wheelhouse install ; then
             rm -rf $env_dir
diff --git a/src/test/cli/radosgw-admin/help.t b/src/test/cli/radosgw-admin/help.t
index 978d45e..606a5f3 100644
--- a/src/test/cli/radosgw-admin/help.t
+++ b/src/test/cli/radosgw-admin/help.t
@@ -66,6 +66,7 @@
     zone modify                modify an existing zone
     zone set                   set zone cluster params (requires infile)
     zone list                  list all zones set on this cluster
+    zone rename                rename a zone
     pool add                   add an existing pool for data placement
     pool rm                    remove an existing pool from data placement set
     pools list                 list placement active set
@@ -78,8 +79,6 @@
     log rm                     remove log object
     usage show                 show usage (by user, date range)
     usage trim                 trim usage (by user, date range)
-    temp remove                remove temporary objects that were created up to
-                               specified date (and optional time)
     gc list                    dump expired garbage collection objects (specify
                                --include-all to list all entries, including unexpired)
     gc process                 manually process garbage
@@ -187,7 +186,7 @@
   Quota options:
      --bucket                  specified bucket for quota command
      --max-objects             specify max objects (negative value to disable)
-     --max-size                specify max size (in bytes, negative value to disable)
+     --max-size                specify max size (in B/K/M/G/T, negative value to disable)
      --quota-scope             scope of quota (bucket, user)
   
   Orphans search options:
diff --git a/src/test/debian-jessie/install-deps.sh b/src/test/debian-jessie/install-deps.sh
index 03ca760..129178f 100755
--- a/src/test/debian-jessie/install-deps.sh
+++ b/src/test/debian-jessie/install-deps.sh
@@ -24,7 +24,7 @@ if test -f /etc/redhat-release ; then
 fi
 
 if type apt-get > /dev/null 2>&1 ; then
-    $SUDO apt-get install -y lsb-release
+    $SUDO apt-get install -y lsb-release devscripts equivs
 fi
 
 if type zypper > /dev/null 2>&1 ; then
@@ -39,20 +39,23 @@ Ubuntu|Debian|Devuan)
             exit 1
         fi
         touch $DIR/status
-        packages=$(dpkg-checkbuilddeps --admindir=$DIR debian/control 2>&1 | \
-            perl -p -e 's/.*Unmet build dependencies: *//;' \
-            -e 's/build-essential:native/build-essential/;' \
-            -e 's/\s*\|\s*/\|/g;' \
-            -e 's/\(.*?\)//g;' \
-            -e 's/ +/\n/g;' | sort)
+
+	backports=""
+	control="debian/control"
         case $(lsb_release -sc) in
             squeeze|wheezy)
-                packages=$(echo $packages | perl -pe 's/[-\w]*babeltrace[-\w]*//g')
+		control="/tmp/control.$$"
+		grep -v babeltrace debian/control > $control
                 backports="-t $(lsb_release -sc)-backports"
                 ;;
         esac
-        packages=$(echo $packages) # change newlines into spaces
-        $SUDO env DEBIAN_FRONTEND=noninteractive apt-get install $backports -y $packages || exit 1
+
+	# make a metapackage that expresses the build dependencies,
+	# install it, rm the .deb; then uninstall the package as its
+	# work is done
+	$SUDO env DEBIAN_FRONTEND=noninteractive mk-build-deps --install --remove --tool="apt-get -y --no-install-recommends $backports" $control || exit 1
+	$SUDO env DEBIAN_FRONTEND=noninteractive apt-get -y remove ceph-build-deps
+	if [ -n "$backports" ] ; then rm $control; fi
         ;;
 CentOS|Fedora|RedHatEnterpriseServer)
         case $(lsb_release -si) in
@@ -106,7 +109,14 @@ function activate_virtualenv() {
     local env_dir=$top_srcdir/install-deps-$interpreter
 
     if ! test -d $env_dir ; then
-        virtualenv --python $interpreter $env_dir
+        # Make a temporary virtualenv to get a fresh version of virtualenv
+        # because CentOS 7 has a buggy old version (v1.10.1)
+        # https://github.com/pypa/virtualenv/issues/463
+        virtualenv ${env_dir}_tmp
+        ${env_dir}_tmp/bin/pip install --upgrade virtualenv
+        ${env_dir}_tmp/bin/virtualenv --python $interpreter $env_dir
+        rm -rf ${env_dir}_tmp
+
         . $env_dir/bin/activate
         if ! populate_wheelhouse install ; then
             rm -rf $env_dir
diff --git a/src/test/fedora-21/ceph.spec.in b/src/test/fedora-21/ceph.spec.in
index 3cf6307..b2e4b12 100644
--- a/src/test/fedora-21/ceph.spec.in
+++ b/src/test/fedora-21/ceph.spec.in
@@ -18,7 +18,12 @@
 %bcond_without cephfs_java
 %bcond_with tests
 %bcond_with xio
+%ifnarch s390 s390x
 %bcond_without tcmalloc
+%else
+# no gperftools/tcmalloc on s390(x)
+%bcond_with tcmalloc
+%endif
 %bcond_without libs_compat
 %bcond_with lowmem_builder
 %if 0%{?fedora} || 0%{?rhel}
@@ -59,6 +64,13 @@ Group:         System/Filesystems
 %endif
 URL:		http://ceph.com/
 Source0:	http://ceph.com/download/%{name}-%{version}.tar.bz2
+%if 0%{?suse_version}
+%if 0%{?is_opensuse}
+ExclusiveArch:  x86_64 aarch64 ppc64 ppc64le
+%else
+ExclusiveArch:  x86_64 aarch64
+%endif
+%endif
 #################################################################################
 # dependencies that apply across all distro families
 #################################################################################
@@ -81,9 +93,13 @@ BuildRequires:	cryptsetup
 BuildRequires:	fuse-devel
 BuildRequires:	gcc-c++
 BuildRequires:	gdbm
+%if 0%{with tcmalloc}
+BuildRequires:	gperftools-devel
+%endif
 BuildRequires:	hdparm
 BuildRequires:	leveldb-devel > 1.2
 BuildRequires:	libaio-devel
+BuildRequires:	libatomic_ops-devel
 BuildRequires:	libblkid-devel >= 2.17
 BuildRequires:	libcurl-devel
 BuildRequires:	libudev-devel
@@ -118,13 +134,9 @@ BuildRequires:	systemd
 PreReq:		%fillup_prereq
 BuildRequires:	net-tools
 BuildRequires:	libbz2-devel
-%if 0%{with tcmalloc}
-BuildRequires:	gperftools-devel
-%endif
 BuildRequires:  btrfsprogs
 BuildRequires:	mozilla-nss-devel
 BuildRequires:	keyutils-devel
-BuildRequires:	libatomic-ops-devel
 BuildRequires:  libopenssl-devel
 BuildRequires:  lsb-release
 BuildRequires:  openldap2-devel
@@ -136,8 +148,6 @@ BuildRequires:  boost-random
 BuildRequires:	btrfs-progs
 BuildRequires:	nss-devel
 BuildRequires:	keyutils-libs-devel
-BuildRequires:	libatomic_ops-devel
-BuildRequires:	gperftools-devel
 BuildRequires:  openldap-devel
 BuildRequires:  openssl-devel
 BuildRequires:  redhat-lsb-core
@@ -197,7 +207,6 @@ Requires:      python-setuptools
 Requires:      grep
 Requires:      xfsprogs
 Requires:      logrotate
-Requires:      parted
 Requires:      util-linux
 Requires:      hdparm
 Requires:      cryptsetup
@@ -342,6 +351,7 @@ Requires:	gdisk
 %if 0%{?suse_version}
 Requires:	gptfdisk
 %endif
+Requires:       parted
 %description osd
 ceph-osd is the object storage daemon for the Ceph distributed file
 system.  It is responsible for storing objects on a local file system
@@ -660,7 +670,9 @@ export RPM_OPT_FLAGS=`echo $RPM_OPT_FLAGS | sed -e 's/i386/i486/'`
 %endif
 		$CEPH_EXTRA_CONFIGURE_ARGS \
 		%{?_with_ocf} \
-		%{?_with_tcmalloc} \
+%if %{without tcmalloc}
+		--without-tcmalloc \
+%endif
 		CFLAGS="$RPM_OPT_FLAGS" CXXFLAGS="$RPM_OPT_FLAGS"
 
 %if %{with lowmem_builder}
@@ -700,17 +712,18 @@ install -m 0644 -D src/logrotate.conf %{buildroot}%{_sysconfdir}/logrotate.d/cep
 chmod 0644 %{buildroot}%{_docdir}/ceph/sample.ceph.conf
 chmod 0644 %{buildroot}%{_docdir}/ceph/sample.fetch_config
 
-# firewall templates
+# firewall templates and /sbin/mount.ceph symlink
 %if 0%{?suse_version}
 install -m 0644 -D etc/sysconfig/SuSEfirewall2.d/services/ceph-mon %{buildroot}%{_sysconfdir}/sysconfig/SuSEfirewall2.d/services/ceph-mon
 install -m 0644 -D etc/sysconfig/SuSEfirewall2.d/services/ceph-osd-mds %{buildroot}%{_sysconfdir}/sysconfig/SuSEfirewall2.d/services/ceph-osd-mds
+mkdir -p %{buildroot}/sbin
+ln -sf %{_sbindir}/mount.ceph %{buildroot}/sbin/mount.ceph
 %endif
 
 # udev rules
 install -m 0644 -D udev/50-rbd.rules %{buildroot}%{_udevrulesdir}/50-rbd.rules
+install -m 0644 -D udev/60-ceph-by-parttypeuuid.rules %{buildroot}%{_udevrulesdir}/60-ceph-by-parttypeuuid.rules
 install -m 0644 -D udev/95-ceph-osd.rules %{buildroot}%{_udevrulesdir}/95-ceph-osd.rules
-mv %{buildroot}/sbin/mount.ceph %{buildroot}/usr/sbin/mount.ceph
-mv %{buildroot}/sbin/mount.fuse.ceph %{buildroot}/usr/sbin/mount.fuse.ceph
 
 #set up placeholder directories
 mkdir -p %{buildroot}%{_sysconfdir}/ceph
@@ -750,7 +763,6 @@ rm -rf %{buildroot}
 %{_libexecdir}/systemd/system-preset/50-ceph.preset
 %{_sbindir}/ceph-create-keys
 %{_sbindir}/rcceph
-%{_sbindir}/mount.ceph
 %dir %{_libexecdir}/ceph
 %{_libexecdir}/ceph/ceph_common.sh
 %dir %{_libdir}/rados-classes
@@ -785,7 +797,6 @@ rm -rf %{buildroot}
 %{_mandir}/man8/osdmaptool.8*
 %{_mandir}/man8/monmaptool.8*
 %{_mandir}/man8/cephfs.8*
-%{_mandir}/man8/mount.ceph.8*
 #set up placeholder directories
 %attr(750,ceph,ceph) %dir %{_localstatedir}/lib/ceph/tmp
 %attr(750,ceph,ceph) %dir %{_localstatedir}/lib/ceph/bootstrap-osd
@@ -841,6 +852,10 @@ DISABLE_RESTART_ON_UPDATE="yes"
 %{_bindir}/rbd-replay
 %{_bindir}/rbd-replay-many
 %{_bindir}/rbdmap
+%{_sbindir}/mount.ceph
+%if 0%{?suse_version}
+/sbin/mount.ceph
+%endif
 %if %{with lttng}
 %{_bindir}/rbd-replay-prep
 %endif
@@ -854,6 +869,7 @@ DISABLE_RESTART_ON_UPDATE="yes"
 %{_mandir}/man8/ceph-syn.8*
 %{_mandir}/man8/ceph-post-file.8*
 %{_mandir}/man8/ceph.8*
+%{_mandir}/man8/mount.ceph.8*
 %{_mandir}/man8/rados.8*
 %{_mandir}/man8/rbd.8*
 %{_mandir}/man8/rbdmap.8*
@@ -1140,6 +1156,7 @@ fi
 %{_sbindir}/ceph-disk
 %{_sbindir}/ceph-disk-udev
 %{_libexecdir}/ceph/ceph-osd-prestart.sh
+%{_udevrulesdir}/60-ceph-by-parttypeuuid.rules
 %{_udevrulesdir}/95-ceph-osd.rules
 %{_mandir}/man8/ceph-clsinfo.8*
 %{_mandir}/man8/ceph-disk.8*
@@ -1201,10 +1218,6 @@ fi
 %dir %{_prefix}/lib/ocf
 %dir %{_prefix}/lib/ocf/resource.d
 %dir %{_prefix}/lib/ocf/resource.d/ceph
-%exclude %{_prefix}/lib/ocf/resource.d/ceph/ceph
-%exclude %{_prefix}/lib/ocf/resource.d/ceph/mds
-%exclude %{_prefix}/lib/ocf/resource.d/ceph/mon
-%exclude %{_prefix}/lib/ocf/resource.d/ceph/osd
 %{_prefix}/lib/ocf/resource.d/ceph/rbd
 
 %endif
diff --git a/src/test/fedora-21/install-deps.sh b/src/test/fedora-21/install-deps.sh
index 03ca760..129178f 100755
--- a/src/test/fedora-21/install-deps.sh
+++ b/src/test/fedora-21/install-deps.sh
@@ -24,7 +24,7 @@ if test -f /etc/redhat-release ; then
 fi
 
 if type apt-get > /dev/null 2>&1 ; then
-    $SUDO apt-get install -y lsb-release
+    $SUDO apt-get install -y lsb-release devscripts equivs
 fi
 
 if type zypper > /dev/null 2>&1 ; then
@@ -39,20 +39,23 @@ Ubuntu|Debian|Devuan)
             exit 1
         fi
         touch $DIR/status
-        packages=$(dpkg-checkbuilddeps --admindir=$DIR debian/control 2>&1 | \
-            perl -p -e 's/.*Unmet build dependencies: *//;' \
-            -e 's/build-essential:native/build-essential/;' \
-            -e 's/\s*\|\s*/\|/g;' \
-            -e 's/\(.*?\)//g;' \
-            -e 's/ +/\n/g;' | sort)
+
+	backports=""
+	control="debian/control"
         case $(lsb_release -sc) in
             squeeze|wheezy)
-                packages=$(echo $packages | perl -pe 's/[-\w]*babeltrace[-\w]*//g')
+		control="/tmp/control.$$"
+		grep -v babeltrace debian/control > $control
                 backports="-t $(lsb_release -sc)-backports"
                 ;;
         esac
-        packages=$(echo $packages) # change newlines into spaces
-        $SUDO env DEBIAN_FRONTEND=noninteractive apt-get install $backports -y $packages || exit 1
+
+	# make a metapackage that expresses the build dependencies,
+	# install it, rm the .deb; then uninstall the package as its
+	# work is done
+	$SUDO env DEBIAN_FRONTEND=noninteractive mk-build-deps --install --remove --tool="apt-get -y --no-install-recommends $backports" $control || exit 1
+	$SUDO env DEBIAN_FRONTEND=noninteractive apt-get -y remove ceph-build-deps
+	if [ -n "$backports" ] ; then rm $control; fi
         ;;
 CentOS|Fedora|RedHatEnterpriseServer)
         case $(lsb_release -si) in
@@ -106,7 +109,14 @@ function activate_virtualenv() {
     local env_dir=$top_srcdir/install-deps-$interpreter
 
     if ! test -d $env_dir ; then
-        virtualenv --python $interpreter $env_dir
+        # Make a temporary virtualenv to get a fresh version of virtualenv
+        # because CentOS 7 has a buggy old version (v1.10.1)
+        # https://github.com/pypa/virtualenv/issues/463
+        virtualenv ${env_dir}_tmp
+        ${env_dir}_tmp/bin/pip install --upgrade virtualenv
+        ${env_dir}_tmp/bin/virtualenv --python $interpreter $env_dir
+        rm -rf ${env_dir}_tmp
+
         . $env_dir/bin/activate
         if ! populate_wheelhouse install ; then
             rm -rf $env_dir
diff --git a/src/test/journal/RadosTestFixture.cc b/src/test/journal/RadosTestFixture.cc
index f57e5ae..dba3ec6 100644
--- a/src/test/journal/RadosTestFixture.cc
+++ b/src/test/journal/RadosTestFixture.cc
@@ -5,6 +5,7 @@
 #include "cls/journal/cls_journal_client.h"
 #include "include/stringify.h"
 #include "common/WorkQueue.h"
+#include "journal/Settings.h"
 
 RadosTestFixture::RadosTestFixture()
   : m_timer_lock("m_timer_lock"), m_timer(NULL), m_listener(this) {
@@ -67,10 +68,13 @@ int RadosTestFixture::create(const std::string &oid, uint8_t order,
 
 journal::JournalMetadataPtr RadosTestFixture::create_metadata(
     const std::string &oid, const std::string &client_id,
-    double commit_internal) {
+    double commit_interval, uint64_t max_fetch_bytes) {
+  journal::Settings settings;
+  settings.commit_interval = commit_interval;
+  settings.max_fetch_bytes = max_fetch_bytes;
+
   journal::JournalMetadataPtr metadata(new journal::JournalMetadata(
-    m_work_queue, m_timer, &m_timer_lock, m_ioctx, oid, client_id,
-    commit_internal));
+    m_work_queue, m_timer, &m_timer_lock, m_ioctx, oid, client_id, settings));
   m_metadatas.push_back(metadata);
   return metadata;
 }
diff --git a/src/test/journal/RadosTestFixture.h b/src/test/journal/RadosTestFixture.h
index d7cd1a8..4ea22e7 100644
--- a/src/test/journal/RadosTestFixture.h
+++ b/src/test/journal/RadosTestFixture.h
@@ -25,7 +25,8 @@ public:
              uint8_t splay_width = 2);
   journal::JournalMetadataPtr create_metadata(const std::string &oid,
                                               const std::string &client_id = "client",
-                                              double commit_internal = 0.1);
+                                              double commit_internal = 0.1,
+                                              uint64_t max_fetch_bytes = 0);
   int append(const std::string &oid, const bufferlist &bl);
 
   int client_register(const std::string &oid, const std::string &id = "client",
@@ -35,7 +36,7 @@ public:
 
   bufferlist create_payload(const std::string &payload);
 
-  struct Listener : public journal::JournalMetadata::Listener {
+  struct Listener : public journal::JournalMetadataListener {
     RadosTestFixture *test_fixture;
     Mutex mutex;
     Cond cond;
diff --git a/src/test/journal/mock/MockJournaler.h b/src/test/journal/mock/MockJournaler.h
index 05efb42..e1998ff 100644
--- a/src/test/journal/mock/MockJournaler.h
+++ b/src/test/journal/mock/MockJournaler.h
@@ -20,6 +20,7 @@ class SafeTimer;
 namespace journal {
 
 struct ReplayHandler;
+struct Settings;
 
 struct MockFuture {
   static MockFuture *s_instance;
@@ -128,18 +129,21 @@ struct MockJournaler {
   MOCK_METHOD1(committed, void(const MockFutureProxy &future));
   MOCK_METHOD1(flush_commit_position, void(Context*));
 
+  MOCK_METHOD1(add_listener, void(JournalMetadataListener *));
+  MOCK_METHOD1(remove_listener, void(JournalMetadataListener *));
+
 };
 
 struct MockJournalerProxy {
   template <typename IoCtxT>
   MockJournalerProxy(IoCtxT &header_ioctx, const std::string &,
-                     const std::string &, double) {
+                     const std::string &, const Settings&) {
     MockJournaler::get_instance().construct();
   }
 
   MockJournalerProxy(ContextWQ *work_queue, SafeTimer *timer, Mutex *timer_lock,
                      librados::IoCtx &header_ioctx, const std::string &journal_id,
-                     const std::string &client_id, double commit_interval) {
+                     const std::string &client_id, const Settings&) {
     MockJournaler::get_instance().construct();
   }
 
@@ -155,6 +159,10 @@ struct MockJournalerProxy {
   int register_client(const bufferlist &data) {
     return -EINVAL;
   }
+  void unregister_client(Context *ctx) {
+    ctx->complete(-EINVAL);
+  }
+
   void allocate_tag(uint64_t, const bufferlist &,
                     cls::journal::Tag*, Context *on_finish) {
     on_finish->complete(-EINVAL);
@@ -266,6 +274,14 @@ struct MockJournalerProxy {
   void flush_commit_position(Context *on_finish) {
     MockJournaler::get_instance().flush_commit_position(on_finish);
   }
+
+  void add_listener(JournalMetadataListener *listener) {
+    MockJournaler::get_instance().add_listener(listener);
+  }
+
+  void remove_listener(JournalMetadataListener *listener) {
+    MockJournaler::get_instance().remove_listener(listener);
+  }
 };
 
 std::ostream &operator<<(std::ostream &os, const MockJournalerProxy &);
diff --git a/src/test/journal/test_FutureImpl.cc b/src/test/journal/test_FutureImpl.cc
index eb5f806..af8ca76 100644
--- a/src/test/journal/test_FutureImpl.cc
+++ b/src/test/journal/test_FutureImpl.cc
@@ -154,14 +154,17 @@ TEST_F(TestFutureImpl, FlushChain) {
                                                  future1);
   journal::FutureImplPtr future3 = create_future(235, 1, 458,
                                                  future2);
+
+  FlushHandler flush_handler;
   ASSERT_FALSE(future1->attach(&m_flush_handler));
-  ASSERT_FALSE(future2->attach(&m_flush_handler));
+  ASSERT_FALSE(future2->attach(&flush_handler));
   ASSERT_FALSE(future3->attach(&m_flush_handler));
 
   C_SaferCond cond;
   future3->flush(&cond);
 
-  ASSERT_EQ(3U, m_flush_handler.flushes);
+  ASSERT_EQ(1U, m_flush_handler.flushes);
+  ASSERT_EQ(1U, flush_handler.flushes);
 
   future3->safe(0);
   ASSERT_FALSE(future3->is_complete());
diff --git a/src/test/journal/test_JournalPlayer.cc b/src/test/journal/test_JournalPlayer.cc
index 000f13b..03255d0 100644
--- a/src/test/journal/test_JournalPlayer.cc
+++ b/src/test/journal/test_JournalPlayer.cc
@@ -13,10 +13,14 @@
 #include <list>
 #include <boost/scope_exit.hpp>
 
+typedef std::list<journal::Entry> Entries;
+
+template <typename T>
 class TestJournalPlayer : public RadosTestFixture {
 public:
   typedef std::list<journal::JournalPlayer *> JournalPlayers;
-  typedef std::list<journal::Entry> Entries;
+
+  static const uint64_t max_fetch_bytes = T::max_fetch_bytes;
 
   struct ReplayHandler : public journal::ReplayHandler {
     Mutex lock;
@@ -54,19 +58,25 @@ public:
     RadosTestFixture::TearDown();
   }
 
+  journal::JournalMetadataPtr create_metadata(const std::string &oid) {
+    return RadosTestFixture::create_metadata(oid, "client", 0.1,
+                                             max_fetch_bytes);
+  }
+
   int client_commit(const std::string &oid,
                     journal::JournalPlayer::ObjectSetPosition position) {
     return RadosTestFixture::client_commit(oid, "client", position);
   }
 
   journal::Entry create_entry(uint64_t tag_tid, uint64_t entry_tid) {
+    std::string payload(128, '0');
     bufferlist payload_bl;
-    payload_bl.append("playload");
+    payload_bl.append(payload);
     return journal::Entry(tag_tid, entry_tid, payload_bl);
   }
 
   journal::JournalPlayer *create_player(const std::string &oid,
-                                          const journal::JournalMetadataPtr &metadata) {
+                                        const journal::JournalMetadataPtr &metadata) {
     journal::JournalPlayer *player(new journal::JournalPlayer(
       m_ioctx, oid + ".", metadata, &m_replay_hander));
     m_players.push_back(player);
@@ -100,12 +110,12 @@ public:
   }
 
   bool wait_for_complete(journal::JournalPlayer *player) {
-    journal::Entry entry;
-    uint64_t commit_tid;
-    player->try_pop_front(&entry, &commit_tid);
-
     Mutex::Locker locker(m_replay_hander.lock);
     while (!m_replay_hander.complete) {
+      journal::Entry entry;
+      uint64_t commit_tid;
+      player->try_pop_front(&entry, &commit_tid);
+
       if (m_replay_hander.cond.WaitInterval(
             reinterpret_cast<CephContext*>(m_ioctx.cct()),
             m_replay_hander.lock, utime_t(10, 0)) != 0) {
@@ -127,44 +137,54 @@ public:
   ReplayHandler m_replay_hander;
 };
 
-TEST_F(TestJournalPlayer, Prefetch) {
-  std::string oid = get_temp_oid();
+template <uint64_t _max_fetch_bytes>
+class TestJournalPlayerParams {
+public:
+  static const uint64_t max_fetch_bytes = _max_fetch_bytes;
+};
+
+typedef ::testing::Types<TestJournalPlayerParams<0>,
+                         TestJournalPlayerParams<16> > TestJournalPlayerTypes;
+TYPED_TEST_CASE(TestJournalPlayer, TestJournalPlayerTypes);
+
+TYPED_TEST(TestJournalPlayer, Prefetch) {
+  std::string oid = this->get_temp_oid();
 
   journal::JournalPlayer::ObjectPositions positions;
   positions = {
     cls::journal::ObjectPosition(0, 234, 122) };
   cls::journal::ObjectSetPosition commit_position(positions);
 
-  ASSERT_EQ(0, create(oid));
-  ASSERT_EQ(0, client_register(oid));
-  ASSERT_EQ(0, client_commit(oid, commit_position));
+  ASSERT_EQ(0, this->create(oid));
+  ASSERT_EQ(0, this->client_register(oid));
+  ASSERT_EQ(0, this->client_commit(oid, commit_position));
 
-  journal::JournalMetadataPtr metadata = create_metadata(oid);
-  ASSERT_EQ(0, init_metadata(metadata));
+  journal::JournalMetadataPtr metadata = this->create_metadata(oid);
+  ASSERT_EQ(0, this->init_metadata(metadata));
 
-  journal::JournalPlayer *player = create_player(oid, metadata);
+  journal::JournalPlayer *player = this->create_player(oid, metadata);
   BOOST_SCOPE_EXIT_ALL( (player) ) {
     C_SaferCond unwatch_ctx;
     player->shut_down(&unwatch_ctx);
     ASSERT_EQ(0, unwatch_ctx.wait());
   };
 
-  ASSERT_EQ(0, write_entry(oid, 0, 234, 122));
-  ASSERT_EQ(0, write_entry(oid, 1, 234, 123));
-  ASSERT_EQ(0, write_entry(oid, 0, 234, 124));
-  ASSERT_EQ(0, write_entry(oid, 1, 234, 125));
+  ASSERT_EQ(0, this->write_entry(oid, 0, 234, 122));
+  ASSERT_EQ(0, this->write_entry(oid, 1, 234, 123));
+  ASSERT_EQ(0, this->write_entry(oid, 0, 234, 124));
+  ASSERT_EQ(0, this->write_entry(oid, 1, 234, 125));
 
   player->prefetch();
 
   Entries entries;
-  ASSERT_TRUE(wait_for_entries(player, 3, &entries));
-  ASSERT_TRUE(wait_for_complete(player));
+  ASSERT_TRUE(this->wait_for_entries(player, 3, &entries));
+  ASSERT_TRUE(this->wait_for_complete(player));
 
   Entries expected_entries;
   expected_entries = {
-    create_entry(234, 123),
-    create_entry(234, 124),
-    create_entry(234, 125)};
+    this->create_entry(234, 123),
+    this->create_entry(234, 124),
+    this->create_entry(234, 125)};
   ASSERT_EQ(expected_entries, entries);
 
   uint64_t last_tid;
@@ -172,8 +192,8 @@ TEST_F(TestJournalPlayer, Prefetch) {
   ASSERT_EQ(125U, last_tid);
 }
 
-TEST_F(TestJournalPlayer, PrefetchSkip) {
-  std::string oid = get_temp_oid();
+TYPED_TEST(TestJournalPlayer, PrefetchSkip) {
+  std::string oid = this->get_temp_oid();
 
   journal::JournalPlayer::ObjectPositions positions;
   positions = {
@@ -181,73 +201,73 @@ TEST_F(TestJournalPlayer, PrefetchSkip) {
     cls::journal::ObjectPosition(1, 234, 124) };
   cls::journal::ObjectSetPosition commit_position(positions);
 
-  ASSERT_EQ(0, create(oid));
-  ASSERT_EQ(0, client_register(oid));
-  ASSERT_EQ(0, client_commit(oid, commit_position));
+  ASSERT_EQ(0, this->create(oid));
+  ASSERT_EQ(0, this->client_register(oid));
+  ASSERT_EQ(0, this->client_commit(oid, commit_position));
 
-  journal::JournalMetadataPtr metadata = create_metadata(oid);
-  ASSERT_EQ(0, init_metadata(metadata));
+  journal::JournalMetadataPtr metadata = this->create_metadata(oid);
+  ASSERT_EQ(0, this->init_metadata(metadata));
 
-  journal::JournalPlayer *player = create_player(oid, metadata);
+  journal::JournalPlayer *player = this->create_player(oid, metadata);
   BOOST_SCOPE_EXIT_ALL( (player) ) {
     C_SaferCond unwatch_ctx;
     player->shut_down(&unwatch_ctx);
     ASSERT_EQ(0, unwatch_ctx.wait());
   };
 
-  ASSERT_EQ(0, write_entry(oid, 0, 234, 122));
-  ASSERT_EQ(0, write_entry(oid, 1, 234, 123));
-  ASSERT_EQ(0, write_entry(oid, 0, 234, 124));
-  ASSERT_EQ(0, write_entry(oid, 1, 234, 125));
+  ASSERT_EQ(0, this->write_entry(oid, 0, 234, 122));
+  ASSERT_EQ(0, this->write_entry(oid, 1, 234, 123));
+  ASSERT_EQ(0, this->write_entry(oid, 0, 234, 124));
+  ASSERT_EQ(0, this->write_entry(oid, 1, 234, 125));
 
   player->prefetch();
 
   Entries entries;
-  ASSERT_TRUE(wait_for_entries(player, 0, &entries));
-  ASSERT_TRUE(wait_for_complete(player));
+  ASSERT_TRUE(this->wait_for_entries(player, 0, &entries));
+  ASSERT_TRUE(this->wait_for_complete(player));
 
   uint64_t last_tid;
   ASSERT_TRUE(metadata->get_last_allocated_entry_tid(234, &last_tid));
   ASSERT_EQ(125U, last_tid);
 }
 
-TEST_F(TestJournalPlayer, PrefetchWithoutCommit) {
-  std::string oid = get_temp_oid();
+TYPED_TEST(TestJournalPlayer, PrefetchWithoutCommit) {
+  std::string oid = this->get_temp_oid();
 
   cls::journal::ObjectSetPosition commit_position;
 
-  ASSERT_EQ(0, create(oid));
-  ASSERT_EQ(0, client_register(oid));
-  ASSERT_EQ(0, client_commit(oid, commit_position));
+  ASSERT_EQ(0, this->create(oid));
+  ASSERT_EQ(0, this->client_register(oid));
+  ASSERT_EQ(0, this->client_commit(oid, commit_position));
 
-  journal::JournalMetadataPtr metadata = create_metadata(oid);
-  ASSERT_EQ(0, init_metadata(metadata));
+  journal::JournalMetadataPtr metadata = this->create_metadata(oid);
+  ASSERT_EQ(0, this->init_metadata(metadata));
 
-  journal::JournalPlayer *player = create_player(oid, metadata);
+  journal::JournalPlayer *player = this->create_player(oid, metadata);
   BOOST_SCOPE_EXIT_ALL( (player) ) {
     C_SaferCond unwatch_ctx;
     player->shut_down(&unwatch_ctx);
     ASSERT_EQ(0, unwatch_ctx.wait());
   };
 
-  ASSERT_EQ(0, write_entry(oid, 0, 234, 122));
-  ASSERT_EQ(0, write_entry(oid, 1, 234, 123));
+  ASSERT_EQ(0, this->write_entry(oid, 0, 234, 122));
+  ASSERT_EQ(0, this->write_entry(oid, 1, 234, 123));
 
   player->prefetch();
 
   Entries entries;
-  ASSERT_TRUE(wait_for_entries(player, 2, &entries));
-  ASSERT_TRUE(wait_for_complete(player));
+  ASSERT_TRUE(this->wait_for_entries(player, 2, &entries));
+  ASSERT_TRUE(this->wait_for_complete(player));
 
   Entries expected_entries;
   expected_entries = {
-    create_entry(234, 122),
-    create_entry(234, 123)};
+    this->create_entry(234, 122),
+    this->create_entry(234, 123)};
   ASSERT_EQ(expected_entries, entries);
 }
 
-TEST_F(TestJournalPlayer, PrefetchMultipleTags) {
-  std::string oid = get_temp_oid();
+TYPED_TEST(TestJournalPlayer, PrefetchMultipleTags) {
+  std::string oid = this->get_temp_oid();
 
   journal::JournalPlayer::ObjectPositions positions;
   positions = {
@@ -256,32 +276,32 @@ TEST_F(TestJournalPlayer, PrefetchMultipleTags) {
     cls::journal::ObjectPosition(0, 234, 120)};
   cls::journal::ObjectSetPosition commit_position(positions);
 
-  ASSERT_EQ(0, create(oid, 14, 3));
-  ASSERT_EQ(0, client_register(oid));
-  ASSERT_EQ(0, client_commit(oid, commit_position));
+  ASSERT_EQ(0, this->create(oid, 14, 3));
+  ASSERT_EQ(0, this->client_register(oid));
+  ASSERT_EQ(0, this->client_commit(oid, commit_position));
 
-  journal::JournalMetadataPtr metadata = create_metadata(oid);
-  ASSERT_EQ(0, init_metadata(metadata));
+  journal::JournalMetadataPtr metadata = this->create_metadata(oid);
+  ASSERT_EQ(0, this->init_metadata(metadata));
 
-  journal::JournalPlayer *player = create_player(oid, metadata);
+  journal::JournalPlayer *player = this->create_player(oid, metadata);
   BOOST_SCOPE_EXIT_ALL( (player) ) {
     C_SaferCond unwatch_ctx;
     player->shut_down(&unwatch_ctx);
     ASSERT_EQ(0, unwatch_ctx.wait());
   };
 
-  ASSERT_EQ(0, write_entry(oid, 0, 234, 120));
-  ASSERT_EQ(0, write_entry(oid, 1, 234, 121));
-  ASSERT_EQ(0, write_entry(oid, 2, 234, 122));
-  ASSERT_EQ(0, write_entry(oid, 0, 234, 123));
-  ASSERT_EQ(0, write_entry(oid, 1, 234, 124));
-  ASSERT_EQ(0, write_entry(oid, 0, 236, 0)); // new tag allocated
+  ASSERT_EQ(0, this->write_entry(oid, 0, 234, 120));
+  ASSERT_EQ(0, this->write_entry(oid, 1, 234, 121));
+  ASSERT_EQ(0, this->write_entry(oid, 2, 234, 122));
+  ASSERT_EQ(0, this->write_entry(oid, 0, 234, 123));
+  ASSERT_EQ(0, this->write_entry(oid, 1, 234, 124));
+  ASSERT_EQ(0, this->write_entry(oid, 0, 236, 0)); // new tag allocated
 
   player->prefetch();
 
   Entries entries;
-  ASSERT_TRUE(wait_for_entries(player, 3, &entries));
-  ASSERT_TRUE(wait_for_complete(player));
+  ASSERT_TRUE(this->wait_for_entries(player, 3, &entries));
+  ASSERT_TRUE(this->wait_for_complete(player));
 
   uint64_t last_tid;
   ASSERT_TRUE(metadata->get_last_allocated_entry_tid(234, &last_tid));
@@ -290,53 +310,53 @@ TEST_F(TestJournalPlayer, PrefetchMultipleTags) {
   ASSERT_EQ(0U, last_tid);
 }
 
-TEST_F(TestJournalPlayer, PrefetchCorruptSequence) {
-  std::string oid = get_temp_oid();
+TYPED_TEST(TestJournalPlayer, PrefetchCorruptSequence) {
+  std::string oid = this->get_temp_oid();
 
   cls::journal::ObjectSetPosition commit_position;
 
-  ASSERT_EQ(0, create(oid));
-  ASSERT_EQ(0, client_register(oid));
-  ASSERT_EQ(0, client_commit(oid, commit_position));
+  ASSERT_EQ(0, this->create(oid));
+  ASSERT_EQ(0, this->client_register(oid));
+  ASSERT_EQ(0, this->client_commit(oid, commit_position));
 
-  journal::JournalMetadataPtr metadata = create_metadata(oid);
-  ASSERT_EQ(0, init_metadata(metadata));
+  journal::JournalMetadataPtr metadata = this->create_metadata(oid);
+  ASSERT_EQ(0, this->init_metadata(metadata));
 
-  journal::JournalPlayer *player = create_player(oid, metadata);
+  journal::JournalPlayer *player = this->create_player(oid, metadata);
   BOOST_SCOPE_EXIT_ALL( (player) ) {
     C_SaferCond unwatch_ctx;
     player->shut_down(&unwatch_ctx);
     ASSERT_EQ(0, unwatch_ctx.wait());
   };
 
-  ASSERT_EQ(0, write_entry(oid, 0, 234, 120));
-  ASSERT_EQ(0, write_entry(oid, 1, 234, 121));
-  ASSERT_EQ(0, write_entry(oid, 0, 234, 124));
+  ASSERT_EQ(0, this->write_entry(oid, 0, 234, 120));
+  ASSERT_EQ(0, this->write_entry(oid, 1, 234, 121));
+  ASSERT_EQ(0, this->write_entry(oid, 0, 234, 124));
 
   player->prefetch();
   Entries entries;
-  ASSERT_TRUE(wait_for_entries(player, 2, &entries));
+  ASSERT_TRUE(this->wait_for_entries(player, 2, &entries));
 
   journal::Entry entry;
   uint64_t commit_tid;
   ASSERT_FALSE(player->try_pop_front(&entry, &commit_tid));
-  ASSERT_TRUE(wait_for_complete(player));
-  ASSERT_EQ(-ENOMSG, m_replay_hander.complete_result);
+  ASSERT_TRUE(this->wait_for_complete(player));
+  ASSERT_EQ(-ENOMSG, this->m_replay_hander.complete_result);
 }
 
-TEST_F(TestJournalPlayer, PrefetchMissingSequence) {
-  std::string oid = get_temp_oid();
+TYPED_TEST(TestJournalPlayer, PrefetchMissingSequence) {
+  std::string oid = this->get_temp_oid();
 
   cls::journal::ObjectSetPosition commit_position;
 
-  ASSERT_EQ(0, create(oid, 14, 4));
-  ASSERT_EQ(0, client_register(oid));
-  ASSERT_EQ(0, client_commit(oid, commit_position));
+  ASSERT_EQ(0, this->create(oid, 14, 4));
+  ASSERT_EQ(0, this->client_register(oid));
+  ASSERT_EQ(0, this->client_commit(oid, commit_position));
 
-  journal::JournalMetadataPtr metadata = create_metadata(oid);
-  ASSERT_EQ(0, init_metadata(metadata));
+  journal::JournalMetadataPtr metadata = this->create_metadata(oid);
+  ASSERT_EQ(0, this->init_metadata(metadata));
 
-  journal::JournalPlayer *player = create_player(oid, metadata);
+  journal::JournalPlayer *player = this->create_player(oid, metadata);
   BOOST_SCOPE_EXIT_ALL( (player) ) {
     C_SaferCond unwatch_ctx;
     player->shut_down(&unwatch_ctx);
@@ -344,49 +364,49 @@ TEST_F(TestJournalPlayer, PrefetchMissingSequence) {
   };
 
   ASSERT_EQ(0, metadata->set_active_set(1));
-  ASSERT_EQ(0, write_entry(oid, 0, 2, 852));
-  ASSERT_EQ(0, write_entry(oid, 0, 2, 856));
-  ASSERT_EQ(0, write_entry(oid, 0, 2, 860));
-  ASSERT_EQ(0, write_entry(oid, 1, 2, 853));
-  ASSERT_EQ(0, write_entry(oid, 1, 2, 857));
-  ASSERT_EQ(0, write_entry(oid, 5, 2, 861));
-  ASSERT_EQ(0, write_entry(oid, 2, 2, 854));
-  ASSERT_EQ(0, write_entry(oid, 0, 3, 0));
-  ASSERT_EQ(0, write_entry(oid, 5, 3, 1));
-  ASSERT_EQ(0, write_entry(oid, 2, 3, 2));
-  ASSERT_EQ(0, write_entry(oid, 3, 3, 3));
+  ASSERT_EQ(0, this->write_entry(oid, 0, 2, 852));
+  ASSERT_EQ(0, this->write_entry(oid, 0, 2, 856));
+  ASSERT_EQ(0, this->write_entry(oid, 0, 2, 860));
+  ASSERT_EQ(0, this->write_entry(oid, 1, 2, 853));
+  ASSERT_EQ(0, this->write_entry(oid, 1, 2, 857));
+  ASSERT_EQ(0, this->write_entry(oid, 5, 2, 861));
+  ASSERT_EQ(0, this->write_entry(oid, 2, 2, 854));
+  ASSERT_EQ(0, this->write_entry(oid, 0, 3, 0));
+  ASSERT_EQ(0, this->write_entry(oid, 5, 3, 1));
+  ASSERT_EQ(0, this->write_entry(oid, 2, 3, 2));
+  ASSERT_EQ(0, this->write_entry(oid, 3, 3, 3));
 
   player->prefetch();
   Entries entries;
-  ASSERT_TRUE(wait_for_entries(player, 7, &entries));
+  ASSERT_TRUE(this->wait_for_entries(player, 7, &entries));
 
   Entries expected_entries = {
-    create_entry(2, 852),
-    create_entry(2, 853),
-    create_entry(2, 854),
-    create_entry(3, 0),
-    create_entry(3, 1),
-    create_entry(3, 2),
-    create_entry(3, 3)};
+    this->create_entry(2, 852),
+    this->create_entry(2, 853),
+    this->create_entry(2, 854),
+    this->create_entry(3, 0),
+    this->create_entry(3, 1),
+    this->create_entry(3, 2),
+    this->create_entry(3, 3)};
   ASSERT_EQ(expected_entries, entries);
 
-  ASSERT_TRUE(wait_for_complete(player));
-  ASSERT_EQ(0, m_replay_hander.complete_result);
+  ASSERT_TRUE(this->wait_for_complete(player));
+  ASSERT_EQ(0, this->m_replay_hander.complete_result);
 }
 
-TEST_F(TestJournalPlayer, PrefetchLargeMissingSequence) {
-  std::string oid = get_temp_oid();
+TYPED_TEST(TestJournalPlayer, PrefetchLargeMissingSequence) {
+  std::string oid = this->get_temp_oid();
 
   cls::journal::ObjectSetPosition commit_position;
 
-  ASSERT_EQ(0, create(oid));
-  ASSERT_EQ(0, client_register(oid));
-  ASSERT_EQ(0, client_commit(oid, commit_position));
+  ASSERT_EQ(0, this->create(oid));
+  ASSERT_EQ(0, this->client_register(oid));
+  ASSERT_EQ(0, this->client_commit(oid, commit_position));
 
-  journal::JournalMetadataPtr metadata = create_metadata(oid);
-  ASSERT_EQ(0, init_metadata(metadata));
+  journal::JournalMetadataPtr metadata = this->create_metadata(oid);
+  ASSERT_EQ(0, this->init_metadata(metadata));
 
-  journal::JournalPlayer *player = create_player(oid, metadata);
+  journal::JournalPlayer *player = this->create_player(oid, metadata);
   BOOST_SCOPE_EXIT_ALL( (player) ) {
     C_SaferCond unwatch_ctx;
     player->shut_down(&unwatch_ctx);
@@ -394,211 +414,211 @@ TEST_F(TestJournalPlayer, PrefetchLargeMissingSequence) {
   };
 
   ASSERT_EQ(0, metadata->set_active_set(2));
-  ASSERT_EQ(0, write_entry(oid, 0, 0, 0));
-  ASSERT_EQ(0, write_entry(oid, 1, 0, 1));
-  ASSERT_EQ(0, write_entry(oid, 3, 0, 3));
-  ASSERT_EQ(0, write_entry(oid, 4, 1, 0));
+  ASSERT_EQ(0, this->write_entry(oid, 0, 0, 0));
+  ASSERT_EQ(0, this->write_entry(oid, 1, 0, 1));
+  ASSERT_EQ(0, this->write_entry(oid, 3, 0, 3));
+  ASSERT_EQ(0, this->write_entry(oid, 4, 1, 0));
 
   player->prefetch();
   Entries entries;
-  ASSERT_TRUE(wait_for_entries(player, 3, &entries));
+  ASSERT_TRUE(this->wait_for_entries(player, 3, &entries));
 
   Entries expected_entries = {
-    create_entry(0, 0),
-    create_entry(0, 1),
-    create_entry(1, 0)};
+    this->create_entry(0, 0),
+    this->create_entry(0, 1),
+    this->create_entry(1, 0)};
   ASSERT_EQ(expected_entries, entries);
 }
 
-TEST_F(TestJournalPlayer, PrefetchBlockedNewTag) {
-  std::string oid = get_temp_oid();
+TYPED_TEST(TestJournalPlayer, PrefetchBlockedNewTag) {
+  std::string oid = this->get_temp_oid();
 
   cls::journal::ObjectSetPosition commit_position;
 
-  ASSERT_EQ(0, create(oid));
-  ASSERT_EQ(0, client_register(oid));
-  ASSERT_EQ(0, client_commit(oid, commit_position));
+  ASSERT_EQ(0, this->create(oid));
+  ASSERT_EQ(0, this->client_register(oid));
+  ASSERT_EQ(0, this->client_commit(oid, commit_position));
 
-  journal::JournalMetadataPtr metadata = create_metadata(oid);
-  ASSERT_EQ(0, init_metadata(metadata));
+  journal::JournalMetadataPtr metadata = this->create_metadata(oid);
+  ASSERT_EQ(0, this->init_metadata(metadata));
 
-  journal::JournalPlayer *player = create_player(oid, metadata);
+  journal::JournalPlayer *player = this->create_player(oid, metadata);
   BOOST_SCOPE_EXIT_ALL( (player) ) {
     C_SaferCond unwatch_ctx;
     player->shut_down(&unwatch_ctx);
     ASSERT_EQ(0, unwatch_ctx.wait());
   };
 
-  ASSERT_EQ(0, write_entry(oid, 0, 0, 0));
-  ASSERT_EQ(0, write_entry(oid, 1, 0, 1));
-  ASSERT_EQ(0, write_entry(oid, 0, 0, 2));
-  ASSERT_EQ(0, write_entry(oid, 0, 0, 4));
-  ASSERT_EQ(0, write_entry(oid, 0, 1, 0));
+  ASSERT_EQ(0, this->write_entry(oid, 0, 0, 0));
+  ASSERT_EQ(0, this->write_entry(oid, 1, 0, 1));
+  ASSERT_EQ(0, this->write_entry(oid, 0, 0, 2));
+  ASSERT_EQ(0, this->write_entry(oid, 0, 0, 4));
+  ASSERT_EQ(0, this->write_entry(oid, 0, 1, 0));
 
   player->prefetch();
   Entries entries;
-  ASSERT_TRUE(wait_for_entries(player, 4, &entries));
+  ASSERT_TRUE(this->wait_for_entries(player, 4, &entries));
 
   Entries expected_entries = {
-    create_entry(0, 0),
-    create_entry(0, 1),
-    create_entry(0, 2),
-    create_entry(1, 0)};
+    this->create_entry(0, 0),
+    this->create_entry(0, 1),
+    this->create_entry(0, 2),
+    this->create_entry(1, 0)};
   ASSERT_EQ(expected_entries, entries);
 }
 
-TEST_F(TestJournalPlayer, PrefetchStaleEntries) {
-  std::string oid = get_temp_oid();
+TYPED_TEST(TestJournalPlayer, PrefetchStaleEntries) {
+  std::string oid = this->get_temp_oid();
 
   journal::JournalPlayer::ObjectPositions positions = {
     cls::journal::ObjectPosition(0, 1, 0) };
   cls::journal::ObjectSetPosition commit_position(positions);
 
-  ASSERT_EQ(0, create(oid));
-  ASSERT_EQ(0, client_register(oid));
-  ASSERT_EQ(0, client_commit(oid, commit_position));
+  ASSERT_EQ(0, this->create(oid));
+  ASSERT_EQ(0, this->client_register(oid));
+  ASSERT_EQ(0, this->client_commit(oid, commit_position));
 
-  journal::JournalMetadataPtr metadata = create_metadata(oid);
-  ASSERT_EQ(0, init_metadata(metadata));
+  journal::JournalMetadataPtr metadata = this->create_metadata(oid);
+  ASSERT_EQ(0, this->init_metadata(metadata));
 
-  journal::JournalPlayer *player = create_player(oid, metadata);
+  journal::JournalPlayer *player = this->create_player(oid, metadata);
   BOOST_SCOPE_EXIT_ALL( (player) ) {
     C_SaferCond unwatch_ctx;
     player->shut_down(&unwatch_ctx);
     ASSERT_EQ(0, unwatch_ctx.wait());
   };
 
-  ASSERT_EQ(0, write_entry(oid, 1, 0, 1));
-  ASSERT_EQ(0, write_entry(oid, 1, 0, 3));
-  ASSERT_EQ(0, write_entry(oid, 0, 1, 0));
-  ASSERT_EQ(0, write_entry(oid, 1, 1, 1));
+  ASSERT_EQ(0, this->write_entry(oid, 1, 0, 1));
+  ASSERT_EQ(0, this->write_entry(oid, 1, 0, 3));
+  ASSERT_EQ(0, this->write_entry(oid, 0, 1, 0));
+  ASSERT_EQ(0, this->write_entry(oid, 1, 1, 1));
 
   player->prefetch();
   Entries entries;
-  ASSERT_TRUE(wait_for_entries(player, 1, &entries));
+  ASSERT_TRUE(this->wait_for_entries(player, 1, &entries));
 
   Entries expected_entries = {
-    create_entry(1, 1)};
+    this->create_entry(1, 1)};
   ASSERT_EQ(expected_entries, entries);
 
-  ASSERT_TRUE(wait_for_complete(player));
-  ASSERT_EQ(0, m_replay_hander.complete_result);
+  ASSERT_TRUE(this->wait_for_complete(player));
+  ASSERT_EQ(0, this->m_replay_hander.complete_result);
 }
 
-TEST_F(TestJournalPlayer, PrefetchUnexpectedTag) {
-  std::string oid = get_temp_oid();
+TYPED_TEST(TestJournalPlayer, PrefetchUnexpectedTag) {
+  std::string oid = this->get_temp_oid();
 
   cls::journal::ObjectSetPosition commit_position;
 
-  ASSERT_EQ(0, create(oid));
-  ASSERT_EQ(0, client_register(oid));
-  ASSERT_EQ(0, client_commit(oid, commit_position));
+  ASSERT_EQ(0, this->create(oid));
+  ASSERT_EQ(0, this->client_register(oid));
+  ASSERT_EQ(0, this->client_commit(oid, commit_position));
 
-  journal::JournalMetadataPtr metadata = create_metadata(oid);
-  ASSERT_EQ(0, init_metadata(metadata));
+  journal::JournalMetadataPtr metadata = this->create_metadata(oid);
+  ASSERT_EQ(0, this->init_metadata(metadata));
 
-  journal::JournalPlayer *player = create_player(oid, metadata);
+  journal::JournalPlayer *player = this->create_player(oid, metadata);
   BOOST_SCOPE_EXIT_ALL( (player) ) {
     C_SaferCond unwatch_ctx;
     player->shut_down(&unwatch_ctx);
     ASSERT_EQ(0, unwatch_ctx.wait());
   };
 
-  ASSERT_EQ(0, write_entry(oid, 0, 234, 120));
-  ASSERT_EQ(0, write_entry(oid, 1, 235, 121));
-  ASSERT_EQ(0, write_entry(oid, 0, 234, 124));
+  ASSERT_EQ(0, this->write_entry(oid, 0, 234, 120));
+  ASSERT_EQ(0, this->write_entry(oid, 1, 235, 121));
+  ASSERT_EQ(0, this->write_entry(oid, 0, 234, 124));
 
   player->prefetch();
   Entries entries;
-  ASSERT_TRUE(wait_for_entries(player, 1, &entries));
+  ASSERT_TRUE(this->wait_for_entries(player, 1, &entries));
 
   journal::Entry entry;
   uint64_t commit_tid;
   ASSERT_FALSE(player->try_pop_front(&entry, &commit_tid));
-  ASSERT_TRUE(wait_for_complete(player));
-  ASSERT_EQ(0, m_replay_hander.complete_result);
+  ASSERT_TRUE(this->wait_for_complete(player));
+  ASSERT_EQ(0, this->m_replay_hander.complete_result);
 }
 
-TEST_F(TestJournalPlayer, PrefetchAndWatch) {
-  std::string oid = get_temp_oid();
+TYPED_TEST(TestJournalPlayer, PrefetchAndWatch) {
+  std::string oid = this->get_temp_oid();
 
   journal::JournalPlayer::ObjectPositions positions;
   positions = {
     cls::journal::ObjectPosition(0, 234, 122)};
   cls::journal::ObjectSetPosition commit_position(positions);
 
-  ASSERT_EQ(0, create(oid));
-  ASSERT_EQ(0, client_register(oid));
-  ASSERT_EQ(0, client_commit(oid, commit_position));
+  ASSERT_EQ(0, this->create(oid));
+  ASSERT_EQ(0, this->client_register(oid));
+  ASSERT_EQ(0, this->client_commit(oid, commit_position));
 
-  journal::JournalMetadataPtr metadata = create_metadata(oid);
-  ASSERT_EQ(0, init_metadata(metadata));
+  journal::JournalMetadataPtr metadata = this->create_metadata(oid);
+  ASSERT_EQ(0, this->init_metadata(metadata));
 
-  journal::JournalPlayer *player = create_player(oid, metadata);
+  journal::JournalPlayer *player = this->create_player(oid, metadata);
   BOOST_SCOPE_EXIT_ALL( (player) ) {
     C_SaferCond unwatch_ctx;
     player->shut_down(&unwatch_ctx);
     ASSERT_EQ(0, unwatch_ctx.wait());
   };
 
-  ASSERT_EQ(0, write_entry(oid, 0, 234, 122));
+  ASSERT_EQ(0, this->write_entry(oid, 0, 234, 122));
 
   player->prefetch_and_watch(0.25);
 
   Entries entries;
-  ASSERT_EQ(0, write_entry(oid, 1, 234, 123));
-  ASSERT_TRUE(wait_for_entries(player, 1, &entries));
+  ASSERT_EQ(0, this->write_entry(oid, 1, 234, 123));
+  ASSERT_TRUE(this->wait_for_entries(player, 1, &entries));
 
   Entries expected_entries;
-  expected_entries = {create_entry(234, 123)};
+  expected_entries = {this->create_entry(234, 123)};
   ASSERT_EQ(expected_entries, entries);
 
-  ASSERT_EQ(0, write_entry(oid, 0, 234, 124));
-  ASSERT_TRUE(wait_for_entries(player, 1, &entries));
+  ASSERT_EQ(0, this->write_entry(oid, 0, 234, 124));
+  ASSERT_TRUE(this->wait_for_entries(player, 1, &entries));
 
-  expected_entries = {create_entry(234, 124)};
+  expected_entries = {this->create_entry(234, 124)};
   ASSERT_EQ(expected_entries, entries);
 }
 
-TEST_F(TestJournalPlayer, PrefetchSkippedObject) {
-  std::string oid = get_temp_oid();
+TYPED_TEST(TestJournalPlayer, PrefetchSkippedObject) {
+  std::string oid = this->get_temp_oid();
 
   cls::journal::ObjectSetPosition commit_position;
 
-  ASSERT_EQ(0, create(oid, 14, 3));
-  ASSERT_EQ(0, client_register(oid));
-  ASSERT_EQ(0, client_commit(oid, commit_position));
+  ASSERT_EQ(0, this->create(oid, 14, 3));
+  ASSERT_EQ(0, this->client_register(oid));
+  ASSERT_EQ(0, this->client_commit(oid, commit_position));
 
-  journal::JournalMetadataPtr metadata = create_metadata(oid);
-  ASSERT_EQ(0, init_metadata(metadata));
+  journal::JournalMetadataPtr metadata = this->create_metadata(oid);
+  ASSERT_EQ(0, this->init_metadata(metadata));
   ASSERT_EQ(0, metadata->set_active_set(2));
 
-  journal::JournalPlayer *player = create_player(oid, metadata);
+  journal::JournalPlayer *player = this->create_player(oid, metadata);
   BOOST_SCOPE_EXIT_ALL( (player) ) {
     C_SaferCond unwatch_ctx;
     player->shut_down(&unwatch_ctx);
     ASSERT_EQ(0, unwatch_ctx.wait());
   };
 
-  ASSERT_EQ(0, write_entry(oid, 0, 234, 122));
-  ASSERT_EQ(0, write_entry(oid, 1, 234, 123));
-  ASSERT_EQ(0, write_entry(oid, 5, 234, 124));
-  ASSERT_EQ(0, write_entry(oid, 6, 234, 125));
-  ASSERT_EQ(0, write_entry(oid, 7, 234, 126));
+  ASSERT_EQ(0, this->write_entry(oid, 0, 234, 122));
+  ASSERT_EQ(0, this->write_entry(oid, 1, 234, 123));
+  ASSERT_EQ(0, this->write_entry(oid, 5, 234, 124));
+  ASSERT_EQ(0, this->write_entry(oid, 6, 234, 125));
+  ASSERT_EQ(0, this->write_entry(oid, 7, 234, 126));
 
   player->prefetch();
 
   Entries entries;
-  ASSERT_TRUE(wait_for_entries(player, 5, &entries));
-  ASSERT_TRUE(wait_for_complete(player));
+  ASSERT_TRUE(this->wait_for_entries(player, 5, &entries));
+  ASSERT_TRUE(this->wait_for_complete(player));
 
   Entries expected_entries;
   expected_entries = {
-    create_entry(234, 122),
-    create_entry(234, 123),
-    create_entry(234, 124),
-    create_entry(234, 125),
-    create_entry(234, 126)};
+    this->create_entry(234, 122),
+    this->create_entry(234, 123),
+    this->create_entry(234, 124),
+    this->create_entry(234, 125),
+    this->create_entry(234, 126)};
   ASSERT_EQ(expected_entries, entries);
 
   uint64_t last_tid;
@@ -606,8 +626,8 @@ TEST_F(TestJournalPlayer, PrefetchSkippedObject) {
   ASSERT_EQ(126U, last_tid);
 }
 
-TEST_F(TestJournalPlayer, ImbalancedJournal) {
-  std::string oid = get_temp_oid();
+TYPED_TEST(TestJournalPlayer, ImbalancedJournal) {
+  std::string oid = this->get_temp_oid();
 
   journal::JournalPlayer::ObjectPositions positions = {
     cls::journal::ObjectPosition(9, 300, 1),
@@ -616,43 +636,43 @@ TEST_F(TestJournalPlayer, ImbalancedJournal) {
     cls::journal::ObjectPosition(11, 200, 4331) };
   cls::journal::ObjectSetPosition commit_position(positions);
 
-  ASSERT_EQ(0, create(oid, 14, 4));
-  ASSERT_EQ(0, client_register(oid));
-  ASSERT_EQ(0, client_commit(oid, commit_position));
+  ASSERT_EQ(0, this->create(oid, 14, 4));
+  ASSERT_EQ(0, this->client_register(oid));
+  ASSERT_EQ(0, this->client_commit(oid, commit_position));
 
-  journal::JournalMetadataPtr metadata = create_metadata(oid);
-  ASSERT_EQ(0, init_metadata(metadata));
+  journal::JournalMetadataPtr metadata = this->create_metadata(oid);
+  ASSERT_EQ(0, this->init_metadata(metadata));
   ASSERT_EQ(0, metadata->set_active_set(2));
   metadata->set_minimum_set(2);
 
-  journal::JournalPlayer *player = create_player(oid, metadata);
+  journal::JournalPlayer *player = this->create_player(oid, metadata);
   BOOST_SCOPE_EXIT_ALL( (player) ) {
     C_SaferCond unwatch_ctx;
     player->shut_down(&unwatch_ctx);
     ASSERT_EQ(0, unwatch_ctx.wait());
   };
 
-  ASSERT_EQ(0, write_entry(oid, 8, 300, 0));
-  ASSERT_EQ(0, write_entry(oid, 8, 301, 0));
-  ASSERT_EQ(0, write_entry(oid, 9, 300, 1));
-  ASSERT_EQ(0, write_entry(oid, 9, 301, 1));
-  ASSERT_EQ(0, write_entry(oid, 10, 200, 4334));
-  ASSERT_EQ(0, write_entry(oid, 10, 301, 2));
-  ASSERT_EQ(0, write_entry(oid, 11, 200, 4331));
-  ASSERT_EQ(0, write_entry(oid, 11, 301, 3));
+  ASSERT_EQ(0, this->write_entry(oid, 8, 300, 0));
+  ASSERT_EQ(0, this->write_entry(oid, 8, 301, 0));
+  ASSERT_EQ(0, this->write_entry(oid, 9, 300, 1));
+  ASSERT_EQ(0, this->write_entry(oid, 9, 301, 1));
+  ASSERT_EQ(0, this->write_entry(oid, 10, 200, 4334));
+  ASSERT_EQ(0, this->write_entry(oid, 10, 301, 2));
+  ASSERT_EQ(0, this->write_entry(oid, 11, 200, 4331));
+  ASSERT_EQ(0, this->write_entry(oid, 11, 301, 3));
 
   player->prefetch();
 
   Entries entries;
-  ASSERT_TRUE(wait_for_entries(player, 4, &entries));
-  ASSERT_TRUE(wait_for_complete(player));
+  ASSERT_TRUE(this->wait_for_entries(player, 4, &entries));
+  ASSERT_TRUE(this->wait_for_complete(player));
 
   Entries expected_entries;
   expected_entries = {
-    create_entry(301, 0),
-    create_entry(301, 1),
-    create_entry(301, 2),
-    create_entry(301, 3)};
+    this->create_entry(301, 0),
+    this->create_entry(301, 1),
+    this->create_entry(301, 2),
+    this->create_entry(301, 3)};
   ASSERT_EQ(expected_entries, entries);
 
   uint64_t last_tid;
@@ -660,124 +680,124 @@ TEST_F(TestJournalPlayer, ImbalancedJournal) {
   ASSERT_EQ(3U, last_tid);
 }
 
-TEST_F(TestJournalPlayer, LiveReplayLaggyAppend) {
-  std::string oid = get_temp_oid();
+TYPED_TEST(TestJournalPlayer, LiveReplayLaggyAppend) {
+  std::string oid = this->get_temp_oid();
 
   cls::journal::ObjectSetPosition commit_position;
 
-  ASSERT_EQ(0, create(oid));
-  ASSERT_EQ(0, client_register(oid));
-  ASSERT_EQ(0, client_commit(oid, commit_position));
+  ASSERT_EQ(0, this->create(oid));
+  ASSERT_EQ(0, this->client_register(oid));
+  ASSERT_EQ(0, this->client_commit(oid, commit_position));
 
-  journal::JournalMetadataPtr metadata = create_metadata(oid);
-  ASSERT_EQ(0, init_metadata(metadata));
+  journal::JournalMetadataPtr metadata = this->create_metadata(oid);
+  ASSERT_EQ(0, this->init_metadata(metadata));
 
-  journal::JournalPlayer *player = create_player(oid, metadata);
+  journal::JournalPlayer *player = this->create_player(oid, metadata);
   BOOST_SCOPE_EXIT_ALL( (player) ) {
     C_SaferCond unwatch_ctx;
     player->shut_down(&unwatch_ctx);
     ASSERT_EQ(0, unwatch_ctx.wait());
   };
 
-  ASSERT_EQ(0, write_entry(oid, 0, 0, 0));
-  ASSERT_EQ(0, write_entry(oid, 1, 0, 1));
-  ASSERT_EQ(0, write_entry(oid, 0, 0, 2));
-  ASSERT_EQ(0, write_entry(oid, 0, 0, 4));
-  ASSERT_EQ(0, write_entry(oid, 3, 0, 5)); // laggy entry 0/3 in object 1
+  ASSERT_EQ(0, this->write_entry(oid, 0, 0, 0));
+  ASSERT_EQ(0, this->write_entry(oid, 1, 0, 1));
+  ASSERT_EQ(0, this->write_entry(oid, 0, 0, 2));
+  ASSERT_EQ(0, this->write_entry(oid, 0, 0, 4));
+  ASSERT_EQ(0, this->write_entry(oid, 3, 0, 5)); // laggy entry 0/3 in object 1
   player->prefetch_and_watch(0.25);
 
   Entries entries;
-  ASSERT_TRUE(wait_for_entries(player, 3, &entries));
+  ASSERT_TRUE(this->wait_for_entries(player, 3, &entries));
 
   Entries expected_entries = {
-    create_entry(0, 0),
-    create_entry(0, 1),
-    create_entry(0, 2)};
+    this->create_entry(0, 0),
+    this->create_entry(0, 1),
+    this->create_entry(0, 2)};
   ASSERT_EQ(expected_entries, entries);
 
   journal::Entry entry;
   uint64_t commit_tid;
   ASSERT_FALSE(player->try_pop_front(&entry, &commit_tid));
 
-  ASSERT_EQ(0, write_entry(oid, 1, 0, 3));
+  ASSERT_EQ(0, this->write_entry(oid, 1, 0, 3));
   ASSERT_EQ(0, metadata->set_active_set(1));
-  ASSERT_TRUE(wait_for_entries(player, 3, &entries));
+  ASSERT_TRUE(this->wait_for_entries(player, 3, &entries));
 
   expected_entries = {
-    create_entry(0, 3),
-    create_entry(0, 4),
-    create_entry(0, 5)};
+    this->create_entry(0, 3),
+    this->create_entry(0, 4),
+    this->create_entry(0, 5)};
   ASSERT_EQ(expected_entries, entries);
 }
 
-TEST_F(TestJournalPlayer, LiveReplayMissingSequence) {
-  std::string oid = get_temp_oid();
+TYPED_TEST(TestJournalPlayer, LiveReplayMissingSequence) {
+  std::string oid = this->get_temp_oid();
 
   cls::journal::ObjectSetPosition commit_position;
 
-  ASSERT_EQ(0, create(oid, 14, 4));
-  ASSERT_EQ(0, client_register(oid));
-  ASSERT_EQ(0, client_commit(oid, commit_position));
+  ASSERT_EQ(0, this->create(oid, 14, 4));
+  ASSERT_EQ(0, this->client_register(oid));
+  ASSERT_EQ(0, this->client_commit(oid, commit_position));
 
-  journal::JournalMetadataPtr metadata = create_metadata(oid);
-  ASSERT_EQ(0, init_metadata(metadata));
+  journal::JournalMetadataPtr metadata = this->create_metadata(oid);
+  ASSERT_EQ(0, this->init_metadata(metadata));
 
-  journal::JournalPlayer *player = create_player(oid, metadata);
+  journal::JournalPlayer *player = this->create_player(oid, metadata);
   BOOST_SCOPE_EXIT_ALL( (player) ) {
     C_SaferCond unwatch_ctx;
     player->shut_down(&unwatch_ctx);
     ASSERT_EQ(0, unwatch_ctx.wait());
   };
 
-  ASSERT_EQ(0, write_entry(oid, 0, 2, 852));
-  ASSERT_EQ(0, write_entry(oid, 0, 2, 856));
-  ASSERT_EQ(0, write_entry(oid, 0, 2, 860));
-  ASSERT_EQ(0, write_entry(oid, 1, 2, 853));
-  ASSERT_EQ(0, write_entry(oid, 1, 2, 857));
-  ASSERT_EQ(0, write_entry(oid, 2, 2, 854));
-  ASSERT_EQ(0, write_entry(oid, 0, 2, 856));
+  ASSERT_EQ(0, this->write_entry(oid, 0, 2, 852));
+  ASSERT_EQ(0, this->write_entry(oid, 0, 2, 856));
+  ASSERT_EQ(0, this->write_entry(oid, 0, 2, 860));
+  ASSERT_EQ(0, this->write_entry(oid, 1, 2, 853));
+  ASSERT_EQ(0, this->write_entry(oid, 1, 2, 857));
+  ASSERT_EQ(0, this->write_entry(oid, 2, 2, 854));
+  ASSERT_EQ(0, this->write_entry(oid, 0, 2, 856));
   player->prefetch_and_watch(0.25);
 
   Entries entries;
-  ASSERT_TRUE(wait_for_entries(player, 3, &entries));
+  ASSERT_TRUE(this->wait_for_entries(player, 3, &entries));
 
   Entries expected_entries = {
-    create_entry(2, 852),
-    create_entry(2, 853),
-    create_entry(2, 854)};
+    this->create_entry(2, 852),
+    this->create_entry(2, 853),
+    this->create_entry(2, 854)};
   ASSERT_EQ(expected_entries, entries);
 
   journal::Entry entry;
   uint64_t commit_tid;
   ASSERT_FALSE(player->try_pop_front(&entry, &commit_tid));
 
-  ASSERT_EQ(0, write_entry(oid, 3, 3, 3));
-  ASSERT_EQ(0, write_entry(oid, 2, 3, 2));
-  ASSERT_EQ(0, write_entry(oid, 1, 3, 1));
-  ASSERT_EQ(0, write_entry(oid, 0, 3, 0));
-  ASSERT_TRUE(wait_for_entries(player, 4, &entries));
+  ASSERT_EQ(0, this->write_entry(oid, 3, 3, 3));
+  ASSERT_EQ(0, this->write_entry(oid, 2, 3, 2));
+  ASSERT_EQ(0, this->write_entry(oid, 1, 3, 1));
+  ASSERT_EQ(0, this->write_entry(oid, 0, 3, 0));
+  ASSERT_TRUE(this->wait_for_entries(player, 4, &entries));
 
   expected_entries = {
-    create_entry(3, 0),
-    create_entry(3, 1),
-    create_entry(3, 2),
-    create_entry(3, 3)};
+    this->create_entry(3, 0),
+    this->create_entry(3, 1),
+    this->create_entry(3, 2),
+    this->create_entry(3, 3)};
   ASSERT_EQ(expected_entries, entries);
 }
 
-TEST_F(TestJournalPlayer, LiveReplayLargeMissingSequence) {
-  std::string oid = get_temp_oid();
+TYPED_TEST(TestJournalPlayer, LiveReplayLargeMissingSequence) {
+  std::string oid = this->get_temp_oid();
 
   cls::journal::ObjectSetPosition commit_position;
 
-  ASSERT_EQ(0, create(oid));
-  ASSERT_EQ(0, client_register(oid));
-  ASSERT_EQ(0, client_commit(oid, commit_position));
+  ASSERT_EQ(0, this->create(oid));
+  ASSERT_EQ(0, this->client_register(oid));
+  ASSERT_EQ(0, this->client_commit(oid, commit_position));
 
-  journal::JournalMetadataPtr metadata = create_metadata(oid);
-  ASSERT_EQ(0, init_metadata(metadata));
+  journal::JournalMetadataPtr metadata = this->create_metadata(oid);
+  ASSERT_EQ(0, this->init_metadata(metadata));
 
-  journal::JournalPlayer *player = create_player(oid, metadata);
+  journal::JournalPlayer *player = this->create_player(oid, metadata);
   BOOST_SCOPE_EXIT_ALL( (player) ) {
     C_SaferCond unwatch_ctx;
     player->shut_down(&unwatch_ctx);
@@ -785,35 +805,35 @@ TEST_F(TestJournalPlayer, LiveReplayLargeMissingSequence) {
   };
 
   ASSERT_EQ(0, metadata->set_active_set(2));
-  ASSERT_EQ(0, write_entry(oid, 0, 0, 0));
-  ASSERT_EQ(0, write_entry(oid, 1, 0, 1));
-  ASSERT_EQ(0, write_entry(oid, 3, 0, 3));
-  ASSERT_EQ(0, write_entry(oid, 4, 1, 0));
+  ASSERT_EQ(0, this->write_entry(oid, 0, 0, 0));
+  ASSERT_EQ(0, this->write_entry(oid, 1, 0, 1));
+  ASSERT_EQ(0, this->write_entry(oid, 3, 0, 3));
+  ASSERT_EQ(0, this->write_entry(oid, 4, 1, 0));
   player->prefetch_and_watch(0.25);
 
   Entries entries;
-  ASSERT_TRUE(wait_for_entries(player, 3, &entries));
+  ASSERT_TRUE(this->wait_for_entries(player, 3, &entries));
 
   Entries expected_entries = {
-    create_entry(0, 0),
-    create_entry(0, 1),
-    create_entry(1, 0)};
+    this->create_entry(0, 0),
+    this->create_entry(0, 1),
+    this->create_entry(1, 0)};
   ASSERT_EQ(expected_entries, entries);
 }
 
-TEST_F(TestJournalPlayer, LiveReplayBlockedNewTag) {
-  std::string oid = get_temp_oid();
+TYPED_TEST(TestJournalPlayer, LiveReplayBlockedNewTag) {
+  std::string oid = this->get_temp_oid();
 
   cls::journal::ObjectSetPosition commit_position;
 
-  ASSERT_EQ(0, create(oid));
-  ASSERT_EQ(0, client_register(oid));
-  ASSERT_EQ(0, client_commit(oid, commit_position));
+  ASSERT_EQ(0, this->create(oid));
+  ASSERT_EQ(0, this->client_register(oid));
+  ASSERT_EQ(0, this->client_commit(oid, commit_position));
 
-  journal::JournalMetadataPtr metadata = create_metadata(oid);
-  ASSERT_EQ(0, init_metadata(metadata));
+  journal::JournalMetadataPtr metadata = this->create_metadata(oid);
+  ASSERT_EQ(0, this->init_metadata(metadata));
 
-  journal::JournalPlayer *player = create_player(oid, metadata);
+  journal::JournalPlayer *player = this->create_player(oid, metadata);
   BOOST_SCOPE_EXIT_ALL( (player) ) {
     C_SaferCond unwatch_ctx;
     player->shut_down(&unwatch_ctx);
@@ -826,19 +846,19 @@ TEST_F(TestJournalPlayer, LiveReplayBlockedNewTag) {
   ASSERT_EQ(0, ctx1.wait());
 
   ASSERT_EQ(0, metadata->set_active_set(0));
-  ASSERT_EQ(0, write_entry(oid, 0, tag1.tid, 0));
-  ASSERT_EQ(0, write_entry(oid, 1, tag1.tid, 1));
-  ASSERT_EQ(0, write_entry(oid, 0, tag1.tid, 2));
-  ASSERT_EQ(0, write_entry(oid, 0, tag1.tid, 4));
+  ASSERT_EQ(0, this->write_entry(oid, 0, tag1.tid, 0));
+  ASSERT_EQ(0, this->write_entry(oid, 1, tag1.tid, 1));
+  ASSERT_EQ(0, this->write_entry(oid, 0, tag1.tid, 2));
+  ASSERT_EQ(0, this->write_entry(oid, 0, tag1.tid, 4));
   player->prefetch_and_watch(0.25);
 
   Entries entries;
-  ASSERT_TRUE(wait_for_entries(player, 3, &entries));
+  ASSERT_TRUE(this->wait_for_entries(player, 3, &entries));
 
   Entries expected_entries = {
-    create_entry(tag1.tid, 0),
-    create_entry(tag1.tid, 1),
-    create_entry(tag1.tid, 2)};
+    this->create_entry(tag1.tid, 0),
+    this->create_entry(tag1.tid, 1),
+    this->create_entry(tag1.tid, 2)};
   ASSERT_EQ(expected_entries, entries);
 
   journal::Entry entry;
@@ -850,65 +870,65 @@ TEST_F(TestJournalPlayer, LiveReplayBlockedNewTag) {
   metadata->allocate_tag(tag1.tag_class, {}, &tag2, &ctx2);
   ASSERT_EQ(0, ctx2.wait());
 
-  ASSERT_EQ(0, write_entry(oid, 0, tag2.tid, 0));
-  ASSERT_TRUE(wait_for_entries(player, 1, &entries));
+  ASSERT_EQ(0, this->write_entry(oid, 0, tag2.tid, 0));
+  ASSERT_TRUE(this->wait_for_entries(player, 1, &entries));
 
   expected_entries = {
-    create_entry(tag2.tid, 0)};
+    this->create_entry(tag2.tid, 0)};
   ASSERT_EQ(expected_entries, entries);
 }
 
-TEST_F(TestJournalPlayer, LiveReplayStaleEntries) {
-  std::string oid = get_temp_oid();
+TYPED_TEST(TestJournalPlayer, LiveReplayStaleEntries) {
+  std::string oid = this->get_temp_oid();
 
   journal::JournalPlayer::ObjectPositions positions = {
     cls::journal::ObjectPosition(0, 1, 0) };
   cls::journal::ObjectSetPosition commit_position(positions);
 
-  ASSERT_EQ(0, create(oid));
-  ASSERT_EQ(0, client_register(oid));
-  ASSERT_EQ(0, client_commit(oid, commit_position));
+  ASSERT_EQ(0, this->create(oid));
+  ASSERT_EQ(0, this->client_register(oid));
+  ASSERT_EQ(0, this->client_commit(oid, commit_position));
 
-  journal::JournalMetadataPtr metadata = create_metadata(oid);
-  ASSERT_EQ(0, init_metadata(metadata));
+  journal::JournalMetadataPtr metadata = this->create_metadata(oid);
+  ASSERT_EQ(0, this->init_metadata(metadata));
 
-  journal::JournalPlayer *player = create_player(oid, metadata);
+  journal::JournalPlayer *player = this->create_player(oid, metadata);
   BOOST_SCOPE_EXIT_ALL( (player) ) {
     C_SaferCond unwatch_ctx;
     player->shut_down(&unwatch_ctx);
     ASSERT_EQ(0, unwatch_ctx.wait());
   };
 
-  ASSERT_EQ(0, write_entry(oid, 1, 0, 1));
-  ASSERT_EQ(0, write_entry(oid, 1, 0, 3));
-  ASSERT_EQ(0, write_entry(oid, 0, 1, 0));
-  ASSERT_EQ(0, write_entry(oid, 1, 1, 1));
+  ASSERT_EQ(0, this->write_entry(oid, 1, 0, 1));
+  ASSERT_EQ(0, this->write_entry(oid, 1, 0, 3));
+  ASSERT_EQ(0, this->write_entry(oid, 0, 1, 0));
+  ASSERT_EQ(0, this->write_entry(oid, 1, 1, 1));
   player->prefetch_and_watch(0.25);
 
   Entries entries;
-  ASSERT_TRUE(wait_for_entries(player, 1, &entries));
+  ASSERT_TRUE(this->wait_for_entries(player, 1, &entries));
 
   Entries expected_entries = {
-    create_entry(1, 1)};
+    this->create_entry(1, 1)};
   ASSERT_EQ(expected_entries, entries);
 }
 
-TEST_F(TestJournalPlayer, LiveReplayRefetchRemoveEmpty) {
-  std::string oid = get_temp_oid();
+TYPED_TEST(TestJournalPlayer, LiveReplayRefetchRemoveEmpty) {
+  std::string oid = this->get_temp_oid();
 
   journal::JournalPlayer::ObjectPositions positions = {
     cls::journal::ObjectPosition(1, 0, 1),
     cls::journal::ObjectPosition(0, 0, 0)};
   cls::journal::ObjectSetPosition commit_position(positions);
 
-  ASSERT_EQ(0, create(oid));
-  ASSERT_EQ(0, client_register(oid));
-  ASSERT_EQ(0, client_commit(oid, commit_position));
+  ASSERT_EQ(0, this->create(oid));
+  ASSERT_EQ(0, this->client_register(oid));
+  ASSERT_EQ(0, this->client_commit(oid, commit_position));
 
-  journal::JournalMetadataPtr metadata = create_metadata(oid);
-  ASSERT_EQ(0, init_metadata(metadata));
+  journal::JournalMetadataPtr metadata = this->create_metadata(oid);
+  ASSERT_EQ(0, this->init_metadata(metadata));
 
-  journal::JournalPlayer *player = create_player(oid, metadata);
+  journal::JournalPlayer *player = this->create_player(oid, metadata);
   BOOST_SCOPE_EXIT_ALL( (player) ) {
     C_SaferCond unwatch_ctx;
     player->shut_down(&unwatch_ctx);
@@ -916,41 +936,41 @@ TEST_F(TestJournalPlayer, LiveReplayRefetchRemoveEmpty) {
   };
 
   ASSERT_EQ(0, metadata->set_active_set(1));
-  ASSERT_EQ(0, write_entry(oid, 0, 0, 0));
-  ASSERT_EQ(0, write_entry(oid, 1, 0, 1));
-  ASSERT_EQ(0, write_entry(oid, 3, 0, 3));
-  ASSERT_EQ(0, write_entry(oid, 2, 1, 0));
+  ASSERT_EQ(0, this->write_entry(oid, 0, 0, 0));
+  ASSERT_EQ(0, this->write_entry(oid, 1, 0, 1));
+  ASSERT_EQ(0, this->write_entry(oid, 3, 0, 3));
+  ASSERT_EQ(0, this->write_entry(oid, 2, 1, 0));
   player->prefetch_and_watch(0.25);
 
   Entries entries;
-  ASSERT_TRUE(wait_for_entries(player, 1, &entries));
+  ASSERT_TRUE(this->wait_for_entries(player, 1, &entries));
 
   Entries expected_entries = {
-    create_entry(1, 0)};
+    this->create_entry(1, 0)};
   ASSERT_EQ(expected_entries, entries);
 
   // should remove player for offset 3 after refetching
   ASSERT_EQ(0, metadata->set_active_set(3));
-  ASSERT_EQ(0, write_entry(oid, 7, 1, 1));
+  ASSERT_EQ(0, this->write_entry(oid, 7, 1, 1));
 
-  ASSERT_TRUE(wait_for_entries(player, 1, &entries));
+  ASSERT_TRUE(this->wait_for_entries(player, 1, &entries));
 
   expected_entries = {
-    create_entry(1, 1)};
+    this->create_entry(1, 1)};
   ASSERT_EQ(expected_entries, entries);
 }
 
-TEST_F(TestJournalPlayer, PrefechShutDown) {
-  std::string oid = get_temp_oid();
+TYPED_TEST(TestJournalPlayer, PrefechShutDown) {
+  std::string oid = this->get_temp_oid();
 
-  ASSERT_EQ(0, create(oid));
-  ASSERT_EQ(0, client_register(oid));
-  ASSERT_EQ(0, client_commit(oid, {}));
+  ASSERT_EQ(0, this->create(oid));
+  ASSERT_EQ(0, this->client_register(oid));
+  ASSERT_EQ(0, this->client_commit(oid, {}));
 
-  journal::JournalMetadataPtr metadata = create_metadata(oid);
-  ASSERT_EQ(0, init_metadata(metadata));
+  journal::JournalMetadataPtr metadata = this->create_metadata(oid);
+  ASSERT_EQ(0, this->init_metadata(metadata));
 
-  journal::JournalPlayer *player = create_player(oid, metadata);
+  journal::JournalPlayer *player = this->create_player(oid, metadata);
   BOOST_SCOPE_EXIT_ALL( (player) ) {
     C_SaferCond unwatch_ctx;
     player->shut_down(&unwatch_ctx);
@@ -959,17 +979,17 @@ TEST_F(TestJournalPlayer, PrefechShutDown) {
   player->prefetch();
 }
 
-TEST_F(TestJournalPlayer, LiveReplayShutDown) {
-  std::string oid = get_temp_oid();
+TYPED_TEST(TestJournalPlayer, LiveReplayShutDown) {
+  std::string oid = this->get_temp_oid();
 
-  ASSERT_EQ(0, create(oid));
-  ASSERT_EQ(0, client_register(oid));
-  ASSERT_EQ(0, client_commit(oid, {}));
+  ASSERT_EQ(0, this->create(oid));
+  ASSERT_EQ(0, this->client_register(oid));
+  ASSERT_EQ(0, this->client_commit(oid, {}));
 
-  journal::JournalMetadataPtr metadata = create_metadata(oid);
-  ASSERT_EQ(0, init_metadata(metadata));
+  journal::JournalMetadataPtr metadata = this->create_metadata(oid);
+  ASSERT_EQ(0, this->init_metadata(metadata));
 
-  journal::JournalPlayer *player = create_player(oid, metadata);
+  journal::JournalPlayer *player = this->create_player(oid, metadata);
   BOOST_SCOPE_EXIT_ALL( (player) ) {
     C_SaferCond unwatch_ctx;
     player->shut_down(&unwatch_ctx);
diff --git a/src/test/journal/test_Journaler.cc b/src/test/journal/test_Journaler.cc
index 4a4ecba..d400695 100644
--- a/src/test/journal/test_Journaler.cc
+++ b/src/test/journal/test_Journaler.cc
@@ -2,6 +2,7 @@
 // vim: ts=8 sw=2 smarttab
 
 #include "journal/Journaler.h"
+#include "journal/Settings.h"
 #include "include/stringify.h"
 #include "gtest/gtest.h"
 #include "test/librados/test.h"
@@ -21,7 +22,7 @@ public:
     RadosTestFixture::SetUp();
     m_journal_id = get_temp_journal_id();
     m_journaler = new journal::Journaler(m_work_queue, m_timer, &m_timer_lock,
-                                         m_ioctx, m_journal_id, CLIENT_ID, 5);
+                                         m_ioctx, m_journal_id, CLIENT_ID, {});
   }
 
   virtual void TearDown() {
@@ -47,7 +48,7 @@ public:
 
   int register_client(const std::string &client_id, const std::string &desc) {
     journal::Journaler journaler(m_work_queue, m_timer, &m_timer_lock,
-                                 m_ioctx, m_journal_id, client_id, 5);
+                                 m_ioctx, m_journal_id, client_id, {});
     bufferlist data;
     data.append(desc);
     C_SaferCond cond;
@@ -57,7 +58,7 @@ public:
 
   int update_client(const std::string &client_id, const std::string &desc) {
     journal::Journaler journaler(m_work_queue, m_timer, &m_timer_lock,
-                                 m_ioctx, m_journal_id, client_id, 5);
+                                 m_ioctx, m_journal_id, client_id, {});
     bufferlist data;
     data.append(desc);
     C_SaferCond cond;
@@ -67,7 +68,7 @@ public:
 
   int unregister_client(const std::string &client_id) {
     journal::Journaler journaler(m_work_queue, m_timer, &m_timer_lock,
-                                 m_ioctx, m_journal_id, client_id, 5);
+                                 m_ioctx, m_journal_id, client_id, {});
     C_SaferCond cond;
     journaler.unregister_client(&cond);
     return cond.wait();
diff --git a/src/test/journal/test_ObjectPlayer.cc b/src/test/journal/test_ObjectPlayer.cc
index 67c35a1..90a3334 100644
--- a/src/test/journal/test_ObjectPlayer.cc
+++ b/src/test/journal/test_ObjectPlayer.cc
@@ -10,36 +10,80 @@
 #include "test/librados/test.h"
 #include "test/journal/RadosTestFixture.h"
 
+template <typename T>
 class TestObjectPlayer : public RadosTestFixture {
 public:
+  static const uint32_t max_fetch_bytes = T::max_fetch_bytes;
+
   journal::ObjectPlayerPtr create_object(const std::string &oid,
                                          uint8_t order) {
     journal::ObjectPlayerPtr object(new journal::ObjectPlayer(
-      m_ioctx, oid + ".", 0, *m_timer, m_timer_lock, order));
+      m_ioctx, oid + ".", 0, *m_timer, m_timer_lock, order,
+      max_fetch_bytes));
     return object;
   }
 
+  int fetch(journal::ObjectPlayerPtr object_player) {
+    while (true) {
+      C_SaferCond ctx;
+      object_player->set_refetch_state(
+        journal::ObjectPlayer::REFETCH_STATE_NONE);
+      object_player->fetch(&ctx);
+      int r = ctx.wait();
+      if (r < 0 || !object_player->refetch_required()) {
+        return r;
+      }
+    }
+    return 0;
+  }
+
+  int watch_and_wait_for_entries(journal::ObjectPlayerPtr object_player,
+                                 journal::ObjectPlayer::Entries *entries,
+                                 size_t count) {
+    for (size_t i = 0; i < 50; ++i) {
+      object_player->get_entries(entries);
+      if (entries->size() == count) {
+        break;
+      }
+
+      C_SaferCond ctx;
+      object_player->watch(&ctx, 0.1);
+
+      int r = ctx.wait();
+      if (r < 0) {
+        return r;
+      }
+    }
+    return 0;
+  }
+
   std::string get_object_name(const std::string &oid) {
     return oid + ".0";
   }
 };
 
-TEST_F(TestObjectPlayer, Fetch) {
-  std::string oid = get_temp_oid();
+template <uint32_t _max_fetch_bytes>
+struct TestObjectPlayerParams {
+  static const uint32_t max_fetch_bytes = _max_fetch_bytes;
+};
+
+typedef ::testing::Types<TestObjectPlayerParams<0>,
+                         TestObjectPlayerParams<10> > TestObjectPlayerTypes;
+TYPED_TEST_CASE(TestObjectPlayer, TestObjectPlayerTypes);
+
+TYPED_TEST(TestObjectPlayer, Fetch) {
+  std::string oid = this->get_temp_oid();
 
-  journal::Entry entry1(234, 123, create_payload(std::string(24, '1')));
-  journal::Entry entry2(234, 124, create_payload(std::string(24, '1')));
+  journal::Entry entry1(234, 123, this->create_payload(std::string(24, '1')));
+  journal::Entry entry2(234, 124, this->create_payload(std::string(24, '1')));
 
   bufferlist bl;
   ::encode(entry1, bl);
   ::encode(entry2, bl);
-  ASSERT_EQ(0, append(get_object_name(oid), bl));
+  ASSERT_EQ(0, this->append(this->get_object_name(oid), bl));
 
-  journal::ObjectPlayerPtr object = create_object(oid, 14);
-
-  C_SaferCond cond;
-  object->fetch(&cond);
-  ASSERT_LE(0, cond.wait());
+  journal::ObjectPlayerPtr object = this->create_object(oid, 14);
+  ASSERT_LE(0, this->fetch(object));
 
   journal::ObjectPlayer::Entries entries;
   object->get_entries(&entries);
@@ -49,48 +93,42 @@ TEST_F(TestObjectPlayer, Fetch) {
   ASSERT_EQ(expected_entries, entries);
 }
 
-TEST_F(TestObjectPlayer, FetchLarge) {
-  std::string oid = get_temp_oid();
+TYPED_TEST(TestObjectPlayer, FetchLarge) {
+  std::string oid = this->get_temp_oid();
 
   journal::Entry entry1(234, 123,
-                        create_payload(std::string(8192 - 33, '1')));
-  journal::Entry entry2(234, 124, create_payload(""));
+                        this->create_payload(std::string(8192 - 32, '1')));
+  journal::Entry entry2(234, 124, this->create_payload(""));
 
   bufferlist bl;
   ::encode(entry1, bl);
   ::encode(entry2, bl);
-  ASSERT_EQ(0, append(get_object_name(oid), bl));
-
-  journal::ObjectPlayerPtr object = create_object(oid, 12);
+  ASSERT_EQ(0, this->append(this->get_object_name(oid), bl));
 
-  C_SaferCond cond;
-  object->fetch(&cond);
-  ASSERT_LE(0, cond.wait());
+  journal::ObjectPlayerPtr object = this->create_object(oid, 12);
+  ASSERT_LE(0, this->fetch(object));
 
   journal::ObjectPlayer::Entries entries;
   object->get_entries(&entries);
-  ASSERT_EQ(1U, entries.size());
+  ASSERT_EQ(2U, entries.size());
 
-  journal::ObjectPlayer::Entries expected_entries = {entry1};
+  journal::ObjectPlayer::Entries expected_entries = {entry1, entry2};
   ASSERT_EQ(expected_entries, entries);
 }
 
-TEST_F(TestObjectPlayer, FetchDeDup) {
-  std::string oid = get_temp_oid();
+TYPED_TEST(TestObjectPlayer, FetchDeDup) {
+  std::string oid = this->get_temp_oid();
 
-  journal::Entry entry1(234, 123, create_payload(std::string(24, '1')));
-  journal::Entry entry2(234, 123, create_payload(std::string(24, '2')));
+  journal::Entry entry1(234, 123, this->create_payload(std::string(24, '1')));
+  journal::Entry entry2(234, 123, this->create_payload(std::string(24, '2')));
 
   bufferlist bl;
   ::encode(entry1, bl);
   ::encode(entry2, bl);
-  ASSERT_EQ(0, append(get_object_name(oid), bl));
-
-  journal::ObjectPlayerPtr object = create_object(oid, 14);
+  ASSERT_EQ(0, this->append(this->get_object_name(oid), bl));
 
-  C_SaferCond cond;
-  object->fetch(&cond);
-  ASSERT_LE(0, cond.wait());
+  journal::ObjectPlayerPtr object = this->create_object(oid, 14);
+  ASSERT_LE(0, this->fetch(object));
 
   journal::ObjectPlayer::Entries entries;
   object->get_entries(&entries);
@@ -100,48 +138,32 @@ TEST_F(TestObjectPlayer, FetchDeDup) {
   ASSERT_EQ(expected_entries, entries);
 }
 
-TEST_F(TestObjectPlayer, FetchEmpty) {
-  std::string oid = get_temp_oid();
+TYPED_TEST(TestObjectPlayer, FetchEmpty) {
+  std::string oid = this->get_temp_oid();
 
   bufferlist bl;
-  ASSERT_EQ(0, append(get_object_name(oid), bl));
+  ASSERT_EQ(0, this->append(this->get_object_name(oid), bl));
 
-  journal::ObjectPlayerPtr object = create_object(oid, 14);
+  journal::ObjectPlayerPtr object = this->create_object(oid, 14);
 
-  C_SaferCond cond;
-  object->fetch(&cond);
-  ASSERT_EQ(-ENOENT, cond.wait());
+  ASSERT_EQ(0, this->fetch(object));
   ASSERT_TRUE(object->empty());
 }
 
-TEST_F(TestObjectPlayer, FetchError) {
-  std::string oid = get_temp_oid();
+TYPED_TEST(TestObjectPlayer, FetchCorrupt) {
+  std::string oid = this->get_temp_oid();
 
-  journal::ObjectPlayerPtr object = create_object(oid, 14);
-
-  C_SaferCond cond;
-  object->fetch(&cond);
-  ASSERT_EQ(-ENOENT, cond.wait());
-  ASSERT_TRUE(object->empty());
-}
-
-TEST_F(TestObjectPlayer, FetchCorrupt) {
-  std::string oid = get_temp_oid();
-
-  journal::Entry entry1(234, 123, create_payload(std::string(24, '1')));
-  journal::Entry entry2(234, 124, create_payload(std::string(24, '2')));
+  journal::Entry entry1(234, 123, this->create_payload(std::string(24, '1')));
+  journal::Entry entry2(234, 124, this->create_payload(std::string(24, '2')));
 
   bufferlist bl;
   ::encode(entry1, bl);
-  ::encode(create_payload("corruption"), bl);
+  ::encode(this->create_payload("corruption"), bl);
   ::encode(entry2, bl);
-  ASSERT_EQ(0, append(get_object_name(oid), bl));
-
-  journal::ObjectPlayerPtr object = create_object(oid, 14);
+  ASSERT_EQ(0, this->append(this->get_object_name(oid), bl));
 
-  C_SaferCond cond;
-  object->fetch(&cond);
-  ASSERT_EQ(-EBADMSG, cond.wait());
+  journal::ObjectPlayerPtr object = this->create_object(oid, 14);
+  ASSERT_EQ(-EBADMSG, this->fetch(object));
 
   journal::ObjectPlayer::Entries entries;
   object->get_entries(&entries);
@@ -151,21 +173,18 @@ TEST_F(TestObjectPlayer, FetchCorrupt) {
   ASSERT_EQ(expected_entries, entries);
 }
 
-TEST_F(TestObjectPlayer, FetchAppend) {
-  std::string oid = get_temp_oid();
+TYPED_TEST(TestObjectPlayer, FetchAppend) {
+  std::string oid = this->get_temp_oid();
 
-  journal::Entry entry1(234, 123, create_payload(std::string(24, '1')));
-  journal::Entry entry2(234, 124, create_payload(std::string(24, '2')));
+  journal::Entry entry1(234, 123, this->create_payload(std::string(24, '1')));
+  journal::Entry entry2(234, 124, this->create_payload(std::string(24, '2')));
 
   bufferlist bl;
   ::encode(entry1, bl);
-  ASSERT_EQ(0, append(get_object_name(oid), bl));
+  ASSERT_EQ(0, this->append(this->get_object_name(oid), bl));
 
-  journal::ObjectPlayerPtr object = create_object(oid, 14);
-
-  C_SaferCond cond1;
-  object->fetch(&cond1);
-  ASSERT_LE(0, cond1.wait());
+  journal::ObjectPlayerPtr object = this->create_object(oid, 14);
+  ASSERT_LE(0, this->fetch(object));
 
   journal::ObjectPlayer::Entries entries;
   object->get_entries(&entries);
@@ -176,11 +195,8 @@ TEST_F(TestObjectPlayer, FetchAppend) {
 
   bl.clear();
   ::encode(entry2, bl);
-  ASSERT_EQ(0, append(get_object_name(oid), bl));
-
-  C_SaferCond cond2;
-  object->fetch(&cond2);
-  ASSERT_LE(0, cond2.wait());
+  ASSERT_EQ(0, this->append(this->get_object_name(oid), bl));
+  ASSERT_LE(0, this->fetch(object));
 
   object->get_entries(&entries);
   ASSERT_EQ(2U, entries.size());
@@ -189,22 +205,19 @@ TEST_F(TestObjectPlayer, FetchAppend) {
   ASSERT_EQ(expected_entries, entries);
 }
 
-TEST_F(TestObjectPlayer, PopEntry) {
-  std::string oid = get_temp_oid();
+TYPED_TEST(TestObjectPlayer, PopEntry) {
+  std::string oid = this->get_temp_oid();
 
-  journal::Entry entry1(234, 123, create_payload(std::string(24, '1')));
-  journal::Entry entry2(234, 124, create_payload(std::string(24, '1')));
+  journal::Entry entry1(234, 123, this->create_payload(std::string(24, '1')));
+  journal::Entry entry2(234, 124, this->create_payload(std::string(24, '1')));
 
   bufferlist bl;
   ::encode(entry1, bl);
   ::encode(entry2, bl);
-  ASSERT_EQ(0, append(get_object_name(oid), bl));
+  ASSERT_EQ(0, this->append(this->get_object_name(oid), bl));
 
-  journal::ObjectPlayerPtr object = create_object(oid, 14);
-
-  C_SaferCond cond;
-  object->fetch(&cond);
-  ASSERT_LE(0, cond.wait());
+  journal::ObjectPlayerPtr object = this->create_object(oid, 14);
+  ASSERT_LE(0, this->fetch(object));
 
   journal::ObjectPlayer::Entries entries;
   object->get_entries(&entries);
@@ -220,23 +233,23 @@ TEST_F(TestObjectPlayer, PopEntry) {
   ASSERT_TRUE(object->empty());
 }
 
-TEST_F(TestObjectPlayer, Watch) {
-  std::string oid = get_temp_oid();
-  journal::ObjectPlayerPtr object = create_object(oid, 14);
+TYPED_TEST(TestObjectPlayer, Watch) {
+  std::string oid = this->get_temp_oid();
+  journal::ObjectPlayerPtr object = this->create_object(oid, 14);
 
   C_SaferCond cond1;
   object->watch(&cond1, 0.1);
 
-  journal::Entry entry1(234, 123, create_payload(std::string(24, '1')));
-  journal::Entry entry2(234, 124, create_payload(std::string(24, '1')));
+  journal::Entry entry1(234, 123, this->create_payload(std::string(24, '1')));
+  journal::Entry entry2(234, 124, this->create_payload(std::string(24, '1')));
 
   bufferlist bl;
   ::encode(entry1, bl);
-  ASSERT_EQ(0, append(get_object_name(oid), bl));
+  ASSERT_EQ(0, this->append(this->get_object_name(oid), bl));
   ASSERT_LE(0, cond1.wait());
 
   journal::ObjectPlayer::Entries entries;
-  object->get_entries(&entries);
+  ASSERT_EQ(0, this->watch_and_wait_for_entries(object, &entries, 1U));
   ASSERT_EQ(1U, entries.size());
 
   journal::ObjectPlayer::Entries expected_entries;
@@ -248,19 +261,19 @@ TEST_F(TestObjectPlayer, Watch) {
 
   bl.clear();
   ::encode(entry2, bl);
-  ASSERT_EQ(0, append(get_object_name(oid), bl));
+  ASSERT_EQ(0, this->append(this->get_object_name(oid), bl));
   ASSERT_LE(0, cond2.wait());
 
-  object->get_entries(&entries);
+  ASSERT_EQ(0, this->watch_and_wait_for_entries(object, &entries, 2U));
   ASSERT_EQ(2U, entries.size());
 
   expected_entries = {entry1, entry2};
   ASSERT_EQ(expected_entries, entries);
 }
 
-TEST_F(TestObjectPlayer, Unwatch) {
-  std::string oid = get_temp_oid();
-  journal::ObjectPlayerPtr object = create_object(oid, 14);
+TYPED_TEST(TestObjectPlayer, Unwatch) {
+  std::string oid = this->get_temp_oid();
+  journal::ObjectPlayerPtr object = this->create_object(oid, 14);
 
   C_SaferCond watch_ctx;
   object->watch(&watch_ctx, 600);
diff --git a/src/test/librados/cmd.cc b/src/test/librados/cmd.cc
index 9261fb5..878a8af 100644
--- a/src/test/librados/cmd.cc
+++ b/src/test/librados/cmd.cc
@@ -48,6 +48,41 @@ TEST(LibRadosCmd, MonDescribe) {
   rados_buffer_free(buf);
   rados_buffer_free(st);
 
+  cmd[0] = (char *)"";
+  ASSERT_EQ(-EINVAL, rados_mon_command(cluster, (const char **)cmd, 1, "{}", 2, &buf, &buflen, &st, &stlen));
+  rados_buffer_free(buf);
+  rados_buffer_free(st);
+
+  cmd[0] = (char *)"{}";
+  ASSERT_EQ(-EINVAL, rados_mon_command(cluster, (const char **)cmd, 1, "", 0, &buf, &buflen, &st, &stlen));
+  rados_buffer_free(buf);
+  rados_buffer_free(st);
+
+  cmd[0] = (char *)"{\"abc\":\"something\"}";
+  ASSERT_EQ(-EINVAL, rados_mon_command(cluster, (const char **)cmd, 1, "", 0, &buf, &buflen, &st, &stlen));
+  rados_buffer_free(buf);
+  rados_buffer_free(st);
+
+  cmd[0] = (char *)"{\"prefix\":\"\"}";
+  ASSERT_EQ(-EINVAL, rados_mon_command(cluster, (const char **)cmd, 1, "", 0, &buf, &buflen, &st, &stlen));
+  rados_buffer_free(buf);
+  rados_buffer_free(st);
+
+  cmd[0] = (char *)"{\"prefix\":\"    \"}";
+  ASSERT_EQ(-EINVAL, rados_mon_command(cluster, (const char **)cmd, 1, "", 0, &buf, &buflen, &st, &stlen));
+  rados_buffer_free(buf);
+  rados_buffer_free(st);
+
+  cmd[0] = (char *)"{\"prefix\":\";;;,,,;;,,\"}";
+  ASSERT_EQ(-EINVAL, rados_mon_command(cluster, (const char **)cmd, 1, "", 0, &buf, &buflen, &st, &stlen));
+  rados_buffer_free(buf);
+  rados_buffer_free(st);
+
+  cmd[0] = (char *)"{\"prefix\":\"extra command\"}";
+  ASSERT_EQ(-EINVAL, rados_mon_command(cluster, (const char **)cmd, 1, "", 0, &buf, &buflen, &st, &stlen));
+  rados_buffer_free(buf);
+  rados_buffer_free(st);
+
   cmd[0] = (char *)"{\"prefix\":\"mon_status\"}";
   ASSERT_EQ(0, rados_mon_command(cluster, (const char **)cmd, 1, "", 0, &buf, &buflen, &st, &stlen));
   ASSERT_LT(0u, buflen);
diff --git a/src/test/librados/misc.cc b/src/test/librados/misc.cc
index 7d89b21..3212e03 100644
--- a/src/test/librados/misc.cc
+++ b/src/test/librados/misc.cc
@@ -454,6 +454,24 @@ TEST_F(LibRadosMiscPP, ExecPP) {
   ASSERT_NE(all_features, (unsigned)0);
 }
 
+void set_completion_complete(rados_completion_t cb, void *arg)
+{
+  bool *my_aio_complete = (bool*)arg;
+  *my_aio_complete = true;
+}
+
+TEST_F(LibRadosMiscPP, BadFlagsPP) {
+  unsigned badflags = CEPH_OSD_FLAG_PARALLELEXEC;
+  {
+    bufferlist bl;
+    bl.append("data");
+    ASSERT_EQ(0, ioctx.write("badfoo", bl, bl.length(), 0));
+  }
+  {
+    ASSERT_EQ(-EINVAL, ioctx.remove("badfoo", badflags));
+  }
+}
+
 TEST_F(LibRadosMiscPP, Operate1PP) {
   ObjectWriteOperation o;
   {
@@ -547,12 +565,6 @@ TEST_F(LibRadosMiscPP, BigObjectPP) {
 #endif
 }
 
-void set_completion_complete(rados_completion_t cb, void *arg)
-{
-  bool *my_aio_complete = (bool*)arg;
-  *my_aio_complete = true;
-}
-
 TEST_F(LibRadosMiscPP, AioOperatePP) {
   bool my_aio_complete = false;
   AioCompletion *my_completion = cluster.aio_create_completion(
diff --git a/src/test/librados/test.h b/src/test/librados/test.h
index 1d13d81..28f5a85 100644
--- a/src/test/librados/test.h
+++ b/src/test/librados/test.h
@@ -48,7 +48,7 @@ class TestAlarm
 {
 public:
   TestAlarm() {
-    alarm(360);
+    alarm(1200);
   }
   ~TestAlarm() {
     alarm(0);
diff --git a/src/test/librbd/exclusive_lock/test_mock_ReleaseRequest.cc b/src/test/librbd/exclusive_lock/test_mock_ReleaseRequest.cc
index 2ed8b8e..99ae094 100644
--- a/src/test/librbd/exclusive_lock/test_mock_ReleaseRequest.cc
+++ b/src/test/librbd/exclusive_lock/test_mock_ReleaseRequest.cc
@@ -19,6 +19,15 @@ template class librbd::exclusive_lock::ReleaseRequest<librbd::MockImageCtx>;
 namespace librbd {
 namespace exclusive_lock {
 
+namespace {
+
+struct MockContext : public Context {
+  MOCK_METHOD1(complete, void(int));
+  MOCK_METHOD1(finish, void(int));
+};
+
+} // anonymous namespace
+
 using ::testing::_;
 using ::testing::InSequence;
 using ::testing::Return;
@@ -30,6 +39,10 @@ class TestMockExclusiveLockReleaseRequest : public TestMockFixture {
 public:
   typedef ReleaseRequest<MockImageCtx> MockReleaseRequest;
 
+  void expect_complete_context(MockContext &mock_context, int r) {
+    EXPECT_CALL(mock_context, complete(r));
+  }
+
   void expect_test_features(MockImageCtx &mock_image_ctx, uint64_t features,
                             bool enabled) {
     EXPECT_CALL(mock_image_ctx, test_features(features))
@@ -105,15 +118,16 @@ TEST_F(TestMockExclusiveLockReleaseRequest, Success) {
   mock_image_ctx.object_map = mock_object_map;
   expect_close_object_map(mock_image_ctx, *mock_object_map);
 
+  MockContext mock_releasing_ctx;
+  expect_complete_context(mock_releasing_ctx, 0);
   expect_unlock(mock_image_ctx, 0);
 
-  C_SaferCond release_ctx;
   C_SaferCond ctx;
   MockReleaseRequest *req = MockReleaseRequest::create(mock_image_ctx,
                                                        TEST_COOKIE,
-                                                       &release_ctx, &ctx);
+                                                       &mock_releasing_ctx,
+                                                       &ctx);
   req->send();
-  ASSERT_EQ(0, release_ctx.wait());
   ASSERT_EQ(0, ctx.wait());
 }
 
diff --git a/src/test/librbd/fsx.cc b/src/test/librbd/fsx.cc
index 2a06b34..95a2f89 100644
--- a/src/test/librbd/fsx.cc
+++ b/src/test/librbd/fsx.cc
@@ -53,6 +53,7 @@
 #include "journal/Journaler.h"
 #include "journal/ReplayEntry.h"
 #include "journal/ReplayHandler.h"
+#include "journal/Settings.h"
 
 #include <boost/scope_exit.hpp>
 
@@ -322,7 +323,7 @@ int register_journal(rados_ioctx_t ioctx, const char *image_name) {
                 return r;
         }
 
-        journal::Journaler journaler(io_ctx, image_id, JOURNAL_CLIENT_ID, 0);
+        journal::Journaler journaler(io_ctx, image_id, JOURNAL_CLIENT_ID, {});
         r = journaler.register_client(bufferlist());
         if (r < 0) {
                 simple_err("failed to register journal client", r);
@@ -341,7 +342,7 @@ int unregister_journal(rados_ioctx_t ioctx, const char *image_name) {
                 return r;
         }
 
-        journal::Journaler journaler(io_ctx, image_id, JOURNAL_CLIENT_ID, 0);
+        journal::Journaler journaler(io_ctx, image_id, JOURNAL_CLIENT_ID, {});
         r = journaler.unregister_client();
         if (r < 0) {
                 simple_err("failed to unregister journal client", r);
@@ -393,7 +394,7 @@ int replay_journal(rados_ioctx_t ioctx, const char *image_name,
                 return r;
         }
 
-        journal::Journaler journaler(io_ctx, image_id, JOURNAL_CLIENT_ID, 0);
+        journal::Journaler journaler(io_ctx, image_id, JOURNAL_CLIENT_ID, {});
         C_SaferCond init_ctx;
         journaler.init(&init_ctx);
         BOOST_SCOPE_EXIT_ALL( (&journaler) ) {
@@ -406,7 +407,7 @@ int replay_journal(rados_ioctx_t ioctx, const char *image_name,
                 return r;
         }
 
-        journal::Journaler replay_journaler(io_ctx, replay_image_id, "", 0);
+        journal::Journaler replay_journaler(io_ctx, replay_image_id, "", {});
 
         C_SaferCond replay_init_ctx;
         replay_journaler.init(&replay_init_ctx);
@@ -501,7 +502,7 @@ struct rbd_ctx {
 	int krbd_fd;		/* image /dev/rbd<id> fd */ /* reused for nbd test */
 };
 
-#define RBD_CTX_INIT	(struct rbd_ctx) { NULL, NULL, NULL, -1 }
+#define RBD_CTX_INIT	(struct rbd_ctx) { NULL, NULL, NULL, -1}
 
 struct rbd_operations {
 	int (*open)(const char *name, struct rbd_ctx *ctx);
@@ -523,6 +524,7 @@ char *iname;			/* name of our test image */
 rados_t cluster;		/* handle for our test cluster */
 rados_ioctx_t ioctx;		/* handle for our test pool */
 struct krbd_ctx *krbd;		/* handle for libkrbd */
+bool skip_partial_discard;	/* rbd_skip_partial_discard config value*/
 
 /*
  * librbd/krbd rbd_operations handlers.  Given the rest of fsx.c, no
@@ -1042,7 +1044,8 @@ nbd_open(const char *name, struct rbd_ctx *ctx)
 	char dev[4096];
 	char *devnode;
 
-	SubProcess process("rbd-nbd", SubProcess::KEEP, SubProcess::PIPE);
+	SubProcess process("rbd-nbd", SubProcess::KEEP, SubProcess::PIPE,
+			   SubProcess::KEEP);
 	process.add_cmd_arg("map");
 	std::string img;
 	img.append(pool);
@@ -1352,10 +1355,25 @@ report_failure(int status)
 #define short_at(cp) ((unsigned short)((*((unsigned char *)(cp)) << 8) | \
 				        *(((unsigned char *)(cp)) + 1)))
 
+int
+fsxcmp(char *good_buf, char *temp_buf, unsigned size)
+{
+	if (!skip_partial_discard) {
+		return memcmp(good_buf, temp_buf, size);
+	}
+
+	for (unsigned i = 0; i < size; i++) {
+		if (good_buf[i] != temp_buf[i] && good_buf[i] != 0) {
+			return good_buf[i] - temp_buf[i];
+		}
+	}
+	return 0;
+}
+
 void
 check_buffers(char *good_buf, char *temp_buf, unsigned offset, unsigned size)
 {
-	if (memcmp(good_buf + offset, temp_buf, size) != 0) {
+	if (fsxcmp(good_buf + offset, temp_buf, size) != 0) {
 		unsigned i = 0;
 		unsigned n = 0;
 
@@ -1446,6 +1464,7 @@ create_image()
 {
 	int r;
 	int order = 0;
+	char buf[32];
 
 	r = rados_create(&cluster, NULL);
 	if (r < 0) {
@@ -1503,6 +1522,15 @@ create_image()
                         goto failed_open;
                 }
         }
+
+	r = rados_conf_get(cluster, "rbd_skip_partial_discard", buf,
+			   sizeof(buf));
+	if (r < 0) {
+		simple_err("Could not get rbd_skip_partial_discard value", r);
+		goto failed_open;
+	}
+	skip_partial_discard = (strcmp(buf, "true") == 0);
+
 	return 0;
 
  failed_open:
diff --git a/src/test/librbd/journal/test_Entries.cc b/src/test/librbd/journal/test_Entries.cc
index bd984fd..cea3f93 100644
--- a/src/test/librbd/journal/test_Entries.cc
+++ b/src/test/librbd/journal/test_Entries.cc
@@ -11,6 +11,7 @@
 #include "journal/Journaler.h"
 #include "journal/ReplayEntry.h"
 #include "journal/ReplayHandler.h"
+#include "journal/Settings.h"
 #include <list>
 #include <boost/variant.hpp>
 
@@ -66,7 +67,7 @@ public:
 
   journal::Journaler *create_journaler(librbd::ImageCtx *ictx) {
     journal::Journaler *journaler = new journal::Journaler(
-      ictx->md_ctx, ictx->id, "dummy client", 1);
+      ictx->md_ctx, ictx->id, "dummy client", {});
 
     int r = journaler->register_client(bufferlist());
     if (r < 0) {
@@ -159,6 +160,9 @@ TEST_F(TestJournalEntries, AioWrite) {
 TEST_F(TestJournalEntries, AioDiscard) {
   REQUIRE_FEATURE(RBD_FEATURE_JOURNALING);
 
+  CephContext* cct = reinterpret_cast<CephContext*>(_rados.cct());
+  REQUIRE(!cct->_conf->rbd_skip_partial_discard);
+
   librbd::ImageCtx *ictx;
   ASSERT_EQ(0, open_image(m_image_name, &ictx));
 
diff --git a/src/test/librbd/journal/test_Replay.cc b/src/test/librbd/journal/test_Replay.cc
index cbde9ae..e5ff749 100644
--- a/src/test/librbd/journal/test_Replay.cc
+++ b/src/test/librbd/journal/test_Replay.cc
@@ -147,7 +147,11 @@ TEST_F(TestJournalReplay, AioDiscardEvent) {
                                  &read_payload[0], NULL, 0);
   ASSERT_EQ(0, aio_comp->wait_for_complete());
   aio_comp->release();
-  ASSERT_EQ(std::string(read_payload.size(), '\0'), read_payload);
+  if (ictx->cct->_conf->rbd_skip_partial_discard) {
+    ASSERT_EQ(payload, read_payload);
+  } else {
+    ASSERT_EQ(std::string(read_payload.size(), '\0'), read_payload);
+  }
 
   // check the commit position is properly updated
   int64_t current_tag;
@@ -251,37 +255,12 @@ TEST_F(TestJournalReplay, AioFlushEvent) {
 
   // inject a flush operation into the journal
   inject_into_journal(ictx, librbd::journal::AioFlushEvent());
-
-  // start an AIO write op
-  librbd::Journal<> *journal = ictx->journal;
-  ictx->journal = NULL;
-
-  std::string payload(m_image_size, '1');
-  librbd::AioCompletion *aio_comp = new librbd::AioCompletion();
-  {
-    RWLock::RLocker owner_lock(ictx->owner_lock);
-    librbd::AioImageRequest<>::aio_write(ictx, aio_comp, 0, payload.size(),
-                                         payload.c_str(), 0);
-  }
-  ictx->journal = journal;
   close_image(ictx);
 
   // re-open the journal so that it replays the new entry
   ASSERT_EQ(0, open_image(m_image_name, &ictx));
   ASSERT_EQ(0, when_acquired_lock(ictx));
 
-  ASSERT_TRUE(aio_comp->is_complete());
-  ASSERT_EQ(0, aio_comp->wait_for_complete());
-  aio_comp->release();
-
-  std::string read_payload(m_image_size, '\0');
-  aio_comp = new librbd::AioCompletion();
-  ictx->aio_work_queue->aio_read(aio_comp, 0, read_payload.size(),
-                                 &read_payload[0], NULL, 0);
-  ASSERT_EQ(0, aio_comp->wait_for_complete());
-  aio_comp->release();
-  ASSERT_EQ(payload, read_payload);
-
   // check the commit position is properly updated
   int64_t current_tag;
   int64_t current_entry;
@@ -301,7 +280,7 @@ TEST_F(TestJournalReplay, AioFlushEvent) {
   ASSERT_EQ(1, current_entry);
 
   // verify lock ordering constraints
-  aio_comp = new librbd::AioCompletion();
+  librbd::AioCompletion *aio_comp = new librbd::AioCompletion();
   ictx->aio_work_queue->aio_flush(aio_comp);
   ASSERT_EQ(0, aio_comp->wait_for_complete());
   aio_comp->release();
@@ -452,7 +431,8 @@ TEST_F(TestJournalReplay, SnapRename) {
   get_journal_commit_position(ictx, &initial_tag, &initial_entry);
 
   // inject snapshot ops into journal
-  inject_into_journal(ictx, librbd::journal::SnapRenameEvent(1, snap_id, "snap2"));
+  inject_into_journal(ictx, librbd::journal::SnapRenameEvent(1, snap_id, "snap",
+                                                             "snap2"));
   inject_into_journal(ictx, librbd::journal::OpFinishEvent(1, 0));
   close_image(ictx);
 
@@ -707,6 +687,12 @@ TEST_F(TestJournalReplay, ObjectPosition) {
   ASSERT_EQ(0, aio_comp->wait_for_complete());
   aio_comp->release();
 
+  {
+    // user flush requests are ignored when journaling + cache are enabled
+    RWLock::RLocker owner_lock(ictx->owner_lock);
+    ictx->flush();
+  }
+
   // check the commit position updated
   get_journal_commit_position(ictx, &current_tag, &current_entry);
   ASSERT_EQ(initial_tag + 1, current_tag);
diff --git a/src/test/librbd/journal/test_mock_Replay.cc b/src/test/librbd/journal/test_mock_Replay.cc
index 43f2909..8f1ee39 100644
--- a/src/test/librbd/journal/test_mock_Replay.cc
+++ b/src/test/librbd/journal/test_mock_Replay.cc
@@ -54,8 +54,16 @@ struct AioImageRequest<MockReplayImageCtx> {
 
 AioImageRequest<MockReplayImageCtx> *AioImageRequest<MockReplayImageCtx>::s_instance = nullptr;
 
+namespace util {
+
+inline ImageCtx *get_image_ctx(librbd::MockReplayImageCtx *image_ctx) {
+  return image_ctx->image_ctx;
 }
 
+} // namespace util
+
+} // namespace librbd
+
 // template definitions
 #include "librbd/journal/Replay.cc"
 template class librbd::journal::Replay<librbd::MockReplayImageCtx>;
@@ -65,6 +73,7 @@ using ::testing::DoAll;
 using ::testing::InSequence;
 using ::testing::Return;
 using ::testing::SaveArg;
+using ::testing::StrEq;
 using ::testing::WithArgs;
 
 MATCHER_P(CStrEq, str, "") {
@@ -131,7 +140,7 @@ public:
 
   void expect_rename(MockReplayImageCtx &mock_image_ctx, Context **on_finish,
                      const char *image_name) {
-    EXPECT_CALL(*mock_image_ctx.operations, execute_rename(CStrEq(image_name), _))
+    EXPECT_CALL(*mock_image_ctx.operations, execute_rename(StrEq(image_name), _))
                   .WillOnce(DoAll(SaveArg<1>(on_finish),
                                   NotifyInvoke(&m_invoke_lock, &m_invoke_cond)));
   }
@@ -146,7 +155,7 @@ public:
   void expect_snap_create(MockReplayImageCtx &mock_image_ctx,
                           Context **on_finish, const char *snap_name,
                           uint64_t op_tid) {
-    EXPECT_CALL(*mock_image_ctx.operations, execute_snap_create(CStrEq(snap_name), _,
+    EXPECT_CALL(*mock_image_ctx.operations, execute_snap_create(StrEq(snap_name), _,
                                                                 op_tid, false))
                   .WillOnce(DoAll(SaveArg<1>(on_finish),
                                   NotifyInvoke(&m_invoke_lock, &m_invoke_cond)));
@@ -154,7 +163,7 @@ public:
 
   void expect_snap_remove(MockReplayImageCtx &mock_image_ctx,
                           Context **on_finish, const char *snap_name) {
-    EXPECT_CALL(*mock_image_ctx.operations, execute_snap_remove(CStrEq(snap_name), _))
+    EXPECT_CALL(*mock_image_ctx.operations, execute_snap_remove(StrEq(snap_name), _))
                   .WillOnce(DoAll(SaveArg<1>(on_finish),
                                   NotifyInvoke(&m_invoke_lock, &m_invoke_cond)));
   }
@@ -162,28 +171,28 @@ public:
   void expect_snap_rename(MockReplayImageCtx &mock_image_ctx,
                           Context **on_finish, uint64_t snap_id,
                           const char *snap_name) {
-    EXPECT_CALL(*mock_image_ctx.operations, execute_snap_rename(snap_id, CStrEq(snap_name), _))
+    EXPECT_CALL(*mock_image_ctx.operations, execute_snap_rename(snap_id, StrEq(snap_name), _))
                   .WillOnce(DoAll(SaveArg<2>(on_finish),
                                   NotifyInvoke(&m_invoke_lock, &m_invoke_cond)));
   }
 
   void expect_snap_protect(MockReplayImageCtx &mock_image_ctx,
                            Context **on_finish, const char *snap_name) {
-    EXPECT_CALL(*mock_image_ctx.operations, execute_snap_protect(CStrEq(snap_name), _))
+    EXPECT_CALL(*mock_image_ctx.operations, execute_snap_protect(StrEq(snap_name), _))
                   .WillOnce(DoAll(SaveArg<1>(on_finish),
                                   NotifyInvoke(&m_invoke_lock, &m_invoke_cond)));
   }
 
   void expect_snap_unprotect(MockReplayImageCtx &mock_image_ctx,
                              Context **on_finish, const char *snap_name) {
-    EXPECT_CALL(*mock_image_ctx.operations, execute_snap_unprotect(CStrEq(snap_name), _))
+    EXPECT_CALL(*mock_image_ctx.operations, execute_snap_unprotect(StrEq(snap_name), _))
                   .WillOnce(DoAll(SaveArg<1>(on_finish),
                                   NotifyInvoke(&m_invoke_lock, &m_invoke_cond)));
   }
 
   void expect_snap_rollback(MockReplayImageCtx &mock_image_ctx,
                             Context **on_finish, const char *snap_name) {
-    EXPECT_CALL(*mock_image_ctx.operations, execute_snap_rollback(CStrEq(snap_name), _, _))
+    EXPECT_CALL(*mock_image_ctx.operations, execute_snap_rollback(StrEq(snap_name), _, _))
                   .WillOnce(DoAll(SaveArg<2>(on_finish),
                                   NotifyInvoke(&m_invoke_lock, &m_invoke_cond)));
   }
@@ -211,7 +220,11 @@ public:
   void when_process(MockJournalReplay &mock_journal_replay,
                     bufferlist::iterator *it, Context *on_ready,
                     Context *on_safe) {
-    mock_journal_replay.process(it, on_ready, on_safe);
+    EventEntry event_entry;
+    int r = mock_journal_replay.decode(it, &event_entry);
+    ASSERT_EQ(0, r);
+
+    mock_journal_replay.process(event_entry, on_ready, on_safe);
   }
 
   void when_complete(MockReplayImageCtx &mock_image_ctx, AioCompletion *aio_comp,
@@ -828,7 +841,7 @@ TEST_F(TestMockJournalReplay, SnapRenameEvent) {
   C_SaferCond on_start_ready;
   C_SaferCond on_start_safe;
   when_process(mock_journal_replay,
-               EventEntry{SnapRenameEvent(123, 234, "snap")},
+               EventEntry{SnapRenameEvent(123, 234, "snap1", "snap")},
                &on_start_ready, &on_start_safe);
   ASSERT_EQ(0, on_start_ready.wait());
 
@@ -861,7 +874,7 @@ TEST_F(TestMockJournalReplay, SnapRenameEventExists) {
   C_SaferCond on_start_ready;
   C_SaferCond on_start_safe;
   when_process(mock_journal_replay,
-               EventEntry{SnapRenameEvent(123, 234, "snap")},
+               EventEntry{SnapRenameEvent(123, 234, "snap1", "snap")},
                &on_start_ready, &on_start_safe);
   ASSERT_EQ(0, on_start_ready.wait());
 
@@ -972,6 +985,34 @@ TEST_F(TestMockJournalReplay, SnapUnprotectEvent) {
   ASSERT_EQ(0, on_finish_safe.wait());
 }
 
+TEST_F(TestMockJournalReplay, SnapUnprotectOpFinishBusy) {
+  REQUIRE_FEATURE(RBD_FEATURE_JOURNALING);
+
+  librbd::ImageCtx *ictx;
+  ASSERT_EQ(0, open_image(m_image_name, &ictx));
+
+  MockReplayImageCtx mock_image_ctx(*ictx);
+  MockJournalReplay mock_journal_replay(mock_image_ctx);
+  expect_op_work_queue(mock_image_ctx);
+
+  InSequence seq;
+  C_SaferCond on_start_ready;
+  C_SaferCond on_start_safe;
+  when_process(mock_journal_replay, EventEntry{SnapUnprotectEvent(123, "snap")},
+               &on_start_ready, &on_start_safe);
+  ASSERT_EQ(0, on_start_ready.wait());
+
+  // aborts the snap unprotect op if image had children
+  C_SaferCond on_finish_ready;
+  C_SaferCond on_finish_safe;
+  when_process(mock_journal_replay, EventEntry{OpFinishEvent(123, -EBUSY)},
+               &on_finish_ready, &on_finish_safe);
+
+  ASSERT_EQ(0, on_start_safe.wait());
+  ASSERT_EQ(0, on_finish_safe.wait());
+  ASSERT_EQ(0, on_finish_ready.wait());
+}
+
 TEST_F(TestMockJournalReplay, SnapUnprotectEventInvalid) {
   REQUIRE_FEATURE(RBD_FEATURE_JOURNALING);
 
diff --git a/src/test/librbd/mock/MockImageCtx.h b/src/test/librbd/mock/MockImageCtx.h
index 034db1c..ae0f3d6 100644
--- a/src/test/librbd/mock/MockImageCtx.h
+++ b/src/test/librbd/mock/MockImageCtx.h
@@ -39,6 +39,7 @@ struct MockImageCtx {
   MockImageCtx(librbd::ImageCtx &image_ctx)
     : image_ctx(&image_ctx),
       cct(image_ctx.cct),
+      perfcounter(image_ctx.perfcounter),
       snap_name(image_ctx.snap_name),
       snap_id(image_ctx.snap_id),
       snap_exists(image_ctx.snap_exists),
@@ -47,18 +48,19 @@ struct MockImageCtx {
       snap_info(image_ctx.snap_info),
       snap_ids(image_ctx.snap_ids),
       object_cacher(image_ctx.object_cacher),
+      object_set(image_ctx.object_set),
       old_format(image_ctx.old_format),
       read_only(image_ctx.read_only),
       lockers(image_ctx.lockers),
       exclusive_locked(image_ctx.exclusive_locked),
       lock_tag(image_ctx.lock_tag),
-      owner_lock("owner_lock"),
-      md_lock("md_lock"),
-      cache_lock("cache_lock"),
-      snap_lock("snap_lock"),
-      parent_lock("parent_lock"),
-      object_map_lock("object_map_lock"),
-      async_ops_lock("async_ops_lock"),
+      owner_lock(image_ctx.owner_lock),
+      md_lock(image_ctx.md_lock),
+      cache_lock(image_ctx.cache_lock),
+      snap_lock(image_ctx.snap_lock),
+      parent_lock(image_ctx.parent_lock),
+      object_map_lock(image_ctx.object_map_lock),
+      async_ops_lock(image_ctx.async_ops_lock),
       order(image_ctx.order),
       size(image_ctx.size),
       features(image_ctx.features),
@@ -70,9 +72,11 @@ struct MockImageCtx {
       id(image_ctx.id),
       name(image_ctx.name),
       parent_md(image_ctx.parent_md),
+      format_string(image_ctx.format_string),
       layout(image_ctx.layout),
       aio_work_queue(new MockAioImageRequestWQ()),
       op_work_queue(new MockContextWQ()),
+      readahead_max_bytes(image_ctx.readahead_max_bytes),
       parent(NULL), operations(new MockOperations()),
       state(new MockImageState()),
       image_watcher(NULL), object_map(NULL),
@@ -86,7 +90,8 @@ struct MockImageCtx {
       journal_object_flush_interval(image_ctx.journal_object_flush_interval),
       journal_object_flush_bytes(image_ctx.journal_object_flush_bytes),
       journal_object_flush_age(image_ctx.journal_object_flush_age),
-      journal_pool(image_ctx.journal_pool)
+      journal_pool(image_ctx.journal_pool),
+      journal_max_payload_bytes(image_ctx.journal_max_payload_bytes)
   {
     md_ctx.dup(image_ctx.md_ctx);
     data_ctx.dup(image_ctx.data_ctx);
@@ -143,7 +148,9 @@ struct MockImageCtx {
                               uint8_t protection_status, uint64_t flags));
   MOCK_METHOD2(rm_snap, void(std::string in_snap_name, librados::snap_t id));
 
+  MOCK_METHOD0(user_flushed, void());
   MOCK_METHOD1(flush, void(Context *));
+  MOCK_METHOD1(flush_async_operations, void(Context *));
   MOCK_METHOD1(flush_copyup, void(Context *));
 
   MOCK_METHOD1(invalidate_cache, void(Context *));
@@ -164,8 +171,14 @@ struct MockImageCtx {
 
   MOCK_CONST_METHOD0(get_journal_policy, journal::Policy*());
 
+  MOCK_METHOD7(aio_read_from_cache, void(object_t, uint64_t, bufferlist *,
+                                         size_t, uint64_t, Context *, int));
+  MOCK_METHOD7(write_to_cache, void(object_t, const bufferlist&, size_t,
+                                    uint64_t, Context *, int, uint64_t));
+
   ImageCtx *image_ctx;
   CephContext *cct;
+  PerfCounters *perfcounter;
 
   std::string snap_name;
   uint64_t snap_id;
@@ -177,6 +190,7 @@ struct MockImageCtx {
   std::map<std::string, librados::snap_t> snap_ids;
 
   ObjectCacher *object_cacher;
+  ObjectCacher::ObjectSet *object_set;
 
   bool old_format;
   bool read_only;
@@ -189,13 +203,13 @@ struct MockImageCtx {
   librados::IoCtx md_ctx;
   librados::IoCtx data_ctx;
 
-  RWLock owner_lock;
-  RWLock md_lock;
-  Mutex cache_lock;
-  RWLock snap_lock;
-  RWLock parent_lock;
-  RWLock object_map_lock;
-  Mutex async_ops_lock;
+  RWLock &owner_lock;
+  RWLock &md_lock;
+  Mutex &cache_lock;
+  RWLock &snap_lock;
+  RWLock &parent_lock;
+  RWLock &object_map_lock;
+  Mutex &async_ops_lock;
 
   uint8_t order;
   uint64_t size;
@@ -208,6 +222,7 @@ struct MockImageCtx {
   std::string id;
   std::string name;
   parent_info parent_md;
+  char *format_string;
 
   file_layout_t layout;
 
@@ -220,6 +235,7 @@ struct MockImageCtx {
   MockContextWQ *op_work_queue;
 
   MockReadahead readahead;
+  uint64_t readahead_max_bytes;
 
   MockImageCtx *parent;
   MockOperations *operations;
@@ -240,6 +256,7 @@ struct MockImageCtx {
   uint64_t journal_object_flush_bytes;
   double journal_object_flush_age;
   std::string journal_pool;
+  uint32_t journal_max_payload_bytes;
 };
 
 } // namespace librbd
diff --git a/src/test/librbd/mock/MockJournal.h b/src/test/librbd/mock/MockJournal.h
index a80eead..48447c3 100644
--- a/src/test/librbd/mock/MockJournal.h
+++ b/src/test/librbd/mock/MockJournal.h
@@ -7,10 +7,16 @@
 #include "gmock/gmock.h"
 #include "librbd/Journal.h"
 #include "librbd/journal/Types.h"
+#include <list>
 
 namespace librbd {
 
+struct AioObjectRequestHandle;
+struct ImageCtx;
+
 struct MockJournal {
+  typedef std::list<AioObjectRequestHandle *> AioObjectRequests;
+
   static MockJournal *s_instance;
   static MockJournal *get_instance() {
     assert(s_instance != nullptr);
@@ -28,6 +34,7 @@ struct MockJournal {
 
   MOCK_CONST_METHOD0(is_journal_ready, bool());
   MOCK_CONST_METHOD0(is_journal_replaying, bool());
+  MOCK_CONST_METHOD0(is_journal_appending, bool());
 
   MOCK_METHOD1(wait_for_journal_ready, void(Context *));
 
@@ -47,6 +54,21 @@ struct MockJournal {
 
   MOCK_METHOD0(allocate_op_tid, uint64_t());
 
+  MOCK_METHOD5(append_write_event, uint64_t(uint64_t, size_t,
+                                            const bufferlist &,
+                                            const AioObjectRequests &, bool));
+  MOCK_METHOD5(append_io_event_mock, uint64_t(const journal::EventEntry&,
+                                              const AioObjectRequests &,
+                                              uint64_t, size_t, bool));
+  uint64_t append_io_event(journal::EventEntry &&event_entry,
+                           const AioObjectRequests &requests,
+                           uint64_t offset, size_t length,
+                           bool flush_entry) {
+    // googlemock doesn't support move semantics
+    return append_io_event_mock(event_entry, requests, offset, length,
+                                flush_entry);
+  }
+
   MOCK_METHOD3(append_op_event_mock, void(uint64_t, const journal::EventEntry&,
                                           Context *));
   void append_op_event(uint64_t op_tid, journal::EventEntry &&event_entry,
@@ -55,8 +77,18 @@ struct MockJournal {
     append_op_event_mock(op_tid, event_entry, on_safe);
   }
 
-  MOCK_METHOD2(commit_op_event, void(uint64_t, int));
+  MOCK_METHOD2(flush_event, void(uint64_t, Context *));
+  MOCK_METHOD2(wait_event, void(uint64_t, Context *));
+
+  MOCK_METHOD3(commit_op_event, void(uint64_t, int, Context *));
   MOCK_METHOD2(replay_op_ready, void(uint64_t, Context *));
+
+  MOCK_METHOD2(add_listener, void(journal::ListenerType,
+                                  journal::JournalListenerPtr));
+  MOCK_METHOD2(remove_listener, void(journal::ListenerType,
+                                     journal::JournalListenerPtr));
+
+  MOCK_METHOD1(check_resync_requested, int(bool *));
 };
 
 } // namespace librbd
diff --git a/src/test/librbd/mock/MockJournalPolicy.h b/src/test/librbd/mock/MockJournalPolicy.h
index e7debfa..8ad6ff6 100644
--- a/src/test/librbd/mock/MockJournalPolicy.h
+++ b/src/test/librbd/mock/MockJournalPolicy.h
@@ -11,8 +11,8 @@ namespace librbd {
 
 struct MockJournalPolicy : public journal::Policy {
 
+  MOCK_CONST_METHOD0(append_disabled, bool());
   MOCK_METHOD1(allocate_tag_on_lock, void(Context*));
-  MOCK_METHOD1(cancel_external_replay, void(Context*));
 
 };
 
diff --git a/src/test/librbd/mock/MockOperations.h b/src/test/librbd/mock/MockOperations.h
index 5bbd9b3..478e763 100644
--- a/src/test/librbd/mock/MockOperations.h
+++ b/src/test/librbd/mock/MockOperations.h
@@ -7,6 +7,7 @@
 #include "include/int_types.h"
 #include "include/rbd/librbd.hpp"
 #include "gmock/gmock.h"
+#include <string>
 
 class Context;
 
@@ -17,27 +18,30 @@ struct MockOperations {
                                      Context *on_finish));
   MOCK_METHOD2(execute_rebuild_object_map, void(ProgressContext &prog_ctx,
                                                 Context *on_finish));
-  MOCK_METHOD2(execute_rename, void(const char *dstname, Context *on_finish));
+  MOCK_METHOD2(execute_rename, void(const std::string &dstname,
+                                    Context *on_finish));
   MOCK_METHOD4(execute_resize, void(uint64_t size, ProgressContext &prog_ctx,
                                     Context *on_finish,
                                     uint64_t journal_op_tid));
-  MOCK_METHOD2(snap_create, void(const char *snap_name, Context *on_finish));
-  MOCK_METHOD4(execute_snap_create, void(const char *snap_name,
+  MOCK_METHOD2(snap_create, void(const std::string &snap_name,
+                                 Context *on_finish));
+  MOCK_METHOD4(execute_snap_create, void(const std::string &snap_name,
                                          Context *on_finish,
                                          uint64_t journal_op_tid,
                                          bool skip_object_map));
-  MOCK_METHOD2(snap_remove, void(const char *snap_name, Context *on_finish));
-  MOCK_METHOD2(execute_snap_remove, void(const char *snap_name,
+  MOCK_METHOD2(snap_remove, void(const std::string &snap_name,
+                                 Context *on_finish));
+  MOCK_METHOD2(execute_snap_remove, void(const std::string &snap_name,
                                          Context *on_finish));
   MOCK_METHOD3(execute_snap_rename, void(uint64_t src_snap_id,
-                                         const char *snap_name,
+                                         const std::string &snap_name,
                                          Context *on_finish));
-  MOCK_METHOD3(execute_snap_rollback, void(const char *snap_name,
+  MOCK_METHOD3(execute_snap_rollback, void(const std::string &snap_name,
                                            ProgressContext &prog_ctx,
                                            Context *on_finish));
-  MOCK_METHOD2(execute_snap_protect, void(const char *snap_name,
+  MOCK_METHOD2(execute_snap_protect, void(const std::string &snap_name,
                                           Context *on_finish));
-  MOCK_METHOD2(execute_snap_unprotect, void(const char *snap_name,
+  MOCK_METHOD2(execute_snap_unprotect, void(const std::string &snap_name,
                                             Context *on_finish));
 };
 
diff --git a/src/test/librbd/object_map/test_mock_SnapshotRemoveRequest.cc b/src/test/librbd/object_map/test_mock_SnapshotRemoveRequest.cc
index c00a3e6..215c214 100644
--- a/src/test/librbd/object_map/test_mock_SnapshotRemoveRequest.cc
+++ b/src/test/librbd/object_map/test_mock_SnapshotRemoveRequest.cc
@@ -101,6 +101,31 @@ TEST_F(TestMockObjectMapSnapshotRemoveRequest, Success) {
   expect_unlock_exclusive_lock(*ictx);
 }
 
+TEST_F(TestMockObjectMapSnapshotRemoveRequest, LoadMapMissing) {
+  REQUIRE_FEATURE(RBD_FEATURE_FAST_DIFF);
+
+  librbd::ImageCtx *ictx;
+  ASSERT_EQ(0, open_image(m_image_name, &ictx));
+  ASSERT_EQ(0, snap_create(*ictx, "snap1"));
+  ASSERT_EQ(0, ictx->state->refresh_if_required());
+
+  uint64_t snap_id = ictx->snap_info.rbegin()->first;
+  expect_load_map(ictx, snap_id, -ENOENT);
+
+  ceph::BitVector<2> object_map;
+  C_SaferCond cond_ctx;
+  AsyncRequest<> *request = new SnapshotRemoveRequest(
+    *ictx, &object_map, snap_id, &cond_ctx);
+  {
+    RWLock::RLocker owner_locker(ictx->owner_lock);
+    RWLock::WLocker snap_locker(ictx->snap_lock);
+    request->send();
+  }
+  ASSERT_EQ(0, cond_ctx.wait());
+
+  expect_unlock_exclusive_lock(*ictx);
+}
+
 TEST_F(TestMockObjectMapSnapshotRemoveRequest, LoadMapError) {
   REQUIRE_FEATURE(RBD_FEATURE_FAST_DIFF);
 
diff --git a/src/test/librbd/operation/test_mock_ResizeRequest.cc b/src/test/librbd/operation/test_mock_ResizeRequest.cc
index 34b0deb..e3a34be 100644
--- a/src/test/librbd/operation/test_mock_ResizeRequest.cc
+++ b/src/test/librbd/operation/test_mock_ResizeRequest.cc
@@ -156,7 +156,7 @@ TEST_F(TestMockOperationResizeRequest, NoOpSuccess) {
 
   InSequence seq;
   expect_block_writes(mock_image_ctx, 0);
-  expect_append_op_event(mock_image_ctx, 0);
+  expect_append_op_event(mock_image_ctx, true, 0);
   expect_unblock_writes(mock_image_ctx);
   expect_commit_op_event(mock_image_ctx, 0);
   ASSERT_EQ(0, when_resize(mock_image_ctx, ictx->size, 0, false));
@@ -175,13 +175,13 @@ TEST_F(TestMockOperationResizeRequest, GrowSuccess) {
 
   InSequence seq;
   expect_block_writes(mock_image_ctx, 0);
-  expect_append_op_event(mock_image_ctx, 0);
+  expect_append_op_event(mock_image_ctx, true, 0);
   expect_unblock_writes(mock_image_ctx);
   expect_grow_object_map(mock_image_ctx);
   expect_block_writes(mock_image_ctx, 0);
   expect_update_header(mock_image_ctx, 0);
-  expect_commit_op_event(mock_image_ctx, 0);
   expect_unblock_writes(mock_image_ctx);
+  expect_commit_op_event(mock_image_ctx, 0);
   ASSERT_EQ(0, when_resize(mock_image_ctx, ictx->size * 2, 0, false));
 }
 
@@ -198,7 +198,7 @@ TEST_F(TestMockOperationResizeRequest, ShrinkSuccess) {
 
   InSequence seq;
   expect_block_writes(mock_image_ctx, 0);
-  expect_append_op_event(mock_image_ctx, 0);
+  expect_append_op_event(mock_image_ctx, true, 0);
   expect_unblock_writes(mock_image_ctx);
 
   MockTrimRequest mock_trim_request;
@@ -206,12 +206,29 @@ TEST_F(TestMockOperationResizeRequest, ShrinkSuccess) {
   expect_invalidate_cache(mock_image_ctx, 0);
   expect_block_writes(mock_image_ctx, 0);
   expect_update_header(mock_image_ctx, 0);
-  expect_commit_op_event(mock_image_ctx, 0);
   expect_shrink_object_map(mock_image_ctx);
   expect_unblock_writes(mock_image_ctx);
+  expect_commit_op_event(mock_image_ctx, 0);
   ASSERT_EQ(0, when_resize(mock_image_ctx, ictx->size / 2, 0, false));
 }
 
+TEST_F(TestMockOperationResizeRequest, ShrinkError) {
+  librbd::ImageCtx *ictx;
+  ASSERT_EQ(0, open_image(m_image_name, &ictx));
+
+  MockImageCtx mock_image_ctx(*ictx);
+  MockExclusiveLock mock_exclusive_lock;
+  MockJournal mock_journal;
+  MockObjectMap mock_object_map;
+  initialize_features(ictx, mock_image_ctx, mock_exclusive_lock, mock_journal,
+                      mock_object_map);
+
+  InSequence seq;
+  expect_block_writes(mock_image_ctx, -EINVAL);
+  expect_unblock_writes(mock_image_ctx);
+  ASSERT_EQ(-EINVAL, when_resize(mock_image_ctx, ictx->size / 2, 0, false));
+}
+
 TEST_F(TestMockOperationResizeRequest, PreBlockWritesError) {
   librbd::ImageCtx *ictx;
   ASSERT_EQ(0, open_image(m_image_name, &ictx));
@@ -242,7 +259,7 @@ TEST_F(TestMockOperationResizeRequest, TrimError) {
 
   InSequence seq;
   expect_block_writes(mock_image_ctx, 0);
-  expect_append_op_event(mock_image_ctx, 0);
+  expect_append_op_event(mock_image_ctx, true, 0);
   expect_unblock_writes(mock_image_ctx);
 
   MockTrimRequest mock_trim_request;
@@ -264,7 +281,7 @@ TEST_F(TestMockOperationResizeRequest, InvalidateCacheError) {
 
   InSequence seq;
   expect_block_writes(mock_image_ctx, 0);
-  expect_append_op_event(mock_image_ctx, 0);
+  expect_append_op_event(mock_image_ctx, true, 0);
   expect_unblock_writes(mock_image_ctx);
 
   MockTrimRequest mock_trim_request;
@@ -287,7 +304,7 @@ TEST_F(TestMockOperationResizeRequest, PostBlockWritesError) {
 
   InSequence seq;
   expect_block_writes(mock_image_ctx, 0);
-  expect_append_op_event(mock_image_ctx, 0);
+  expect_append_op_event(mock_image_ctx, true, 0);
   expect_unblock_writes(mock_image_ctx);
   expect_grow_object_map(mock_image_ctx);
   expect_block_writes(mock_image_ctx, -EINVAL);
@@ -309,7 +326,7 @@ TEST_F(TestMockOperationResizeRequest, UpdateHeaderError) {
 
   InSequence seq;
   expect_block_writes(mock_image_ctx, 0);
-  expect_append_op_event(mock_image_ctx, 0);
+  expect_append_op_event(mock_image_ctx, true, 0);
   expect_unblock_writes(mock_image_ctx);
   expect_grow_object_map(mock_image_ctx);
   expect_block_writes(mock_image_ctx, 0);
@@ -334,7 +351,7 @@ TEST_F(TestMockOperationResizeRequest, JournalAppendError) {
 
   InSequence seq;
   expect_block_writes(mock_image_ctx, 0);
-  expect_append_op_event(mock_image_ctx, -EINVAL);
+  expect_append_op_event(mock_image_ctx, true, -EINVAL);
   expect_unblock_writes(mock_image_ctx);
   ASSERT_EQ(-EINVAL, when_resize(mock_image_ctx, ictx->size, 0, false));
 }
diff --git a/src/test/librbd/operation/test_mock_SnapshotRollbackRequest.cc b/src/test/librbd/operation/test_mock_SnapshotRollbackRequest.cc
index 6173336..6258229 100644
--- a/src/test/librbd/operation/test_mock_SnapshotRollbackRequest.cc
+++ b/src/test/librbd/operation/test_mock_SnapshotRollbackRequest.cc
@@ -198,7 +198,7 @@ TEST_F(TestMockOperationSnapshotRollbackRequest, Success) {
 
   InSequence seq;
   MockResizeRequest mock_resize_request;
-  expect_append_op_event(mock_image_ctx, 0);
+  expect_append_op_event(mock_image_ctx, false, 0);
   expect_block_writes(mock_image_ctx, 0);
   expect_resize(mock_image_ctx, mock_resize_request, 0);
   expect_rollback_object_map(mock_image_ctx, *mock_object_map);
@@ -223,7 +223,7 @@ TEST_F(TestMockOperationSnapshotRollbackRequest, BlockWritesError) {
   expect_op_work_queue(mock_image_ctx);
 
   InSequence seq;
-  expect_append_op_event(mock_image_ctx, 0);
+  expect_append_op_event(mock_image_ctx, false, 0);
   expect_block_writes(mock_image_ctx, -EINVAL);
   expect_commit_op_event(mock_image_ctx, -EINVAL);
   expect_unblock_writes(mock_image_ctx);
@@ -243,7 +243,7 @@ TEST_F(TestMockOperationSnapshotRollbackRequest, SkipResize) {
   expect_op_work_queue(mock_image_ctx);
 
   InSequence seq;
-  expect_append_op_event(mock_image_ctx, 0);
+  expect_append_op_event(mock_image_ctx, false, 0);
   expect_block_writes(mock_image_ctx, 0);
   expect_get_image_size(mock_image_ctx, 345);
   expect_rollback_object_map(mock_image_ctx, *mock_object_map);
@@ -269,7 +269,7 @@ TEST_F(TestMockOperationSnapshotRollbackRequest, ResizeError) {
 
   InSequence seq;
   MockResizeRequest mock_resize_request;
-  expect_append_op_event(mock_image_ctx, 0);
+  expect_append_op_event(mock_image_ctx, false, 0);
   expect_block_writes(mock_image_ctx, 0);
   expect_resize(mock_image_ctx, mock_resize_request, -EINVAL);
   expect_commit_op_event(mock_image_ctx, -EINVAL);
@@ -291,7 +291,7 @@ TEST_F(TestMockOperationSnapshotRollbackRequest, RollbackObjectsError) {
 
   InSequence seq;
   MockResizeRequest mock_resize_request;
-  expect_append_op_event(mock_image_ctx, 0);
+  expect_append_op_event(mock_image_ctx, false, 0);
   expect_block_writes(mock_image_ctx, 0);
   expect_resize(mock_image_ctx, mock_resize_request, 0);
   expect_rollback_object_map(mock_image_ctx, mock_object_map);
@@ -315,7 +315,7 @@ TEST_F(TestMockOperationSnapshotRollbackRequest, InvalidateCacheError) {
 
   InSequence seq;
   MockResizeRequest mock_resize_request;
-  expect_append_op_event(mock_image_ctx, 0);
+  expect_append_op_event(mock_image_ctx, false, 0);
   expect_block_writes(mock_image_ctx, 0);
   expect_resize(mock_image_ctx, mock_resize_request, 0);
   expect_rollback_object_map(mock_image_ctx, *mock_object_map);
diff --git a/src/test/librbd/test_internal.cc b/src/test/librbd/test_internal.cc
index b5c6f6b..407e036 100644
--- a/src/test/librbd/test_internal.cc
+++ b/src/test/librbd/test_internal.cc
@@ -601,7 +601,6 @@ TEST_F(TestInternal, ResizeCopyup)
 
   librbd::ImageCtx *ictx2;
   ASSERT_EQ(0, open_image(clone_name, &ictx2));
-
   ASSERT_EQ(0, snap_create(*ictx2, "snap1"));
 
   bufferptr read_ptr(bl.length());
@@ -612,6 +611,8 @@ TEST_F(TestInternal, ResizeCopyup)
   librbd::NoOpProgressContext no_op;
   ASSERT_EQ(0, ictx2->operations->resize(m_image_size - (1 << order) - 32,
                                          no_op));
+  ASSERT_EQ(0, ictx2->operations->resize(m_image_size - (2 << order) - 32,
+                                         no_op));
   ASSERT_EQ(0, librbd::snap_set(ictx2, "snap1"));
 
   {
@@ -631,6 +632,9 @@ TEST_F(TestInternal, DiscardCopyup)
 {
   REQUIRE_FEATURE(RBD_FEATURE_LAYERING);
 
+  CephContext* cct = reinterpret_cast<CephContext*>(_rados.cct());
+  REQUIRE(!cct->_conf->rbd_skip_partial_discard);
+
   m_image_name = get_temp_image_name();
   m_image_size = 1 << 14;
 
diff --git a/src/test/librbd/test_librbd.cc b/src/test/librbd/test_librbd.cc
index 901abc8..c03ce64 100644
--- a/src/test/librbd/test_librbd.cc
+++ b/src/test/librbd/test_librbd.cc
@@ -23,6 +23,7 @@
 
 #include "gtest/gtest.h"
 
+#include <chrono>
 #include <errno.h>
 #include <stdarg.h>
 #include <stdio.h>
@@ -39,6 +40,10 @@
 
 #include "test/librados/test.h"
 #include "test/librbd/test_support.h"
+#include "common/Cond.h"
+#include "common/Mutex.h"
+#include "common/config.h"
+#include "common/ceph_context.h"
 #include "common/errno.h"
 #include "include/interval_set.h"
 #include "include/stringify.h"
@@ -52,6 +57,8 @@
 
 using namespace std;
 
+using std::chrono::seconds;
+
 #define ASSERT_PASSED(x, args...) \
   do {                            \
     bool passed = false;          \
@@ -454,6 +461,111 @@ TEST_F(TestLibRBD, ResizeAndStatPP)
   ioctx.close();
 }
 
+TEST_F(TestLibRBD, UpdateWatchAndResize)
+{
+  rados_ioctx_t ioctx;
+  rados_ioctx_create(_cluster, m_pool_name.c_str(), &ioctx);
+
+  rbd_image_t image;
+  int order = 0;
+  std::string name = get_temp_image_name();
+  uint64_t size = 2 << 20;
+  struct Watcher {
+    static void cb(void *arg) {
+      Watcher *watcher = (Watcher *)arg;
+      watcher->handle_notify();
+    }
+    Watcher(rbd_image_t &image) : m_image(image), m_lock("lock") {}
+    void handle_notify() {
+      rbd_image_info_t info;
+      ASSERT_EQ(0, rbd_stat(m_image, &info, sizeof(info)));
+      Mutex::Locker locker(m_lock);
+      m_size = info.size;
+      m_cond.Signal();
+    }
+    void wait_for_size(size_t size) {
+      Mutex::Locker locker(m_lock);
+      while (m_size != size) {
+	CephContext* cct = reinterpret_cast<CephContext*>(_rados.cct());
+	ASSERT_EQ(0, m_cond.WaitInterval(cct, m_lock, seconds(5)));
+      }
+    }
+    rbd_image_t &m_image;
+    Mutex m_lock;
+    Cond m_cond;
+    size_t m_size = 0;
+  } watcher(image);
+  uint64_t handle;
+
+  ASSERT_EQ(0, create_image(ioctx, name.c_str(), size, &order));
+  ASSERT_EQ(0, rbd_open(ioctx, name.c_str(), &image, NULL));
+
+  ASSERT_EQ(0, rbd_update_watch(image, &handle, Watcher::cb, &watcher));
+
+  ASSERT_EQ(0, rbd_resize(image, size * 4));
+  watcher.wait_for_size(size * 4);
+
+  ASSERT_EQ(0, rbd_resize(image, size / 2));
+  watcher.wait_for_size(size / 2);
+
+  ASSERT_EQ(0, rbd_update_unwatch(image, handle));
+
+  ASSERT_EQ(0, rbd_close(image));
+  rados_ioctx_destroy(ioctx);
+}
+
+TEST_F(TestLibRBD, UpdateWatchAndResizePP)
+{
+  librados::IoCtx ioctx;
+  ASSERT_EQ(0, _rados.ioctx_create(m_pool_name.c_str(), ioctx));
+
+  {
+    librbd::RBD rbd;
+    librbd::Image image;
+    int order = 0;
+    std::string name = get_temp_image_name();
+    uint64_t size = 2 << 20;
+    struct Watcher : public librbd::UpdateWatchCtx {
+      Watcher(librbd::Image &image) : m_image(image), m_lock("lock") {
+      }
+      void handle_notify() {
+        librbd::image_info_t info;
+	ASSERT_EQ(0, m_image.stat(info, sizeof(info)));
+        Mutex::Locker locker(m_lock);
+        m_size = info.size;
+        m_cond.Signal();
+      }
+      void wait_for_size(size_t size) {
+	Mutex::Locker locker(m_lock);
+	while (m_size != size) {
+	  CephContext* cct = reinterpret_cast<CephContext*>(_rados.cct());
+	  ASSERT_EQ(0, m_cond.WaitInterval(cct, m_lock, seconds(5)));
+	}
+      }
+      librbd::Image &m_image;
+      Mutex m_lock;
+      Cond m_cond;
+      size_t m_size = 0;
+    } watcher(image);
+    uint64_t handle;
+
+    ASSERT_EQ(0, create_image_pp(rbd, ioctx, name.c_str(), size, &order));
+    ASSERT_EQ(0, rbd.open(ioctx, image, name.c_str(), NULL));
+
+    ASSERT_EQ(0, image.update_watch(&watcher, &handle));
+
+    ASSERT_EQ(0, image.resize(size * 4));
+    watcher.wait_for_size(size * 4);
+
+    ASSERT_EQ(0, image.resize(size / 2));
+    watcher.wait_for_size(size / 2);
+
+    ASSERT_EQ(0, image.update_unwatch(handle));
+  }
+
+  ioctx.close();
+}
+
 int test_ls(rados_ioctx_t io_ctx, size_t num_expected, ...)
 {
   int num_images, i;
@@ -1054,11 +1166,14 @@ TEST_F(TestLibRBD, TestIO)
   rados_ioctx_t ioctx;
   rados_ioctx_create(_cluster, m_pool_name.c_str(), &ioctx);
 
+  CephContext* cct = reinterpret_cast<CephContext*>(_rados.cct());
+  bool skip_discard = cct->_conf->rbd_skip_partial_discard;
+
   rbd_image_t image;
   int order = 0;
   std::string name = get_temp_image_name();
   uint64_t size = 2 << 20;
-  
+
   ASSERT_EQ(0, create_image(ioctx, name.c_str(), size, &order));
   ASSERT_EQ(0, rbd_open(ioctx, name.c_str(), &image, NULL));
 
@@ -1089,11 +1204,13 @@ TEST_F(TestLibRBD, TestIO)
   ASSERT_PASSED(aio_discard_test_data, image, TEST_IO_SIZE*3, TEST_IO_SIZE);
 
   ASSERT_PASSED(read_test_data, image, test_data,  0, TEST_IO_SIZE, 0);
-  ASSERT_PASSED(read_test_data, image,  zero_data, TEST_IO_SIZE, TEST_IO_SIZE, 0);
+  ASSERT_PASSED(read_test_data, image, skip_discard ? test_data : zero_data,
+		TEST_IO_SIZE, TEST_IO_SIZE, 0);
   ASSERT_PASSED(read_test_data, image, test_data,  TEST_IO_SIZE*2, TEST_IO_SIZE, 0);
-  ASSERT_PASSED(read_test_data, image,  zero_data, TEST_IO_SIZE*3, TEST_IO_SIZE, 0);
+  ASSERT_PASSED(read_test_data, image, skip_discard ? test_data : zero_data,
+		TEST_IO_SIZE*3, TEST_IO_SIZE, 0);
   ASSERT_PASSED(read_test_data, image, test_data,  TEST_IO_SIZE*4, TEST_IO_SIZE, 0);
-  
+
   rbd_image_info_t info;
   rbd_completion_t comp;
   ASSERT_EQ(0, rbd_stat(image, &info, sizeof(info)));
@@ -1128,6 +1245,9 @@ TEST_F(TestLibRBD, TestIOWithIOHint)
   rados_ioctx_t ioctx;
   rados_ioctx_create(_cluster, m_pool_name.c_str(), &ioctx);
 
+  CephContext* cct = reinterpret_cast<CephContext*>(_rados.cct());
+  bool skip_discard = cct->_conf->rbd_skip_partial_discard;
+
   rbd_image_t image;
   int order = 0;
   std::string name = get_temp_image_name();
@@ -1168,11 +1288,13 @@ TEST_F(TestLibRBD, TestIOWithIOHint)
 
   ASSERT_PASSED(read_test_data, image, test_data,  0, TEST_IO_SIZE,
 		LIBRADOS_OP_FLAG_FADVISE_SEQUENTIAL);
-  ASSERT_PASSED(read_test_data, image,  zero_data, TEST_IO_SIZE, TEST_IO_SIZE,
+  ASSERT_PASSED(read_test_data, image, skip_discard ? test_data : zero_data,
+		TEST_IO_SIZE, TEST_IO_SIZE,
 		LIBRADOS_OP_FLAG_FADVISE_SEQUENTIAL);
   ASSERT_PASSED(read_test_data, image, test_data,  TEST_IO_SIZE*2, TEST_IO_SIZE,
 		LIBRADOS_OP_FLAG_FADVISE_SEQUENTIAL);
-  ASSERT_PASSED(read_test_data, image,  zero_data, TEST_IO_SIZE*3, TEST_IO_SIZE,
+  ASSERT_PASSED(read_test_data, image, skip_discard ? test_data : zero_data,
+		TEST_IO_SIZE*3, TEST_IO_SIZE,
 		LIBRADOS_OP_FLAG_FADVISE_SEQUENTIAL);
   ASSERT_PASSED(read_test_data, image, test_data,  TEST_IO_SIZE*4, TEST_IO_SIZE, 0);
 
@@ -1332,25 +1454,28 @@ void read_test_data(librbd::Image& image, const char *expected, off_t off, size_
   *passed = true;
 }
 
-TEST_F(TestLibRBD, TestIOPP) 
+TEST_F(TestLibRBD, TestIOPP)
 {
   librados::IoCtx ioctx;
   ASSERT_EQ(0, _rados.ioctx_create(m_pool_name.c_str(), ioctx));
 
+  CephContext* cct = reinterpret_cast<CephContext*>(_rados.cct());
+  bool skip_discard = cct->_conf->rbd_skip_partial_discard;
+
   {
     librbd::RBD rbd;
     librbd::Image image;
     int order = 0;
     std::string name = get_temp_image_name();
     uint64_t size = 2 << 20;
-    
+
     ASSERT_EQ(0, create_image_pp(rbd, ioctx, name.c_str(), size, &order));
     ASSERT_EQ(0, rbd.open(ioctx, image, name.c_str(), NULL));
 
     char test_data[TEST_IO_SIZE + 1];
     char zero_data[TEST_IO_SIZE + 1];
     int i;
-    
+
     for (i = 0; i < TEST_IO_SIZE; ++i) {
       test_data[i] = (char) (rand() % (126 - 33) + 33);
     }
@@ -1359,24 +1484,26 @@ TEST_F(TestLibRBD, TestIOPP)
 
     for (i = 0; i < 5; ++i)
       ASSERT_PASSED(write_test_data, image, test_data, strlen(test_data) * i, 0);
-    
+
     for (i = 5; i < 10; ++i)
       ASSERT_PASSED(aio_write_test_data, image, test_data, strlen(test_data) * i, 0);
-    
+
     for (i = 0; i < 5; ++i)
       ASSERT_PASSED(read_test_data, image, test_data, strlen(test_data) * i, TEST_IO_SIZE, 0);
-    
+
     for (i = 5; i < 10; ++i)
       ASSERT_PASSED(aio_read_test_data, image, test_data, strlen(test_data) * i, TEST_IO_SIZE, 0);
 
     // discard 2nd, 4th sections.
     ASSERT_PASSED(discard_test_data, image, TEST_IO_SIZE, TEST_IO_SIZE);
     ASSERT_PASSED(aio_discard_test_data, image, TEST_IO_SIZE*3, TEST_IO_SIZE);
-    
+
     ASSERT_PASSED(read_test_data, image, test_data,  0, TEST_IO_SIZE, 0);
-    ASSERT_PASSED(read_test_data, image,  zero_data, TEST_IO_SIZE, TEST_IO_SIZE, 0);
+    ASSERT_PASSED(read_test_data, image, skip_discard ? test_data : zero_data,
+		  TEST_IO_SIZE, TEST_IO_SIZE, 0);
     ASSERT_PASSED(read_test_data, image, test_data,  TEST_IO_SIZE*2, TEST_IO_SIZE, 0);
-    ASSERT_PASSED(read_test_data, image,  zero_data, TEST_IO_SIZE*3, TEST_IO_SIZE, 0);
+    ASSERT_PASSED(read_test_data, image, skip_discard ? test_data : zero_data,
+		  TEST_IO_SIZE*3, TEST_IO_SIZE, 0);
     ASSERT_PASSED(read_test_data, image, test_data,  TEST_IO_SIZE*4, TEST_IO_SIZE, 0);
 
     ASSERT_PASSED(validate_object_map, image);
@@ -2281,7 +2408,7 @@ static int iterate_error_cb(uint64_t off, size_t len, int exists, void *arg)
   return -EINVAL;
 }
 
-void scribble(librbd::Image& image, int n, int max,
+void scribble(librbd::Image& image, int n, int max, bool skip_discard,
               interval_set<uint64_t> *exists,
               interval_set<uint64_t> *what)
 {
@@ -2292,7 +2419,7 @@ void scribble(librbd::Image& image, int n, int max,
   for (int i=0; i<n; i++) {
     uint64_t off = rand() % (size - max + 1);
     uint64_t len = 1 + rand() % max;
-    if (rand() % 4 == 0) {
+    if (!skip_discard && rand() % 4 == 0) {
       ASSERT_EQ((int)len, image.discard(off, len));
       interval_set<uint64_t> w;
       w.insert(off, len);
@@ -2367,6 +2494,9 @@ TYPED_TEST(DiffIterateTest, DiffIterate)
   librados::IoCtx ioctx;
   ASSERT_EQ(0, this->_rados.ioctx_create(this->m_pool_name.c_str(), ioctx));
 
+  CephContext* cct = reinterpret_cast<CephContext*>(this->_rados.cct());
+  bool skip_discard = cct->_conf->rbd_skip_partial_discard;
+
   {
     librbd::RBD rbd;
     librbd::Image image;
@@ -2384,10 +2514,10 @@ TYPED_TEST(DiffIterateTest, DiffIterate)
 
     interval_set<uint64_t> exists;
     interval_set<uint64_t> one, two;
-    scribble(image, 10, 102400, &exists, &one);
+    scribble(image, 10, 102400, skip_discard, &exists, &one);
     cout << " wrote " << one << std::endl;
     ASSERT_EQ(0, image.snap_create("one"));
-    scribble(image, 10, 102400, &exists, &two);
+    scribble(image, 10, 102400, skip_discard, &exists, &two);
 
     two = round_diff_interval(two, object_size);
     cout << " wrote " << two << std::endl;
@@ -2515,6 +2645,9 @@ TYPED_TEST(DiffIterateTest, DiffIterateStress)
   librados::IoCtx ioctx;
   ASSERT_EQ(0, this->_rados.ioctx_create(this->m_pool_name.c_str(), ioctx));
 
+  CephContext* cct = reinterpret_cast<CephContext*>(this->_rados.cct());
+  bool skip_discard = cct->_conf->rbd_skip_partial_discard;
+
   librbd::RBD rbd;
   librbd::Image image;
   int order = 0;
@@ -2536,7 +2669,7 @@ TYPED_TEST(DiffIterateTest, DiffIterateStress)
   int n = 20;
   for (int i=0; i<n; i++) {
     interval_set<uint64_t> w;
-    scribble(image, 10, 8192000, &curexists, &w);
+    scribble(image, 10, 8192000, skip_discard, &curexists, &w);
     cout << " i=" << i << " exists " << curexists << " wrote " << w << std::endl;
     string s = "snap" + stringify(i);
     ASSERT_EQ(0, image.snap_create(s.c_str()));
@@ -2634,6 +2767,9 @@ TYPED_TEST(DiffIterateTest, DiffIterateIgnoreParent)
   librados::IoCtx ioctx;
   ASSERT_EQ(0, this->_rados.ioctx_create(this->m_pool_name.c_str(), ioctx));
 
+  CephContext* cct = reinterpret_cast<CephContext*>(this->_rados.cct());
+  bool skip_discard = cct->_conf->rbd_skip_partial_discard;
+
   librbd::RBD rbd;
   librbd::Image image;
   std::string name = this->get_temp_image_name();
@@ -2664,7 +2800,7 @@ TYPED_TEST(DiffIterateTest, DiffIterateIgnoreParent)
 
   interval_set<uint64_t> exists;
   interval_set<uint64_t> two;
-  scribble(image, 10, 102400, &exists, &two);
+  scribble(image, 10, 102400, skip_discard, &exists, &two);
   two = round_diff_interval(two, object_size);
   cout << " wrote " << two << " to clone" << std::endl;
 
@@ -2683,6 +2819,9 @@ TYPED_TEST(DiffIterateTest, DiffIterateCallbackError)
   librados::IoCtx ioctx;
   ASSERT_EQ(0, this->_rados.ioctx_create(this->m_pool_name.c_str(), ioctx));
 
+  CephContext* cct = reinterpret_cast<CephContext*>(this->_rados.cct());
+  bool skip_discard = cct->_conf->rbd_skip_partial_discard;
+
   {
     librbd::RBD rbd;
     librbd::Image image;
@@ -2695,7 +2834,7 @@ TYPED_TEST(DiffIterateTest, DiffIterateCallbackError)
 
     interval_set<uint64_t> exists;
     interval_set<uint64_t> one;
-    scribble(image, 10, 102400, &exists, &one);
+    scribble(image, 10, 102400, skip_discard, &exists, &one);
     cout << " wrote " << one << std::endl;
 
     interval_set<uint64_t> diff;
@@ -2713,6 +2852,9 @@ TYPED_TEST(DiffIterateTest, DiffIterateParentDiscard)
   librados::IoCtx ioctx;
   ASSERT_EQ(0, this->_rados.ioctx_create(this->m_pool_name.c_str(), ioctx));
 
+  CephContext* cct = reinterpret_cast<CephContext*>(this->_rados.cct());
+  bool skip_discard = cct->_conf->rbd_skip_partial_discard;
+
   librbd::RBD rbd;
   librbd::Image image;
   std::string name = this->get_temp_image_name();
@@ -2729,7 +2871,7 @@ TYPED_TEST(DiffIterateTest, DiffIterateParentDiscard)
 
   interval_set<uint64_t> exists;
   interval_set<uint64_t> one;
-  scribble(image, 10, 102400, &exists, &one);
+  scribble(image, 10, 102400, skip_discard, &exists, &one);
   ASSERT_EQ(0, image.snap_create("one"));
 
   ASSERT_EQ(1 << order, image.discard(0, 1 << order));
@@ -2744,7 +2886,7 @@ TYPED_TEST(DiffIterateTest, DiffIterateParentDiscard)
   ASSERT_EQ(0, rbd.open(ioctx, image, clone_name.c_str(), NULL));
 
   interval_set<uint64_t> two;
-  scribble(image, 10, 102400, &exists, &two);
+  scribble(image, 10, 102400, skip_discard, &exists, &two);
   two = round_diff_interval(two, object_size);
 
   interval_set<uint64_t> diff;
@@ -3658,6 +3800,9 @@ TEST_F(TestLibRBD, BlockingAIO)
   librados::IoCtx ioctx;
   ASSERT_EQ(0, _rados.ioctx_create(m_pool_name.c_str(), ioctx));
 
+  CephContext* cct = reinterpret_cast<CephContext*>(_rados.cct());
+  bool skip_discard = cct->_conf->rbd_skip_partial_discard;
+
   librbd::RBD rbd;
   std::string name = get_temp_image_name();
   uint64_t size = 1 << 20;
@@ -3710,7 +3855,7 @@ TEST_F(TestLibRBD, BlockingAIO)
 
   bufferlist expected_bl;
   expected_bl.append(std::string(128, '1'));
-  expected_bl.append(std::string(128, '\0'));
+  expected_bl.append(std::string(128, skip_discard ? '1' : '\0'));
   ASSERT_TRUE(expected_bl.contents_equal(read_bl));
 }
 
diff --git a/src/test/librbd/test_mirroring.cc b/src/test/librbd/test_mirroring.cc
index 758fc45..cd59dd7 100644
--- a/src/test/librbd/test_mirroring.cc
+++ b/src/test/librbd/test_mirroring.cc
@@ -24,6 +24,7 @@
 #include "librbd/Operations.h"
 #include "librbd/journal/Types.h"
 #include "journal/Journaler.h"
+#include "journal/Settings.h"
 #include <boost/scope_exit.hpp>
 #include <boost/assign/list_of.hpp>
 #include <utility>
@@ -267,7 +268,7 @@ public:
       "remote-image-id", {{"sync-point-snap", boost::none}}, {});
     librbd::journal::ClientData client_data(peer_client_meta);
 
-    journal::Journaler journaler(io_ctx, image_id, "peer-client", 5);
+    journal::Journaler journaler(io_ctx, image_id, "peer-client", {});
     C_SaferCond init_ctx;
     journaler.init(&init_ctx);
     ASSERT_EQ(-ENOENT, init_ctx.wait());
diff --git a/src/test/librbd/test_mock_ExclusiveLock.cc b/src/test/librbd/test_mock_ExclusiveLock.cc
index 7757675..f87a319 100644
--- a/src/test/librbd/test_mock_ExclusiveLock.cc
+++ b/src/test/librbd/test_mock_ExclusiveLock.cc
@@ -589,5 +589,76 @@ TEST_F(TestMockExclusiveLock, ConcurrentRequests) {
   ASSERT_EQ(0, when_shut_down(mock_image_ctx, exclusive_lock));
 }
 
+TEST_F(TestMockExclusiveLock, BlockRequests) {
+  REQUIRE_FEATURE(RBD_FEATURE_EXCLUSIVE_LOCK);
+
+  librbd::ImageCtx *ictx;
+  ASSERT_EQ(0, open_image(m_image_name, &ictx));
+
+  MockExclusiveLockImageCtx mock_image_ctx(*ictx);
+  MockExclusiveLock exclusive_lock(mock_image_ctx);
+
+  expect_op_work_queue(mock_image_ctx);
+
+  InSequence seq;
+  expect_block_writes(mock_image_ctx);
+  ASSERT_EQ(0, when_init(mock_image_ctx, exclusive_lock));
+
+  MockAcquireRequest try_lock_acquire;
+  expect_acquire_lock(mock_image_ctx, try_lock_acquire, 0);
+  ASSERT_EQ(0, when_try_lock(mock_image_ctx, exclusive_lock));
+  ASSERT_TRUE(is_lock_owner(mock_image_ctx, exclusive_lock));
+
+  int ret_val;
+  ASSERT_TRUE(exclusive_lock.accept_requests(&ret_val));
+  ASSERT_EQ(0, ret_val);
+
+  exclusive_lock.block_requests(-EROFS);
+  ASSERT_FALSE(exclusive_lock.accept_requests(&ret_val));
+  ASSERT_EQ(-EROFS, ret_val);
+
+  exclusive_lock.unblock_requests();
+  ASSERT_TRUE(exclusive_lock.accept_requests(&ret_val));
+  ASSERT_EQ(0, ret_val);
+
+  MockReleaseRequest shutdown_release;
+  expect_release_lock(mock_image_ctx, shutdown_release, 0, true);
+  ASSERT_EQ(0, when_shut_down(mock_image_ctx, exclusive_lock));
+  ASSERT_FALSE(is_lock_owner(mock_image_ctx, exclusive_lock));
+}
+
+TEST_F(TestMockExclusiveLock, RequestLockWatchNotRegistered) {
+  REQUIRE_FEATURE(RBD_FEATURE_EXCLUSIVE_LOCK);
+
+  librbd::ImageCtx *ictx;
+  ASSERT_EQ(0, open_image(m_image_name, &ictx));
+
+  MockExclusiveLockImageCtx mock_image_ctx(*ictx);
+  MockExclusiveLock exclusive_lock(mock_image_ctx);
+  expect_op_work_queue(mock_image_ctx);
+
+  InSequence seq;
+  expect_block_writes(mock_image_ctx);
+  ASSERT_EQ(0, when_init(mock_image_ctx, exclusive_lock));
+
+  EXPECT_CALL(*mock_image_ctx.image_watcher, get_watch_handle())
+    .WillOnce(DoAll(Invoke([&mock_image_ctx, &exclusive_lock]() {
+                      mock_image_ctx.image_ctx->op_work_queue->queue(
+                        new FunctionContext([&exclusive_lock](int r) {
+                          exclusive_lock.handle_watch_registered();
+                        }));
+                    }),
+                    Return(0)));
+  MockAcquireRequest request_lock_acquire;
+  expect_acquire_lock(mock_image_ctx, request_lock_acquire, 0);
+  ASSERT_EQ(0, when_request_lock(mock_image_ctx, exclusive_lock));
+  ASSERT_TRUE(is_lock_owner(mock_image_ctx, exclusive_lock));
+
+  MockReleaseRequest shutdown_release;
+  expect_release_lock(mock_image_ctx, shutdown_release, 0, true);
+  ASSERT_EQ(0, when_shut_down(mock_image_ctx, exclusive_lock));
+  ASSERT_FALSE(is_lock_owner(mock_image_ctx, exclusive_lock));
+}
+
 } // namespace librbd
 
diff --git a/src/test/librbd/test_mock_Journal.cc b/src/test/librbd/test_mock_Journal.cc
index b3ed6fd..3785f2c 100644
--- a/src/test/librbd/test_mock_Journal.cc
+++ b/src/test/librbd/test_mock_Journal.cc
@@ -5,6 +5,7 @@
 #include "test/journal/mock/MockJournaler.h"
 #include "test/librbd/test_support.h"
 #include "test/librbd/mock/MockImageCtx.h"
+#include "test/librbd/mock/MockJournalPolicy.h"
 #include "common/Cond.h"
 #include "common/Mutex.h"
 #include "cls/journal/cls_journal_types.h"
@@ -56,7 +57,8 @@ struct MockReplay {
   }
 
   MOCK_METHOD2(shut_down, void(bool cancel_ops, Context *));
-  MOCK_METHOD3(process, void(bufferlist::iterator*, Context *, Context *));
+  MOCK_METHOD2(decode, int(bufferlist::iterator*, EventEntry *));
+  MOCK_METHOD3(process, void(const EventEntry&, Context *, Context *));
   MOCK_METHOD2(replay_op_ready, void(uint64_t, Context *));
 };
 
@@ -71,9 +73,13 @@ public:
     MockReplay::get_instance().shut_down(cancel_ops, on_finish);
   }
 
-  void process(bufferlist::iterator *it, Context *on_ready,
+  int decode(bufferlist::iterator *it, EventEntry *event_entry) {
+    return MockReplay::get_instance().decode(it, event_entry);
+  }
+
+  void process(const EventEntry& event_entry, Context *on_ready,
                Context *on_commit) {
-    MockReplay::get_instance().process(it, on_ready, on_commit);
+    MockReplay::get_instance().process(event_entry, on_ready, on_commit);
   }
 
   void replay_op_ready(uint64_t op_tid, Context *on_resume) {
@@ -153,6 +159,7 @@ public:
   }
 
   void expect_shut_down_journaler(::journal::MockJournaler &mock_journaler) {
+    EXPECT_CALL(mock_journaler, remove_listener(_));
     EXPECT_CALL(mock_journaler, shut_down(_))
                   .WillOnce(CompleteContext(0, NULL));
   }
@@ -191,6 +198,7 @@ public:
     EXPECT_CALL(mock_journaler, get_tags(0, _, _))
                   .WillOnce(DoAll(SetArgPointee<1>(tags),
                                   WithArg<2>(CompleteContext(r, mock_image_ctx.image_ctx->op_work_queue))));
+    EXPECT_CALL(mock_journaler, add_listener(_));
   }
 
   void expect_start_replay(MockJournalImageCtx &mock_image_ctx,
@@ -231,6 +239,8 @@ public:
   }
 
   void expect_replay_process(MockJournalReplay &mock_journal_replay) {
+    EXPECT_CALL(mock_journal_replay, decode(_, _))
+                  .WillOnce(Return(0));
     EXPECT_CALL(mock_journal_replay, process(_, _, _))
                   .WillOnce(DoAll(WithArg<1>(CompleteContext(0, NULL)),
                                   WithArg<2>(Invoke(this, &TestMockJournal::save_commit_context))));
@@ -298,7 +308,7 @@ public:
 
   uint64_t when_append_io_event(MockJournalImageCtx &mock_image_ctx,
                                 MockJournal &mock_journal,
-                                AioObjectRequest *object_request = nullptr) {
+                                AioObjectRequest<> *object_request = nullptr) {
     RWLock::RLocker owner_locker(mock_image_ctx.owner_lock);
     MockJournal::AioObjectRequests object_requests;
     if (object_request != nullptr) {
@@ -592,6 +602,58 @@ TEST_F(TestMockJournal, FlushReplayError) {
   ASSERT_EQ(0, when_close(mock_journal));
 }
 
+TEST_F(TestMockJournal, CorruptEntry) {
+  REQUIRE_FEATURE(RBD_FEATURE_JOURNALING);
+
+  librbd::ImageCtx *ictx;
+  ASSERT_EQ(0, open_image(m_image_name, &ictx));
+
+  MockJournalImageCtx mock_image_ctx(*ictx);
+  MockJournal mock_journal(mock_image_ctx);
+  expect_op_work_queue(mock_image_ctx);
+
+  InSequence seq;
+
+  ::journal::MockJournaler mock_journaler;
+  expect_construct_journaler(mock_journaler);
+  expect_init_journaler(mock_journaler, 0);
+  expect_get_max_append_size(mock_journaler, 1 << 16);
+  expect_get_journaler_cached_client(mock_journaler, 0);
+  expect_get_journaler_tags(mock_image_ctx, mock_journaler, 0);
+  expect_start_replay(
+    mock_image_ctx, mock_journaler, {
+      std::bind(&invoke_replay_ready, _1),
+      std::bind(&invoke_replay_complete, _1, 0)
+    });
+
+  ::journal::MockReplayEntry mock_replay_entry;
+  MockJournalReplay mock_journal_replay;
+  expect_try_pop_front(mock_journaler, true, mock_replay_entry);
+  EXPECT_CALL(mock_journal_replay, decode(_, _)).WillOnce(Return(-EBADMSG));
+  expect_stop_replay(mock_journaler);
+  expect_shut_down_replay(mock_image_ctx, mock_journal_replay, 0, true);
+  expect_shut_down_journaler(mock_journaler);
+
+  // replay failure should result in replay-restart
+  expect_construct_journaler(mock_journaler);
+  expect_init_journaler(mock_journaler, 0);
+  expect_get_max_append_size(mock_journaler, 1 << 16);
+  expect_get_journaler_cached_client(mock_journaler, 0);
+  expect_get_journaler_tags(mock_image_ctx, mock_journaler, 0);
+  expect_start_replay(
+    mock_image_ctx, mock_journaler, {
+      std::bind(&invoke_replay_complete, _1, 0)
+    });
+  expect_stop_replay(mock_journaler);
+  expect_shut_down_replay(mock_image_ctx, mock_journal_replay, 0);
+  expect_start_append(mock_journaler);
+  ASSERT_EQ(0, when_open(mock_journal));
+
+  expect_stop_append(mock_journaler, -EINVAL);
+  expect_shut_down_journaler(mock_journaler);
+  ASSERT_EQ(-EINVAL, when_close(mock_journal));
+}
+
 TEST_F(TestMockJournal, StopError) {
   REQUIRE_FEATURE(RBD_FEATURE_JOURNALING);
 
@@ -655,6 +717,8 @@ TEST_F(TestMockJournal, ReplayOnDiskPreFlushError) {
   MockJournalReplay mock_journal_replay;
   expect_try_pop_front(mock_journaler, true, mock_replay_entry);
 
+  EXPECT_CALL(mock_journal_replay, decode(_, _))
+                .WillOnce(Return(0));
   Context *on_ready;
   EXPECT_CALL(mock_journal_replay, process(_, _, _))
                 .WillOnce(DoAll(SaveArg<1>(&on_ready),
@@ -1000,4 +1064,126 @@ TEST_F(TestMockJournal, FlushCommitPosition) {
   expect_shut_down_journaler(mock_journaler);
 }
 
+TEST_F(TestMockJournal, ExternalReplay) {
+  REQUIRE_FEATURE(RBD_FEATURE_JOURNALING);
+
+  librbd::ImageCtx *ictx;
+  ASSERT_EQ(0, open_image(m_image_name, &ictx));
+
+  MockJournalImageCtx mock_image_ctx(*ictx);
+  MockJournal mock_journal(mock_image_ctx);
+  ::journal::MockJournaler mock_journaler;
+  open_journal(mock_image_ctx, mock_journal, mock_journaler);
+  BOOST_SCOPE_EXIT_ALL(&) {
+    close_journal(mock_journal, mock_journaler);
+  };
+
+  InSequence seq;
+  expect_stop_append(mock_journaler, 0);
+  expect_start_append(mock_journaler);
+  expect_shut_down_journaler(mock_journaler);
+
+  C_SaferCond start_ctx;
+  C_SaferCond close_request_ctx;
+
+  journal::Replay<MockJournalImageCtx> *journal_replay = nullptr;
+  mock_journal.start_external_replay(&journal_replay, &start_ctx,
+                                     &close_request_ctx);
+  ASSERT_EQ(0, start_ctx.wait());
+
+  mock_journal.stop_external_replay();
+  ASSERT_EQ(-ECANCELED, close_request_ctx.wait());
+}
+
+TEST_F(TestMockJournal, ExternalReplayFailure) {
+  REQUIRE_FEATURE(RBD_FEATURE_JOURNALING);
+
+  librbd::ImageCtx *ictx;
+  ASSERT_EQ(0, open_image(m_image_name, &ictx));
+
+  MockJournalImageCtx mock_image_ctx(*ictx);
+  MockJournal mock_journal(mock_image_ctx);
+  ::journal::MockJournaler mock_journaler;
+  open_journal(mock_image_ctx, mock_journal, mock_journaler);
+  BOOST_SCOPE_EXIT_ALL(&) {
+    close_journal(mock_journal, mock_journaler);
+  };
+
+  InSequence seq;
+  expect_stop_append(mock_journaler, -EINVAL);
+  expect_start_append(mock_journaler);
+  expect_shut_down_journaler(mock_journaler);
+
+  C_SaferCond start_ctx;
+  C_SaferCond close_request_ctx;
+
+  journal::Replay<MockJournalImageCtx> *journal_replay = nullptr;
+  mock_journal.start_external_replay(&journal_replay, &start_ctx,
+                                     &close_request_ctx);
+  ASSERT_EQ(-EINVAL, start_ctx.wait());
+  ASSERT_EQ(-EINVAL, close_request_ctx.wait());
+}
+
+TEST_F(TestMockJournal, ExternalReplayCloseRequest) {
+  REQUIRE_FEATURE(RBD_FEATURE_JOURNALING);
+
+  librbd::ImageCtx *ictx;
+  ASSERT_EQ(0, open_image(m_image_name, &ictx));
+
+  MockJournalImageCtx mock_image_ctx(*ictx);
+  MockJournal mock_journal(mock_image_ctx);
+  ::journal::MockJournaler mock_journaler;
+  open_journal(mock_image_ctx, mock_journal, mock_journaler);
+
+  InSequence seq;
+  expect_stop_append(mock_journaler, 0);
+  expect_shut_down_journaler(mock_journaler);
+
+  C_SaferCond start_ctx;
+  C_SaferCond close_request_ctx;
+
+  journal::Replay<MockJournalImageCtx> *journal_replay = nullptr;
+  mock_journal.start_external_replay(&journal_replay, &start_ctx,
+                                     &close_request_ctx);
+  ASSERT_EQ(0, start_ctx.wait());
+
+  C_SaferCond close_ctx;
+  mock_journal.close(&close_ctx);
+
+  ASSERT_EQ(0, close_request_ctx.wait());
+  mock_journal.stop_external_replay();
+
+  ASSERT_EQ(0, close_ctx.wait());
+}
+
+TEST_F(TestMockJournal, AppendDisabled) {
+  REQUIRE_FEATURE(RBD_FEATURE_JOURNALING);
+
+  librbd::ImageCtx *ictx;
+  ASSERT_EQ(0, open_image(m_image_name, &ictx));
+
+  MockJournalImageCtx mock_image_ctx(*ictx);
+  MockJournal mock_journal(mock_image_ctx);
+  MockJournalPolicy mock_journal_policy;
+
+  ::journal::MockJournaler mock_journaler;
+  open_journal(mock_image_ctx, mock_journal, mock_journaler);
+  BOOST_SCOPE_EXIT_ALL(&) {
+    close_journal(mock_journal, mock_journaler);
+  };
+
+  InSequence seq;
+  RWLock::RLocker snap_locker(mock_image_ctx.snap_lock);
+  EXPECT_CALL(mock_image_ctx, get_journal_policy()).WillOnce(
+    Return(ictx->get_journal_policy()));
+  ASSERT_TRUE(mock_journal.is_journal_appending());
+
+  EXPECT_CALL(mock_image_ctx, get_journal_policy()).WillOnce(
+    Return(&mock_journal_policy));
+  EXPECT_CALL(mock_journal_policy, append_disabled()).WillOnce(Return(true));
+  ASSERT_FALSE(mock_journal.is_journal_appending());
+
+  expect_shut_down_journaler(mock_journaler);
+}
+
 } // namespace librbd
diff --git a/src/test/librbd/test_mock_fixture.cc b/src/test/librbd/test_mock_fixture.cc
index 4cc940c..3fb246d 100644
--- a/src/test/librbd/test_mock_fixture.cc
+++ b/src/test/librbd/test_mock_fixture.cc
@@ -84,6 +84,11 @@ void TestMockFixture::initialize_features(librbd::ImageCtx *ictx,
   }
 }
 
+void TestMockFixture::expect_is_journal_appending(librbd::MockJournal &mock_journal,
+                                                  bool appending) {
+  EXPECT_CALL(mock_journal, is_journal_appending()).WillOnce(Return(appending));
+}
+
 void TestMockFixture::expect_is_journal_replaying(librbd::MockJournal &mock_journal) {
   EXPECT_CALL(mock_journal, is_journal_replaying()).WillOnce(Return(false));
 }
@@ -99,9 +104,13 @@ void TestMockFixture::expect_allocate_op_tid(librbd::MockImageCtx &mock_image_ct
   }
 }
 
-void TestMockFixture::expect_append_op_event(librbd::MockImageCtx &mock_image_ctx, int r) {
+void TestMockFixture::expect_append_op_event(librbd::MockImageCtx &mock_image_ctx,
+                                             bool can_affect_io, int r) {
   if (mock_image_ctx.journal != nullptr) {
-    expect_is_journal_replaying(*mock_image_ctx.journal);
+    if (can_affect_io) {
+      expect_is_journal_replaying(*mock_image_ctx.journal);
+    }
+    expect_is_journal_appending(*mock_image_ctx.journal, true);
     expect_allocate_op_tid(mock_image_ctx);
     EXPECT_CALL(*mock_image_ctx.journal, append_op_event_mock(_, _, _))
                   .WillOnce(WithArg<2>(CompleteContext(r, mock_image_ctx.image_ctx->op_work_queue)));
@@ -110,9 +119,10 @@ void TestMockFixture::expect_append_op_event(librbd::MockImageCtx &mock_image_ct
 
 void TestMockFixture::expect_commit_op_event(librbd::MockImageCtx &mock_image_ctx, int r) {
   if (mock_image_ctx.journal != nullptr) {
-    expect_is_journal_replaying(*mock_image_ctx.journal);
+    expect_is_journal_appending(*mock_image_ctx.journal, true);
     expect_is_journal_ready(*mock_image_ctx.journal);
-    EXPECT_CALL(*mock_image_ctx.journal, commit_op_event(1U, r));
+    EXPECT_CALL(*mock_image_ctx.journal, commit_op_event(1U, r, _))
+                  .WillOnce(WithArg<2>(CompleteContext(r, mock_image_ctx.image_ctx->op_work_queue)));
   }
 }
 
diff --git a/src/test/librbd/test_mock_fixture.h b/src/test/librbd/test_mock_fixture.h
index bd5a2ac..b06ca5b 100644
--- a/src/test/librbd/test_mock_fixture.h
+++ b/src/test/librbd/test_mock_fixture.h
@@ -80,10 +80,13 @@ public:
                            librbd::MockJournal &mock_journal,
                            librbd::MockObjectMap &mock_object_map);
 
+  void expect_is_journal_appending(librbd::MockJournal &mock_journal,
+                                   bool appending);
   void expect_is_journal_replaying(librbd::MockJournal &mock_journal);
   void expect_is_journal_ready(librbd::MockJournal &mock_journal);
   void expect_allocate_op_tid(librbd::MockImageCtx &mock_image_ctx);
-  void expect_append_op_event(librbd::MockImageCtx &mock_image_ctx, int r);
+  void expect_append_op_event(librbd::MockImageCtx &mock_image_ctx,
+                              bool can_affect_io, int r);
   void expect_commit_op_event(librbd::MockImageCtx &mock_image_ctx, int r);
 
 private:
diff --git a/src/test/librbd/test_support.h b/src/test/librbd/test_support.h
index 3a2298e..bce931a 100644
--- a/src/test/librbd/test_support.h
+++ b/src/test/librbd/test_support.h
@@ -11,18 +11,13 @@ int create_image_pp(librbd::RBD &rbd, librados::IoCtx &ioctx,
                     const std::string &name, uint64_t size);
 int get_image_id(librbd::Image &image, std::string *image_id);
 
-#define REQUIRE_FEATURE(feature) { 	  \
-  if (!is_feature_enabled(feature)) { 	  \
-    std::cout << "SKIPPING" << std::endl; \
-    return SUCCEED(); 			  \
-  } 					  \
-}
-
-#define REQUIRE_FORMAT_V1() { 	          \
-  if (is_feature_enabled(0)) { 	          \
+#define REQUIRE(x) {			  \
+  if (!(x)) {				  \
     std::cout << "SKIPPING" << std::endl; \
     return SUCCEED(); 			  \
   } 					  \
 }
 
+#define REQUIRE_FEATURE(feature) REQUIRE(is_feature_enabled(feature))
+#define REQUIRE_FORMAT_V1() REQUIRE(!is_feature_enabled(0))
 #define REQUIRE_FORMAT_V2() REQUIRE_FEATURE(0)
diff --git a/src/test/msgr/test_msgr.cc b/src/test/msgr/test_msgr.cc
index b41f260..65a6a6f 100644
--- a/src/test/msgr/test_msgr.cc
+++ b/src/test/msgr/test_msgr.cc
@@ -826,6 +826,14 @@ class SyntheticDispatcher : public Dispatcher {
       sent.erase(*it);
     conn_sent.erase(con);
   }
+
+  void print() {
+    for (auto && p : conn_sent) {
+      if (!p.second.empty()) {
+        cerr << __func__ << " " << p.first << " wait " << p.second.size() << std::endl;
+      }
+    }
+  }
 };
 
 
@@ -997,18 +1005,29 @@ class SyntheticWorkload {
     ASSERT_EQ(available_connections.erase(conn), 1U);
   }
 
-  void print_internal_state() {
+  void print_internal_state(bool detail=false) {
     Mutex::Locker l(lock);
     cerr << "available_connections: " << available_connections.size()
          << " inflight messages: " << dispatcher.get_pending() << std::endl;
+    if (detail && !available_connections.empty()) {
+      for (auto &&c : available_connections)
+        cerr << "available connection: " << c.first << " ";
+      cerr << std::endl;
+      dispatcher.print();
+    }
   }
 
   void wait_for_done() {
-    uint64_t i = 0;
+    int64_t tick_us = 1000 * 100; // 100ms
+    int64_t timeout_us = 5 * 60 * 1000 * 1000; // 5 mins
+    int i = 0;
     while (dispatcher.get_pending()) {
-      usleep(1000*100);
+      usleep(tick_us);
+      timeout_us -= tick_us;
       if (i++ % 50 == 0)
-        print_internal_state();
+        print_internal_state(true);
+      if (timeout_us < 0)
+        assert(0 == " loop time exceed 5 mins, it looks we stuck into some problems!");
     }
     for (set<Messenger*>::iterator it = available_servers.begin();
          it != available_servers.end(); ++it) {
@@ -1201,6 +1220,9 @@ TEST_P(MessengerTest, SyntheticInjectTest3) {
 TEST_P(MessengerTest, SyntheticInjectTest4) {
   g_ceph_context->_conf->set_val("ms_inject_socket_failures", "30");
   g_ceph_context->_conf->set_val("ms_inject_internal_delays", "0.1");
+  g_ceph_context->_conf->set_val("ms_inject_delay_probability", "1");
+  g_ceph_context->_conf->set_val("ms_inject_delay_type", "client osd", false, false);
+  g_ceph_context->_conf->set_val("ms_inject_delay_max", "5");
   SyntheticWorkload test_msg(16, 32, GetParam(), 100,
                              Messenger::Policy::lossless_peer(0, 0),
                              Messenger::Policy::lossless_peer(0, 0));
@@ -1229,6 +1251,9 @@ TEST_P(MessengerTest, SyntheticInjectTest4) {
   test_msg.wait_for_done();
   g_ceph_context->_conf->set_val("ms_inject_socket_failures", "0");
   g_ceph_context->_conf->set_val("ms_inject_internal_delays", "0");
+  g_ceph_context->_conf->set_val("ms_inject_delay_probability", "0");
+  g_ceph_context->_conf->set_val("ms_inject_delay_type", "", false, false);
+  g_ceph_context->_conf->set_val("ms_inject_delay_max", "0");
 }
 
 
diff --git a/src/test/opensuse-13.2/ceph.spec.in b/src/test/opensuse-13.2/ceph.spec.in
index 3cf6307..b2e4b12 100644
--- a/src/test/opensuse-13.2/ceph.spec.in
+++ b/src/test/opensuse-13.2/ceph.spec.in
@@ -18,7 +18,12 @@
 %bcond_without cephfs_java
 %bcond_with tests
 %bcond_with xio
+%ifnarch s390 s390x
 %bcond_without tcmalloc
+%else
+# no gperftools/tcmalloc on s390(x)
+%bcond_with tcmalloc
+%endif
 %bcond_without libs_compat
 %bcond_with lowmem_builder
 %if 0%{?fedora} || 0%{?rhel}
@@ -59,6 +64,13 @@ Group:         System/Filesystems
 %endif
 URL:		http://ceph.com/
 Source0:	http://ceph.com/download/%{name}-%{version}.tar.bz2
+%if 0%{?suse_version}
+%if 0%{?is_opensuse}
+ExclusiveArch:  x86_64 aarch64 ppc64 ppc64le
+%else
+ExclusiveArch:  x86_64 aarch64
+%endif
+%endif
 #################################################################################
 # dependencies that apply across all distro families
 #################################################################################
@@ -81,9 +93,13 @@ BuildRequires:	cryptsetup
 BuildRequires:	fuse-devel
 BuildRequires:	gcc-c++
 BuildRequires:	gdbm
+%if 0%{with tcmalloc}
+BuildRequires:	gperftools-devel
+%endif
 BuildRequires:	hdparm
 BuildRequires:	leveldb-devel > 1.2
 BuildRequires:	libaio-devel
+BuildRequires:	libatomic_ops-devel
 BuildRequires:	libblkid-devel >= 2.17
 BuildRequires:	libcurl-devel
 BuildRequires:	libudev-devel
@@ -118,13 +134,9 @@ BuildRequires:	systemd
 PreReq:		%fillup_prereq
 BuildRequires:	net-tools
 BuildRequires:	libbz2-devel
-%if 0%{with tcmalloc}
-BuildRequires:	gperftools-devel
-%endif
 BuildRequires:  btrfsprogs
 BuildRequires:	mozilla-nss-devel
 BuildRequires:	keyutils-devel
-BuildRequires:	libatomic-ops-devel
 BuildRequires:  libopenssl-devel
 BuildRequires:  lsb-release
 BuildRequires:  openldap2-devel
@@ -136,8 +148,6 @@ BuildRequires:  boost-random
 BuildRequires:	btrfs-progs
 BuildRequires:	nss-devel
 BuildRequires:	keyutils-libs-devel
-BuildRequires:	libatomic_ops-devel
-BuildRequires:	gperftools-devel
 BuildRequires:  openldap-devel
 BuildRequires:  openssl-devel
 BuildRequires:  redhat-lsb-core
@@ -197,7 +207,6 @@ Requires:      python-setuptools
 Requires:      grep
 Requires:      xfsprogs
 Requires:      logrotate
-Requires:      parted
 Requires:      util-linux
 Requires:      hdparm
 Requires:      cryptsetup
@@ -342,6 +351,7 @@ Requires:	gdisk
 %if 0%{?suse_version}
 Requires:	gptfdisk
 %endif
+Requires:       parted
 %description osd
 ceph-osd is the object storage daemon for the Ceph distributed file
 system.  It is responsible for storing objects on a local file system
@@ -660,7 +670,9 @@ export RPM_OPT_FLAGS=`echo $RPM_OPT_FLAGS | sed -e 's/i386/i486/'`
 %endif
 		$CEPH_EXTRA_CONFIGURE_ARGS \
 		%{?_with_ocf} \
-		%{?_with_tcmalloc} \
+%if %{without tcmalloc}
+		--without-tcmalloc \
+%endif
 		CFLAGS="$RPM_OPT_FLAGS" CXXFLAGS="$RPM_OPT_FLAGS"
 
 %if %{with lowmem_builder}
@@ -700,17 +712,18 @@ install -m 0644 -D src/logrotate.conf %{buildroot}%{_sysconfdir}/logrotate.d/cep
 chmod 0644 %{buildroot}%{_docdir}/ceph/sample.ceph.conf
 chmod 0644 %{buildroot}%{_docdir}/ceph/sample.fetch_config
 
-# firewall templates
+# firewall templates and /sbin/mount.ceph symlink
 %if 0%{?suse_version}
 install -m 0644 -D etc/sysconfig/SuSEfirewall2.d/services/ceph-mon %{buildroot}%{_sysconfdir}/sysconfig/SuSEfirewall2.d/services/ceph-mon
 install -m 0644 -D etc/sysconfig/SuSEfirewall2.d/services/ceph-osd-mds %{buildroot}%{_sysconfdir}/sysconfig/SuSEfirewall2.d/services/ceph-osd-mds
+mkdir -p %{buildroot}/sbin
+ln -sf %{_sbindir}/mount.ceph %{buildroot}/sbin/mount.ceph
 %endif
 
 # udev rules
 install -m 0644 -D udev/50-rbd.rules %{buildroot}%{_udevrulesdir}/50-rbd.rules
+install -m 0644 -D udev/60-ceph-by-parttypeuuid.rules %{buildroot}%{_udevrulesdir}/60-ceph-by-parttypeuuid.rules
 install -m 0644 -D udev/95-ceph-osd.rules %{buildroot}%{_udevrulesdir}/95-ceph-osd.rules
-mv %{buildroot}/sbin/mount.ceph %{buildroot}/usr/sbin/mount.ceph
-mv %{buildroot}/sbin/mount.fuse.ceph %{buildroot}/usr/sbin/mount.fuse.ceph
 
 #set up placeholder directories
 mkdir -p %{buildroot}%{_sysconfdir}/ceph
@@ -750,7 +763,6 @@ rm -rf %{buildroot}
 %{_libexecdir}/systemd/system-preset/50-ceph.preset
 %{_sbindir}/ceph-create-keys
 %{_sbindir}/rcceph
-%{_sbindir}/mount.ceph
 %dir %{_libexecdir}/ceph
 %{_libexecdir}/ceph/ceph_common.sh
 %dir %{_libdir}/rados-classes
@@ -785,7 +797,6 @@ rm -rf %{buildroot}
 %{_mandir}/man8/osdmaptool.8*
 %{_mandir}/man8/monmaptool.8*
 %{_mandir}/man8/cephfs.8*
-%{_mandir}/man8/mount.ceph.8*
 #set up placeholder directories
 %attr(750,ceph,ceph) %dir %{_localstatedir}/lib/ceph/tmp
 %attr(750,ceph,ceph) %dir %{_localstatedir}/lib/ceph/bootstrap-osd
@@ -841,6 +852,10 @@ DISABLE_RESTART_ON_UPDATE="yes"
 %{_bindir}/rbd-replay
 %{_bindir}/rbd-replay-many
 %{_bindir}/rbdmap
+%{_sbindir}/mount.ceph
+%if 0%{?suse_version}
+/sbin/mount.ceph
+%endif
 %if %{with lttng}
 %{_bindir}/rbd-replay-prep
 %endif
@@ -854,6 +869,7 @@ DISABLE_RESTART_ON_UPDATE="yes"
 %{_mandir}/man8/ceph-syn.8*
 %{_mandir}/man8/ceph-post-file.8*
 %{_mandir}/man8/ceph.8*
+%{_mandir}/man8/mount.ceph.8*
 %{_mandir}/man8/rados.8*
 %{_mandir}/man8/rbd.8*
 %{_mandir}/man8/rbdmap.8*
@@ -1140,6 +1156,7 @@ fi
 %{_sbindir}/ceph-disk
 %{_sbindir}/ceph-disk-udev
 %{_libexecdir}/ceph/ceph-osd-prestart.sh
+%{_udevrulesdir}/60-ceph-by-parttypeuuid.rules
 %{_udevrulesdir}/95-ceph-osd.rules
 %{_mandir}/man8/ceph-clsinfo.8*
 %{_mandir}/man8/ceph-disk.8*
@@ -1201,10 +1218,6 @@ fi
 %dir %{_prefix}/lib/ocf
 %dir %{_prefix}/lib/ocf/resource.d
 %dir %{_prefix}/lib/ocf/resource.d/ceph
-%exclude %{_prefix}/lib/ocf/resource.d/ceph/ceph
-%exclude %{_prefix}/lib/ocf/resource.d/ceph/mds
-%exclude %{_prefix}/lib/ocf/resource.d/ceph/mon
-%exclude %{_prefix}/lib/ocf/resource.d/ceph/osd
 %{_prefix}/lib/ocf/resource.d/ceph/rbd
 
 %endif
diff --git a/src/test/opensuse-13.2/install-deps.sh b/src/test/opensuse-13.2/install-deps.sh
index 03ca760..129178f 100755
--- a/src/test/opensuse-13.2/install-deps.sh
+++ b/src/test/opensuse-13.2/install-deps.sh
@@ -24,7 +24,7 @@ if test -f /etc/redhat-release ; then
 fi
 
 if type apt-get > /dev/null 2>&1 ; then
-    $SUDO apt-get install -y lsb-release
+    $SUDO apt-get install -y lsb-release devscripts equivs
 fi
 
 if type zypper > /dev/null 2>&1 ; then
@@ -39,20 +39,23 @@ Ubuntu|Debian|Devuan)
             exit 1
         fi
         touch $DIR/status
-        packages=$(dpkg-checkbuilddeps --admindir=$DIR debian/control 2>&1 | \
-            perl -p -e 's/.*Unmet build dependencies: *//;' \
-            -e 's/build-essential:native/build-essential/;' \
-            -e 's/\s*\|\s*/\|/g;' \
-            -e 's/\(.*?\)//g;' \
-            -e 's/ +/\n/g;' | sort)
+
+	backports=""
+	control="debian/control"
         case $(lsb_release -sc) in
             squeeze|wheezy)
-                packages=$(echo $packages | perl -pe 's/[-\w]*babeltrace[-\w]*//g')
+		control="/tmp/control.$$"
+		grep -v babeltrace debian/control > $control
                 backports="-t $(lsb_release -sc)-backports"
                 ;;
         esac
-        packages=$(echo $packages) # change newlines into spaces
-        $SUDO env DEBIAN_FRONTEND=noninteractive apt-get install $backports -y $packages || exit 1
+
+	# make a metapackage that expresses the build dependencies,
+	# install it, rm the .deb; then uninstall the package as its
+	# work is done
+	$SUDO env DEBIAN_FRONTEND=noninteractive mk-build-deps --install --remove --tool="apt-get -y --no-install-recommends $backports" $control || exit 1
+	$SUDO env DEBIAN_FRONTEND=noninteractive apt-get -y remove ceph-build-deps
+	if [ -n "$backports" ] ; then rm $control; fi
         ;;
 CentOS|Fedora|RedHatEnterpriseServer)
         case $(lsb_release -si) in
@@ -106,7 +109,14 @@ function activate_virtualenv() {
     local env_dir=$top_srcdir/install-deps-$interpreter
 
     if ! test -d $env_dir ; then
-        virtualenv --python $interpreter $env_dir
+        # Make a temporary virtualenv to get a fresh version of virtualenv
+        # because CentOS 7 has a buggy old version (v1.10.1)
+        # https://github.com/pypa/virtualenv/issues/463
+        virtualenv ${env_dir}_tmp
+        ${env_dir}_tmp/bin/pip install --upgrade virtualenv
+        ${env_dir}_tmp/bin/virtualenv --python $interpreter $env_dir
+        rm -rf ${env_dir}_tmp
+
         . $env_dir/bin/activate
         if ! populate_wheelhouse install ; then
             rm -rf $env_dir
diff --git a/src/test/osd/osd-scrub-repair.sh b/src/test/osd/osd-scrub-repair.sh
index 03580c3..687b8ed 100755
--- a/src/test/osd/osd-scrub-repair.sh
+++ b/src/test/osd/osd-scrub-repair.sh
@@ -351,6 +351,55 @@ function TEST_list_missing_erasure_coded() {
     teardown $dir || return 1
 }
 
+#
+# Corrupt one copy of a replicated pool
+#
+function TEST_corrupt_scrub_replicated() {
+    local dir=$1
+    local poolname=csr_pool
+    local total_objs=4
+
+    setup $dir || return 1
+    run_mon $dir a --osd_pool_default_size=2 || return 1
+    run_osd $dir 0 || return 1
+    run_osd $dir 1 || return 1
+    wait_for_clean || return 1
+
+    ceph osd pool create $poolname 1 1 || return 1
+    wait_for_clean || return 1
+
+    for i in $(seq 0 $total_objs) ; do
+      objname=OBJ${i}
+      add_something $dir $poolname $objname
+      if [ $i = "0" ];
+      then
+        local payload=UVWXYZ
+        echo $payload > $dir/CORRUPT
+        objectstore_tool $dir $(expr $i % 2) $objname set-bytes $dir/CORRUPT || return 1
+      else
+        objectstore_tool $dir $(expr $i % 2) $objname remove || return 1
+      fi
+    done
+
+    local pg=$(get_pg $poolname OBJ0)
+    pg_scrub $pg
+
+    rados list-inconsistent-pg $poolname > $dir/json || return 1
+    # Check pg count
+    test $(jq '. | length' $dir/json) = "1" || return 1
+    # Check pgid
+    test $(jq -r '.[0]' $dir/json) = $pg || return 1
+
+    rados list-inconsistent-obj $pg > $dir/json || return 1
+    # Get epoch for repair-get requests
+    epoch=$(jq .epoch $dir/json)
+    # Check object count
+    test $(jq '.inconsistents | length' $dir/json) = "$total_objs" || return 1
+
+    rados rmpool $poolname $poolname --yes-i-really-really-mean-it
+    teardown $dir || return 1
+}
+
 
 main osd-scrub-repair "$@"
 
diff --git a/src/test/osd/osd-scrub-snaps.sh b/src/test/osd/osd-scrub-snaps.sh
index d422448..14d27d1 100755
--- a/src/test/osd/osd-scrub-snaps.sh
+++ b/src/test/osd/osd-scrub-snaps.sh
@@ -156,6 +156,46 @@ function TEST_scrub_snaps() {
     fi
     grep 'log_channel' $dir/osd.0.log
 
+    rados list-inconsistent-pg $poolname > $dir/json || return 1
+    # Check pg count
+    test $(jq '. | length' $dir/json) = "1" || return 1
+    # Check pgid
+    test $(jq -r '.[0]' $dir/json) = $pgid || return 1
+
+    rados list-inconsistent-snapset $pgid > $dir/json || return 1
+    test $(jq '.inconsistents | length' $dir/json) = "20" || return 1
+
+    jq -c '.inconsistents | sort' > $dir/checkcsjson << EOF
+{"epoch":18,"inconsistents":[{"name":"obj1","nspace":"","locator":"","snap":1,
+"errors":["headless"]},{"name":"obj10","nspace":"","locator":"","snap":1,
+"errors":["size_mismatch"]},{"name":"obj11","nspace":"","locator":"","snap":1,
+"errors":["headless"]},{"name":"obj14","nspace":"","locator":"","snap":1,
+"errors":["size_mismatch"]},{"name":"obj6","nspace":"","locator":"","snap":1,
+"errors":["headless"]},{"name":"obj7","nspace":"","locator":"","snap":1,
+"errors":["headless"]},{"name":"obj9","nspace":"","locator":"","snap":1,
+"errors":["size_mismatch"]},{"name":"obj2","nspace":"","locator":"","snap":4,
+"errors":["headless"]},{"name":"obj5","nspace":"","locator":"","snap":4,
+"errors":["size_mismatch"]},{"name":"obj2","nspace":"","locator":"","snap":7,
+"errors":["headless"]},{"name":"obj5","nspace":"","locator":"","snap":7,
+"errors":["oi_attr_missing","headless"]},{"name":"obj11","nspace":"",
+"locator":"","snap":"head","errors":["extra_clones"],"extra clones":[1]},
+{"name":"obj12","nspace":"","locator":"","snap":"head",
+"errors":["head_mismatch"]},{"name":"obj3","nspace":"","locator":"",
+"snap":"head","errors":["size_mismatch"]},{"name":"obj5","nspace":"",
+"locator":"","snap":"head","errors":["extra_clones","clone_missing"],
+"extra clones":[7],"missing":[2,1]},{"name":"obj6","nspace":"","locator":"",
+"snap":"head","errors":["extra_clones"],"extra clones":[1]},{"name":"obj7",
+"nspace":"","locator":"","snap":"head","errors":["head_mismatch",
+"extra_clones"],"extra clones":[1]},{"name":"obj8","nspace":"","locator":"",
+"snap":"head","errors":["snapset_mismatch"]},{"name":"obj2","nspace":"",
+"locator":"","snap":"snapdir","errors":["ss_attr_missing","extra_clones"],
+"extra clones":[7,4]},{"name":"obj4","nspace":"","locator":"","snap":"snapdir",
+"errors":["clone_missing"],"missing":[7]}]}
+EOF
+
+    jq -c '.inconsistents | sort' $dir/json > $dir/csjson
+    diff $dir/csjson $dir/checkcsjson || return 1
+
     for i in `seq 1 7`
     do
         rados -p $poolname rmsnap snap$i
@@ -189,15 +229,15 @@ function TEST_scrub_snaps() {
     err_strings[11]="log_channel[(]cluster[)] log [[]ERR[]] : scrub [0-9]*[.]0 .*:::obj3:head on disk size [(]3840[)] does not match object info size [(]768[)] adjusted for ondisk to [(]768[)]"
     err_strings[12]="log_channel[(]cluster[)] log [[]ERR[]] : scrub [0-9]*[.]0 .*:::obj6:1 is an unexpected clone"
     err_strings[13]="log_channel[(]cluster[)] log [[]ERR[]] : scrub [0-9]*[.]0 .*:::obj2:snapdir no 'snapset' attr"
-    err_strings[14]="log_channel[(]cluster[)] log [[]INF[]] : scrub [0-9]*[.]0 .*:::obj2:7 clone ignored due to missing snapset"
-    err_strings[15]="log_channel[(]cluster[)] log [[]INF[]] : scrub [0-9]*[.]0 .*:::obj2:4 clone ignored due to missing snapset"
+    err_strings[14]="log_channel[(]cluster[)] log [[]ERR[]] : scrub [0-9]*[.]0 .*:::obj2:7 clone ignored due to missing snapset"
+    err_strings[15]="log_channel[(]cluster[)] log [[]ERR[]] : scrub [0-9]*[.]0 .*:::obj2:4 clone ignored due to missing snapset"
     err_strings[16]="log_channel[(]cluster[)] log [[]ERR[]] : scrub [0-9]*[.]0 .*:::obj4:snapdir expected clone .*:::obj4:7"
     err_strings[17]="log_channel[(]cluster[)] log [[]INF[]] : scrub [0-9]*[.]0 .*:::obj4:snapdir 1 missing clone[(]s[)]"
     err_strings[18]="log_channel[(]cluster[)] log [[]ERR[]] : scrub [0-9]*[.]0 .*:::obj1:1 is an unexpected clone"
     err_strings[19]="log_channel[(]cluster[)] log [[]ERR[]] : scrub [0-9]*[.]0 .*:::obj9:1 is missing in clone_size"
     err_strings[20]="log_channel[(]cluster[)] log [[]ERR[]] : scrub [0-9]*[.]0 .*:::obj11:1 is an unexpected clone"
     err_strings[21]="log_channel[(]cluster[)] log [[]ERR[]] : scrub [0-9]*[.]0 .*:::obj14:1 size 1032 != clone_size 1033"
-    err_strings[22]="log_channel[(]cluster[)] log [[]ERR[]] : [0-9]*[.]0 scrub 19 errors"
+    err_strings[22]="log_channel[(]cluster[)] log [[]ERR[]] : [0-9]*[.]0 scrub 21 errors"
 
     for i in `seq 0 ${#err_strings[@]}`
     do
diff --git a/src/test/osdc/MemWriteback.cc b/src/test/osdc/MemWriteback.cc
new file mode 100644
index 0000000..17f5601
--- /dev/null
+++ b/src/test/osdc/MemWriteback.cc
@@ -0,0 +1,163 @@
+// -*- mode:C++; tab-width:8; c-basic-offset:2; indent-tabs-mode:t -*-
+// vim: ts=8 sw=2 smarttab
+
+#include <errno.h>
+#include <time.h>
+
+#include <thread>
+#include "common/debug.h"
+#include "common/Cond.h"
+#include "common/Finisher.h"
+#include "common/Mutex.h"
+#include "include/assert.h"
+#include "common/ceph_time.h"
+
+#include "MemWriteback.h"
+
+#define dout_subsys ceph_subsys_objectcacher
+#undef dout_prefix
+#define dout_prefix *_dout << "MemWriteback(" << this << ") "
+
+class C_DelayRead : public Context {
+  MemWriteback *wb;
+  CephContext *m_cct;
+  Context *m_con;
+  ceph::timespan m_delay;
+  Mutex *m_lock;
+  object_t m_oid;
+  uint64_t m_off;
+  uint64_t m_len;
+  bufferlist *m_bl;
+
+public:
+  C_DelayRead(MemWriteback *mwb, CephContext *cct, Context *c, Mutex *lock,
+	      const object_t& oid, uint64_t off, uint64_t len, bufferlist *pbl,
+	      uint64_t delay_ns=0)
+    : wb(mwb), m_cct(cct), m_con(c),
+      m_delay(delay_ns * std::chrono::nanoseconds(1)),
+      m_lock(lock), m_oid(oid), m_off(off), m_len(len), m_bl(pbl) {}
+  void finish(int r) {
+    std::this_thread::sleep_for(m_delay);
+    m_lock->Lock();
+    r = wb->read_object_data(m_oid, m_off, m_len, m_bl);
+    if (m_con)
+      m_con->complete(r);
+    m_lock->Unlock();
+  }
+};
+
+class C_DelayWrite : public Context {
+  MemWriteback *wb;
+  CephContext *m_cct;
+  Context *m_con;
+  ceph::timespan m_delay;
+  Mutex *m_lock;
+  object_t m_oid;
+  uint64_t m_off;
+  uint64_t m_len;
+  const bufferlist& m_bl;
+
+public:
+  C_DelayWrite(MemWriteback *mwb, CephContext *cct, Context *c, Mutex *lock,
+	       const object_t& oid, uint64_t off, uint64_t len,
+	       const bufferlist& bl, uint64_t delay_ns=0)
+    : wb(mwb), m_cct(cct), m_con(c),
+      m_delay(delay_ns * std::chrono::nanoseconds(1)),
+      m_lock(lock), m_oid(oid), m_off(off), m_len(len), m_bl(bl) {}
+  void finish(int r) {
+    std::this_thread::sleep_for(m_delay);
+    m_lock->Lock();
+    wb->write_object_data(m_oid, m_off, m_len, m_bl);
+    if (m_con)
+      m_con->complete(r);
+    m_lock->Unlock();
+  }
+};
+
+MemWriteback::MemWriteback(CephContext *cct, Mutex *lock, uint64_t delay_ns)
+  : m_cct(cct), m_lock(lock), m_delay_ns(delay_ns)
+{
+  m_finisher = new Finisher(cct);
+  m_finisher->start();
+}
+
+MemWriteback::~MemWriteback()
+{
+  m_finisher->stop();
+  delete m_finisher;
+}
+
+void MemWriteback::read(const object_t& oid, uint64_t object_no,
+			 const object_locator_t& oloc,
+			 uint64_t off, uint64_t len, snapid_t snapid,
+			 bufferlist *pbl, uint64_t trunc_size,
+			 __u32 trunc_seq, int op_flags, Context *onfinish)
+{
+  assert(snapid == CEPH_NOSNAP);
+  C_DelayRead *wrapper = new C_DelayRead(this, m_cct, onfinish, m_lock, oid,
+					 off, len, pbl, m_delay_ns);
+  m_finisher->queue(wrapper, len);
+}
+
+ceph_tid_t MemWriteback::write(const object_t& oid,
+				const object_locator_t& oloc,
+				uint64_t off, uint64_t len,
+				const SnapContext& snapc,
+				const bufferlist &bl, ceph::real_time mtime,
+				uint64_t trunc_size, __u32 trunc_seq,
+				ceph_tid_t journal_tid, Context *oncommit)
+{
+  assert(snapc.seq == 0);
+  C_DelayWrite *wrapper = new C_DelayWrite(this, m_cct, oncommit, m_lock, oid,
+					   off, len, bl, m_delay_ns);
+  m_finisher->queue(wrapper, 0);
+  return m_tid.inc();
+}
+
+void MemWriteback::write_object_data(const object_t& oid, uint64_t off, uint64_t len,
+				     const bufferlist& data_bl)
+{
+  dout(1) << "writing " << oid << " " << off << "~" << len  << dendl;
+  assert(len == data_bl.length());
+  bufferlist& obj_bl = object_data[oid];
+  bufferlist new_obj_bl;
+  // ensure size, or set it if new object
+  if (off + len > obj_bl.length()) {
+    obj_bl.append_zero(off + len - obj_bl.length());
+  }
+
+  // beginning
+  new_obj_bl.substr_of(obj_bl, 0, off);
+  // overwritten bit
+  new_obj_bl.append(data_bl);
+  // tail bit
+  bufferlist tmp;
+  tmp.substr_of(obj_bl, off+len, obj_bl.length()-(off+len));
+  new_obj_bl.append(tmp);
+  obj_bl.swap(new_obj_bl);
+  dout(1) << oid << " final size " << obj_bl.length() << dendl;
+}
+
+int MemWriteback::read_object_data(const object_t& oid, uint64_t off, uint64_t len,
+				   bufferlist *data_bl)
+{
+  dout(1) << "reading " << oid << " " << off << "~" << len << dendl;
+  auto obj_i = object_data.find(oid);
+  if (obj_i == object_data.end()) {
+    dout(1) << oid << "DNE!" << dendl;
+    return -ENOENT;
+  }
+
+  const bufferlist& obj_bl = obj_i->second;
+  dout(1) << "reading " << oid << " from total size " << obj_bl.length() << dendl;
+
+  uint64_t read_len = MIN(len, obj_bl.length()-off);
+  data_bl->substr_of(obj_bl, off, read_len);
+  return 0;
+}
+
+bool MemWriteback::may_copy_on_write(const object_t&, uint64_t, uint64_t,
+				      snapid_t)
+{
+  return false;
+}
diff --git a/src/test/osdc/MemWriteback.h b/src/test/osdc/MemWriteback.h
new file mode 100644
index 0000000..d5a057f
--- /dev/null
+++ b/src/test/osdc/MemWriteback.h
@@ -0,0 +1,49 @@
+// -*- mode:C++; tab-width:8; c-basic-offset:2; indent-tabs-mode:t -*-
+// vim: ts=8 sw=2 smarttab
+#ifndef CEPH_TEST_OSDC_MEMWRITEBACK_H
+#define CEPH_TEST_OSDC_MEMWRITEBACK_H
+
+#include "include/atomic.h"
+#include "include/Context.h"
+#include "include/types.h"
+#include "osd/osd_types.h"
+#include "osdc/WritebackHandler.h"
+
+class Finisher;
+class Mutex;
+
+class MemWriteback : public WritebackHandler {
+public:
+  MemWriteback(CephContext *cct, Mutex *lock, uint64_t delay_ns);
+  virtual ~MemWriteback();
+
+  virtual void read(const object_t& oid, uint64_t object_no,
+		    const object_locator_t& oloc, uint64_t off, uint64_t len,
+		    snapid_t snapid, bufferlist *pbl, uint64_t trunc_size,
+		    __u32 trunc_seq, int op_flags, Context *onfinish);
+
+  virtual ceph_tid_t write(const object_t& oid, const object_locator_t& oloc,
+			   uint64_t off, uint64_t len,
+			   const SnapContext& snapc, const bufferlist &bl,
+			   ceph::real_time mtime, uint64_t trunc_size,
+			   __u32 trunc_seq, ceph_tid_t journal_tid,
+			   Context *oncommit);
+
+  using WritebackHandler::write;
+
+  virtual bool may_copy_on_write(const object_t&, uint64_t, uint64_t,
+				 snapid_t);
+  void write_object_data(const object_t& oid, uint64_t off, uint64_t len,
+			 const bufferlist& data_bl);
+  int read_object_data(const object_t& oid, uint64_t off, uint64_t len,
+		       bufferlist *data_bl);
+private:
+  std::map<object_t, bufferlist> object_data;
+  CephContext *m_cct;
+  Mutex *m_lock;
+  uint64_t m_delay_ns;
+  atomic_t m_tid;
+  Finisher *m_finisher;
+};
+
+#endif
diff --git a/src/test/osdc/object_cacher_stress.cc b/src/test/osdc/object_cacher_stress.cc
index b961f75..a753295 100644
--- a/src/test/osdc/object_cacher_stress.cc
+++ b/src/test/osdc/object_cacher_stress.cc
@@ -21,6 +21,7 @@
 #include "osdc/ObjectCacher.h"
 
 #include "FakeWriteback.h"
+#include "MemWriteback.h"
 
 // XXX: Only tests default namespace
 struct op_data {
@@ -173,6 +174,182 @@ int stress_test(uint64_t num_ops, uint64_t num_objs,
   return EXIT_SUCCESS;
 }
 
+int correctness_test(uint64_t delay_ns)
+{
+  std::cerr << "starting correctness test" << std::endl;
+  Mutex lock("object_cacher_stress::object_cacher");
+  MemWriteback writeback(g_ceph_context, &lock, delay_ns);
+
+  ObjectCacher obc(g_ceph_context, "test", writeback, lock, NULL, NULL,
+		   1<<21, // max cache size, 2MB
+		   1, // max objects, just one
+		   1<<18, // max dirty, 256KB
+		   1<<17, // target dirty, 128KB
+		   g_conf->client_oc_max_dirty_age,
+		   true);
+  obc.start();
+  std::cerr << "just start()ed ObjectCacher" << std::endl;
+
+  SnapContext snapc;
+  ceph_tid_t journal_tid = 0;
+  std::string oid("correctness_test_obj");
+  ObjectCacher::ObjectSet object_set(NULL, 0, 0);
+  ceph::bufferlist zeroes_bl;
+  zeroes_bl.append_zero(1<<20);
+
+  // set up a 4MB all-zero object
+  std::cerr << "writing 4x1MB object" << std::endl;
+  std::map<int, C_SaferCond> create_finishers;
+  for (int i = 0; i < 4; ++i) {
+    ObjectCacher::OSDWrite *wr = obc.prepare_write(snapc, zeroes_bl,
+						   ceph::real_time::min(), 0,
+						   ++journal_tid);
+    ObjectExtent extent(oid, 0, zeroes_bl.length()*i, zeroes_bl.length(), 0);
+    extent.oloc.pool = 0;
+    extent.buffer_extents.push_back(make_pair(0, 1<<20));
+    wr->extents.push_back(extent);
+    lock.Lock();
+    obc.writex(wr, &object_set, &create_finishers[i]);
+    lock.Unlock();
+  }
+
+  // write some 1-valued bits at 256-KB intervals for checking consistency
+  std::cerr << "Writing some 0xff values" << std::endl;
+  ceph::buffer::ptr ones(1<<16);
+  memset(ones.c_str(), 0xff, ones.length());
+  ceph::bufferlist ones_bl;
+  ones_bl.append(ones);
+  for (int i = 1<<18; i < 1<<22; i+=1<<18) {
+    ObjectCacher::OSDWrite *wr = obc.prepare_write(snapc, ones_bl,
+						   ceph::real_time::min(), 0,
+						   ++journal_tid);
+    ObjectExtent extent(oid, 0, i, ones_bl.length(), 0);
+    extent.oloc.pool = 0;
+    extent.buffer_extents.push_back(make_pair(0, 1<<16));
+    wr->extents.push_back(extent);
+    lock.Lock();
+    obc.writex(wr, &object_set, &create_finishers[i]);
+    lock.Unlock();
+  }
+
+  for (auto i = create_finishers.begin(); i != create_finishers.end(); ++i) {
+    i->second.wait();
+  }
+  std::cout << "Finished setting up object" << std::endl;
+  lock.Lock();
+  C_SaferCond flushcond;
+  bool done = obc.flush_all(&flushcond);
+  if (!done) {
+    std::cout << "Waiting for flush" << std::endl;
+    lock.Unlock();
+    flushcond.wait();
+    lock.Lock();
+  }
+  lock.Unlock();
+
+  /* now read the back half of the object in, check consistency,
+   */
+  std::cout << "Reading back half of object (1<<21~1<<21)" << std::endl;
+  bufferlist readbl;
+  C_SaferCond backreadcond;
+  ObjectCacher::OSDRead *back_half_rd = obc.prepare_read(CEPH_NOSNAP, &readbl, 0);
+  ObjectExtent back_half_extent(oid, 0, 1<<21, 1<<21, 0);
+  back_half_extent.oloc.pool = 0;
+  back_half_extent.buffer_extents.push_back(make_pair(0, 1<<21));
+  back_half_rd->extents.push_back(back_half_extent);
+  lock.Lock();
+  int r = obc.readx(back_half_rd, &object_set, &backreadcond);
+  lock.Unlock();
+  assert(r >= 0);
+  if (r == 0) {
+    std::cout << "Waiting to read data into cache" << std::endl;
+    r = backreadcond.wait();
+  }
+
+  assert(r == 1<<21);
+
+  /* Read the whole object in,
+   * verify we have to wait for it to complete,
+   * overwrite a small piece, (http://tracker.ceph.com/issues/16002),
+   * and check consistency */
+
+  readbl.clear();
+  std::cout<< "Reading whole object (0~1<<22)" << std::endl;
+  C_SaferCond frontreadcond;
+  ObjectCacher::OSDRead *whole_rd = obc.prepare_read(CEPH_NOSNAP, &readbl, 0);
+  ObjectExtent whole_extent(oid, 0, 0, 1<<22, 0);
+  whole_extent.oloc.pool = 0;
+  whole_extent.buffer_extents.push_back(make_pair(0, 1<<22));
+  whole_rd->extents.push_back(whole_extent);
+  lock.Lock();
+  r = obc.readx(whole_rd, &object_set, &frontreadcond);
+  // we cleared out the cache by reading back half, it shouldn't pass immediately!
+  assert(r == 0);
+  std::cout << "Data (correctly) not available without fetching" << std::endl;
+
+  ObjectCacher::OSDWrite *verify_wr = obc.prepare_write(snapc, ones_bl,
+							ceph::real_time::min(), 0,
+							++journal_tid);
+  ObjectExtent verify_extent(oid, 0, (1<<18)+(1<<16), ones_bl.length(), 0);
+  verify_extent.oloc.pool = 0;
+  verify_extent.buffer_extents.push_back(make_pair(0, 1<<16));
+  verify_wr->extents.push_back(verify_extent);
+  C_SaferCond verify_finisher;
+  obc.writex(verify_wr, &object_set, &verify_finisher);
+  lock.Unlock();
+  std::cout << "wrote dirtying data" << std::endl;
+
+  std::cout << "Waiting to read data into cache" << std::endl;
+  r = frontreadcond.wait();
+  verify_finisher.wait();
+
+  std::cout << "Validating data" << std::endl;
+
+  for (int i = 1<<18; i < 1<<22; i+=1<<18) {
+    bufferlist ones_maybe;
+    ones_maybe.substr_of(readbl, i, ones_bl.length());
+    assert(0 == memcmp(ones_maybe.c_str(), ones_bl.c_str(), ones_bl.length()));
+  }
+  bufferlist ones_maybe;
+  ones_maybe.substr_of(readbl, (1<<18)+(1<<16), ones_bl.length());
+  assert(0 == memcmp(ones_maybe.c_str(), ones_bl.c_str(), ones_bl.length()));
+
+  std::cout << "validated that data is 0xff where it should be" << std::endl;
+  
+  lock.Lock();
+  C_SaferCond flushcond2;
+  done = obc.flush_all(&flushcond2);
+  if (!done) {
+    std::cout << "Waiting for final write flush" << std::endl;
+    lock.Unlock();
+    flushcond2.wait();
+    lock.Lock();
+  }
+
+  bool unclean = obc.release_set(&object_set);
+  if (unclean) {
+    std::cout << "unclean buffers left over!" << std::endl;
+    vector<ObjectExtent> discard_extents;
+    int i = 0;
+    for (auto oi = object_set.objects.begin(); !oi.end(); ++oi) {
+      discard_extents.emplace_back(oid, i++, 0, 1<<22, 0);
+    }
+    obc.discard_set(&object_set, discard_extents);
+    lock.Unlock();
+    obc.stop();
+    goto fail;
+  }
+  lock.Unlock();
+
+  obc.stop();
+
+  std::cout << "Testing ObjectCacher correctness complete" << std::endl;
+  return EXIT_SUCCESS;
+
+ fail:
+  return EXIT_FAILURE;
+}
+
 int main(int argc, const char **argv)
 {
   std::vector<const char*> args;
@@ -187,6 +364,8 @@ int main(int argc, const char **argv)
   long long num_objs = 10;
   float percent_reads = 0.90;
   int seed = time(0) % 100000;
+  bool stress = false;
+  bool correctness = false;
   std::ostringstream err;
   std::vector<const char*>::iterator i;
   for (i = args.begin(); i != args.end();) {
@@ -225,12 +404,21 @@ int main(int argc, const char **argv)
 	cerr << argv[0] << ": " << err.str() << std::endl;
 	return EXIT_FAILURE;
       }
+    } else if (ceph_argparse_flag(args, i, "--stress-test", NULL)) {
+      stress = true;
+    } else if (ceph_argparse_flag(args, i, "--correctness-test", NULL)) {
+      correctness = true;
     } else {
       cerr << "unknown option " << *i << std::endl;
       return EXIT_FAILURE;
     }
   }
 
-  srandom(seed);
-  return stress_test(num_ops, num_objs, obj_bytes, delay_ns, max_len, percent_reads);
+  if (stress) {
+    srandom(seed);
+    return stress_test(num_ops, num_objs, obj_bytes, delay_ns, max_len, percent_reads);
+  }
+  if (correctness) {
+    return correctness_test(delay_ns);
+  }
 }
diff --git a/src/test/pybind/test_ceph_argparse.py b/src/test/pybind/test_ceph_argparse.py
index 4c325f2..00689e3 100755
--- a/src/test/pybind/test_ceph_argparse.py
+++ b/src/test/pybind/test_ceph_argparse.py
@@ -89,11 +89,17 @@ class TestArgparse:
 
 class TestBasic:
 
-	def test_non_ascii_in_non_options(self):
-		# unicode() is not able to convert this str parameter into unicode
-		# using the default encoding 'ascii'. and validate_command() should
-		# not choke on it.
-		assert_is_none(validate_command(sigdict, ['章鱼和鱿鱼']))
+    def test_non_ascii_in_non_options(self):
+        # ArgumentPrefix("no match for {0}".format(s)) is not able to convert
+        # unicode str parameter into str. and validate_command() should not
+        # choke on it.
+        assert_is_none(validate_command(sigdict, [u'章鱼和鱿鱼']))
+        assert_is_none(validate_command(sigdict, [u'–w']))
+        # actually we always pass unicode strings to validate_command() in "ceph"
+        # CLI, but we also use bytestrings in our tests, so make sure it does not
+        # break.
+        assert_is_none(validate_command(sigdict, ['章鱼和鱿鱼']))
+        assert_is_none(validate_command(sigdict, ['–w']))
 
 
 class TestPG(TestArgparse):
@@ -638,7 +644,7 @@ class TestOSD(TestArgparse):
                                                     'rename-bucket']))
         assert_equal({}, validate_command(sigdict, ['osd', 'crush',
                                                     'rename-bucket',
-													'srcname']))
+                                                    'srcname']))
         assert_equal({}, validate_command(sigdict, ['osd', 'crush',
                                                     'rename-bucket', 'srcname',
                                                     'dstname',
@@ -744,7 +750,7 @@ class TestOSD(TestArgparse):
 
     def test_crush_tunables(self):
         for tunable in ('legacy', 'argonaut', 'bobtail', 'firefly',
-						'optimal', 'default'):
+                        'optimal', 'default'):
             self.assert_valid_command(['osd', 'crush', 'tunables',
                                        tunable])
         assert_equal({}, validate_command(sigdict, ['osd', 'crush',
@@ -996,13 +1002,13 @@ class TestOSD(TestArgparse):
                                                     'poolname',
                                                     '128', '128',
                                                     'erasure', '^^^',
-													'ruleset']))
+                                                    'ruleset']))
         assert_equal({}, validate_command(sigdict, ['osd', 'pool', 'create',
                                                     'poolname',
                                                     '128', '128',
                                                     'erasure', 'profile',
                                                     'ruleset',
-												    'toomany']))
+                                                    'toomany']))
         assert_equal({}, validate_command(sigdict, ['osd', 'pool', 'create',
                                                     'poolname',
                                                     '128', '128',
@@ -1102,7 +1108,7 @@ class TestOSD(TestArgparse):
     def test_reweight_by_utilization(self):
         self.assert_valid_command(['osd', 'reweight-by-utilization'])
         self.assert_valid_command(['osd', 'reweight-by-utilization', '100'])
-		self.assert_valid_command(['osd', 'reweight-by-utilization', '100', '.1'])
+        self.assert_valid_command(['osd', 'reweight-by-utilization', '100', '.1'])
         self.assert_valid_command(['osd', 'reweight-by-utilization', '--no-increasing'])
         assert_equal({}, validate_command(sigdict, ['osd',
                                                     'reweight-by-utilization',
diff --git a/src/test/rbd_mirror/image_replay.cc b/src/test/rbd_mirror/image_replay.cc
deleted file mode 100644
index e7eab87..0000000
--- a/src/test/rbd_mirror/image_replay.cc
+++ /dev/null
@@ -1,225 +0,0 @@
-// -*- mode:C++; tab-width:8; c-basic-offset:2; indent-tabs-mode:t -*-
-// vim: ts=8 sw=2 smarttab
-
-#include "common/ceph_argparse.h"
-#include "common/config.h"
-#include "common/debug.h"
-#include "common/errno.h"
-#include "global/global_init.h"
-#include "global/signal_handler.h"
-#include "librbd/ImageCtx.h"
-#include "librbd/ImageState.h"
-#include "tools/rbd_mirror/ImageReplayer.h"
-#include "tools/rbd_mirror/Threads.h"
-
-#include <string>
-#include <vector>
-
-#define dout_subsys ceph_subsys_rbd_mirror
-#undef dout_prefix
-#define dout_prefix *_dout << "rbd-mirror-image-replay: "
-
-rbd::mirror::ImageReplayer<> *replayer = nullptr;
-
-void usage() {
-  std::cout << "usage: ceph_test_rbd_mirror_image_replay [options...] \\" << std::endl;
-  std::cout << "           <client-id> <local-pool> <remote-pool> <image>" << std::endl;
-  std::cout << std::endl;
-  std::cout << "  client-id     client ID to register in remote journal" << std::endl;
-  std::cout << "  local-pool    local (secondary, destination) pool" << std::endl;
-  std::cout << "  remote-pool   remote (primary, source) pool" << std::endl;
-  std::cout << "  image         image to replay (mirror)" << std::endl;
-  std::cout << std::endl;
-  std::cout << "options:\n";
-  std::cout << "  -m monaddress[:port]      connect to specified monitor\n";
-  std::cout << "  --keyring=<path>          path to keyring for local cluster\n";
-  std::cout << "  --log-file=<logfile>      file to log debug output\n";
-  std::cout << "  --debug-rbd-mirror=<log-level>/<memory-level>  set rbd-mirror debug level\n";
-  generic_server_usage();
-}
-
-static atomic_t g_stopping;
-
-static void handle_signal(int signum)
-{
-  g_stopping.set(1);
-}
-
-int get_image_id(rbd::mirror::RadosRef cluster, int64_t pool_id,
-		 const std::string &image_name, std::string *image_id)
-{
-  librados::IoCtx ioctx;
-
-  int r = cluster->ioctx_create2(pool_id, ioctx);
-  if (r < 0) {
-    derr << "error opening ioctx for pool " << pool_id
-	 << ": " << cpp_strerror(r) << dendl;
-    return r;
-  }
-
-  librbd::ImageCtx *image_ctx = new librbd::ImageCtx(image_name, "", NULL,
-						     ioctx, true);
-  r = image_ctx->state->open();
-  if (r < 0) {
-    derr << "error opening remote image " << image_name
-	 << ": " << cpp_strerror(r) << dendl;
-    delete image_ctx;
-    return r;
-  }
-
-  *image_id = image_ctx->id;
-  image_ctx->state->close();
-  return 0;
-}
-
-int main(int argc, const char **argv)
-{
-  std::vector<const char*> args;
-  argv_to_vec(argc, argv, args);
-  env_to_vec(args);
-
-  global_init(nullptr, args, CEPH_ENTITY_TYPE_CLIENT,
-	      CODE_ENVIRONMENT_DAEMON,
-	      CINIT_FLAG_UNPRIVILEGED_DAEMON_DEFAULTS);
-
-  for (auto i = args.begin(); i != args.end(); ++i) {
-    if (ceph_argparse_flag(args, i, "-h", "--help", (char*)NULL)) {
-      usage();
-      return EXIT_SUCCESS;
-    }
-  }
-
-  if (args.size() < 4) {
-    usage();
-    return EXIT_FAILURE;
-  }
-
-  std::string client_id = args[0];
-  std::string local_pool_name = args[1];
-  std::string remote_pool_name = args[2];
-  std::string image_name = args[3];
-
-  dout(1) << "client_id=" << client_id << ", local_pool_name="
-	  << local_pool_name << ", remote_pool_name=" << remote_pool_name
-	  << ", image_name=" << image_name << dendl;
-
-  rbd::mirror::ImageReplayer<>::BootstrapParams bootstap_params(image_name);
-  int64_t local_pool_id;
-  int64_t remote_pool_id;
-  std::string remote_image_id;
-
-  if (local_pool_name == remote_pool_name) {
-    std::cerr << "local and remote pools can't be the same" << std::endl;
-    return EXIT_FAILURE;
-  }
-
-  if (g_conf->daemonize) {
-    global_init_daemonize(g_ceph_context);
-  }
-  g_ceph_context->enable_perf_counter();
-
-  common_init_finish(g_ceph_context);
-
-  init_async_signal_handler();
-  register_async_signal_handler(SIGHUP, sighup_handler);
-  register_async_signal_handler_oneshot(SIGINT, handle_signal);
-  register_async_signal_handler_oneshot(SIGTERM, handle_signal);
-
-  dout(5) << "connecting to cluster" << dendl;
-
-  rbd::mirror::RadosRef local(new librados::Rados());
-  rbd::mirror::RadosRef remote(new librados::Rados());
-  rbd::mirror::Threads *threads = nullptr;
-
-  C_SaferCond start_cond, stop_cond;
-
-  int r = local->init_with_context(g_ceph_context);
-  if (r < 0) {
-    derr << "could not initialize rados handle" << dendl;
-    goto cleanup;
-  }
-
-  r = local->connect();
-  if (r < 0) {
-    derr << "error connecting to local cluster" << dendl;
-    goto cleanup;
-  }
-
-  r = local->pool_lookup(local_pool_name.c_str());
-  if (r < 0) {
-    derr << "error finding local pool " << local_pool_name
-	 << ": " << cpp_strerror(r) << dendl;
-    goto cleanup;
-  }
-  local_pool_id = r;
-
-  r = remote->init_with_context(g_ceph_context);
-  if (r < 0) {
-    derr << "could not initialize rados handle" << dendl;
-    goto cleanup;
-  }
-
-  r = remote->connect();
-  if (r < 0) {
-    derr << "error connecting to local cluster" << dendl;
-    goto cleanup;
-  }
-
-  r = remote->pool_lookup(remote_pool_name.c_str());
-  if (r < 0) {
-    derr << "error finding remote pool " << remote_pool_name
-	 << ": " << cpp_strerror(r) << dendl;
-    goto cleanup;
-  }
-  remote_pool_id = r;
-
-  r = get_image_id(remote, remote_pool_id, image_name, &remote_image_id);
-  if (r < 0) {
-    derr << "error resolving ID for remote image " << image_name
-	 << ": " << cpp_strerror(r) << dendl;
-    goto cleanup;
-  }
-
-  dout(5) << "starting replay" << dendl;
-
-  threads = new rbd::mirror::Threads(reinterpret_cast<CephContext*>(
-    local->cct()));
-  replayer = new rbd::mirror::ImageReplayer<>(threads, local, remote, client_id,
-					      "remote mirror uuid",
-                                              local_pool_id, remote_pool_id,
-                                              remote_image_id,
-                                              "global image id");
-
-  replayer->start(&start_cond, &bootstap_params);
-  r = start_cond.wait();
-  if (r < 0) {
-    derr << "failed to start: " << cpp_strerror(r) << dendl;
-    goto cleanup;
-  }
-
-  dout(5) << "replay started" << dendl;
-
-  while (!g_stopping.read()) {
-    usleep(200000);
-  }
-
-  dout(1) << "termination signal received, stopping replay" << dendl;
-
-  replayer->stop(&stop_cond);
-  r = stop_cond.wait();
-  assert(r == 0);
-
-  dout(1) << "shutdown" << dendl;
-
- cleanup:
-  unregister_async_signal_handler(SIGHUP, sighup_handler);
-  unregister_async_signal_handler(SIGINT, handle_signal);
-  unregister_async_signal_handler(SIGTERM, handle_signal);
-  shutdown_async_signal_handler();
-
-  delete replayer;
-  delete threads;
-  g_ceph_context->put();
-
-  return r < 0 ? EXIT_SUCCESS : EXIT_FAILURE;
-}
diff --git a/src/test/rbd_mirror/image_replayer/test_mock_BootstrapRequest.cc b/src/test/rbd_mirror/image_replayer/test_mock_BootstrapRequest.cc
index 7ca0436..7d2e37e 100644
--- a/src/test/rbd_mirror/image_replayer/test_mock_BootstrapRequest.cc
+++ b/src/test/rbd_mirror/image_replayer/test_mock_BootstrapRequest.cc
@@ -4,6 +4,7 @@
 #include "test/rbd_mirror/test_mock_fixture.h"
 #include "librbd/journal/TypeTraits.h"
 #include "tools/rbd_mirror/ImageSync.h"
+#include "tools/rbd_mirror/ImageSyncThrottler.h"
 #include "tools/rbd_mirror/image_replayer/BootstrapRequest.h"
 #include "tools/rbd_mirror/image_replayer/CloseImageRequest.h"
 #include "tools/rbd_mirror/image_replayer/CreateImageRequest.h"
@@ -70,6 +71,20 @@ struct ImageSync<librbd::MockTestImageCtx> {
 
 ImageSync<librbd::MockTestImageCtx>* ImageSync<librbd::MockTestImageCtx>::s_instance = nullptr;
 
+template<>
+struct ImageSyncThrottler<librbd::MockTestImageCtx> {
+  MOCK_METHOD10(start_sync, void(librbd::MockTestImageCtx *local_image_ctx,
+                                 librbd::MockTestImageCtx *remote_image_ctx,
+                                 SafeTimer *timer, Mutex *timer_lock,
+                                 const std::string &mirror_uuid,
+                                 ::journal::MockJournaler *journaler,
+                                 librbd::journal::MirrorPeerClientMeta *client_meta,
+                                 ContextWQ *work_queue, Context *on_finish,
+                                 ProgressContext *progress_ctx));
+  MOCK_METHOD2(cancel_sync, void(librados::IoCtx &local_io_ctx,
+                                 const std::string& mirror_uuid));
+};
+
 namespace image_replayer {
 
 template<>
diff --git a/src/test/rbd_mirror/image_replayer/test_mock_EventPreprocessor.cc b/src/test/rbd_mirror/image_replayer/test_mock_EventPreprocessor.cc
new file mode 100644
index 0000000..ca1be6f
--- /dev/null
+++ b/src/test/rbd_mirror/image_replayer/test_mock_EventPreprocessor.cc
@@ -0,0 +1,265 @@
+// -*- mode:C++; tab-width:8; c-basic-offset:2; indent-tabs-mode:t -*-
+// vim: ts=8 sw=2 smarttab
+
+#include "test/rbd_mirror/test_mock_fixture.h"
+#include "librbd/journal/Types.h"
+#include "librbd/journal/TypeTraits.h"
+#include "tools/rbd_mirror/image_replayer/EventPreprocessor.h"
+#include "tools/rbd_mirror/Threads.h"
+#include "test/journal/mock/MockJournaler.h"
+#include "test/librbd/mock/MockImageCtx.h"
+
+namespace librbd {
+
+namespace {
+
+struct MockTestImageCtx : public librbd::MockImageCtx {
+  MockTestImageCtx(librbd::ImageCtx &image_ctx)
+    : librbd::MockImageCtx(image_ctx) {
+  }
+};
+
+} // anonymous namespace
+
+namespace journal {
+
+template <>
+struct TypeTraits<librbd::MockTestImageCtx> {
+  typedef ::journal::MockJournaler Journaler;
+};
+
+} // namespace journal
+} // namespace librbd
+
+// template definitions
+#include "tools/rbd_mirror/image_replayer/EventPreprocessor.cc"
+template class rbd::mirror::image_replayer::EventPreprocessor<librbd::MockTestImageCtx>;
+
+namespace rbd {
+namespace mirror {
+namespace image_replayer {
+
+using testing::_;
+using testing::WithArg;
+
+class TestMockImageReplayerEventPreprocessor : public TestMockFixture {
+public:
+  typedef EventPreprocessor<librbd::MockTestImageCtx> MockEventPreprocessor;
+
+  virtual void SetUp() {
+    TestMockFixture::SetUp();
+
+    librbd::RBD rbd;
+    ASSERT_EQ(0, create_image(rbd, m_local_io_ctx, m_image_name, m_image_size));
+    ASSERT_EQ(0, open_image(m_local_io_ctx, m_image_name, &m_local_image_ctx));
+  }
+
+  void expect_image_refresh(librbd::MockTestImageCtx &mock_remote_image_ctx, int r) {
+    EXPECT_CALL(*mock_remote_image_ctx.state, refresh(_))
+      .WillOnce(CompleteContext(r));
+  }
+
+  void expect_update_client(journal::MockJournaler &mock_journaler, int r) {
+    EXPECT_CALL(mock_journaler, update_client(_, _))
+      .WillOnce(WithArg<1>(CompleteContext(r)));
+  }
+
+  librbd::ImageCtx *m_local_image_ctx;
+  librbd::journal::MirrorPeerClientMeta m_client_meta;
+
+};
+
+TEST_F(TestMockImageReplayerEventPreprocessor, IsNotRequired) {
+  librbd::MockTestImageCtx mock_local_image_ctx(*m_local_image_ctx);
+  ::journal::MockJournaler mock_remote_journaler;
+
+  MockEventPreprocessor event_preprocessor(mock_local_image_ctx,
+                                           mock_remote_journaler,
+                                           "local mirror uuid",
+                                           &m_client_meta,
+                                           m_threads->work_queue);
+
+  librbd::journal::EventEntry event_entry{librbd::journal::RenameEvent{}};
+  ASSERT_FALSE(event_preprocessor.is_required(event_entry));
+}
+
+TEST_F(TestMockImageReplayerEventPreprocessor, IsRequiredSnapMapPrune) {
+  librbd::MockTestImageCtx mock_local_image_ctx(*m_local_image_ctx);
+  ::journal::MockJournaler mock_remote_journaler;
+
+  m_client_meta.snap_seqs = {{1, 2}, {3, 4}};
+  MockEventPreprocessor event_preprocessor(mock_local_image_ctx,
+                                           mock_remote_journaler,
+                                           "local mirror uuid",
+                                           &m_client_meta,
+                                           m_threads->work_queue);
+
+  librbd::journal::EventEntry event_entry{librbd::journal::RenameEvent{}};
+  ASSERT_TRUE(event_preprocessor.is_required(event_entry));
+}
+
+TEST_F(TestMockImageReplayerEventPreprocessor, IsRequiredSnapRename) {
+  librbd::MockTestImageCtx mock_local_image_ctx(*m_local_image_ctx);
+  ::journal::MockJournaler mock_remote_journaler;
+
+  MockEventPreprocessor event_preprocessor(mock_local_image_ctx,
+                                           mock_remote_journaler,
+                                           "local mirror uuid",
+                                           &m_client_meta,
+                                           m_threads->work_queue);
+
+  librbd::journal::EventEntry event_entry{librbd::journal::SnapRenameEvent{}};
+  ASSERT_TRUE(event_preprocessor.is_required(event_entry));
+}
+
+TEST_F(TestMockImageReplayerEventPreprocessor, PreprocessSnapMapPrune) {
+  librbd::MockTestImageCtx mock_local_image_ctx(*m_local_image_ctx);
+  ::journal::MockJournaler mock_remote_journaler;
+
+  expect_image_refresh(mock_local_image_ctx, 0);
+  expect_update_client(mock_remote_journaler, 0);
+
+  mock_local_image_ctx.snap_info = {
+    {6, librbd::SnapInfo{"snap", 0U, {}, 0U, 0U}}};
+  m_client_meta.snap_seqs = {{1, 2}, {3, 4}, {5, 6}};
+  MockEventPreprocessor event_preprocessor(mock_local_image_ctx,
+                                           mock_remote_journaler,
+                                           "local mirror uuid",
+                                           &m_client_meta,
+                                           m_threads->work_queue);
+
+  librbd::journal::EventEntry event_entry{librbd::journal::RenameEvent{}};
+  C_SaferCond ctx;
+  event_preprocessor.preprocess(&event_entry, &ctx);
+  ASSERT_EQ(0, ctx.wait());
+
+  librbd::journal::MirrorPeerClientMeta::SnapSeqs expected_snap_seqs = {{5, 6}};
+  ASSERT_EQ(expected_snap_seqs, m_client_meta.snap_seqs);
+}
+
+TEST_F(TestMockImageReplayerEventPreprocessor, PreprocessSnapRename) {
+  librbd::MockTestImageCtx mock_local_image_ctx(*m_local_image_ctx);
+  ::journal::MockJournaler mock_remote_journaler;
+
+  expect_image_refresh(mock_local_image_ctx, 0);
+  expect_update_client(mock_remote_journaler, 0);
+
+  mock_local_image_ctx.snap_ids = {{"snap", 6}};
+  mock_local_image_ctx.snap_info = {
+    {6, librbd::SnapInfo{"snap", 0U, {}, 0U, 0U}}};
+  MockEventPreprocessor event_preprocessor(mock_local_image_ctx,
+                                           mock_remote_journaler,
+                                           "local mirror uuid",
+                                           &m_client_meta,
+                                           m_threads->work_queue);
+
+  librbd::journal::EventEntry event_entry{
+    librbd::journal::SnapRenameEvent{0, 5, "snap", "new_snap"}};
+  C_SaferCond ctx;
+  event_preprocessor.preprocess(&event_entry, &ctx);
+  ASSERT_EQ(0, ctx.wait());
+
+  librbd::journal::MirrorPeerClientMeta::SnapSeqs expected_snap_seqs = {{5, 6}};
+  ASSERT_EQ(expected_snap_seqs, m_client_meta.snap_seqs);
+
+  librbd::journal::SnapRenameEvent *event =
+    boost::get<librbd::journal::SnapRenameEvent>(&event_entry.event);
+  ASSERT_EQ(6U, event->snap_id);
+}
+
+TEST_F(TestMockImageReplayerEventPreprocessor, PreprocessSnapRenameMissing) {
+  librbd::MockTestImageCtx mock_local_image_ctx(*m_local_image_ctx);
+  ::journal::MockJournaler mock_remote_journaler;
+
+  expect_image_refresh(mock_local_image_ctx, 0);
+
+  MockEventPreprocessor event_preprocessor(mock_local_image_ctx,
+                                           mock_remote_journaler,
+                                           "local mirror uuid",
+                                           &m_client_meta,
+                                           m_threads->work_queue);
+
+  librbd::journal::EventEntry event_entry{
+    librbd::journal::SnapRenameEvent{0, 5, "snap", "new_snap"}};
+  C_SaferCond ctx;
+  event_preprocessor.preprocess(&event_entry, &ctx);
+  ASSERT_EQ(-ENOENT, ctx.wait());
+
+  librbd::journal::SnapRenameEvent *event =
+    boost::get<librbd::journal::SnapRenameEvent>(&event_entry.event);
+  ASSERT_EQ(CEPH_NOSNAP, event->snap_id);
+}
+
+TEST_F(TestMockImageReplayerEventPreprocessor, PreprocessSnapRenameKnown) {
+  librbd::MockTestImageCtx mock_local_image_ctx(*m_local_image_ctx);
+  ::journal::MockJournaler mock_remote_journaler;
+
+  expect_image_refresh(mock_local_image_ctx, 0);
+
+  mock_local_image_ctx.snap_info = {
+    {6, librbd::SnapInfo{"snap", 0U, {}, 0U, 0U}}};
+  m_client_meta.snap_seqs = {{5, 6}};
+  MockEventPreprocessor event_preprocessor(mock_local_image_ctx,
+                                           mock_remote_journaler,
+                                           "local mirror uuid",
+                                           &m_client_meta,
+                                           m_threads->work_queue);
+
+  librbd::journal::EventEntry event_entry{
+    librbd::journal::SnapRenameEvent{0, 5, "snap", "new_snap"}};
+  C_SaferCond ctx;
+  event_preprocessor.preprocess(&event_entry, &ctx);
+  ASSERT_EQ(0, ctx.wait());
+
+  librbd::journal::MirrorPeerClientMeta::SnapSeqs expected_snap_seqs = {{5, 6}};
+  ASSERT_EQ(expected_snap_seqs, m_client_meta.snap_seqs);
+
+  librbd::journal::SnapRenameEvent *event =
+    boost::get<librbd::journal::SnapRenameEvent>(&event_entry.event);
+  ASSERT_EQ(6U, event->snap_id);
+}
+
+TEST_F(TestMockImageReplayerEventPreprocessor, PreprocessRefreshError) {
+  librbd::MockTestImageCtx mock_local_image_ctx(*m_local_image_ctx);
+  ::journal::MockJournaler mock_remote_journaler;
+
+  expect_image_refresh(mock_local_image_ctx, -EINVAL);
+
+  MockEventPreprocessor event_preprocessor(mock_local_image_ctx,
+                                           mock_remote_journaler,
+                                           "local mirror uuid",
+                                           &m_client_meta,
+                                           m_threads->work_queue);
+
+  librbd::journal::EventEntry event_entry{librbd::journal::RenameEvent{}};
+  C_SaferCond ctx;
+  event_preprocessor.preprocess(&event_entry, &ctx);
+  ASSERT_EQ(-EINVAL, ctx.wait());
+}
+
+TEST_F(TestMockImageReplayerEventPreprocessor, PreprocessClientUpdateError) {
+  librbd::MockTestImageCtx mock_local_image_ctx(*m_local_image_ctx);
+  ::journal::MockJournaler mock_remote_journaler;
+
+  expect_image_refresh(mock_local_image_ctx, 0);
+  expect_update_client(mock_remote_journaler, -EINVAL);
+
+  mock_local_image_ctx.snap_ids = {{"snap", 6}};
+  mock_local_image_ctx.snap_info = {
+    {6, librbd::SnapInfo{"snap", 0U, {}, 0U, 0U}}};
+  MockEventPreprocessor event_preprocessor(mock_local_image_ctx,
+                                           mock_remote_journaler,
+                                           "local mirror uuid",
+                                           &m_client_meta,
+                                           m_threads->work_queue);
+
+  librbd::journal::EventEntry event_entry{
+    librbd::journal::SnapRenameEvent{0, 5, "snap", "new_snap"}};
+  C_SaferCond ctx;
+  event_preprocessor.preprocess(&event_entry, &ctx);
+  ASSERT_EQ(-EINVAL, ctx.wait());
+}
+
+} // namespace image_replayer
+} // namespace mirror
+} // namespace rbd
diff --git a/src/test/rbd_mirror/image_sync/test_mock_ImageCopyRequest.cc b/src/test/rbd_mirror/image_sync/test_mock_ImageCopyRequest.cc
index abc6cc0..79e7fa8 100644
--- a/src/test/rbd_mirror/image_sync/test_mock_ImageCopyRequest.cc
+++ b/src/test/rbd_mirror/image_sync/test_mock_ImageCopyRequest.cc
@@ -16,10 +16,21 @@
 #include <boost/scope_exit.hpp>
 
 namespace librbd {
+
+namespace {
+
+struct MockTestImageCtx : public librbd::MockImageCtx {
+  MockTestImageCtx(librbd::ImageCtx &image_ctx)
+    : librbd::MockImageCtx(image_ctx) {
+  }
+};
+
+} // anonymous namespace
+
 namespace journal {
 
 template <>
-struct TypeTraits<librbd::MockImageCtx> {
+struct TypeTraits<librbd::MockTestImageCtx> {
   typedef ::journal::MockJournaler Journaler;
 };
 
@@ -31,11 +42,11 @@ namespace mirror {
 namespace image_sync {
 
 template <>
-struct ObjectCopyRequest<librbd::MockImageCtx> {
+struct ObjectCopyRequest<librbd::MockTestImageCtx> {
   static ObjectCopyRequest* s_instance;
-  static ObjectCopyRequest* create(librbd::MockImageCtx *local_image_ctx,
-                                   librbd::MockImageCtx *remote_image_ctx,
-                                   const ImageCopyRequest<librbd::MockImageCtx>::SnapMap *snap_map,
+  static ObjectCopyRequest* create(librbd::MockTestImageCtx *local_image_ctx,
+                                   librbd::MockTestImageCtx *remote_image_ctx,
+                                   const ImageCopyRequest<librbd::MockTestImageCtx>::SnapMap *snap_map,
                                    uint64_t object_number, Context *on_finish) {
     assert(s_instance != nullptr);
     Mutex::Locker locker(s_instance->lock);
@@ -50,7 +61,7 @@ struct ObjectCopyRequest<librbd::MockImageCtx> {
   Mutex lock;
   Cond cond;
 
-  const ImageCopyRequest<librbd::MockImageCtx>::SnapMap *snap_map;
+  const ImageCopyRequest<librbd::MockTestImageCtx>::SnapMap *snap_map = nullptr;
   std::map<uint64_t, Context *> object_contexts;
 
   ObjectCopyRequest() : lock("lock") {
@@ -58,7 +69,7 @@ struct ObjectCopyRequest<librbd::MockImageCtx> {
   }
 };
 
-ObjectCopyRequest<librbd::MockImageCtx>* ObjectCopyRequest<librbd::MockImageCtx>::s_instance = nullptr;
+ObjectCopyRequest<librbd::MockTestImageCtx>* ObjectCopyRequest<librbd::MockTestImageCtx>::s_instance = nullptr;
 
 } // namespace image_sync
 } // namespace mirror
@@ -66,7 +77,7 @@ ObjectCopyRequest<librbd::MockImageCtx>* ObjectCopyRequest<librbd::MockImageCtx>
 
 // template definitions
 #include "tools/rbd_mirror/image_sync/ImageCopyRequest.cc"
-template class rbd::mirror::image_sync::ImageCopyRequest<librbd::MockImageCtx>;
+template class rbd::mirror::image_sync::ImageCopyRequest<librbd::MockTestImageCtx>;
 
 namespace rbd {
 namespace mirror {
@@ -81,8 +92,8 @@ using ::testing::InvokeWithoutArgs;
 
 class TestMockImageSyncImageCopyRequest : public TestMockFixture {
 public:
-  typedef ImageCopyRequest<librbd::MockImageCtx> MockImageCopyRequest;
-  typedef ObjectCopyRequest<librbd::MockImageCtx> MockObjectCopyRequest;
+  typedef ImageCopyRequest<librbd::MockTestImageCtx> MockImageCopyRequest;
+  typedef ObjectCopyRequest<librbd::MockTestImageCtx> MockObjectCopyRequest;
 
   virtual void SetUp() {
     TestMockFixture::SetUp();
@@ -95,15 +106,15 @@ public:
     ASSERT_EQ(0, open_image(m_local_io_ctx, m_image_name, &m_local_image_ctx));
   }
 
-  void expect_get_snap_id(librbd::MockImageCtx &mock_image_ctx) {
+  void expect_get_snap_id(librbd::MockTestImageCtx &mock_image_ctx) {
     EXPECT_CALL(mock_image_ctx, get_snap_id(_))
       .WillRepeatedly(Invoke([&mock_image_ctx](std::string snap_name) {
-        RWLock::RLocker snap_locker(mock_image_ctx.image_ctx->snap_lock);
+        assert(mock_image_ctx.image_ctx->snap_lock.is_locked());
         return mock_image_ctx.image_ctx->get_snap_id(snap_name);
       }));
   }
 
-  void expect_get_object_count(librbd::MockImageCtx &mock_image_ctx,
+  void expect_get_object_count(librbd::MockTestImageCtx &mock_image_ctx,
                                uint64_t count) {
     EXPECT_CALL(mock_image_ctx, get_object_count(_))
       .WillOnce(Return(count)).RetiresOnSaturation();
@@ -119,7 +130,8 @@ public:
   }
 
   bool complete_object_copy(MockObjectCopyRequest &mock_object_copy_request,
-                            uint64_t object_num, int r) {
+                               uint64_t object_num, int r,
+                               std::function<void()> fn = []() {}) {
     Mutex::Locker locker(mock_object_copy_request.lock);
     while (mock_object_copy_request.object_contexts.count(object_num) == 0) {
       if (mock_object_copy_request.cond.WaitInterval(m_local_image_ctx->cct,
@@ -129,7 +141,12 @@ public:
       }
     }
 
-    m_threads->work_queue->queue(mock_object_copy_request.object_contexts[object_num], r);
+    FunctionContext *wrapper_ctx = new FunctionContext(
+      [&mock_object_copy_request, object_num, fn] (int r) {
+        fn();
+        mock_object_copy_request.object_contexts[object_num]->complete(r);
+      });
+    m_threads->work_queue->queue(wrapper_ctx, r);
     return true;
   }
 
@@ -145,8 +162,8 @@ public:
     return *mock_object_copy_request.snap_map;
   }
 
-  MockImageCopyRequest *create_request(librbd::MockImageCtx &mock_remote_image_ctx,
-                                       librbd::MockImageCtx &mock_local_image_ctx,
+  MockImageCopyRequest *create_request(librbd::MockTestImageCtx &mock_remote_image_ctx,
+                                       librbd::MockTestImageCtx &mock_local_image_ctx,
                                        journal::MockJournaler &mock_journaler,
                                        librbd::journal::MirrorPeerSyncPoint &sync_point,
                                        Context *ctx) {
@@ -193,8 +210,8 @@ TEST_F(TestMockImageSyncImageCopyRequest, SimpleImage) {
   ASSERT_EQ(0, create_snap("snap1"));
   m_client_meta.sync_points = {{"snap1", boost::none}};
 
-  librbd::MockImageCtx mock_remote_image_ctx(*m_remote_image_ctx);
-  librbd::MockImageCtx mock_local_image_ctx(*m_local_image_ctx);
+  librbd::MockTestImageCtx mock_remote_image_ctx(*m_remote_image_ctx);
+  librbd::MockTestImageCtx mock_local_image_ctx(*m_local_image_ctx);
   journal::MockJournaler mock_journaler;
   MockObjectCopyRequest mock_object_copy_request;
 
@@ -224,21 +241,52 @@ TEST_F(TestMockImageSyncImageCopyRequest, Throttled) {
   ASSERT_EQ(0, create_snap("snap1"));
   m_client_meta.sync_points = {{"snap1", boost::none}};
 
-  librbd::MockImageCtx mock_remote_image_ctx(*m_remote_image_ctx);
-  librbd::MockImageCtx mock_local_image_ctx(*m_local_image_ctx);
+  std::string update_sync_age;;
+  ASSERT_EQ(0, _rados->conf_get("rbd_mirror_sync_point_update_age", update_sync_age));
+  ASSERT_EQ(0, _rados->conf_set("rbd_mirror_sync_point_update_age", "1"));
+  BOOST_SCOPE_EXIT( (update_sync_age) ) {
+    ASSERT_EQ(0, _rados->conf_set("rbd_mirror_sync_point_update_age", update_sync_age.c_str()));
+  } BOOST_SCOPE_EXIT_END;
+
+
+  std::string max_ops_str;
+  ASSERT_EQ(0, _rados->conf_get("rbd_concurrent_management_ops", max_ops_str));
+  int max_ops = std::stoi(max_ops_str);
+
+  uint64_t object_count = 55;
+
+  librbd::MockTestImageCtx mock_remote_image_ctx(*m_remote_image_ctx);
+  librbd::MockTestImageCtx mock_local_image_ctx(*m_local_image_ctx);
   journal::MockJournaler mock_journaler;
   MockObjectCopyRequest mock_object_copy_request;
 
   expect_get_snap_id(mock_remote_image_ctx);
 
-  InSequence seq;
-  expect_get_object_count(mock_remote_image_ctx, 50);
+  expect_get_object_count(mock_remote_image_ctx, object_count);
   expect_get_object_count(mock_remote_image_ctx, 0);
-  expect_update_client(mock_journaler, 0);
-  for (int i = 0; i < 50; ++i) {
-    expect_object_copy_send(mock_object_copy_request);
-  }
-  expect_update_client(mock_journaler, 0);
+
+  EXPECT_CALL(mock_object_copy_request, send()).Times(object_count);
+
+  boost::optional<uint64_t> expected_object_number(boost::none);
+  EXPECT_CALL(mock_journaler, update_client(_, _))
+    .WillRepeatedly(
+        Invoke([&expected_object_number, max_ops, object_count, this]
+               (bufferlist data, Context *ctx) {
+          ASSERT_EQ(expected_object_number,
+                    m_client_meta.sync_points.front().object_number);
+          if (!expected_object_number) {
+            expected_object_number = (max_ops - 1);
+          } else {
+            expected_object_number = expected_object_number.get() + max_ops;
+          }
+
+          if (expected_object_number.get() > (object_count - 1)) {
+            expected_object_number = (object_count - 1);
+          }
+
+          m_threads->work_queue->queue(ctx, 0);
+      }));
+
 
   C_SaferCond ctx;
   MockImageCopyRequest *request = create_request(mock_remote_image_ctx,
@@ -248,9 +296,17 @@ TEST_F(TestMockImageSyncImageCopyRequest, Throttled) {
                                                  &ctx);
   request->send();
 
+  std::function<void()> sleep_fn = [request]() {
+    sleep(2);
+  };
+
   ASSERT_EQ(m_snap_map, wait_for_snap_map(mock_object_copy_request));
-  for (uint64_t i = 0; i < 50; ++i) {
-    ASSERT_TRUE(complete_object_copy(mock_object_copy_request, i, 0));
+  for (uint64_t i = 0; i < object_count; ++i) {
+    if (i % 10 == 0) {
+      ASSERT_TRUE(complete_object_copy(mock_object_copy_request, i, 0, sleep_fn));
+    } else {
+      ASSERT_TRUE(complete_object_copy(mock_object_copy_request, i, 0));
+    }
   }
   ASSERT_EQ(0, ctx.wait());
 }
@@ -261,8 +317,8 @@ TEST_F(TestMockImageSyncImageCopyRequest, SnapshotSubset) {
   ASSERT_EQ(0, create_snap("snap3"));
   m_client_meta.sync_points = {{"snap3", "snap2", boost::none}};
 
-  librbd::MockImageCtx mock_remote_image_ctx(*m_remote_image_ctx);
-  librbd::MockImageCtx mock_local_image_ctx(*m_local_image_ctx);
+  librbd::MockTestImageCtx mock_remote_image_ctx(*m_remote_image_ctx);
+  librbd::MockTestImageCtx mock_local_image_ctx(*m_local_image_ctx);
   journal::MockJournaler mock_journaler;
   MockObjectCopyRequest mock_object_copy_request;
 
@@ -299,8 +355,8 @@ TEST_F(TestMockImageSyncImageCopyRequest, RestartCatchup) {
   m_client_meta.sync_points = {{"snap1", boost::none},
                                {"snap2", "snap1", boost::none}};
 
-  librbd::MockImageCtx mock_remote_image_ctx(*m_remote_image_ctx);
-  librbd::MockImageCtx mock_local_image_ctx(*m_local_image_ctx);
+  librbd::MockTestImageCtx mock_remote_image_ctx(*m_remote_image_ctx);
+  librbd::MockTestImageCtx mock_local_image_ctx(*m_local_image_ctx);
   journal::MockJournaler mock_journaler;
   MockObjectCopyRequest mock_object_copy_request;
 
@@ -331,8 +387,8 @@ TEST_F(TestMockImageSyncImageCopyRequest, RestartPartialSync) {
   ASSERT_EQ(0, create_snap("snap1"));
   m_client_meta.sync_points = {{"snap1", librbd::journal::MirrorPeerSyncPoint::ObjectNumber{0U}}};
 
-  librbd::MockImageCtx mock_remote_image_ctx(*m_remote_image_ctx);
-  librbd::MockImageCtx mock_local_image_ctx(*m_local_image_ctx);
+  librbd::MockTestImageCtx mock_remote_image_ctx(*m_remote_image_ctx);
+  librbd::MockTestImageCtx mock_local_image_ctx(*m_local_image_ctx);
   journal::MockJournaler mock_journaler;
   MockObjectCopyRequest mock_object_copy_request;
 
@@ -368,8 +424,8 @@ TEST_F(TestMockImageSyncImageCopyRequest, Cancel) {
   ASSERT_EQ(0, create_snap("snap1"));
   m_client_meta.sync_points = {{"snap1", boost::none}};
 
-  librbd::MockImageCtx mock_remote_image_ctx(*m_remote_image_ctx);
-  librbd::MockImageCtx mock_local_image_ctx(*m_local_image_ctx);
+  librbd::MockTestImageCtx mock_remote_image_ctx(*m_remote_image_ctx);
+  librbd::MockTestImageCtx mock_local_image_ctx(*m_local_image_ctx);
   journal::MockJournaler mock_journaler;
   MockObjectCopyRequest mock_object_copy_request;
 
@@ -396,12 +452,74 @@ TEST_F(TestMockImageSyncImageCopyRequest, Cancel) {
   ASSERT_EQ(-ECANCELED, ctx.wait());
 }
 
+TEST_F(TestMockImageSyncImageCopyRequest, Cancel_Inflight_Sync) {
+  std::string update_sync_age;;
+  ASSERT_EQ(0, _rados->conf_get("rbd_mirror_sync_point_update_age", update_sync_age));
+  ASSERT_EQ(0, _rados->conf_set("rbd_mirror_sync_point_update_age", "1"));
+  BOOST_SCOPE_EXIT( (update_sync_age) ) {
+    ASSERT_EQ(0, _rados->conf_set("rbd_mirror_sync_point_update_age", update_sync_age.c_str()));
+  } BOOST_SCOPE_EXIT_END;
+
+  std::string max_ops_str;
+  ASSERT_EQ(0, _rados->conf_get("rbd_concurrent_management_ops", max_ops_str));
+  ASSERT_EQ(0, _rados->conf_set("rbd_concurrent_management_ops", "3"));
+  BOOST_SCOPE_EXIT( (max_ops_str) ) {
+    ASSERT_EQ(0, _rados->conf_set("rbd_concurrent_management_ops", max_ops_str.c_str()));
+  } BOOST_SCOPE_EXIT_END;
+
+  ASSERT_EQ(0, create_snap("snap1"));
+  m_client_meta.sync_points = {{"snap1", boost::none}};
+
+  librbd::MockTestImageCtx mock_remote_image_ctx(*m_remote_image_ctx);
+  librbd::MockTestImageCtx mock_local_image_ctx(*m_local_image_ctx);
+  journal::MockJournaler mock_journaler;
+  MockObjectCopyRequest mock_object_copy_request;
+
+  expect_get_snap_id(mock_remote_image_ctx);
+
+  expect_get_object_count(mock_remote_image_ctx, 10);
+  expect_get_object_count(mock_remote_image_ctx, 0);
+
+  EXPECT_CALL(mock_object_copy_request, send()).Times(6);
+
+  EXPECT_CALL(mock_journaler, update_client(_, _))
+    .WillRepeatedly(Invoke([this] (bufferlist data, Context *ctx) {
+          m_threads->work_queue->queue(ctx, 0);
+      }));
+
+
+  C_SaferCond ctx;
+  MockImageCopyRequest *request = create_request(mock_remote_image_ctx,
+                                                 mock_local_image_ctx,
+                                                 mock_journaler,
+                                                 m_client_meta.sync_points.front(),
+                                                 &ctx);
+  request->send();
+
+  ASSERT_EQ(m_snap_map, wait_for_snap_map(mock_object_copy_request));
+
+  std::function<void()> cancel_fn = [request]() {
+    sleep(2);
+    request->cancel();
+  };
+
+  ASSERT_TRUE(complete_object_copy(mock_object_copy_request, 0, 0));
+  ASSERT_TRUE(complete_object_copy(mock_object_copy_request, 1, 0));
+  ASSERT_TRUE(complete_object_copy(mock_object_copy_request, 2, 0));
+  ASSERT_TRUE(complete_object_copy(mock_object_copy_request, 3, 0, cancel_fn));
+  ASSERT_TRUE(complete_object_copy(mock_object_copy_request, 4, 0));
+  ASSERT_TRUE(complete_object_copy(mock_object_copy_request, 5, 0));
+
+  ASSERT_EQ(-ECANCELED, ctx.wait());
+  ASSERT_EQ(5u, m_client_meta.sync_points.front().object_number.get());
+}
+
 TEST_F(TestMockImageSyncImageCopyRequest, Cancel1) {
   ASSERT_EQ(0, create_snap("snap1"));
   m_client_meta.sync_points = {{"snap1", boost::none}};
 
-  librbd::MockImageCtx mock_remote_image_ctx(*m_remote_image_ctx);
-  librbd::MockImageCtx mock_local_image_ctx(*m_local_image_ctx);
+  librbd::MockTestImageCtx mock_remote_image_ctx(*m_remote_image_ctx);
+  librbd::MockTestImageCtx mock_local_image_ctx(*m_local_image_ctx);
   journal::MockJournaler mock_journaler;
   MockObjectCopyRequest mock_object_copy_request;
 
@@ -431,8 +549,8 @@ TEST_F(TestMockImageSyncImageCopyRequest, MissingSnap) {
   ASSERT_EQ(0, create_snap("snap1"));
   m_client_meta.sync_points = {{"missing-snap", boost::none}};
 
-  librbd::MockImageCtx mock_remote_image_ctx(*m_remote_image_ctx);
-  librbd::MockImageCtx mock_local_image_ctx(*m_local_image_ctx);
+  librbd::MockTestImageCtx mock_remote_image_ctx(*m_remote_image_ctx);
+  librbd::MockTestImageCtx mock_local_image_ctx(*m_local_image_ctx);
   journal::MockJournaler mock_journaler;
 
   expect_get_snap_id(mock_remote_image_ctx);
@@ -451,8 +569,8 @@ TEST_F(TestMockImageSyncImageCopyRequest, MissingFromSnap) {
   ASSERT_EQ(0, create_snap("snap1"));
   m_client_meta.sync_points = {{"snap1", "missing-snap", boost::none}};
 
-  librbd::MockImageCtx mock_remote_image_ctx(*m_remote_image_ctx);
-  librbd::MockImageCtx mock_local_image_ctx(*m_local_image_ctx);
+  librbd::MockTestImageCtx mock_remote_image_ctx(*m_remote_image_ctx);
+  librbd::MockTestImageCtx mock_local_image_ctx(*m_local_image_ctx);
   journal::MockJournaler mock_journaler;
 
   expect_get_snap_id(mock_remote_image_ctx);
@@ -473,8 +591,30 @@ TEST_F(TestMockImageSyncImageCopyRequest, EmptySnapMap) {
   m_client_meta.snap_seqs = {{0, 0}};
   m_client_meta.sync_points = {{"snap2", "snap1", boost::none}};
 
-  librbd::MockImageCtx mock_remote_image_ctx(*m_remote_image_ctx);
-  librbd::MockImageCtx mock_local_image_ctx(*m_local_image_ctx);
+  librbd::MockTestImageCtx mock_remote_image_ctx(*m_remote_image_ctx);
+  librbd::MockTestImageCtx mock_local_image_ctx(*m_local_image_ctx);
+  journal::MockJournaler mock_journaler;
+
+  expect_get_snap_id(mock_remote_image_ctx);
+
+  C_SaferCond ctx;
+  MockImageCopyRequest *request = create_request(mock_remote_image_ctx,
+                                                 mock_local_image_ctx,
+                                                 mock_journaler,
+                                                 m_client_meta.sync_points.front(),
+                                                 &ctx);
+  request->send();
+  ASSERT_EQ(-EINVAL, ctx.wait());
+}
+
+TEST_F(TestMockImageSyncImageCopyRequest, EmptySnapSeqs) {
+  ASSERT_EQ(0, create_snap("snap1"));
+  ASSERT_EQ(0, create_snap("snap2"));
+  m_client_meta.snap_seqs = {};
+  m_client_meta.sync_points = {{"snap2", "snap1", boost::none}};
+
+  librbd::MockTestImageCtx mock_remote_image_ctx(*m_remote_image_ctx);
+  librbd::MockTestImageCtx mock_local_image_ctx(*m_local_image_ctx);
   journal::MockJournaler mock_journaler;
 
   expect_get_snap_id(mock_remote_image_ctx);
diff --git a/src/test/rbd_mirror/image_sync/test_mock_ObjectCopyRequest.cc b/src/test/rbd_mirror/image_sync/test_mock_ObjectCopyRequest.cc
index 69c4c64..b018f16 100644
--- a/src/test/rbd_mirror/image_sync/test_mock_ObjectCopyRequest.cc
+++ b/src/test/rbd_mirror/image_sync/test_mock_ObjectCopyRequest.cc
@@ -14,9 +14,21 @@
 #include "tools/rbd_mirror/Threads.h"
 #include "tools/rbd_mirror/image_sync/ObjectCopyRequest.h"
 
+namespace librbd {
+namespace {
+
+struct MockTestImageCtx : public librbd::MockImageCtx {
+  MockTestImageCtx(librbd::ImageCtx &image_ctx)
+    : librbd::MockImageCtx(image_ctx) {
+  }
+};
+
+} // anonymous namespace
+} // namespace librbd
+
 // template definitions
 #include "tools/rbd_mirror/image_sync/ObjectCopyRequest.cc"
-template class rbd::mirror::image_sync::ObjectCopyRequest<librbd::MockImageCtx>;
+template class rbd::mirror::image_sync::ObjectCopyRequest<librbd::MockTestImageCtx>;
 
 namespace rbd {
 namespace mirror {
@@ -56,7 +68,7 @@ void scribble(librbd::ImageCtx *image_ctx, int num_ops, size_t max_size,
 
 class TestMockImageSyncObjectCopyRequest : public TestMockFixture {
 public:
-  typedef ObjectCopyRequest<librbd::MockImageCtx> MockObjectCopyRequest;
+  typedef ObjectCopyRequest<librbd::MockTestImageCtx> MockObjectCopyRequest;
 
   virtual void SetUp() {
     TestMockFixture::SetUp();
@@ -69,7 +81,7 @@ public:
     ASSERT_EQ(0, open_image(m_local_io_ctx, m_image_name, &m_local_image_ctx));
   }
 
-  void expect_list_snaps(librbd::MockImageCtx &mock_image_ctx,
+  void expect_list_snaps(librbd::MockTestImageCtx &mock_image_ctx,
                          librados::MockTestMemIoCtxImpl &mock_io_ctx, int r) {
     auto &expect = EXPECT_CALL(mock_io_ctx,
                                list_snaps(mock_image_ctx.image_ctx->get_object_name(0),
@@ -81,13 +93,13 @@ public:
     }
   }
 
-  void expect_get_object_name(librbd::MockImageCtx &mock_image_ctx) {
+  void expect_get_object_name(librbd::MockTestImageCtx &mock_image_ctx) {
     EXPECT_CALL(mock_image_ctx, get_object_name(0))
                   .WillOnce(Return(mock_image_ctx.image_ctx->get_object_name(0)));
   }
 
-  MockObjectCopyRequest *create_request(librbd::MockImageCtx &mock_remote_image_ctx,
-                                        librbd::MockImageCtx &mock_local_image_ctx,
+  MockObjectCopyRequest *create_request(librbd::MockTestImageCtx &mock_remote_image_ctx,
+                                        librbd::MockTestImageCtx &mock_local_image_ctx,
                                         Context *on_finish) {
     expect_get_object_name(mock_local_image_ctx);
     expect_get_object_name(mock_remote_image_ctx);
@@ -155,7 +167,7 @@ public:
     }
   }
 
-  void expect_update_object_map(librbd::MockImageCtx &mock_image_ctx,
+  void expect_update_object_map(librbd::MockTestImageCtx &mock_image_ctx,
                                 librbd::MockObjectMap &mock_object_map,
                                 librados::snap_t snap_id, uint8_t state,
                                 int r) {
@@ -167,8 +179,8 @@ public:
           })));
       } else {
         expect.WillOnce(WithArg<5>(Invoke([&mock_image_ctx, snap_id, state, r](Context *ctx) {
-            RWLock::RLocker snap_locker(mock_image_ctx.image_ctx->snap_lock);
-            RWLock::WLocker object_map_locker(mock_image_ctx.image_ctx->object_map_lock);
+            assert(mock_image_ctx.image_ctx->snap_lock.is_locked());
+            assert(mock_image_ctx.image_ctx->object_map_lock.is_wlocked());
             mock_image_ctx.image_ctx->object_map->aio_update(snap_id, 0, 1,
                                                              state,
                                                              boost::none, ctx);
@@ -289,8 +301,8 @@ public:
 
 TEST_F(TestMockImageSyncObjectCopyRequest, DNE) {
   ASSERT_EQ(0, create_snap("sync"));
-  librbd::MockImageCtx mock_remote_image_ctx(*m_remote_image_ctx);
-  librbd::MockImageCtx mock_local_image_ctx(*m_local_image_ctx);
+  librbd::MockTestImageCtx mock_remote_image_ctx(*m_remote_image_ctx);
+  librbd::MockTestImageCtx mock_local_image_ctx(*m_local_image_ctx);
 
   librbd::MockObjectMap mock_object_map;
   mock_local_image_ctx.object_map = &mock_object_map;
@@ -317,8 +329,8 @@ TEST_F(TestMockImageSyncObjectCopyRequest, Write) {
   scribble(m_remote_image_ctx, 10, 102400, &one);
 
   ASSERT_EQ(0, create_snap("sync"));
-  librbd::MockImageCtx mock_remote_image_ctx(*m_remote_image_ctx);
-  librbd::MockImageCtx mock_local_image_ctx(*m_local_image_ctx);
+  librbd::MockTestImageCtx mock_remote_image_ctx(*m_remote_image_ctx);
+  librbd::MockTestImageCtx mock_local_image_ctx(*m_local_image_ctx);
 
   librbd::MockObjectMap mock_object_map;
   mock_local_image_ctx.object_map = &mock_object_map;
@@ -352,8 +364,8 @@ TEST_F(TestMockImageSyncObjectCopyRequest, ReadError) {
   scribble(m_remote_image_ctx, 10, 102400, &one);
 
   ASSERT_EQ(0, create_snap("sync"));
-  librbd::MockImageCtx mock_remote_image_ctx(*m_remote_image_ctx);
-  librbd::MockImageCtx mock_local_image_ctx(*m_local_image_ctx);
+  librbd::MockTestImageCtx mock_remote_image_ctx(*m_remote_image_ctx);
+  librbd::MockTestImageCtx mock_local_image_ctx(*m_local_image_ctx);
 
   librbd::MockObjectMap mock_object_map;
   mock_local_image_ctx.object_map = &mock_object_map;
@@ -381,8 +393,8 @@ TEST_F(TestMockImageSyncObjectCopyRequest, WriteError) {
   scribble(m_remote_image_ctx, 10, 102400, &one);
 
   ASSERT_EQ(0, create_snap("sync"));
-  librbd::MockImageCtx mock_remote_image_ctx(*m_remote_image_ctx);
-  librbd::MockImageCtx mock_local_image_ctx(*m_local_image_ctx);
+  librbd::MockTestImageCtx mock_remote_image_ctx(*m_remote_image_ctx);
+  librbd::MockTestImageCtx mock_local_image_ctx(*m_local_image_ctx);
 
   librbd::MockObjectMap mock_object_map;
   mock_local_image_ctx.object_map = &mock_object_map;
@@ -424,8 +436,8 @@ TEST_F(TestMockImageSyncObjectCopyRequest, WriteSnaps) {
   }
 
   ASSERT_EQ(0, create_snap("sync"));
-  librbd::MockImageCtx mock_remote_image_ctx(*m_remote_image_ctx);
-  librbd::MockImageCtx mock_local_image_ctx(*m_local_image_ctx);
+  librbd::MockTestImageCtx mock_remote_image_ctx(*m_remote_image_ctx);
+  librbd::MockTestImageCtx mock_local_image_ctx(*m_local_image_ctx);
 
   librbd::MockObjectMap mock_object_map;
   mock_local_image_ctx.object_map = &mock_object_map;
@@ -460,6 +472,8 @@ TEST_F(TestMockImageSyncObjectCopyRequest, WriteSnaps) {
 }
 
 TEST_F(TestMockImageSyncObjectCopyRequest, Trim) {
+  ASSERT_EQ(0, metadata_set(m_remote_image_ctx,
+			    "conf_rbd_skip_partial_discard", "false"));
   // scribble some data
   interval_set<uint64_t> one;
   scribble(m_remote_image_ctx, 10, 102400, &one);
@@ -471,8 +485,8 @@ TEST_F(TestMockImageSyncObjectCopyRequest, Trim) {
     trim_offset, one.range_end() - trim_offset));
   ASSERT_EQ(0, create_snap("sync"));
 
-  librbd::MockImageCtx mock_remote_image_ctx(*m_remote_image_ctx);
-  librbd::MockImageCtx mock_local_image_ctx(*m_local_image_ctx);
+  librbd::MockTestImageCtx mock_remote_image_ctx(*m_remote_image_ctx);
+  librbd::MockTestImageCtx mock_local_image_ctx(*m_local_image_ctx);
 
   librbd::MockObjectMap mock_object_map;
   mock_local_image_ctx.object_map = &mock_object_map;
@@ -513,8 +527,8 @@ TEST_F(TestMockImageSyncObjectCopyRequest, Remove) {
   uint64_t object_size = 1 << m_remote_image_ctx->order;
   ASSERT_LE(0, m_remote_image_ctx->aio_work_queue->discard(0, object_size));
   ASSERT_EQ(0, create_snap("sync"));
-  librbd::MockImageCtx mock_remote_image_ctx(*m_remote_image_ctx);
-  librbd::MockImageCtx mock_local_image_ctx(*m_local_image_ctx);
+  librbd::MockTestImageCtx mock_remote_image_ctx(*m_remote_image_ctx);
+  librbd::MockTestImageCtx mock_local_image_ctx(*m_local_image_ctx);
 
   librbd::MockObjectMap mock_object_map;
   mock_local_image_ctx.object_map = &mock_object_map;
diff --git a/src/test/rbd_mirror/image_sync/test_mock_SnapshotCopyRequest.cc b/src/test/rbd_mirror/image_sync/test_mock_SnapshotCopyRequest.cc
index bcb9fd6..2fd026a 100644
--- a/src/test/rbd_mirror/image_sync/test_mock_SnapshotCopyRequest.cc
+++ b/src/test/rbd_mirror/image_sync/test_mock_SnapshotCopyRequest.cc
@@ -15,10 +15,21 @@
 #include "tools/rbd_mirror/Threads.h"
 
 namespace librbd {
+
+namespace {
+
+struct MockTestImageCtx : public librbd::MockImageCtx {
+  MockTestImageCtx(librbd::ImageCtx &image_ctx)
+    : librbd::MockImageCtx(image_ctx) {
+  }
+};
+
+} // anonymous namespace
+
 namespace journal {
 
 template <>
-struct TypeTraits<librbd::MockImageCtx> {
+struct TypeTraits<librbd::MockTestImageCtx> {
   typedef ::journal::MockJournaler Journaler;
 };
 
@@ -30,9 +41,9 @@ namespace mirror {
 namespace image_sync {
 
 template <>
-struct SnapshotCreateRequest<librbd::MockImageCtx> {
+struct SnapshotCreateRequest<librbd::MockTestImageCtx> {
   static SnapshotCreateRequest* s_instance;
-  static SnapshotCreateRequest* create(librbd::MockImageCtx* image_ctx,
+  static SnapshotCreateRequest* create(librbd::MockTestImageCtx* image_ctx,
                                        const std::string &snap_name,
                                        uint64_t size,
                                        const librbd::parent_spec &parent_spec,
@@ -52,7 +63,7 @@ struct SnapshotCreateRequest<librbd::MockImageCtx> {
   MOCK_METHOD0(send, void());
 };
 
-SnapshotCreateRequest<librbd::MockImageCtx>* SnapshotCreateRequest<librbd::MockImageCtx>::s_instance = nullptr;
+SnapshotCreateRequest<librbd::MockTestImageCtx>* SnapshotCreateRequest<librbd::MockTestImageCtx>::s_instance = nullptr;
 
 } // namespace image_sync
 } // namespace mirror
@@ -60,7 +71,7 @@ SnapshotCreateRequest<librbd::MockImageCtx>* SnapshotCreateRequest<librbd::MockI
 
 // template definitions
 #include "tools/rbd_mirror/image_sync/SnapshotCopyRequest.cc"
-template class rbd::mirror::image_sync::SnapshotCopyRequest<librbd::MockImageCtx>;
+template class rbd::mirror::image_sync::SnapshotCopyRequest<librbd::MockTestImageCtx>;
 
 namespace rbd {
 namespace mirror {
@@ -79,8 +90,8 @@ using ::testing::WithArg;
 
 class TestMockImageSyncSnapshotCopyRequest : public TestMockFixture {
 public:
-  typedef SnapshotCopyRequest<librbd::MockImageCtx> MockSnapshotCopyRequest;
-  typedef SnapshotCreateRequest<librbd::MockImageCtx> MockSnapshotCreateRequest;
+  typedef SnapshotCopyRequest<librbd::MockTestImageCtx> MockSnapshotCopyRequest;
+  typedef SnapshotCreateRequest<librbd::MockTestImageCtx> MockSnapshotCreateRequest;
 
   virtual void SetUp() {
     TestMockFixture::SetUp();
@@ -93,7 +104,7 @@ public:
     ASSERT_EQ(0, open_image(m_local_io_ctx, m_image_name, &m_local_image_ctx));
   }
 
-  void expect_snap_create(librbd::MockImageCtx &mock_image_ctx,
+  void expect_snap_create(librbd::MockTestImageCtx &mock_image_ctx,
                           MockSnapshotCreateRequest &mock_snapshot_create_request,
                           const std::string &snap_name, uint64_t snap_id, int r) {
     EXPECT_CALL(mock_snapshot_create_request, send())
@@ -105,7 +116,7 @@ public:
                       })));
   }
 
-  void expect_snap_remove(librbd::MockImageCtx &mock_image_ctx,
+  void expect_snap_remove(librbd::MockTestImageCtx &mock_image_ctx,
                           const std::string &snap_name, int r) {
     EXPECT_CALL(*mock_image_ctx.operations, execute_snap_remove(StrEq(snap_name), _))
                   .WillOnce(WithArg<1>(Invoke([this, r](Context *ctx) {
@@ -113,7 +124,7 @@ public:
                             })));
   }
 
-  void expect_snap_protect(librbd::MockImageCtx &mock_image_ctx,
+  void expect_snap_protect(librbd::MockTestImageCtx &mock_image_ctx,
                            const std::string &snap_name, int r) {
     EXPECT_CALL(*mock_image_ctx.operations, execute_snap_protect(StrEq(snap_name), _))
                   .WillOnce(WithArg<1>(Invoke([this, r](Context *ctx) {
@@ -121,7 +132,7 @@ public:
                             })));
   }
 
-  void expect_snap_unprotect(librbd::MockImageCtx &mock_image_ctx,
+  void expect_snap_unprotect(librbd::MockTestImageCtx &mock_image_ctx,
                              const std::string &snap_name, int r) {
     EXPECT_CALL(*mock_image_ctx.operations, execute_snap_unprotect(StrEq(snap_name), _))
                   .WillOnce(WithArg<1>(Invoke([this, r](Context *ctx) {
@@ -129,14 +140,14 @@ public:
                             })));
   }
 
-  void expect_snap_is_protected(librbd::MockImageCtx &mock_image_ctx,
+  void expect_snap_is_protected(librbd::MockTestImageCtx &mock_image_ctx,
                                 uint64_t snap_id, bool is_protected, int r) {
     EXPECT_CALL(mock_image_ctx, is_snap_protected(snap_id, _))
                   .WillOnce(DoAll(SetArgPointee<1>(is_protected),
                                   Return(r)));
   }
 
-  void expect_snap_is_unprotected(librbd::MockImageCtx &mock_image_ctx,
+  void expect_snap_is_unprotected(librbd::MockTestImageCtx &mock_image_ctx,
                                   uint64_t snap_id, bool is_unprotected, int r) {
     EXPECT_CALL(mock_image_ctx, is_snap_unprotected(snap_id, _))
                   .WillOnce(DoAll(SetArgPointee<1>(is_unprotected),
@@ -148,13 +159,13 @@ public:
                   .WillOnce(WithArg<1>(CompleteContext(r)));
   }
 
-  static void inject_snap(librbd::MockImageCtx &mock_image_ctx,
-                   uint64_t snap_id, const std::string &snap_name) {
+  static void inject_snap(librbd::MockTestImageCtx &mock_image_ctx,
+                          uint64_t snap_id, const std::string &snap_name) {
     mock_image_ctx.snap_ids[snap_name] = snap_id;
   }
 
-  MockSnapshotCopyRequest *create_request(librbd::MockImageCtx &mock_remote_image_ctx,
-                                          librbd::MockImageCtx &mock_local_image_ctx,
+  MockSnapshotCopyRequest *create_request(librbd::MockTestImageCtx &mock_remote_image_ctx,
+                                          librbd::MockTestImageCtx &mock_local_image_ctx,
                                           journal::MockJournaler &mock_journaler,
                                           Context *on_finish) {
     return new MockSnapshotCopyRequest(&mock_local_image_ctx,
@@ -200,8 +211,8 @@ public:
 };
 
 TEST_F(TestMockImageSyncSnapshotCopyRequest, Empty) {
-  librbd::MockImageCtx mock_remote_image_ctx(*m_remote_image_ctx);
-  librbd::MockImageCtx mock_local_image_ctx(*m_local_image_ctx);
+  librbd::MockTestImageCtx mock_remote_image_ctx(*m_remote_image_ctx);
+  librbd::MockTestImageCtx mock_local_image_ctx(*m_local_image_ctx);
   journal::MockJournaler mock_journaler;
 
   InSequence seq;
@@ -219,8 +230,8 @@ TEST_F(TestMockImageSyncSnapshotCopyRequest, Empty) {
 }
 
 TEST_F(TestMockImageSyncSnapshotCopyRequest, UpdateClientError) {
-  librbd::MockImageCtx mock_remote_image_ctx(*m_remote_image_ctx);
-  librbd::MockImageCtx mock_local_image_ctx(*m_local_image_ctx);
+  librbd::MockTestImageCtx mock_remote_image_ctx(*m_remote_image_ctx);
+  librbd::MockTestImageCtx mock_local_image_ctx(*m_local_image_ctx);
   journal::MockJournaler mock_journaler;
 
   InSequence seq;
@@ -235,8 +246,8 @@ TEST_F(TestMockImageSyncSnapshotCopyRequest, UpdateClientError) {
 }
 
 TEST_F(TestMockImageSyncSnapshotCopyRequest, UpdateClientCancel) {
-  librbd::MockImageCtx mock_remote_image_ctx(*m_remote_image_ctx);
-  librbd::MockImageCtx mock_local_image_ctx(*m_local_image_ctx);
+  librbd::MockTestImageCtx mock_remote_image_ctx(*m_remote_image_ctx);
+  librbd::MockTestImageCtx mock_local_image_ctx(*m_local_image_ctx);
   journal::MockJournaler mock_journaler;
 
   C_SaferCond ctx;
@@ -261,8 +272,8 @@ TEST_F(TestMockImageSyncSnapshotCopyRequest, SnapCreate) {
   uint64_t remote_snap_id1 = m_remote_image_ctx->snap_ids["snap1"];
   uint64_t remote_snap_id2 = m_remote_image_ctx->snap_ids["snap2"];
 
-  librbd::MockImageCtx mock_remote_image_ctx(*m_remote_image_ctx);
-  librbd::MockImageCtx mock_local_image_ctx(*m_local_image_ctx);
+  librbd::MockTestImageCtx mock_remote_image_ctx(*m_remote_image_ctx);
+  librbd::MockTestImageCtx mock_local_image_ctx(*m_local_image_ctx);
   MockSnapshotCreateRequest mock_snapshot_create_request;
   journal::MockJournaler mock_journaler;
 
@@ -287,8 +298,8 @@ TEST_F(TestMockImageSyncSnapshotCopyRequest, SnapCreate) {
 TEST_F(TestMockImageSyncSnapshotCopyRequest, SnapCreateError) {
   ASSERT_EQ(0, create_snap(m_remote_image_ctx, "snap1"));
 
-  librbd::MockImageCtx mock_remote_image_ctx(*m_remote_image_ctx);
-  librbd::MockImageCtx mock_local_image_ctx(*m_local_image_ctx);
+  librbd::MockTestImageCtx mock_remote_image_ctx(*m_remote_image_ctx);
+  librbd::MockTestImageCtx mock_local_image_ctx(*m_local_image_ctx);
   MockSnapshotCreateRequest mock_snapshot_create_request;
   journal::MockJournaler mock_journaler;
 
@@ -306,8 +317,8 @@ TEST_F(TestMockImageSyncSnapshotCopyRequest, SnapCreateError) {
 TEST_F(TestMockImageSyncSnapshotCopyRequest, SnapCreateCancel) {
   ASSERT_EQ(0, create_snap(m_remote_image_ctx, "snap1"));
 
-  librbd::MockImageCtx mock_remote_image_ctx(*m_remote_image_ctx);
-  librbd::MockImageCtx mock_local_image_ctx(*m_local_image_ctx);
+  librbd::MockTestImageCtx mock_remote_image_ctx(*m_remote_image_ctx);
+  librbd::MockTestImageCtx mock_local_image_ctx(*m_local_image_ctx);
   MockSnapshotCreateRequest mock_snapshot_create_request;
   journal::MockJournaler mock_journaler;
 
@@ -334,8 +345,8 @@ TEST_F(TestMockImageSyncSnapshotCopyRequest, SnapRemoveAndCreate) {
 
   uint64_t remote_snap_id1 = m_remote_image_ctx->snap_ids["snap1"];
 
-  librbd::MockImageCtx mock_remote_image_ctx(*m_remote_image_ctx);
-  librbd::MockImageCtx mock_local_image_ctx(*m_local_image_ctx);
+  librbd::MockTestImageCtx mock_remote_image_ctx(*m_remote_image_ctx);
+  librbd::MockTestImageCtx mock_local_image_ctx(*m_local_image_ctx);
   MockSnapshotCreateRequest mock_snapshot_create_request;
   journal::MockJournaler mock_journaler;
 
@@ -361,8 +372,8 @@ TEST_F(TestMockImageSyncSnapshotCopyRequest, SnapRemoveAndCreate) {
 TEST_F(TestMockImageSyncSnapshotCopyRequest, SnapRemoveError) {
   ASSERT_EQ(0, create_snap(m_local_image_ctx, "snap1"));
 
-  librbd::MockImageCtx mock_remote_image_ctx(*m_remote_image_ctx);
-  librbd::MockImageCtx mock_local_image_ctx(*m_local_image_ctx);
+  librbd::MockTestImageCtx mock_remote_image_ctx(*m_remote_image_ctx);
+  librbd::MockTestImageCtx mock_local_image_ctx(*m_local_image_ctx);
   journal::MockJournaler mock_journaler;
 
   InSequence seq;
@@ -386,8 +397,8 @@ TEST_F(TestMockImageSyncSnapshotCopyRequest, SnapUnprotect) {
   uint64_t local_snap_id1 = m_local_image_ctx->snap_ids["snap1"];
   m_client_meta.snap_seqs[remote_snap_id1] = local_snap_id1;
 
-  librbd::MockImageCtx mock_remote_image_ctx(*m_remote_image_ctx);
-  librbd::MockImageCtx mock_local_image_ctx(*m_local_image_ctx);
+  librbd::MockTestImageCtx mock_remote_image_ctx(*m_remote_image_ctx);
+  librbd::MockTestImageCtx mock_local_image_ctx(*m_local_image_ctx);
   journal::MockJournaler mock_journaler;
 
   InSequence seq;
@@ -416,8 +427,8 @@ TEST_F(TestMockImageSyncSnapshotCopyRequest, SnapUnprotectError) {
   uint64_t local_snap_id1 = m_local_image_ctx->snap_ids["snap1"];
   m_client_meta.snap_seqs[remote_snap_id1] = local_snap_id1;
 
-  librbd::MockImageCtx mock_remote_image_ctx(*m_remote_image_ctx);
-  librbd::MockImageCtx mock_local_image_ctx(*m_local_image_ctx);
+  librbd::MockTestImageCtx mock_remote_image_ctx(*m_remote_image_ctx);
+  librbd::MockTestImageCtx mock_local_image_ctx(*m_local_image_ctx);
   journal::MockJournaler mock_journaler;
 
   InSequence seq;
@@ -441,8 +452,8 @@ TEST_F(TestMockImageSyncSnapshotCopyRequest, SnapUnprotectCancel) {
   uint64_t local_snap_id1 = m_local_image_ctx->snap_ids["snap1"];
   m_client_meta.snap_seqs[remote_snap_id1] = local_snap_id1;
 
-  librbd::MockImageCtx mock_remote_image_ctx(*m_remote_image_ctx);
-  librbd::MockImageCtx mock_local_image_ctx(*m_local_image_ctx);
+  librbd::MockTestImageCtx mock_remote_image_ctx(*m_remote_image_ctx);
+  librbd::MockTestImageCtx mock_local_image_ctx(*m_local_image_ctx);
   journal::MockJournaler mock_journaler;
 
   C_SaferCond ctx;
@@ -471,8 +482,8 @@ TEST_F(TestMockImageSyncSnapshotCopyRequest, SnapUnprotectRemove) {
 
   uint64_t remote_snap_id1 = m_remote_image_ctx->snap_ids["snap1"];
 
-  librbd::MockImageCtx mock_remote_image_ctx(*m_remote_image_ctx);
-  librbd::MockImageCtx mock_local_image_ctx(*m_local_image_ctx);
+  librbd::MockTestImageCtx mock_remote_image_ctx(*m_remote_image_ctx);
+  librbd::MockTestImageCtx mock_local_image_ctx(*m_local_image_ctx);
   MockSnapshotCreateRequest mock_snapshot_create_request;
   journal::MockJournaler mock_journaler;
 
@@ -501,8 +512,8 @@ TEST_F(TestMockImageSyncSnapshotCopyRequest, SnapCreateProtect) {
 
   uint64_t remote_snap_id1 = m_remote_image_ctx->snap_ids["snap1"];
 
-  librbd::MockImageCtx mock_remote_image_ctx(*m_remote_image_ctx);
-  librbd::MockImageCtx mock_local_image_ctx(*m_local_image_ctx);
+  librbd::MockTestImageCtx mock_remote_image_ctx(*m_remote_image_ctx);
+  librbd::MockTestImageCtx mock_local_image_ctx(*m_local_image_ctx);
   MockSnapshotCreateRequest mock_snapshot_create_request;
   journal::MockJournaler mock_journaler;
 
@@ -532,8 +543,8 @@ TEST_F(TestMockImageSyncSnapshotCopyRequest, SnapProtect) {
   uint64_t local_snap_id1 = m_local_image_ctx->snap_ids["snap1"];
   m_client_meta.snap_seqs[remote_snap_id1] = local_snap_id1;
 
-  librbd::MockImageCtx mock_remote_image_ctx(*m_remote_image_ctx);
-  librbd::MockImageCtx mock_local_image_ctx(*m_local_image_ctx);
+  librbd::MockTestImageCtx mock_remote_image_ctx(*m_remote_image_ctx);
+  librbd::MockTestImageCtx mock_local_image_ctx(*m_local_image_ctx);
   journal::MockJournaler mock_journaler;
 
   InSequence seq;
@@ -562,8 +573,8 @@ TEST_F(TestMockImageSyncSnapshotCopyRequest, SnapProtectError) {
   uint64_t local_snap_id1 = m_local_image_ctx->snap_ids["snap1"];
   m_client_meta.snap_seqs[remote_snap_id1] = local_snap_id1;
 
-  librbd::MockImageCtx mock_remote_image_ctx(*m_remote_image_ctx);
-  librbd::MockImageCtx mock_local_image_ctx(*m_local_image_ctx);
+  librbd::MockTestImageCtx mock_remote_image_ctx(*m_remote_image_ctx);
+  librbd::MockTestImageCtx mock_local_image_ctx(*m_local_image_ctx);
   journal::MockJournaler mock_journaler;
 
   InSequence seq;
@@ -588,8 +599,8 @@ TEST_F(TestMockImageSyncSnapshotCopyRequest, SnapProtectCancel) {
   uint64_t local_snap_id1 = m_local_image_ctx->snap_ids["snap1"];
   m_client_meta.snap_seqs[remote_snap_id1] = local_snap_id1;
 
-  librbd::MockImageCtx mock_remote_image_ctx(*m_remote_image_ctx);
-  librbd::MockImageCtx mock_local_image_ctx(*m_local_image_ctx);
+  librbd::MockTestImageCtx mock_remote_image_ctx(*m_remote_image_ctx);
+  librbd::MockTestImageCtx mock_local_image_ctx(*m_local_image_ctx);
   journal::MockJournaler mock_journaler;
 
   C_SaferCond ctx;
diff --git a/src/test/rbd_mirror/image_sync/test_mock_SnapshotCreateRequest.cc b/src/test/rbd_mirror/image_sync/test_mock_SnapshotCreateRequest.cc
index 8cf6217..c0105ae 100644
--- a/src/test/rbd_mirror/image_sync/test_mock_SnapshotCreateRequest.cc
+++ b/src/test/rbd_mirror/image_sync/test_mock_SnapshotCreateRequest.cc
@@ -13,9 +13,21 @@
 #include "tools/rbd_mirror/image_sync/SnapshotCreateRequest.h"
 #include "tools/rbd_mirror/Threads.h"
 
+namespace librbd {
+namespace {
+
+struct MockTestImageCtx : public librbd::MockImageCtx {
+  MockTestImageCtx(librbd::ImageCtx &image_ctx)
+    : librbd::MockImageCtx(image_ctx) {
+  }
+};
+
+} // anonymous namespace
+} // namespace librbd
+
 // template definitions
 #include "tools/rbd_mirror/image_sync/SnapshotCreateRequest.cc"
-template class rbd::mirror::image_sync::SnapshotCreateRequest<librbd::MockImageCtx>;
+template class rbd::mirror::image_sync::SnapshotCreateRequest<librbd::MockTestImageCtx>;
 
 namespace rbd {
 namespace mirror {
@@ -32,7 +44,7 @@ using ::testing::WithArg;
 
 class TestMockImageSyncSnapshotCreateRequest : public TestMockFixture {
 public:
-  typedef SnapshotCreateRequest<librbd::MockImageCtx> MockSnapshotCreateRequest;
+  typedef SnapshotCreateRequest<librbd::MockTestImageCtx> MockSnapshotCreateRequest;
 
   virtual void SetUp() {
     TestMockFixture::SetUp();
@@ -42,31 +54,31 @@ public:
     ASSERT_EQ(0, open_image(m_local_io_ctx, m_image_name, &m_local_image_ctx));
   }
 
-  void expect_test_features(librbd::MockImageCtx &mock_image_ctx,
+  void expect_test_features(librbd::MockTestImageCtx &mock_image_ctx,
                             uint64_t features, bool enabled) {
     EXPECT_CALL(mock_image_ctx, test_features(features))
                   .WillOnce(Return(enabled));
   }
 
-  void expect_set_size(librbd::MockImageCtx &mock_image_ctx, int r) {
+  void expect_set_size(librbd::MockTestImageCtx &mock_image_ctx, int r) {
     EXPECT_CALL(get_mock_io_ctx(mock_image_ctx.md_ctx),
                 exec(mock_image_ctx.header_oid, _, StrEq("rbd"), StrEq("set_size"), _, _, _))
                   .WillOnce(Return(r));
   }
 
-  void expect_remove_parent(librbd::MockImageCtx &mock_image_ctx, int r) {
+  void expect_remove_parent(librbd::MockTestImageCtx &mock_image_ctx, int r) {
     EXPECT_CALL(get_mock_io_ctx(mock_image_ctx.md_ctx),
                 exec(mock_image_ctx.header_oid, _, StrEq("rbd"), StrEq("remove_parent"), _, _, _))
                   .WillOnce(Return(r));
   }
 
-  void expect_set_parent(librbd::MockImageCtx &mock_image_ctx, int r) {
+  void expect_set_parent(librbd::MockTestImageCtx &mock_image_ctx, int r) {
     EXPECT_CALL(get_mock_io_ctx(mock_image_ctx.md_ctx),
                 exec(mock_image_ctx.header_oid, _, StrEq("rbd"), StrEq("set_parent"), _, _, _))
                   .WillOnce(Return(r));
   }
 
-  void expect_snap_create(librbd::MockImageCtx &mock_image_ctx,
+  void expect_snap_create(librbd::MockTestImageCtx &mock_image_ctx,
                           const std::string &snap_name, uint64_t snap_id, int r) {
     EXPECT_CALL(*mock_image_ctx.operations, execute_snap_create(StrEq(snap_name), _, 0, true))
                   .WillOnce(DoAll(InvokeWithoutArgs([&mock_image_ctx, snap_id, snap_name]() {
@@ -77,7 +89,7 @@ public:
                                   }))));
   }
 
-  void expect_object_map_resize(librbd::MockImageCtx &mock_image_ctx,
+  void expect_object_map_resize(librbd::MockTestImageCtx &mock_image_ctx,
                                 librados::snap_t snap_id, int r) {
     std::string oid(librbd::ObjectMap::object_map_name(mock_image_ctx.id,
                                                        snap_id));
@@ -86,12 +98,12 @@ public:
                   .WillOnce(Return(r));
   }
 
-  static void inject_snap(librbd::MockImageCtx &mock_image_ctx,
+  static void inject_snap(librbd::MockTestImageCtx &mock_image_ctx,
                    uint64_t snap_id, const std::string &snap_name) {
     mock_image_ctx.snap_ids[snap_name] = snap_id;
   }
 
-  MockSnapshotCreateRequest *create_request(librbd::MockImageCtx &mock_local_image_ctx,
+  MockSnapshotCreateRequest *create_request(librbd::MockTestImageCtx &mock_local_image_ctx,
                                             const std::string &snap_name,
                                             uint64_t size,
                                             const librbd::parent_spec &spec,
@@ -105,7 +117,7 @@ public:
 };
 
 TEST_F(TestMockImageSyncSnapshotCreateRequest, Resize) {
-  librbd::MockImageCtx mock_local_image_ctx(*m_local_image_ctx);
+  librbd::MockTestImageCtx mock_local_image_ctx(*m_local_image_ctx);
 
   InSequence seq;
   expect_set_size(mock_local_image_ctx, 0);
@@ -121,7 +133,7 @@ TEST_F(TestMockImageSyncSnapshotCreateRequest, Resize) {
 }
 
 TEST_F(TestMockImageSyncSnapshotCreateRequest, ResizeError) {
-  librbd::MockImageCtx mock_local_image_ctx(*m_local_image_ctx);
+  librbd::MockTestImageCtx mock_local_image_ctx(*m_local_image_ctx);
 
   InSequence seq;
   expect_set_size(mock_local_image_ctx, -EINVAL);
@@ -135,7 +147,7 @@ TEST_F(TestMockImageSyncSnapshotCreateRequest, ResizeError) {
 }
 
 TEST_F(TestMockImageSyncSnapshotCreateRequest, RemoveParent) {
-  librbd::MockImageCtx mock_local_image_ctx(*m_local_image_ctx);
+  librbd::MockTestImageCtx mock_local_image_ctx(*m_local_image_ctx);
   mock_local_image_ctx.parent_md.spec.pool_id = 213;
 
   InSequence seq;
@@ -153,7 +165,7 @@ TEST_F(TestMockImageSyncSnapshotCreateRequest, RemoveParent) {
 }
 
 TEST_F(TestMockImageSyncSnapshotCreateRequest, RemoveParentError) {
-  librbd::MockImageCtx mock_local_image_ctx(*m_local_image_ctx);
+  librbd::MockTestImageCtx mock_local_image_ctx(*m_local_image_ctx);
   mock_local_image_ctx.parent_md.spec.pool_id = 213;
 
   InSequence seq;
@@ -169,7 +181,7 @@ TEST_F(TestMockImageSyncSnapshotCreateRequest, RemoveParentError) {
 }
 
 TEST_F(TestMockImageSyncSnapshotCreateRequest, RemoveSetParent) {
-  librbd::MockImageCtx mock_local_image_ctx(*m_local_image_ctx);
+  librbd::MockTestImageCtx mock_local_image_ctx(*m_local_image_ctx);
   mock_local_image_ctx.parent_md.spec.pool_id = 213;
 
   InSequence seq;
@@ -189,7 +201,7 @@ TEST_F(TestMockImageSyncSnapshotCreateRequest, RemoveSetParent) {
 }
 
 TEST_F(TestMockImageSyncSnapshotCreateRequest, SetParentSpec) {
-  librbd::MockImageCtx mock_local_image_ctx(*m_local_image_ctx);
+  librbd::MockTestImageCtx mock_local_image_ctx(*m_local_image_ctx);
 
   InSequence seq;
   expect_set_parent(mock_local_image_ctx, 0);
@@ -207,7 +219,7 @@ TEST_F(TestMockImageSyncSnapshotCreateRequest, SetParentSpec) {
 }
 
 TEST_F(TestMockImageSyncSnapshotCreateRequest, SetParentOverlap) {
-  librbd::MockImageCtx mock_local_image_ctx(*m_local_image_ctx);
+  librbd::MockTestImageCtx mock_local_image_ctx(*m_local_image_ctx);
   mock_local_image_ctx.parent_md.spec = {123, "test", 0};
 
   InSequence seq;
@@ -226,7 +238,7 @@ TEST_F(TestMockImageSyncSnapshotCreateRequest, SetParentOverlap) {
 }
 
 TEST_F(TestMockImageSyncSnapshotCreateRequest, SetParentError) {
-  librbd::MockImageCtx mock_local_image_ctx(*m_local_image_ctx);
+  librbd::MockTestImageCtx mock_local_image_ctx(*m_local_image_ctx);
 
   InSequence seq;
   expect_set_parent(mock_local_image_ctx, -ESTALE);
@@ -242,7 +254,7 @@ TEST_F(TestMockImageSyncSnapshotCreateRequest, SetParentError) {
 }
 
 TEST_F(TestMockImageSyncSnapshotCreateRequest, SnapCreate) {
-  librbd::MockImageCtx mock_local_image_ctx(*m_local_image_ctx);
+  librbd::MockTestImageCtx mock_local_image_ctx(*m_local_image_ctx);
 
   InSequence seq;
   expect_snap_create(mock_local_image_ctx, "snap1", 10, 0);
@@ -258,7 +270,7 @@ TEST_F(TestMockImageSyncSnapshotCreateRequest, SnapCreate) {
 }
 
 TEST_F(TestMockImageSyncSnapshotCreateRequest, SnapCreateError) {
-  librbd::MockImageCtx mock_local_image_ctx(*m_local_image_ctx);
+  librbd::MockTestImageCtx mock_local_image_ctx(*m_local_image_ctx);
 
   InSequence seq;
   expect_snap_create(mock_local_image_ctx, "snap1", 10, -EINVAL);
@@ -273,7 +285,7 @@ TEST_F(TestMockImageSyncSnapshotCreateRequest, SnapCreateError) {
 }
 
 TEST_F(TestMockImageSyncSnapshotCreateRequest, ResizeObjectMap) {
-  librbd::MockImageCtx mock_local_image_ctx(*m_local_image_ctx);
+  librbd::MockTestImageCtx mock_local_image_ctx(*m_local_image_ctx);
 
   InSequence seq;
   expect_snap_create(mock_local_image_ctx, "snap1", 10, 0);
@@ -290,7 +302,7 @@ TEST_F(TestMockImageSyncSnapshotCreateRequest, ResizeObjectMap) {
 }
 
 TEST_F(TestMockImageSyncSnapshotCreateRequest, ResizeObjectMapError) {
-  librbd::MockImageCtx mock_local_image_ctx(*m_local_image_ctx);
+  librbd::MockTestImageCtx mock_local_image_ctx(*m_local_image_ctx);
 
   InSequence seq;
   expect_snap_create(mock_local_image_ctx, "snap1", 10, 0);
diff --git a/src/test/rbd_mirror/image_sync/test_mock_SyncPointCreateRequest.cc b/src/test/rbd_mirror/image_sync/test_mock_SyncPointCreateRequest.cc
index faff6a7..7fd32d4 100644
--- a/src/test/rbd_mirror/image_sync/test_mock_SyncPointCreateRequest.cc
+++ b/src/test/rbd_mirror/image_sync/test_mock_SyncPointCreateRequest.cc
@@ -11,10 +11,21 @@
 #include "tools/rbd_mirror/image_sync/SyncPointCreateRequest.h"
 
 namespace librbd {
+
+namespace {
+
+struct MockTestImageCtx : public librbd::MockImageCtx {
+  MockTestImageCtx(librbd::ImageCtx &image_ctx)
+    : librbd::MockImageCtx(image_ctx) {
+  }
+};
+
+} // anonymous namespace
+
 namespace journal {
 
 template <>
-struct TypeTraits<librbd::MockImageCtx> {
+struct TypeTraits<librbd::MockTestImageCtx> {
   typedef ::journal::MockJournaler Journaler;
 };
 
@@ -23,7 +34,7 @@ struct TypeTraits<librbd::MockImageCtx> {
 
 // template definitions
 #include "tools/rbd_mirror/image_sync/SyncPointCreateRequest.cc"
-template class rbd::mirror::image_sync::SyncPointCreateRequest<librbd::MockImageCtx>;
+template class rbd::mirror::image_sync::SyncPointCreateRequest<librbd::MockTestImageCtx>;
 
 namespace rbd {
 namespace mirror {
@@ -35,7 +46,7 @@ using ::testing::WithArg;
 
 class TestMockImageSyncSyncPointCreateRequest : public TestMockFixture {
 public:
-  typedef SyncPointCreateRequest<librbd::MockImageCtx> MockSyncPointCreateRequest;
+  typedef SyncPointCreateRequest<librbd::MockTestImageCtx> MockSyncPointCreateRequest;
 
   virtual void SetUp() {
     TestMockFixture::SetUp();
@@ -50,17 +61,17 @@ public:
       .WillOnce(WithArg<1>(CompleteContext(r)));
   }
 
-  void expect_image_refresh(librbd::MockImageCtx &mock_remote_image_ctx, int r) {
+  void expect_image_refresh(librbd::MockTestImageCtx &mock_remote_image_ctx, int r) {
     EXPECT_CALL(*mock_remote_image_ctx.state, refresh(_))
       .WillOnce(CompleteContext(r));
   }
 
-  void expect_snap_create(librbd::MockImageCtx &mock_remote_image_ctx, int r) {
+  void expect_snap_create(librbd::MockTestImageCtx &mock_remote_image_ctx, int r) {
     EXPECT_CALL(*mock_remote_image_ctx.operations, snap_create(_, _))
       .WillOnce(WithArg<1>(CompleteContext(r)));
   }
 
-  MockSyncPointCreateRequest *create_request(librbd::MockImageCtx &mock_remote_image_ctx,
+  MockSyncPointCreateRequest *create_request(librbd::MockTestImageCtx &mock_remote_image_ctx,
                                              journal::MockJournaler &mock_journaler,
                                              Context *ctx) {
     return new MockSyncPointCreateRequest(&mock_remote_image_ctx, "uuid",
@@ -72,7 +83,7 @@ public:
 };
 
 TEST_F(TestMockImageSyncSyncPointCreateRequest, Success) {
-  librbd::MockImageCtx mock_remote_image_ctx(*m_remote_image_ctx);
+  librbd::MockTestImageCtx mock_remote_image_ctx(*m_remote_image_ctx);
   journal::MockJournaler mock_journaler;
 
   InSequence seq;
@@ -94,7 +105,7 @@ TEST_F(TestMockImageSyncSyncPointCreateRequest, ResyncSuccess) {
   m_client_meta.sync_points.emplace_front("start snap", "", boost::none);
   auto sync_point = m_client_meta.sync_points.front();
 
-  librbd::MockImageCtx mock_remote_image_ctx(*m_remote_image_ctx);
+  librbd::MockTestImageCtx mock_remote_image_ctx(*m_remote_image_ctx);
   journal::MockJournaler mock_journaler;
 
   InSequence seq;
@@ -115,7 +126,7 @@ TEST_F(TestMockImageSyncSyncPointCreateRequest, ResyncSuccess) {
 }
 
 TEST_F(TestMockImageSyncSyncPointCreateRequest, SnapshotExists) {
-  librbd::MockImageCtx mock_remote_image_ctx(*m_remote_image_ctx);
+  librbd::MockTestImageCtx mock_remote_image_ctx(*m_remote_image_ctx);
   journal::MockJournaler mock_journaler;
 
   InSequence seq;
@@ -137,7 +148,7 @@ TEST_F(TestMockImageSyncSyncPointCreateRequest, SnapshotExists) {
 }
 
 TEST_F(TestMockImageSyncSyncPointCreateRequest, ClientUpdateError) {
-  librbd::MockImageCtx mock_remote_image_ctx(*m_remote_image_ctx);
+  librbd::MockTestImageCtx mock_remote_image_ctx(*m_remote_image_ctx);
   journal::MockJournaler mock_journaler;
 
   InSequence seq;
diff --git a/src/test/rbd_mirror/image_sync/test_mock_SyncPointPruneRequest.cc b/src/test/rbd_mirror/image_sync/test_mock_SyncPointPruneRequest.cc
index 7025457..7d03765 100644
--- a/src/test/rbd_mirror/image_sync/test_mock_SyncPointPruneRequest.cc
+++ b/src/test/rbd_mirror/image_sync/test_mock_SyncPointPruneRequest.cc
@@ -11,10 +11,21 @@
 #include "tools/rbd_mirror/image_sync/SyncPointPruneRequest.h"
 
 namespace librbd {
+
+namespace {
+
+struct MockTestImageCtx : public librbd::MockImageCtx {
+  MockTestImageCtx(librbd::ImageCtx &image_ctx)
+    : librbd::MockImageCtx(image_ctx) {
+  }
+};
+
+} // anonymous namespace
+
 namespace journal {
 
 template <>
-struct TypeTraits<librbd::MockImageCtx> {
+struct TypeTraits<librbd::MockTestImageCtx> {
   typedef ::journal::MockJournaler Journaler;
 };
 
@@ -23,7 +34,7 @@ struct TypeTraits<librbd::MockImageCtx> {
 
 // template definitions
 #include "tools/rbd_mirror/image_sync/SyncPointPruneRequest.cc"
-template class rbd::mirror::image_sync::SyncPointPruneRequest<librbd::MockImageCtx>;
+template class rbd::mirror::image_sync::SyncPointPruneRequest<librbd::MockTestImageCtx>;
 
 namespace rbd {
 namespace mirror {
@@ -31,12 +42,13 @@ namespace image_sync {
 
 using ::testing::_;
 using ::testing::InSequence;
+using ::testing::Return;
 using ::testing::StrEq;
 using ::testing::WithArg;
 
 class TestMockImageSyncSyncPointPruneRequest : public TestMockFixture {
 public:
-  typedef SyncPointPruneRequest<librbd::MockImageCtx> MockSyncPointPruneRequest;
+  typedef SyncPointPruneRequest<librbd::MockTestImageCtx> MockSyncPointPruneRequest;
 
   virtual void SetUp() {
     TestMockFixture::SetUp();
@@ -51,18 +63,24 @@ public:
       .WillOnce(WithArg<1>(CompleteContext(r)));
   }
 
-  void expect_image_refresh(librbd::MockImageCtx &mock_remote_image_ctx, int r) {
+  void expect_get_snap_id(librbd::MockTestImageCtx &mock_remote_image_ctx,
+                          const std::string &snap_name, uint64_t snap_id) {
+    EXPECT_CALL(mock_remote_image_ctx, get_snap_id(StrEq(snap_name)))
+      .WillOnce(Return(snap_id));
+  }
+
+  void expect_image_refresh(librbd::MockTestImageCtx &mock_remote_image_ctx, int r) {
     EXPECT_CALL(*mock_remote_image_ctx.state, refresh(_))
       .WillOnce(CompleteContext(r));
   }
 
-  void expect_snap_remove(librbd::MockImageCtx &mock_remote_image_ctx,
+  void expect_snap_remove(librbd::MockTestImageCtx &mock_remote_image_ctx,
                           const std::string &snap_name, int r) {
     EXPECT_CALL(*mock_remote_image_ctx.operations, snap_remove(StrEq(snap_name), _))
       .WillOnce(WithArg<1>(CompleteContext(r)));
   }
 
-  MockSyncPointPruneRequest *create_request(librbd::MockImageCtx &mock_remote_image_ctx,
+  MockSyncPointPruneRequest *create_request(librbd::MockTestImageCtx &mock_remote_image_ctx,
                                             journal::MockJournaler &mock_journaler,
                                             bool sync_complete, Context *ctx) {
     return new MockSyncPointPruneRequest(&mock_remote_image_ctx, sync_complete,
@@ -78,10 +96,11 @@ TEST_F(TestMockImageSyncSyncPointPruneRequest, SyncInProgressSuccess) {
   client_meta.sync_points.emplace_front("snap1", boost::none);
   m_client_meta = client_meta;
 
-  librbd::MockImageCtx mock_remote_image_ctx(*m_remote_image_ctx);
+  librbd::MockTestImageCtx mock_remote_image_ctx(*m_remote_image_ctx);
   journal::MockJournaler mock_journaler;
 
   InSequence seq;
+  expect_get_snap_id(mock_remote_image_ctx, "snap1", 123);
   expect_image_refresh(mock_remote_image_ctx, 0);
   expect_update_client(mock_journaler, 0);
 
@@ -99,10 +118,11 @@ TEST_F(TestMockImageSyncSyncPointPruneRequest, RestartedSyncInProgressSuccess) {
   client_meta.sync_points.emplace_front("snap1", boost::none);
   m_client_meta = client_meta;
 
-  librbd::MockImageCtx mock_remote_image_ctx(*m_remote_image_ctx);
+  librbd::MockTestImageCtx mock_remote_image_ctx(*m_remote_image_ctx);
   journal::MockJournaler mock_journaler;
 
   InSequence seq;
+  expect_get_snap_id(mock_remote_image_ctx, "snap1", 123);
   expect_snap_remove(mock_remote_image_ctx, "snap2", 0);
   expect_image_refresh(mock_remote_image_ctx, 0);
   expect_update_client(mock_journaler, 0);
@@ -117,13 +137,64 @@ TEST_F(TestMockImageSyncSyncPointPruneRequest, RestartedSyncInProgressSuccess) {
   ASSERT_EQ(client_meta, m_client_meta);
 }
 
+TEST_F(TestMockImageSyncSyncPointPruneRequest, SyncInProgressMissingSnapSuccess) {
+  librbd::journal::MirrorPeerClientMeta client_meta;
+  client_meta.sync_points.emplace_front("snap2", "snap1", boost::none);
+  client_meta.sync_points.emplace_front("snap1", boost::none);
+  m_client_meta = client_meta;
+
+  librbd::MockTestImageCtx mock_remote_image_ctx(*m_remote_image_ctx);
+  journal::MockJournaler mock_journaler;
+
+  InSequence seq;
+  expect_get_snap_id(mock_remote_image_ctx, "snap1", CEPH_NOSNAP);
+  expect_snap_remove(mock_remote_image_ctx, "snap2", 0);
+  expect_snap_remove(mock_remote_image_ctx, "snap1", 0);
+  expect_image_refresh(mock_remote_image_ctx, 0);
+  expect_update_client(mock_journaler, 0);
+
+  C_SaferCond ctx;
+  MockSyncPointPruneRequest *req = create_request(mock_remote_image_ctx,
+                                                  mock_journaler, false, &ctx);
+  req->send();
+  ASSERT_EQ(0, ctx.wait());
+
+  client_meta.sync_points.clear();
+  ASSERT_EQ(client_meta, m_client_meta);
+}
+
+TEST_F(TestMockImageSyncSyncPointPruneRequest, SyncInProgressUnexpectedFromSnapSuccess) {
+  librbd::journal::MirrorPeerClientMeta client_meta;
+  client_meta.sync_points.emplace_front("snap2", "snap1", boost::none);
+  m_client_meta = client_meta;
+
+  librbd::MockTestImageCtx mock_remote_image_ctx(*m_remote_image_ctx);
+  journal::MockJournaler mock_journaler;
+
+  InSequence seq;
+  expect_get_snap_id(mock_remote_image_ctx, "snap2", 124);
+  expect_snap_remove(mock_remote_image_ctx, "snap2", 0);
+  expect_snap_remove(mock_remote_image_ctx, "snap1", 0);
+  expect_image_refresh(mock_remote_image_ctx, 0);
+  expect_update_client(mock_journaler, 0);
+
+  C_SaferCond ctx;
+  MockSyncPointPruneRequest *req = create_request(mock_remote_image_ctx,
+                                                  mock_journaler, false, &ctx);
+  req->send();
+  ASSERT_EQ(0, ctx.wait());
+
+  client_meta.sync_points.clear();
+  ASSERT_EQ(client_meta, m_client_meta);
+}
+
 TEST_F(TestMockImageSyncSyncPointPruneRequest, SyncCompleteSuccess) {
   librbd::journal::MirrorPeerClientMeta client_meta;
   client_meta.sync_points.emplace_front("snap1", boost::none);
   m_client_meta = client_meta;
   ASSERT_EQ(librbd::journal::MIRROR_PEER_STATE_SYNCING, m_client_meta.state);
 
-  librbd::MockImageCtx mock_remote_image_ctx(*m_remote_image_ctx);
+  librbd::MockTestImageCtx mock_remote_image_ctx(*m_remote_image_ctx);
   journal::MockJournaler mock_journaler;
 
   InSequence seq;
@@ -146,7 +217,7 @@ TEST_F(TestMockImageSyncSyncPointPruneRequest, RestartedSyncCompleteSuccess) {
   client_meta.sync_points.emplace_front("snap1", boost::none);
   m_client_meta = client_meta;
 
-  librbd::MockImageCtx mock_remote_image_ctx(*m_remote_image_ctx);
+  librbd::MockTestImageCtx mock_remote_image_ctx(*m_remote_image_ctx);
   journal::MockJournaler mock_journaler;
 
   InSequence seq;
@@ -168,7 +239,7 @@ TEST_F(TestMockImageSyncSyncPointPruneRequest, RestartedCatchUpSyncCompleteSucce
   client_meta.sync_points.emplace_front("snap2", "snap1", boost::none);
   m_client_meta = client_meta;
 
-  librbd::MockImageCtx mock_remote_image_ctx(*m_remote_image_ctx);
+  librbd::MockTestImageCtx mock_remote_image_ctx(*m_remote_image_ctx);
   journal::MockJournaler mock_journaler;
 
   InSequence seq;
@@ -190,7 +261,7 @@ TEST_F(TestMockImageSyncSyncPointPruneRequest, SnapshotDNE) {
   client_meta.sync_points.emplace_front("snap1", boost::none);
   m_client_meta = client_meta;
 
-  librbd::MockImageCtx mock_remote_image_ctx(*m_remote_image_ctx);
+  librbd::MockTestImageCtx mock_remote_image_ctx(*m_remote_image_ctx);
   journal::MockJournaler mock_journaler;
 
   InSequence seq;
@@ -212,7 +283,7 @@ TEST_F(TestMockImageSyncSyncPointPruneRequest, ClientUpdateError) {
   client_meta.sync_points.emplace_front("snap1", boost::none);
   m_client_meta = client_meta;
 
-  librbd::MockImageCtx mock_remote_image_ctx(*m_remote_image_ctx);
+  librbd::MockTestImageCtx mock_remote_image_ctx(*m_remote_image_ctx);
   journal::MockJournaler mock_journaler;
 
   InSequence seq;
diff --git a/src/test/rbd_mirror/test_ImageDeleter.cc b/src/test/rbd_mirror/test_ImageDeleter.cc
index 1386977..d96b624 100644
--- a/src/test/rbd_mirror/test_ImageDeleter.cc
+++ b/src/test/rbd_mirror/test_ImageDeleter.cc
@@ -62,8 +62,9 @@ public:
 
     librbd::mirror_mode_set(m_local_io_ctx, RBD_MIRROR_MODE_IMAGE);
 
-    m_deleter = new rbd::mirror::ImageDeleter(_rados,
-        m_threads->timer, &m_threads->timer_lock);
+    m_deleter = new rbd::mirror::ImageDeleter(m_threads->work_queue,
+                                              m_threads->timer,
+                                              &m_threads->timer_lock);
 
     EXPECT_EQ(0, create_image(rbd, m_local_io_ctx, m_image_name, 1 << 20));
     ImageCtx *ictx = new ImageCtx(m_image_name, "", "", m_local_io_ctx,
@@ -211,7 +212,7 @@ int64_t TestImageDeleter::m_local_pool_id;
 
 
 TEST_F(TestImageDeleter, Delete_NonPrimary_Image) {
-  m_deleter->schedule_image_delete(m_local_pool_id, m_local_image_id,
+  m_deleter->schedule_image_delete(_rados, m_local_pool_id, m_local_image_id,
       m_image_name, GLOBAL_IMAGE_ID);
 
   C_SaferCond ctx;
@@ -227,7 +228,7 @@ TEST_F(TestImageDeleter, Delete_NonPrimary_Image) {
 TEST_F(TestImageDeleter, Fail_Delete_Primary_Image) {
   promote_image();
 
-  m_deleter->schedule_image_delete(m_local_pool_id, m_local_image_id,
+  m_deleter->schedule_image_delete(_rados, m_local_pool_id, m_local_image_id,
       m_image_name, GLOBAL_IMAGE_ID);
 
   C_SaferCond ctx;
@@ -242,7 +243,7 @@ TEST_F(TestImageDeleter, Fail_Delete_Diff_GlobalId) {
   // This test case represents a case that should never happen, unless
   // there is bug in the implementation
 
-  m_deleter->schedule_image_delete(m_local_pool_id, m_local_image_id,
+  m_deleter->schedule_image_delete(_rados, m_local_pool_id, m_local_image_id,
       m_image_name, "diff global id");
 
   C_SaferCond ctx;
@@ -256,7 +257,7 @@ TEST_F(TestImageDeleter, Fail_Delete_Diff_GlobalId) {
 TEST_F(TestImageDeleter, Delete_Image_With_Child) {
   create_snapshot();
 
-  m_deleter->schedule_image_delete(m_local_pool_id, m_local_image_id,
+  m_deleter->schedule_image_delete(_rados, m_local_pool_id, m_local_image_id,
       m_image_name, GLOBAL_IMAGE_ID);
 
   C_SaferCond ctx;
@@ -271,7 +272,7 @@ TEST_F(TestImageDeleter, Delete_Image_With_Children) {
   create_snapshot("snap1");
   create_snapshot("snap2");
 
-  m_deleter->schedule_image_delete(m_local_pool_id, m_local_image_id,
+  m_deleter->schedule_image_delete(_rados, m_local_pool_id, m_local_image_id,
       m_image_name, GLOBAL_IMAGE_ID);
 
   C_SaferCond ctx;
@@ -285,7 +286,7 @@ TEST_F(TestImageDeleter, Delete_Image_With_Children) {
 TEST_F(TestImageDeleter, Delete_Image_With_ProtectedChild) {
   create_snapshot("snap1", true);
 
-  m_deleter->schedule_image_delete(m_local_pool_id, m_local_image_id,
+  m_deleter->schedule_image_delete(_rados, m_local_pool_id, m_local_image_id,
       m_image_name, GLOBAL_IMAGE_ID);
 
   C_SaferCond ctx;
@@ -300,7 +301,7 @@ TEST_F(TestImageDeleter, Delete_Image_With_ProtectedChildren) {
   create_snapshot("snap1", true);
   create_snapshot("snap2", true);
 
-  m_deleter->schedule_image_delete(m_local_pool_id, m_local_image_id,
+  m_deleter->schedule_image_delete(_rados, m_local_pool_id, m_local_image_id,
       m_image_name, GLOBAL_IMAGE_ID);
 
   C_SaferCond ctx;
@@ -314,7 +315,7 @@ TEST_F(TestImageDeleter, Delete_Image_With_ProtectedChildren) {
 TEST_F(TestImageDeleter, Delete_Image_With_Clone) {
   std::string clone_id = create_clone();
 
-  m_deleter->schedule_image_delete(m_local_pool_id, m_local_image_id,
+  m_deleter->schedule_image_delete(_rados, m_local_pool_id, m_local_image_id,
       m_image_name, GLOBAL_IMAGE_ID);
 
   C_SaferCond ctx;
@@ -324,7 +325,7 @@ TEST_F(TestImageDeleter, Delete_Image_With_Clone) {
   ASSERT_EQ(1u, m_deleter->get_delete_queue_items().size());
   ASSERT_EQ(0u, m_deleter->get_failed_queue_items().size());
 
-  m_deleter->schedule_image_delete(m_local_pool_id, clone_id,
+  m_deleter->schedule_image_delete(_rados, m_local_pool_id, clone_id,
       "clone1", GLOBAL_CLONE_IMAGE_ID);
 
   C_SaferCond ctx2;
@@ -347,7 +348,7 @@ TEST_F(TestImageDeleter, Delete_NonExistent_Image) {
   EXPECT_EQ(0, cls_client::mirror_image_set(&m_local_io_ctx, m_local_image_id,
                                             mirror_image));
 
-  m_deleter->schedule_image_delete(m_local_pool_id, m_local_image_id,
+  m_deleter->schedule_image_delete(_rados, m_local_pool_id, m_local_image_id,
       m_image_name, GLOBAL_IMAGE_ID);
 
   C_SaferCond ctx;
@@ -371,7 +372,7 @@ TEST_F(TestImageDeleter, Delete_NonExistent_Image_With_MirroringState) {
   EXPECT_EQ(0, cls_client::mirror_image_set(&m_local_io_ctx, m_local_image_id,
                                             mirror_image));
 
-  m_deleter->schedule_image_delete(m_local_pool_id, m_local_image_id,
+  m_deleter->schedule_image_delete(_rados, m_local_pool_id, m_local_image_id,
       m_image_name, GLOBAL_IMAGE_ID);
 
   C_SaferCond ctx;
@@ -387,7 +388,7 @@ TEST_F(TestImageDeleter, Delete_NonExistent_Image_With_MirroringState) {
 TEST_F(TestImageDeleter, Delete_NonExistent_Image_Without_MirroringState) {
   remove_image();
 
-  m_deleter->schedule_image_delete(m_local_pool_id, m_local_image_id,
+  m_deleter->schedule_image_delete(_rados, m_local_pool_id, m_local_image_id,
       m_image_name, GLOBAL_IMAGE_ID);
 
   C_SaferCond ctx;
@@ -405,7 +406,7 @@ TEST_F(TestImageDeleter, Fail_Delete_NonPrimary_Image) {
                                 false);
   EXPECT_EQ(0, ictx->state->open());
 
-  m_deleter->schedule_image_delete(m_local_pool_id, m_local_image_id,
+  m_deleter->schedule_image_delete(_rados, m_local_pool_id, m_local_image_id,
       m_image_name, GLOBAL_IMAGE_ID);
 
   C_SaferCond ctx;
@@ -425,7 +426,7 @@ TEST_F(TestImageDeleter, Retry_Failed_Deletes) {
 
   m_deleter->set_failed_timer_interval(2);
 
-  m_deleter->schedule_image_delete(m_local_pool_id, m_local_image_id,
+  m_deleter->schedule_image_delete(_rados, m_local_pool_id, m_local_image_id,
       m_image_name, GLOBAL_IMAGE_ID);
 
   C_SaferCond ctx;
@@ -449,7 +450,7 @@ TEST_F(TestImageDeleter, Delete_Is_Idempotent) {
                                 false);
   EXPECT_EQ(0, ictx->state->open());
 
-  m_deleter->schedule_image_delete(m_local_pool_id, m_local_image_id,
+  m_deleter->schedule_image_delete(_rados, m_local_pool_id, m_local_image_id,
       m_image_name, GLOBAL_IMAGE_ID);
 
   C_SaferCond ctx;
@@ -459,7 +460,7 @@ TEST_F(TestImageDeleter, Delete_Is_Idempotent) {
   ASSERT_EQ(0u, m_deleter->get_delete_queue_items().size());
   ASSERT_EQ(1u, m_deleter->get_failed_queue_items().size());
 
-  m_deleter->schedule_image_delete(m_local_pool_id, m_local_image_id,
+  m_deleter->schedule_image_delete(_rados, m_local_pool_id, m_local_image_id,
       m_image_name, GLOBAL_IMAGE_ID);
 
   ASSERT_EQ(0u, m_deleter->get_delete_queue_items().size());
diff --git a/src/test/rbd_mirror/test_ImageReplayer.cc b/src/test/rbd_mirror/test_ImageReplayer.cc
index c2753ba..d3002b0 100644
--- a/src/test/rbd_mirror/test_ImageReplayer.cc
+++ b/src/test/rbd_mirror/test_ImageReplayer.cc
@@ -23,14 +23,18 @@
 #include "journal/Journaler.h"
 #include "librbd/AioCompletion.h"
 #include "librbd/AioImageRequestWQ.h"
+#include "librbd/ExclusiveLock.h"
 #include "librbd/ImageCtx.h"
 #include "librbd/ImageState.h"
 #include "librbd/Journal.h"
+#include "librbd/Operations.h"
 #include "librbd/Utils.h"
 #include "librbd/internal.h"
 #include "tools/rbd_mirror/types.h"
 #include "tools/rbd_mirror/ImageReplayer.h"
+#include "tools/rbd_mirror/ImageSyncThrottler.h"
 #include "tools/rbd_mirror/Threads.h"
+#include "tools/rbd_mirror/ImageDeleter.h"
 
 #include "test/librados/test.h"
 #include "gtest/gtest.h"
@@ -71,14 +75,16 @@ public:
     }
   };
 
-  TestImageReplayer() : m_watch_handle(0)
+  TestImageReplayer()
+    : m_local_cluster(new librados::Rados()), m_watch_handle(0)
   {
-    EXPECT_EQ("", connect_cluster_pp(m_local_cluster));
-    EXPECT_EQ(0, m_local_cluster.conf_set("rbd_cache", "false"));
+    EXPECT_EQ("", connect_cluster_pp(*m_local_cluster.get()));
+    EXPECT_EQ(0, m_local_cluster->conf_set("rbd_cache", "false"));
+    EXPECT_EQ(0, m_local_cluster->conf_set("rbd_mirror_journal_poll_age", "1"));
 
     m_local_pool_name = get_temp_pool_name();
-    EXPECT_EQ(0, m_local_cluster.pool_create(m_local_pool_name.c_str()));
-    EXPECT_EQ(0, m_local_cluster.ioctx_create(m_local_pool_name.c_str(),
+    EXPECT_EQ(0, m_local_cluster->pool_create(m_local_pool_name.c_str()));
+    EXPECT_EQ(0, m_local_cluster->ioctx_create(m_local_pool_name.c_str(),
 					      m_local_ioctx));
 
     EXPECT_EQ("", connect_cluster_pp(m_remote_cluster));
@@ -102,38 +108,37 @@ public:
 
     m_threads = new rbd::mirror::Threads(reinterpret_cast<CephContext*>(
       m_local_ioctx.cct()));
+
+    m_image_deleter.reset(new rbd::mirror::ImageDeleter(m_threads->work_queue,
+                                                        m_threads->timer,
+                                                        &m_threads->timer_lock));
+    m_image_sync_throttler.reset(new rbd::mirror::ImageSyncThrottler<>());
   }
 
   ~TestImageReplayer()
   {
-    if (m_watch_handle != 0) {
-      m_remote_ioctx.unwatch2(m_watch_handle);
-      delete m_watch_ctx;
-      m_watch_ctx = nullptr;
-      m_watch_handle = 0;
-    }
+    unwatch();
 
     delete m_replayer;
     delete m_threads;
 
     EXPECT_EQ(0, m_remote_cluster.pool_delete(m_remote_pool_name.c_str()));
-    EXPECT_EQ(0, m_local_cluster.pool_delete(m_local_pool_name.c_str()));
+    EXPECT_EQ(0, m_local_cluster->pool_delete(m_local_pool_name.c_str()));
   }
 
   template <typename ImageReplayerT = rbd::mirror::ImageReplayer<> >
   void create_replayer() {
-    m_replayer = new ImageReplayerT(m_threads,
+    m_replayer = new ImageReplayerT(m_threads, m_image_deleter, m_image_sync_throttler,
       rbd::mirror::RadosRef(new librados::Rados(m_local_ioctx)),
       rbd::mirror::RadosRef(new librados::Rados(m_remote_ioctx)),
       m_local_mirror_uuid, m_remote_mirror_uuid, m_local_ioctx.get_id(),
       m_remote_pool_id, m_remote_image_id, "global image id");
   }
 
-  void start(rbd::mirror::ImageReplayer<>::BootstrapParams *bootstap_params =
-	     nullptr)
+  void start()
   {
     C_SaferCond cond;
-    m_replayer->start(&cond, bootstap_params);
+    m_replayer->start(&cond);
     ASSERT_EQ(0, cond.wait());
 
     ASSERT_EQ(0U, m_watch_handle);
@@ -142,14 +147,18 @@ public:
     ASSERT_EQ(0, m_remote_ioctx.watch2(oid, &m_watch_handle, m_watch_ctx));
   }
 
-  void stop()
-  {
+  void unwatch() {
     if (m_watch_handle != 0) {
       m_remote_ioctx.unwatch2(m_watch_handle);
       delete m_watch_ctx;
       m_watch_ctx = nullptr;
       m_watch_handle = 0;
     }
+  }
+
+  void stop()
+  {
+    unwatch();
 
     C_SaferCond cond;
     m_replayer->stop(&cond);
@@ -160,9 +169,7 @@ public:
   {
     create_replayer<>();
 
-    rbd::mirror::ImageReplayer<>::BootstrapParams
-      bootstap_params(m_image_name);
-    start(&bootstap_params);
+    start();
     wait_for_replay_complete();
     stop();
   }
@@ -279,6 +286,16 @@ public:
     ASSERT_EQ(master_position, mirror_position);
   }
 
+  void wait_for_stopped() {
+    for (int i = 0; i < 100; i++) {
+      if (m_replayer->is_stopped()) {
+        break;
+      }
+      wait_for_watcher_notify(1);
+    }
+    ASSERT_TRUE(m_replayer->is_stopped());
+  }
+
   void write_test_data(librbd::ImageCtx *ictx, const char *test_data, off_t off,
                        size_t len)
   {
@@ -332,7 +349,10 @@ public:
   static int _image_number;
 
   rbd::mirror::Threads *m_threads = nullptr;
-  librados::Rados m_local_cluster, m_remote_cluster;
+  std::shared_ptr<rbd::mirror::ImageDeleter> m_image_deleter;
+  std::shared_ptr<librados::Rados> m_local_cluster;
+  librados::Rados m_remote_cluster;
+  std::shared_ptr<rbd::mirror::ImageSyncThrottler<>> m_image_sync_throttler;
   std::string m_local_mirror_uuid = "local mirror uuid";
   std::string m_remote_mirror_uuid = "remote mirror uuid";
   std::string m_local_pool_name, m_remote_pool_name;
@@ -360,10 +380,8 @@ TEST_F(TestImageReplayer, BootstrapErrorLocalImageExists)
 			      false, 0, &order, 0, 0));
 
   create_replayer<>();
-  rbd::mirror::ImageReplayer<>::BootstrapParams
-    bootstap_params(m_image_name);
   C_SaferCond cond;
-  m_replayer->start(&cond, &bootstap_params);
+  m_replayer->start(&cond);
   ASSERT_EQ(-EEXIST, cond.wait());
 }
 
@@ -378,20 +396,16 @@ TEST_F(TestImageReplayer, BootstrapErrorNoJournal)
   close_image(ictx);
 
   create_replayer<>();
-  rbd::mirror::ImageReplayer<>::BootstrapParams
-    bootstap_params(m_image_name);
   C_SaferCond cond;
-  m_replayer->start(&cond, &bootstap_params);
+  m_replayer->start(&cond);
   ASSERT_EQ(-ENOENT, cond.wait());
 }
 
 TEST_F(TestImageReplayer, StartInterrupted)
 {
   create_replayer<>();
-  rbd::mirror::ImageReplayer<>::BootstrapParams
-    bootstap_params(m_image_name);
   C_SaferCond start_cond, stop_cond;
-  m_replayer->start(&start_cond, &bootstap_params);
+  m_replayer->start(&start_cond);
   m_replayer->stop(&stop_cond);
   int r = start_cond.wait();
   printf("start returned %d\n", r);
@@ -422,10 +436,8 @@ TEST_F(TestImageReplayer, ErrorNoJournal)
   ASSERT_EQ(0, librbd::update_features(ictx, RBD_FEATURE_JOURNALING, false));
   close_image(ictx);
 
-  rbd::mirror::ImageReplayer<>::BootstrapParams
-    bootstap_params(m_image_name);
   C_SaferCond cond;
-  m_replayer->start(&cond, &bootstap_params);
+  m_replayer->start(&cond);
   ASSERT_EQ(-ENOENT, cond.wait());
 }
 
@@ -533,3 +545,280 @@ TEST_F(TestImageReplayer, NextTag)
 
   stop();
 }
+
+TEST_F(TestImageReplayer, Resync)
+{
+  bootstrap();
+
+  librbd::ImageCtx *ictx;
+
+  start();
+
+  std::string image_id = m_replayer->get_local_image_id();
+
+  generate_test_data();
+
+  open_remote_image(&ictx);
+  for (int i = 0; i < TEST_IO_COUNT; ++i) {
+    write_test_data(ictx, m_test_data, TEST_IO_SIZE * i, TEST_IO_SIZE);
+  }
+  flush(ictx);
+
+  wait_for_replay_complete();
+
+  for (int i = TEST_IO_COUNT; i < 2 * TEST_IO_COUNT; ++i) {
+    write_test_data(ictx, m_test_data, TEST_IO_SIZE * i, TEST_IO_SIZE);
+  }
+  flush(ictx);
+  close_image(ictx);
+
+  C_SaferCond ctx;
+  m_replayer->resync_image(&ctx);
+  ASSERT_EQ(0, ctx.wait());
+
+  C_SaferCond delete_ctx;
+  m_image_deleter->wait_for_scheduled_deletion(
+    m_replayer->get_local_image_name(), &delete_ctx);
+  EXPECT_EQ(0, delete_ctx.wait());
+
+  C_SaferCond cond;
+  m_replayer->start(&cond);
+  ASSERT_EQ(0, cond.wait());
+
+  ASSERT_NE(image_id, m_replayer->get_local_image_id());
+  ASSERT_TRUE(m_replayer->is_replaying());
+
+  wait_for_replay_complete();
+
+  open_local_image(&ictx);
+  for (int i = 0; i < 2 * TEST_IO_COUNT; ++i) {
+    read_test_data(ictx, m_test_data, TEST_IO_SIZE * i, TEST_IO_SIZE);
+  }
+  close_image(ictx);
+
+  stop();
+}
+
+TEST_F(TestImageReplayer, Resync_While_Stop)
+{
+
+  bootstrap();
+
+  start();
+
+  std::string image_id = m_replayer->get_local_image_id();
+
+  generate_test_data();
+
+  librbd::ImageCtx *ictx;
+  open_remote_image(&ictx);
+  for (int i = 0; i < TEST_IO_COUNT; ++i) {
+    write_test_data(ictx, m_test_data, TEST_IO_SIZE * i, TEST_IO_SIZE);
+  }
+  flush(ictx);
+
+  wait_for_replay_complete();
+
+  for (int i = TEST_IO_COUNT; i < 2 * TEST_IO_COUNT; ++i) {
+    write_test_data(ictx, m_test_data, TEST_IO_SIZE * i, TEST_IO_SIZE);
+  }
+  flush(ictx);
+  close_image(ictx);
+
+  wait_for_replay_complete();
+
+  C_SaferCond cond;
+  m_replayer->stop(&cond);
+  ASSERT_EQ(0, cond.wait());
+
+  open_local_image(&ictx);
+  librbd::Journal<>::request_resync(ictx);
+  close_image(ictx);
+
+  C_SaferCond cond2;
+  m_replayer->start(&cond2);
+  ASSERT_EQ(0, cond2.wait());
+
+  ASSERT_TRUE(m_replayer->is_stopped());
+
+  C_SaferCond delete_ctx;
+  m_image_deleter->wait_for_scheduled_deletion(
+    m_replayer->get_local_image_name(), &delete_ctx);
+  EXPECT_EQ(0, delete_ctx.wait());
+
+  C_SaferCond cond3;
+  m_replayer->start(&cond3);
+  ASSERT_EQ(0, cond3.wait());
+
+  ASSERT_NE(image_id, m_replayer->get_local_image_id());
+  ASSERT_TRUE(m_replayer->is_replaying());
+
+  wait_for_replay_complete();
+
+  open_local_image(&ictx);
+  for (int i = 0; i < 2 * TEST_IO_COUNT; ++i) {
+    read_test_data(ictx, m_test_data, TEST_IO_SIZE * i, TEST_IO_SIZE);
+  }
+  close_image(ictx);
+
+  stop();
+}
+
+TEST_F(TestImageReplayer, Resync_StartInterrupted)
+{
+
+  bootstrap();
+
+  std::string image_id = m_replayer->get_local_image_id();
+
+  librbd::ImageCtx *ictx;
+  open_local_image(&ictx);
+  librbd::Journal<>::request_resync(ictx);
+  close_image(ictx);
+
+  C_SaferCond cond;
+  m_replayer->start(&cond);
+  ASSERT_EQ(0, cond.wait());
+
+  ASSERT_TRUE(m_replayer->is_stopped());
+
+  C_SaferCond delete_ctx;
+  m_image_deleter->wait_for_scheduled_deletion(
+    m_replayer->get_local_image_name(), &delete_ctx);
+  EXPECT_EQ(0, delete_ctx.wait());
+
+  C_SaferCond cond2;
+  m_replayer->start(&cond2);
+  ASSERT_EQ(0, cond2.wait());
+
+  ASSERT_EQ(0U, m_watch_handle);
+  std::string oid = ::journal::Journaler::header_oid(m_remote_image_id);
+  m_watch_ctx = new C_WatchCtx(this, oid);
+  ASSERT_EQ(0, m_remote_ioctx.watch2(oid, &m_watch_handle, m_watch_ctx));
+
+  ASSERT_NE(image_id, m_replayer->get_local_image_id());
+
+  ASSERT_TRUE(m_replayer->is_replaying());
+
+  open_remote_image(&ictx);
+  for (int i = 0; i < TEST_IO_COUNT; ++i) {
+    write_test_data(ictx, m_test_data, TEST_IO_SIZE * i, TEST_IO_SIZE);
+  }
+  flush(ictx);
+
+  wait_for_replay_complete();
+
+  for (int i = TEST_IO_COUNT; i < 2 * TEST_IO_COUNT; ++i) {
+    write_test_data(ictx, m_test_data, TEST_IO_SIZE * i, TEST_IO_SIZE);
+  }
+  flush(ictx);
+  close_image(ictx);
+
+  wait_for_replay_complete();
+
+  open_local_image(&ictx);
+  for (int i = 0; i < 2 * TEST_IO_COUNT; ++i) {
+    read_test_data(ictx, m_test_data, TEST_IO_SIZE * i, TEST_IO_SIZE);
+  }
+  close_image(ictx);
+
+  stop();
+}
+
+TEST_F(TestImageReplayer, MultipleReplayFailures_SingleEpoch) {
+  bootstrap();
+
+  // inject a snapshot that cannot be unprotected
+  librbd::ImageCtx *ictx;
+  open_image(m_local_ioctx, m_image_name, false, &ictx);
+  ictx->features &= ~RBD_FEATURE_JOURNALING;
+  ASSERT_EQ(0, ictx->operations->snap_create("foo"));
+  ASSERT_EQ(0, ictx->operations->snap_protect("foo"));
+  ASSERT_EQ(0, librbd::cls_client::add_child(&ictx->md_ctx, RBD_CHILDREN,
+                                             {ictx->md_ctx.get_id(),
+                                              ictx->id, ictx->snap_ids["foo"]},
+                                             "dummy child id"));
+  close_image(ictx);
+
+  // race failed op shut down with new ops
+  open_remote_image(&ictx);
+  for (uint64_t i = 0; i < 10; ++i) {
+    RWLock::RLocker owner_locker(ictx->owner_lock);
+    C_SaferCond request_lock;
+    ictx->exclusive_lock->request_lock(&request_lock);
+    ASSERT_EQ(0, request_lock.wait());
+
+    C_SaferCond append_ctx;
+    ictx->journal->append_op_event(
+      i,
+      librbd::journal::EventEntry{
+        librbd::journal::SnapUnprotectEvent{i, "foo"}},
+      &append_ctx);
+    ASSERT_EQ(0, append_ctx.wait());
+
+    C_SaferCond commit_ctx;
+    ictx->journal->commit_op_event(i, 0, &commit_ctx);
+    ASSERT_EQ(0, commit_ctx.wait());
+
+    C_SaferCond release_ctx;
+    ictx->exclusive_lock->release_lock(&release_ctx);
+    ASSERT_EQ(0, release_ctx.wait());
+  }
+
+  for (uint64_t i = 0; i < 5; ++i) {
+    start();
+    wait_for_stopped();
+    unwatch();
+  }
+}
+
+TEST_F(TestImageReplayer, MultipleReplayFailures_MultiEpoch) {
+  bootstrap();
+
+  // inject a snapshot that cannot be unprotected
+  librbd::ImageCtx *ictx;
+  open_image(m_local_ioctx, m_image_name, false, &ictx);
+  ictx->features &= ~RBD_FEATURE_JOURNALING;
+  ASSERT_EQ(0, ictx->operations->snap_create("foo"));
+  ASSERT_EQ(0, ictx->operations->snap_protect("foo"));
+  ASSERT_EQ(0, librbd::cls_client::add_child(&ictx->md_ctx, RBD_CHILDREN,
+                                             {ictx->md_ctx.get_id(),
+                                              ictx->id, ictx->snap_ids["foo"]},
+                                             "dummy child id"));
+  close_image(ictx);
+
+  // race failed op shut down with new tag flush
+  open_remote_image(&ictx);
+  {
+    RWLock::RLocker owner_locker(ictx->owner_lock);
+    C_SaferCond request_lock;
+    ictx->exclusive_lock->request_lock(&request_lock);
+    ASSERT_EQ(0, request_lock.wait());
+
+    C_SaferCond append_ctx;
+    ictx->journal->append_op_event(
+      1U,
+      librbd::journal::EventEntry{
+        librbd::journal::SnapUnprotectEvent{1U, "foo"}},
+      &append_ctx);
+    ASSERT_EQ(0, append_ctx.wait());
+
+    C_SaferCond commit_ctx;
+    ictx->journal->commit_op_event(1U, 0, &commit_ctx);
+    ASSERT_EQ(0, commit_ctx.wait());
+
+    C_SaferCond release_ctx;
+    ictx->exclusive_lock->release_lock(&release_ctx);
+    ASSERT_EQ(0, release_ctx.wait());
+  }
+
+  write_test_data(ictx, m_test_data, 0, TEST_IO_SIZE);
+
+  for (uint64_t i = 0; i < 5; ++i) {
+    start();
+    wait_for_stopped();
+    unwatch();
+  }
+  close_image(ictx);
+}
+
diff --git a/src/test/rbd_mirror/test_ImageSync.cc b/src/test/rbd_mirror/test_ImageSync.cc
index 922d499..6097169 100644
--- a/src/test/rbd_mirror/test_ImageSync.cc
+++ b/src/test/rbd_mirror/test_ImageSync.cc
@@ -5,6 +5,7 @@
 #include "include/stringify.h"
 #include "include/rbd/librbd.hpp"
 #include "journal/Journaler.h"
+#include "journal/Settings.h"
 #include "librbd/AioImageRequestWQ.h"
 #include "librbd/ExclusiveLock.h"
 #include "librbd/ImageCtx.h"
@@ -54,7 +55,7 @@ public:
 
     m_remote_journaler = new ::journal::Journaler(
       m_threads->work_queue, m_threads->timer, &m_threads->timer_lock,
-      m_remote_io_ctx, m_remote_image_ctx->id, "mirror-uuid", 5);
+      m_remote_io_ctx, m_remote_image_ctx->id, "mirror-uuid", {});
 
     m_client_meta = {"image-id"};
 
diff --git a/src/test/rbd_mirror/test_mock_ImageReplayer.cc b/src/test/rbd_mirror/test_mock_ImageReplayer.cc
index 8ff318a..5f651af 100644
--- a/src/test/rbd_mirror/test_mock_ImageReplayer.cc
+++ b/src/test/rbd_mirror/test_mock_ImageReplayer.cc
@@ -6,6 +6,8 @@
 #include "tools/rbd_mirror/ImageReplayer.h"
 #include "tools/rbd_mirror/image_replayer/BootstrapRequest.h"
 #include "tools/rbd_mirror/image_replayer/CloseImageRequest.h"
+#include "tools/rbd_mirror/image_replayer/EventPreprocessor.h"
+#include "tools/rbd_mirror/ImageSyncThrottler.h"
 #include "test/journal/mock/MockJournaler.h"
 #include "test/librbd/mock/MockImageCtx.h"
 #include "test/librbd/mock/MockJournal.h"
@@ -21,8 +23,9 @@ struct MockTestImageCtx : public MockImageCtx {
 };
 
 struct MockTestJournal : public MockJournal {
-  MOCK_METHOD2(start_external_replay, void(journal::Replay<MockTestImageCtx> **,
-                                           Context *on_finish));
+  MOCK_METHOD3(start_external_replay, void(journal::Replay<MockTestImageCtx> **,
+                                           Context *on_finish,
+                                           Context *on_close_request));
   MOCK_METHOD0(stop_external_replay, void());
 };
 
@@ -32,7 +35,8 @@ namespace journal {
 
 template<>
 struct Replay<MockTestImageCtx> {
-  MOCK_METHOD3(process, void(bufferlist::iterator *, Context *, Context *));
+  MOCK_METHOD2(decode, int(bufferlist::iterator *, EventEntry *));
+  MOCK_METHOD3(process, void(const EventEntry &, Context *, Context *));
   MOCK_METHOD1(flush, void(Context*));
   MOCK_METHOD2(shut_down, void(bool, Context*));
 };
@@ -58,19 +62,20 @@ struct BootstrapRequest<librbd::MockTestImageCtx> {
   Context *on_finish = nullptr;
 
   static BootstrapRequest* create(librados::IoCtx &local_io_ctx,
-                                  librados::IoCtx &remote_io_ctx,
-                                  librbd::MockTestImageCtx **local_image_ctx,
-                                  const std::string &local_image_name,
-                                  const std::string &remote_image_id,
-                                  const std::string &global_image_id,
-                                  ContextWQ *work_queue, SafeTimer *timer,
-                                  Mutex *timer_lock,
-                                  const std::string &local_mirror_uuid,
-                                  const std::string &remote_mirror_uuid,
-                                  ::journal::MockJournalerProxy *journaler,
-                                  librbd::journal::MirrorPeerClientMeta *client_meta,
-                                  Context *on_finish,
-                                  rbd::mirror::ProgressContext *progress_ctx = nullptr) {
+        librados::IoCtx &remote_io_ctx,
+        rbd::mirror::ImageSyncThrottlerRef<librbd::MockTestImageCtx> image_sync_throttler,
+        librbd::MockTestImageCtx **local_image_ctx,
+        const std::string &local_image_name,
+        const std::string &remote_image_id,
+        const std::string &global_image_id,
+        ContextWQ *work_queue, SafeTimer *timer,
+        Mutex *timer_lock,
+        const std::string &local_mirror_uuid,
+        const std::string &remote_mirror_uuid,
+        ::journal::MockJournalerProxy *journaler,
+        librbd::journal::MirrorPeerClientMeta *client_meta,
+        Context *on_finish,
+        rbd::mirror::ProgressContext *progress_ctx = nullptr) {
     assert(s_instance != nullptr);
     s_instance->on_finish = on_finish;
     return s_instance;
@@ -113,6 +118,28 @@ struct CloseImageRequest<librbd::MockTestImageCtx> {
 };
 
 template<>
+struct EventPreprocessor<librbd::MockTestImageCtx> {
+  static EventPreprocessor *s_instance;
+
+  static EventPreprocessor *create(librbd::MockTestImageCtx &local_image_ctx,
+                                   ::journal::MockJournalerProxy &remote_journaler,
+                                   const std::string &local_mirror_uuid,
+                                   librbd::journal::MirrorPeerClientMeta *client_meta,
+                                   ContextWQ *work_queue) {
+    assert(s_instance != nullptr);
+    return s_instance;
+  }
+
+  EventPreprocessor() {
+    assert(s_instance == nullptr);
+    s_instance = this;
+  }
+
+  MOCK_METHOD1(is_required, bool(const librbd::journal::EventEntry &));
+  MOCK_METHOD2(preprocess, void(librbd::journal::EventEntry *, Context *));
+};
+
+template<>
 struct ReplayStatusFormatter<librbd::MockTestImageCtx> {
   static ReplayStatusFormatter* s_instance;
 
@@ -132,6 +159,7 @@ struct ReplayStatusFormatter<librbd::MockTestImageCtx> {
 
 BootstrapRequest<librbd::MockTestImageCtx>* BootstrapRequest<librbd::MockTestImageCtx>::s_instance = nullptr;
 CloseImageRequest<librbd::MockTestImageCtx>* CloseImageRequest<librbd::MockTestImageCtx>::s_instance = nullptr;
+EventPreprocessor<librbd::MockTestImageCtx>* EventPreprocessor<librbd::MockTestImageCtx>::s_instance = nullptr;
 ReplayStatusFormatter<librbd::MockTestImageCtx>* ReplayStatusFormatter<librbd::MockTestImageCtx>::s_instance = nullptr;
 
 } // namespace image_replayer
diff --git a/src/test/rbd_mirror/test_mock_ImageSync.cc b/src/test/rbd_mirror/test_mock_ImageSync.cc
index 868a030..3d3983a 100644
--- a/src/test/rbd_mirror/test_mock_ImageSync.cc
+++ b/src/test/rbd_mirror/test_mock_ImageSync.cc
@@ -17,10 +17,21 @@
 #include "tools/rbd_mirror/image_sync/SyncPointPruneRequest.h"
 
 namespace librbd {
+
+namespace {
+
+struct MockTestImageCtx : public librbd::MockImageCtx {
+  MockTestImageCtx(librbd::ImageCtx &image_ctx)
+    : librbd::MockImageCtx(image_ctx) {
+  }
+};
+
+} // anonymous namespace
+
 namespace journal {
 
 template <>
-struct TypeTraits<librbd::MockImageCtx> {
+struct TypeTraits<librbd::MockTestImageCtx> {
   typedef ::journal::MockJournaler Journaler;
 };
 
@@ -29,7 +40,7 @@ struct TypeTraits<librbd::MockImageCtx> {
 
 // template definitions
 #include "tools/rbd_mirror/ImageSync.cc"
-template class rbd::mirror::ImageSync<librbd::MockImageCtx>;
+template class rbd::mirror::ImageSync<librbd::MockTestImageCtx>;
 
 namespace rbd {
 namespace mirror {
@@ -37,13 +48,13 @@ namespace mirror {
 namespace image_sync {
 
 template <>
-class ImageCopyRequest<librbd::MockImageCtx> {
+class ImageCopyRequest<librbd::MockTestImageCtx> {
 public:
   static ImageCopyRequest* s_instance;
   Context *on_finish;
 
-  static ImageCopyRequest* create(librbd::MockImageCtx *local_image_ctx,
-                                  librbd::MockImageCtx *remote_image_ctx,
+  static ImageCopyRequest* create(librbd::MockTestImageCtx *local_image_ctx,
+                                  librbd::MockTestImageCtx *remote_image_ctx,
                                   SafeTimer *timer, Mutex *timer_lock,
                                   journal::MockJournaler *journaler,
                                   librbd::journal::MirrorPeerClientMeta *client_meta,
@@ -70,13 +81,13 @@ public:
 };
 
 template <>
-class SnapshotCopyRequest<librbd::MockImageCtx> {
+class SnapshotCopyRequest<librbd::MockTestImageCtx> {
 public:
   static SnapshotCopyRequest* s_instance;
   Context *on_finish;
 
-  static SnapshotCopyRequest* create(librbd::MockImageCtx *local_image_ctx,
-                                     librbd::MockImageCtx *remote_image_ctx,
+  static SnapshotCopyRequest* create(librbd::MockTestImageCtx *local_image_ctx,
+                                     librbd::MockTestImageCtx *remote_image_ctx,
                                      SnapshotCopyRequest<librbd::ImageCtx>::SnapMap *snap_map,
                                      journal::MockJournaler *journaler,
                                      librbd::journal::MirrorPeerClientMeta *client_meta,
@@ -102,12 +113,12 @@ public:
 };
 
 template <>
-class SyncPointCreateRequest<librbd::MockImageCtx> {
+class SyncPointCreateRequest<librbd::MockTestImageCtx> {
 public:
   static SyncPointCreateRequest *s_instance;
   Context *on_finish;
 
-  static SyncPointCreateRequest* create(librbd::MockImageCtx *remote_image_ctx,
+  static SyncPointCreateRequest* create(librbd::MockTestImageCtx *remote_image_ctx,
                                         const std::string &mirror_uuid,
                                         journal::MockJournaler *journaler,
                                         librbd::journal::MirrorPeerClientMeta *client_meta,
@@ -124,13 +135,13 @@ public:
 };
 
 template <>
-class SyncPointPruneRequest<librbd::MockImageCtx> {
+class SyncPointPruneRequest<librbd::MockTestImageCtx> {
 public:
   static SyncPointPruneRequest *s_instance;
   Context *on_finish;
   bool sync_complete;
 
-  static SyncPointPruneRequest* create(librbd::MockImageCtx *remote_image_ctx,
+  static SyncPointPruneRequest* create(librbd::MockTestImageCtx *remote_image_ctx,
                                        bool sync_complete,
                                        journal::MockJournaler *journaler,
                                        librbd::journal::MirrorPeerClientMeta *client_meta,
@@ -147,10 +158,10 @@ public:
   MOCK_METHOD0(send, void());
 };
 
-ImageCopyRequest<librbd::MockImageCtx>* ImageCopyRequest<librbd::MockImageCtx>::s_instance = nullptr;
-SnapshotCopyRequest<librbd::MockImageCtx>* SnapshotCopyRequest<librbd::MockImageCtx>::s_instance = nullptr;
-SyncPointCreateRequest<librbd::MockImageCtx>* SyncPointCreateRequest<librbd::MockImageCtx>::s_instance = nullptr;
-SyncPointPruneRequest<librbd::MockImageCtx>* SyncPointPruneRequest<librbd::MockImageCtx>::s_instance = nullptr;
+ImageCopyRequest<librbd::MockTestImageCtx>* ImageCopyRequest<librbd::MockTestImageCtx>::s_instance = nullptr;
+SnapshotCopyRequest<librbd::MockTestImageCtx>* SnapshotCopyRequest<librbd::MockTestImageCtx>::s_instance = nullptr;
+SyncPointCreateRequest<librbd::MockTestImageCtx>* SyncPointCreateRequest<librbd::MockTestImageCtx>::s_instance = nullptr;
+SyncPointPruneRequest<librbd::MockTestImageCtx>* SyncPointPruneRequest<librbd::MockTestImageCtx>::s_instance = nullptr;
 
 } // namespace image_sync
 
@@ -163,11 +174,11 @@ using ::testing::InvokeWithoutArgs;
 
 class TestMockImageSync : public TestMockFixture {
 public:
-  typedef ImageSync<librbd::MockImageCtx> MockImageSync;
-  typedef image_sync::ImageCopyRequest<librbd::MockImageCtx> MockImageCopyRequest;
-  typedef image_sync::SnapshotCopyRequest<librbd::MockImageCtx> MockSnapshotCopyRequest;
-  typedef image_sync::SyncPointCreateRequest<librbd::MockImageCtx> MockSyncPointCreateRequest;
-  typedef image_sync::SyncPointPruneRequest<librbd::MockImageCtx> MockSyncPointPruneRequest;
+  typedef ImageSync<librbd::MockTestImageCtx> MockImageSync;
+  typedef image_sync::ImageCopyRequest<librbd::MockTestImageCtx> MockImageCopyRequest;
+  typedef image_sync::SnapshotCopyRequest<librbd::MockTestImageCtx> MockSnapshotCopyRequest;
+  typedef image_sync::SyncPointCreateRequest<librbd::MockTestImageCtx> MockSyncPointCreateRequest;
+  typedef image_sync::SyncPointPruneRequest<librbd::MockTestImageCtx> MockSyncPointPruneRequest;
 
   virtual void SetUp() {
     TestMockFixture::SetUp();
@@ -180,7 +191,7 @@ public:
     ASSERT_EQ(0, open_image(m_local_io_ctx, m_image_name, &m_local_image_ctx));
   }
 
-  void expect_create_sync_point(librbd::MockImageCtx &mock_local_image_ctx,
+  void expect_create_sync_point(librbd::MockTestImageCtx &mock_local_image_ctx,
                                 MockSyncPointCreateRequest &mock_sync_point_create_request,
                                 int r) {
     EXPECT_CALL(mock_sync_point_create_request, send())
@@ -216,13 +227,13 @@ public:
     }
   }
 
-  void expect_create_object_map(librbd::MockImageCtx &mock_image_ctx,
+  void expect_create_object_map(librbd::MockTestImageCtx &mock_image_ctx,
                                 librbd::MockObjectMap *mock_object_map) {
     EXPECT_CALL(mock_image_ctx, create_object_map(CEPH_NOSNAP))
       .WillOnce(Return(mock_object_map));
   }
 
-  void expect_open_object_map(librbd::MockImageCtx &mock_image_ctx,
+  void expect_open_object_map(librbd::MockTestImageCtx &mock_image_ctx,
                               librbd::MockObjectMap &mock_object_map) {
     EXPECT_CALL(mock_object_map, open(_))
       .WillOnce(Invoke([this](Context *ctx) {
@@ -248,8 +259,8 @@ public:
         }));
   }
 
-  MockImageSync *create_request(librbd::MockImageCtx &mock_remote_image_ctx,
-                                librbd::MockImageCtx &mock_local_image_ctx,
+  MockImageSync *create_request(librbd::MockTestImageCtx &mock_remote_image_ctx,
+                                librbd::MockTestImageCtx &mock_local_image_ctx,
                                 journal::MockJournaler &mock_journaler,
                                 Context *ctx) {
     return new MockImageSync(&mock_local_image_ctx, &mock_remote_image_ctx,
@@ -264,8 +275,8 @@ public:
 };
 
 TEST_F(TestMockImageSync, SimpleSync) {
-  librbd::MockImageCtx mock_remote_image_ctx(*m_remote_image_ctx);
-  librbd::MockImageCtx mock_local_image_ctx(*m_local_image_ctx);
+  librbd::MockTestImageCtx mock_remote_image_ctx(*m_remote_image_ctx);
+  librbd::MockTestImageCtx mock_local_image_ctx(*m_local_image_ctx);
   journal::MockJournaler mock_journaler;
   MockImageCopyRequest mock_image_copy_request;
   MockSnapshotCopyRequest mock_snapshot_copy_request;
@@ -294,8 +305,8 @@ TEST_F(TestMockImageSync, SimpleSync) {
 }
 
 TEST_F(TestMockImageSync, RestartSync) {
-  librbd::MockImageCtx mock_remote_image_ctx(*m_remote_image_ctx);
-  librbd::MockImageCtx mock_local_image_ctx(*m_local_image_ctx);
+  librbd::MockTestImageCtx mock_remote_image_ctx(*m_remote_image_ctx);
+  librbd::MockTestImageCtx mock_local_image_ctx(*m_local_image_ctx);
   journal::MockJournaler mock_journaler;
   MockImageCopyRequest mock_image_copy_request;
   MockSnapshotCopyRequest mock_snapshot_copy_request;
@@ -329,8 +340,8 @@ TEST_F(TestMockImageSync, RestartSync) {
 }
 
 TEST_F(TestMockImageSync, CancelImageCopy) {
-  librbd::MockImageCtx mock_remote_image_ctx(*m_remote_image_ctx);
-  librbd::MockImageCtx mock_local_image_ctx(*m_local_image_ctx);
+  librbd::MockTestImageCtx mock_remote_image_ctx(*m_remote_image_ctx);
+  librbd::MockTestImageCtx mock_local_image_ctx(*m_local_image_ctx);
   journal::MockJournaler mock_journaler;
   MockImageCopyRequest mock_image_copy_request;
   MockSnapshotCopyRequest mock_snapshot_copy_request;
@@ -340,6 +351,7 @@ TEST_F(TestMockImageSync, CancelImageCopy) {
   m_client_meta.sync_points = {{"snap1", boost::none}};
 
   InSequence seq;
+  expect_prune_sync_point(mock_sync_point_prune_request, false, 0);
   expect_copy_snapshots(mock_snapshot_copy_request, 0);
 
   C_SaferCond image_copy_ctx;
@@ -366,8 +378,8 @@ TEST_F(TestMockImageSync, CancelImageCopy) {
 }
 
 TEST_F(TestMockImageSync, CancelAfterCopySnapshots) {
-  librbd::MockImageCtx mock_remote_image_ctx(*m_remote_image_ctx);
-  librbd::MockImageCtx mock_local_image_ctx(*m_local_image_ctx);
+  librbd::MockTestImageCtx mock_remote_image_ctx(*m_remote_image_ctx);
+  librbd::MockTestImageCtx mock_local_image_ctx(*m_local_image_ctx);
   journal::MockJournaler mock_journaler;
   MockSnapshotCopyRequest mock_snapshot_copy_request;
   MockSyncPointCreateRequest mock_sync_point_create_request;
@@ -396,8 +408,8 @@ TEST_F(TestMockImageSync, CancelAfterCopySnapshots) {
 }
 
 TEST_F(TestMockImageSync, CancelAfterCopyImage) {
-  librbd::MockImageCtx mock_remote_image_ctx(*m_remote_image_ctx);
-  librbd::MockImageCtx mock_local_image_ctx(*m_local_image_ctx);
+  librbd::MockTestImageCtx mock_remote_image_ctx(*m_remote_image_ctx);
+  librbd::MockTestImageCtx mock_local_image_ctx(*m_local_image_ctx);
   journal::MockJournaler mock_journaler;
   MockImageCopyRequest mock_image_copy_request;
   MockSnapshotCopyRequest mock_snapshot_copy_request;
diff --git a/src/test/rbd_mirror/test_mock_ImageSyncThrottler.cc b/src/test/rbd_mirror/test_mock_ImageSyncThrottler.cc
new file mode 100644
index 0000000..e624ed9
--- /dev/null
+++ b/src/test/rbd_mirror/test_mock_ImageSyncThrottler.cc
@@ -0,0 +1,409 @@
+// -*- mode:C; tab-width:8; c-basic-offset:2; indent-tabs-mode:t -*-
+// vim: ts=8 sw=2 smarttab
+/*
+ * Ceph - scalable distributed file system
+ *
+ * Copyright (C) 2016 SUSE LINUX GmbH
+ *
+ * This is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License version 2.1, as published by the Free Software
+ * Foundation.  See file COPYING.
+ *
+ */
+
+#include "test/rbd_mirror/test_mock_fixture.h"
+#include "librbd/journal/TypeTraits.h"
+#include "test/journal/mock/MockJournaler.h"
+#include "test/librbd/mock/MockImageCtx.h"
+#include "librbd/ImageState.h"
+#include "tools/rbd_mirror/Threads.h"
+#include "tools/rbd_mirror/ImageSync.h"
+
+namespace librbd {
+
+namespace {
+
+struct MockTestImageCtx : public librbd::MockImageCtx {
+  MockTestImageCtx(librbd::ImageCtx &image_ctx)
+    : librbd::MockImageCtx(image_ctx) {
+  }
+};
+
+} // anonymous namespace
+
+namespace journal {
+
+template <>
+struct TypeTraits<librbd::MockTestImageCtx> {
+  typedef ::journal::MockJournaler Journaler;
+};
+
+} // namespace journal
+} // namespace librbd
+
+namespace rbd {
+namespace mirror {
+
+using ::testing::Invoke;
+
+typedef ImageSync<librbd::MockTestImageCtx> MockImageSync;
+
+template<>
+class ImageSync<librbd::MockTestImageCtx> {
+public:
+  static std::vector<MockImageSync *> instances;
+
+  Context *on_finish;
+  bool syncing = false;
+
+  static ImageSync* create(librbd::MockTestImageCtx *local_image_ctx,
+                           librbd::MockTestImageCtx *remote_image_ctx,
+                           SafeTimer *timer, Mutex *timer_lock,
+                           const std::string &mirror_uuid,
+                           journal::MockJournaler *journaler,
+                           librbd::journal::MirrorPeerClientMeta *client_meta,
+                           ContextWQ *work_queue, Context *on_finish,
+                           ProgressContext *progress_ctx = nullptr) {
+    ImageSync *sync = new ImageSync();
+    sync->on_finish = on_finish;
+
+    EXPECT_CALL(*sync, send())
+      .WillRepeatedly(Invoke([sync]() {
+            sync->syncing = true;
+          }));
+
+    return sync;
+  }
+
+  void finish(int r) {
+    on_finish->complete(r);
+    put();
+  }
+
+  void get() {
+    instances.push_back(this);
+  }
+
+  void put() { delete this; }
+
+  MOCK_METHOD0(cancel, void());
+  MOCK_METHOD0(send, void());
+
+};
+
+
+std::vector<MockImageSync *> MockImageSync::instances;
+
+} // namespace mirror
+} // namespace rbd
+
+
+// template definitions
+#include "tools/rbd_mirror/ImageSyncThrottler.cc"
+template class rbd::mirror::ImageSyncThrottler<librbd::MockTestImageCtx>;
+
+namespace rbd {
+namespace mirror {
+
+class TestMockImageSyncThrottler : public TestMockFixture {
+public:
+  typedef ImageSyncThrottler<librbd::MockTestImageCtx> MockImageSyncThrottler;
+
+  virtual void SetUp() {
+    TestMockFixture::SetUp();
+
+    librbd::RBD rbd;
+    ASSERT_EQ(0, create_image(rbd, m_remote_io_ctx, m_image_name, m_image_size));
+    ASSERT_EQ(0, open_image(m_remote_io_ctx, m_image_name, &m_remote_image_ctx));
+
+    ASSERT_EQ(0, create_image(rbd, m_local_io_ctx, m_image_name, m_image_size));
+    ASSERT_EQ(0, open_image(m_local_io_ctx, m_image_name, &m_local_image_ctx));
+
+    mock_sync_throttler = new MockImageSyncThrottler();
+
+    m_mock_local_image_ctx = new librbd::MockTestImageCtx(*m_local_image_ctx);
+    m_mock_remote_image_ctx = new librbd::MockTestImageCtx(*m_remote_image_ctx);
+    m_mock_journaler = new journal::MockJournaler();
+  }
+
+  virtual void TearDown() {
+    MockImageSync::instances.clear();
+    delete mock_sync_throttler;
+    delete m_mock_local_image_ctx;
+    delete m_mock_remote_image_ctx;
+    delete m_mock_journaler;
+    TestMockFixture::TearDown();
+  }
+
+  void start_sync(const std::string& image_id, Context *ctx) {
+    m_mock_local_image_ctx->id = image_id;
+    mock_sync_throttler->start_sync(m_mock_local_image_ctx,
+                                    m_mock_remote_image_ctx,
+                                    m_threads->timer,
+                                    &m_threads->timer_lock,
+                                    "mirror_uuid",
+                                    m_mock_journaler,
+                                    &m_client_meta,
+                                    m_threads->work_queue,
+                                    ctx);
+  }
+
+  void cancel(const std::string& mirror_uuid, MockImageSync *sync,
+              bool running=true) {
+    if (running) {
+      EXPECT_CALL(*sync, cancel())
+        .WillOnce(Invoke([sync]() {
+              sync->finish(-ECANCELED);
+            }));
+    } else {
+      EXPECT_CALL(*sync, cancel()).Times(0);
+    }
+    mock_sync_throttler->cancel_sync(m_local_io_ctx, mirror_uuid);
+  }
+
+  librbd::ImageCtx *m_remote_image_ctx;
+  librbd::ImageCtx *m_local_image_ctx;
+  librbd::MockTestImageCtx *m_mock_local_image_ctx;
+  librbd::MockTestImageCtx *m_mock_remote_image_ctx;
+  journal::MockJournaler *m_mock_journaler;
+  librbd::journal::MirrorPeerClientMeta m_client_meta;
+  MockImageSyncThrottler *mock_sync_throttler;
+};
+
+TEST_F(TestMockImageSyncThrottler, Single_Sync) {
+  C_SaferCond ctx;
+  start_sync("image_id", &ctx);
+
+  ASSERT_EQ(1u, MockImageSync::instances.size());
+  MockImageSync *sync = MockImageSync::instances[0];
+  ASSERT_EQ(true, sync->syncing);
+  sync->finish(0);
+  ASSERT_EQ(0, ctx.wait());
+}
+
+TEST_F(TestMockImageSyncThrottler, Multiple_Syncs) {
+  mock_sync_throttler->set_max_concurrent_syncs(2);
+
+  C_SaferCond ctx1;
+  start_sync("image_id_1", &ctx1);
+  C_SaferCond ctx2;
+  start_sync("image_id_2", &ctx2);
+  C_SaferCond ctx3;
+  start_sync("image_id_3", &ctx3);
+  C_SaferCond ctx4;
+  start_sync("image_id_4", &ctx4);
+
+  ASSERT_EQ(4u, MockImageSync::instances.size());
+
+  MockImageSync *sync1 = MockImageSync::instances[0];
+  ASSERT_TRUE(sync1->syncing);
+
+  MockImageSync *sync2 = MockImageSync::instances[1];
+  ASSERT_TRUE(sync2->syncing);
+
+  MockImageSync *sync3 = MockImageSync::instances[2];
+  ASSERT_FALSE(sync3->syncing);
+
+  MockImageSync *sync4 = MockImageSync::instances[3];
+  ASSERT_FALSE(sync4->syncing);
+
+  sync1->finish(0);
+  ASSERT_EQ(0, ctx1.wait());
+
+  ASSERT_TRUE(sync3->syncing);
+  sync3->finish(-EINVAL);
+  ASSERT_EQ(-EINVAL, ctx3.wait());
+
+  ASSERT_TRUE(sync4->syncing);
+
+  sync2->finish(0);
+  ASSERT_EQ(0, ctx2.wait());
+
+  sync4->finish(0);
+  ASSERT_EQ(0, ctx4.wait());
+}
+
+TEST_F(TestMockImageSyncThrottler, Cancel_Running_Sync) {
+  C_SaferCond ctx1;
+  start_sync("image_id_1", &ctx1);
+  C_SaferCond ctx2;
+  start_sync("image_id_2", &ctx2);
+
+  ASSERT_EQ(2u, MockImageSync::instances.size());
+
+  MockImageSync *sync1 = MockImageSync::instances[0];
+  ASSERT_TRUE(sync1->syncing);
+
+  MockImageSync *sync2 = MockImageSync::instances[1];
+  ASSERT_TRUE(sync2->syncing);
+
+  cancel("image_id_2", sync2);
+  ASSERT_EQ(-ECANCELED, ctx2.wait());
+
+  sync1->finish(0);
+  ASSERT_EQ(0, ctx1.wait());
+}
+
+TEST_F(TestMockImageSyncThrottler, Cancel_Waiting_Sync) {
+  mock_sync_throttler->set_max_concurrent_syncs(1);
+
+  C_SaferCond ctx1;
+  start_sync("image_id_1", &ctx1);
+  C_SaferCond ctx2;
+  start_sync("image_id_2", &ctx2);
+
+  ASSERT_EQ(2u, MockImageSync::instances.size());
+
+  MockImageSync *sync1 = MockImageSync::instances[0];
+  ASSERT_TRUE(sync1->syncing);
+
+  MockImageSync *sync2 = MockImageSync::instances[1];
+  ASSERT_FALSE(sync2->syncing);
+
+  cancel("image_id_2", sync2, false);
+  ASSERT_EQ(-ECANCELED, ctx2.wait());
+
+  sync1->finish(0);
+  ASSERT_EQ(0, ctx1.wait());
+}
+
+TEST_F(TestMockImageSyncThrottler, Cancel_Running_Sync_Start_Waiting) {
+  mock_sync_throttler->set_max_concurrent_syncs(1);
+
+  C_SaferCond ctx1;
+  start_sync("image_id_1", &ctx1);
+  C_SaferCond ctx2;
+  start_sync("image_id_2", &ctx2);
+
+  ASSERT_EQ(2u, MockImageSync::instances.size());
+
+  MockImageSync *sync1 = MockImageSync::instances[0];
+  ASSERT_TRUE(sync1->syncing);
+
+  MockImageSync *sync2 = MockImageSync::instances[1];
+  ASSERT_FALSE(sync2->syncing);
+
+  cancel("image_id_1", sync1);
+  ASSERT_EQ(-ECANCELED, ctx1.wait());
+
+  ASSERT_TRUE(sync2->syncing);
+  sync2->finish(0);
+  ASSERT_EQ(0, ctx2.wait());
+}
+
+TEST_F(TestMockImageSyncThrottler, Increase_Max_Concurrent_Syncs) {
+  mock_sync_throttler->set_max_concurrent_syncs(2);
+
+  C_SaferCond ctx1;
+  start_sync("image_id_1", &ctx1);
+  C_SaferCond ctx2;
+  start_sync("image_id_2", &ctx2);
+  C_SaferCond ctx3;
+  start_sync("image_id_3", &ctx3);
+  C_SaferCond ctx4;
+  start_sync("image_id_4", &ctx4);
+  C_SaferCond ctx5;
+  start_sync("image_id_5", &ctx5);
+
+  ASSERT_EQ(5u, MockImageSync::instances.size());
+
+  MockImageSync *sync1 = MockImageSync::instances[0];
+  ASSERT_TRUE(sync1->syncing);
+
+  MockImageSync *sync2 = MockImageSync::instances[1];
+  ASSERT_TRUE(sync2->syncing);
+
+  MockImageSync *sync3 = MockImageSync::instances[2];
+  ASSERT_FALSE(sync3->syncing);
+
+  MockImageSync *sync4 = MockImageSync::instances[3];
+  ASSERT_FALSE(sync4->syncing);
+
+  MockImageSync *sync5 = MockImageSync::instances[4];
+  ASSERT_FALSE(sync5->syncing);
+
+  mock_sync_throttler->set_max_concurrent_syncs(4);
+
+  ASSERT_TRUE(sync3->syncing);
+  ASSERT_TRUE(sync4->syncing);
+  ASSERT_FALSE(sync5->syncing);
+
+  sync1->finish(0);
+  ASSERT_EQ(0, ctx1.wait());
+
+  ASSERT_TRUE(sync5->syncing);
+  sync5->finish(-EINVAL);
+  ASSERT_EQ(-EINVAL, ctx5.wait());
+
+  sync2->finish(0);
+  ASSERT_EQ(0, ctx2.wait());
+
+  sync3->finish(0);
+  ASSERT_EQ(0, ctx3.wait());
+
+  sync4->finish(0);
+  ASSERT_EQ(0, ctx4.wait());
+}
+
+TEST_F(TestMockImageSyncThrottler, Decrease_Max_Concurrent_Syncs) {
+  mock_sync_throttler->set_max_concurrent_syncs(4);
+
+  C_SaferCond ctx1;
+  start_sync("image_id_1", &ctx1);
+  C_SaferCond ctx2;
+  start_sync("image_id_2", &ctx2);
+  C_SaferCond ctx3;
+  start_sync("image_id_3", &ctx3);
+  C_SaferCond ctx4;
+  start_sync("image_id_4", &ctx4);
+  C_SaferCond ctx5;
+  start_sync("image_id_5", &ctx5);
+
+  ASSERT_EQ(5u, MockImageSync::instances.size());
+
+  MockImageSync *sync1 = MockImageSync::instances[0];
+  ASSERT_TRUE(sync1->syncing);
+
+  MockImageSync *sync2 = MockImageSync::instances[1];
+  ASSERT_TRUE(sync2->syncing);
+
+  MockImageSync *sync3 = MockImageSync::instances[2];
+  ASSERT_TRUE(sync3->syncing);
+
+  MockImageSync *sync4 = MockImageSync::instances[3];
+  ASSERT_TRUE(sync4->syncing);
+
+  MockImageSync *sync5 = MockImageSync::instances[4];
+  ASSERT_FALSE(sync5->syncing);
+
+  mock_sync_throttler->set_max_concurrent_syncs(2);
+
+  ASSERT_FALSE(sync5->syncing);
+
+  sync1->finish(0);
+  ASSERT_EQ(0, ctx1.wait());
+
+  ASSERT_FALSE(sync5->syncing);
+
+  sync2->finish(0);
+  ASSERT_EQ(0, ctx2.wait());
+
+  ASSERT_FALSE(sync5->syncing);
+
+  sync3->finish(0);
+  ASSERT_EQ(0, ctx3.wait());
+
+  ASSERT_TRUE(sync5->syncing);
+
+  sync4->finish(0);
+  ASSERT_EQ(0, ctx4.wait());
+
+  sync5->finish(0);
+  ASSERT_EQ(0, ctx5.wait());
+}
+
+
+} // namespace mirror
+} // namespace rbd
+
diff --git a/src/test/strtol.cc b/src/test/strtol.cc
index 646c055..3946736 100644
--- a/src/test/strtol.cc
+++ b/src/test/strtol.cc
@@ -234,6 +234,21 @@ TEST(StrictSICast, Error) {
     (void)strict_si_cast<int>("1T", &err);
     ASSERT_NE(err, "");
   }
+  {
+    std::string err;
+    (void)strict_si_cast<int64_t>("2E", &err);
+    ASSERT_EQ(err, "");
+  }
+  {
+    std::string err;
+    (void)strict_si_cast<int64_t>("-2E", &err);
+    ASSERT_EQ(err, "");
+  }
+  {
+    std::string err;
+    (void)strict_si_cast<int64_t>("1T", &err);
+    ASSERT_EQ(err, "");
+  }
 }
 
 /*
diff --git a/src/test/test_subman.sh b/src/test/test_subman.sh
new file mode 100755
index 0000000..d2cdd52
--- /dev/null
+++ b/src/test/test_subman.sh
@@ -0,0 +1,28 @@
+#!/bin/bash -e
+
+source $(dirname $0)/detect-build-env-vars.sh
+
+TMP=$(mktemp --tmpdir -d)
+trap "rm -fr $TMP" EXIT
+
+export PATH=$TMP:$PATH
+
+cat > $TMP/ceph-disk <<EOF
+echo '[{"partition":[{"type":"data","path":"/dev/foo/bar"}]}]'
+EOF
+chmod +x $TMP/ceph-disk
+
+cat > $TMP/df <<EOF
+echo Used
+echo $((2 * 1024 * 1024 * 1024))
+EOF
+chmod +x $TMP/df
+
+cat > $TMP/expected <<EOF
+{
+"band.storage.usage": 2
+}
+EOF
+export CEPH_FACTS_FILE=$TMP/facts
+$CEPH_ROOT/src/script/subman
+diff -u $CEPH_FACTS_FILE $TMP/expected
diff --git a/src/test/ubuntu-12.04/install-deps.sh b/src/test/ubuntu-12.04/install-deps.sh
index 03ca760..129178f 100755
--- a/src/test/ubuntu-12.04/install-deps.sh
+++ b/src/test/ubuntu-12.04/install-deps.sh
@@ -24,7 +24,7 @@ if test -f /etc/redhat-release ; then
 fi
 
 if type apt-get > /dev/null 2>&1 ; then
-    $SUDO apt-get install -y lsb-release
+    $SUDO apt-get install -y lsb-release devscripts equivs
 fi
 
 if type zypper > /dev/null 2>&1 ; then
@@ -39,20 +39,23 @@ Ubuntu|Debian|Devuan)
             exit 1
         fi
         touch $DIR/status
-        packages=$(dpkg-checkbuilddeps --admindir=$DIR debian/control 2>&1 | \
-            perl -p -e 's/.*Unmet build dependencies: *//;' \
-            -e 's/build-essential:native/build-essential/;' \
-            -e 's/\s*\|\s*/\|/g;' \
-            -e 's/\(.*?\)//g;' \
-            -e 's/ +/\n/g;' | sort)
+
+	backports=""
+	control="debian/control"
         case $(lsb_release -sc) in
             squeeze|wheezy)
-                packages=$(echo $packages | perl -pe 's/[-\w]*babeltrace[-\w]*//g')
+		control="/tmp/control.$$"
+		grep -v babeltrace debian/control > $control
                 backports="-t $(lsb_release -sc)-backports"
                 ;;
         esac
-        packages=$(echo $packages) # change newlines into spaces
-        $SUDO env DEBIAN_FRONTEND=noninteractive apt-get install $backports -y $packages || exit 1
+
+	# make a metapackage that expresses the build dependencies,
+	# install it, rm the .deb; then uninstall the package as its
+	# work is done
+	$SUDO env DEBIAN_FRONTEND=noninteractive mk-build-deps --install --remove --tool="apt-get -y --no-install-recommends $backports" $control || exit 1
+	$SUDO env DEBIAN_FRONTEND=noninteractive apt-get -y remove ceph-build-deps
+	if [ -n "$backports" ] ; then rm $control; fi
         ;;
 CentOS|Fedora|RedHatEnterpriseServer)
         case $(lsb_release -si) in
@@ -106,7 +109,14 @@ function activate_virtualenv() {
     local env_dir=$top_srcdir/install-deps-$interpreter
 
     if ! test -d $env_dir ; then
-        virtualenv --python $interpreter $env_dir
+        # Make a temporary virtualenv to get a fresh version of virtualenv
+        # because CentOS 7 has a buggy old version (v1.10.1)
+        # https://github.com/pypa/virtualenv/issues/463
+        virtualenv ${env_dir}_tmp
+        ${env_dir}_tmp/bin/pip install --upgrade virtualenv
+        ${env_dir}_tmp/bin/virtualenv --python $interpreter $env_dir
+        rm -rf ${env_dir}_tmp
+
         . $env_dir/bin/activate
         if ! populate_wheelhouse install ; then
             rm -rf $env_dir
diff --git a/src/test/ubuntu-14.04/install-deps.sh b/src/test/ubuntu-14.04/install-deps.sh
index 03ca760..129178f 100755
--- a/src/test/ubuntu-14.04/install-deps.sh
+++ b/src/test/ubuntu-14.04/install-deps.sh
@@ -24,7 +24,7 @@ if test -f /etc/redhat-release ; then
 fi
 
 if type apt-get > /dev/null 2>&1 ; then
-    $SUDO apt-get install -y lsb-release
+    $SUDO apt-get install -y lsb-release devscripts equivs
 fi
 
 if type zypper > /dev/null 2>&1 ; then
@@ -39,20 +39,23 @@ Ubuntu|Debian|Devuan)
             exit 1
         fi
         touch $DIR/status
-        packages=$(dpkg-checkbuilddeps --admindir=$DIR debian/control 2>&1 | \
-            perl -p -e 's/.*Unmet build dependencies: *//;' \
-            -e 's/build-essential:native/build-essential/;' \
-            -e 's/\s*\|\s*/\|/g;' \
-            -e 's/\(.*?\)//g;' \
-            -e 's/ +/\n/g;' | sort)
+
+	backports=""
+	control="debian/control"
         case $(lsb_release -sc) in
             squeeze|wheezy)
-                packages=$(echo $packages | perl -pe 's/[-\w]*babeltrace[-\w]*//g')
+		control="/tmp/control.$$"
+		grep -v babeltrace debian/control > $control
                 backports="-t $(lsb_release -sc)-backports"
                 ;;
         esac
-        packages=$(echo $packages) # change newlines into spaces
-        $SUDO env DEBIAN_FRONTEND=noninteractive apt-get install $backports -y $packages || exit 1
+
+	# make a metapackage that expresses the build dependencies,
+	# install it, rm the .deb; then uninstall the package as its
+	# work is done
+	$SUDO env DEBIAN_FRONTEND=noninteractive mk-build-deps --install --remove --tool="apt-get -y --no-install-recommends $backports" $control || exit 1
+	$SUDO env DEBIAN_FRONTEND=noninteractive apt-get -y remove ceph-build-deps
+	if [ -n "$backports" ] ; then rm $control; fi
         ;;
 CentOS|Fedora|RedHatEnterpriseServer)
         case $(lsb_release -si) in
@@ -106,7 +109,14 @@ function activate_virtualenv() {
     local env_dir=$top_srcdir/install-deps-$interpreter
 
     if ! test -d $env_dir ; then
-        virtualenv --python $interpreter $env_dir
+        # Make a temporary virtualenv to get a fresh version of virtualenv
+        # because CentOS 7 has a buggy old version (v1.10.1)
+        # https://github.com/pypa/virtualenv/issues/463
+        virtualenv ${env_dir}_tmp
+        ${env_dir}_tmp/bin/pip install --upgrade virtualenv
+        ${env_dir}_tmp/bin/virtualenv --python $interpreter $env_dir
+        rm -rf ${env_dir}_tmp
+
         . $env_dir/bin/activate
         if ! populate_wheelhouse install ; then
             rm -rf $env_dir
diff --git a/src/tools/Makefile-client.am b/src/tools/Makefile-client.am
index 7762c8b..e0488fc 100644
--- a/src/tools/Makefile-client.am
+++ b/src/tools/Makefile-client.am
@@ -92,6 +92,7 @@ librbd_mirror_internal_la_SOURCES = \
 	tools/rbd_mirror/ClusterWatcher.cc \
 	tools/rbd_mirror/ImageReplayer.cc \
 	tools/rbd_mirror/ImageSync.cc \
+        tools/rbd_mirror/ImageSyncThrottler.cc \
 	tools/rbd_mirror/Mirror.cc \
 	tools/rbd_mirror/PoolWatcher.cc \
 	tools/rbd_mirror/Replayer.cc \
@@ -101,6 +102,7 @@ librbd_mirror_internal_la_SOURCES = \
 	tools/rbd_mirror/image_replayer/BootstrapRequest.cc \
 	tools/rbd_mirror/image_replayer/CloseImageRequest.cc \
 	tools/rbd_mirror/image_replayer/CreateImageRequest.cc \
+	tools/rbd_mirror/image_replayer/EventPreprocessor.cc \
 	tools/rbd_mirror/image_replayer/OpenImageRequest.cc \
 	tools/rbd_mirror/image_replayer/OpenLocalImageRequest.cc \
 	tools/rbd_mirror/image_replayer/ReplayStatusFormatter.cc \
@@ -116,6 +118,7 @@ noinst_HEADERS += \
 	tools/rbd_mirror/ClusterWatcher.h \
 	tools/rbd_mirror/ImageReplayer.h \
 	tools/rbd_mirror/ImageSync.h \
+	tools/rbd_mirror/ImageSyncThrottler.h \
 	tools/rbd_mirror/Mirror.h \
 	tools/rbd_mirror/PoolWatcher.h \
 	tools/rbd_mirror/ProgressContext.h \
@@ -126,6 +129,7 @@ noinst_HEADERS += \
 	tools/rbd_mirror/image_replayer/BootstrapRequest.h \
 	tools/rbd_mirror/image_replayer/CloseImageRequest.h \
 	tools/rbd_mirror/image_replayer/CreateImageRequest.h \
+	tools/rbd_mirror/image_replayer/EventPreprocessor.h \
 	tools/rbd_mirror/image_replayer/OpenImageRequest.h \
 	tools/rbd_mirror/image_replayer/OpenLocalImageRequest.h \
 	tools/rbd_mirror/image_replayer/ReplayStatusFormatter.h \
diff --git a/src/tools/rados/rados.cc b/src/tools/rados/rados.cc
index d3b37ab..09f95dd 100644
--- a/src/tools/rados/rados.cc
+++ b/src/tools/rados/rados.cc
@@ -1246,19 +1246,14 @@ static void dump_shard(const shard_info_t& shard,
 		       const inconsistent_obj_t& inc,
 		       Formatter &f)
 {
-  f.dump_bool("missing", shard.has_shard_missing());
+  // A missing shard just has that error and nothing else
   if (shard.has_shard_missing()) {
+    f.open_array_section("errors");
+    f.dump_string("error", "missing");
+    f.close_section();
     return;
   }
-  f.dump_bool("read_error", shard.has_read_error());
-  f.dump_bool("data_digest_mismatch", shard.has_data_digest_mismatch());
-  f.dump_bool("omap_digest_mismatch", shard.has_omap_digest_mismatch());
-  f.dump_bool("size_mismatch", shard.has_size_mismatch());
-  if (!shard.has_read_error()) {
-    f.dump_bool("data_digest_mismatch_oi", shard.has_data_digest_mismatch_oi());
-    f.dump_bool("omap_digest_mismatch_oi", shard.has_omap_digest_mismatch_oi());
-    f.dump_bool("size_mismatch_oi", shard.has_size_mismatch_oi());
-  }
+
   f.dump_unsigned("size", shard.size);
   if (shard.omap_digest_present) {
     f.dump_format("omap_digest", "0x%08x", shard.omap_digest);
@@ -1266,6 +1261,30 @@ static void dump_shard(const shard_info_t& shard,
   if (shard.data_digest_present) {
     f.dump_format("data_digest", "0x%08x", shard.data_digest);
   }
+
+  f.open_array_section("errors");
+  if (shard.has_read_error())
+    f.dump_string("error", "read_error");
+  if (shard.has_data_digest_mismatch())
+    f.dump_string("error", "data_digest_mismatch");
+  if (shard.has_omap_digest_mismatch())
+    f.dump_string("error", "omap_digest_mismatch");
+  if (shard.has_size_mismatch())
+    f.dump_string("error", "size_mismatch");
+  if (!shard.has_read_error()) {
+    if (shard.has_data_digest_mismatch_oi())
+      f.dump_string("error", "data_digest_mismatch_oi");
+    if (shard.has_omap_digest_mismatch_oi())
+      f.dump_string("error", "omap_digest_mismatch_oi");
+    if (shard.has_size_mismatch_oi())
+      f.dump_string("error", "size_mismatch_oi");
+  }
+  if (shard.has_attr_missing())
+    f.dump_string("error", "attr_missing");
+  if (shard.has_attr_unexpected())
+    f.dump_string("error", "attr_unexpected");
+  f.close_section();
+
   if (inc.has_attr_mismatch()) {
     f.open_object_section("attrs");
     for (auto kv : shard.attrs) {
@@ -1295,7 +1314,7 @@ static void dump_object_id(const object_id_t& object,
     f.dump_string("snap", "snapdir");
     break;
   default:
-    f.dump_format("snap", "0x%08x", object.snap);
+    f.dump_unsigned("snap", object.snap);
     break;
   }
 }
@@ -1306,13 +1325,26 @@ static void dump_inconsistent(const inconsistent_obj_t& inc,
   f.open_object_section("object");
   dump_object_id(inc.object, f);
   f.close_section();
-  f.dump_bool("missing", inc.has_shard_missing());
-  f.dump_bool("stat_err", inc.has_stat_error());
-  f.dump_bool("read_err", inc.has_read_error());
-  f.dump_bool("data_digest_mismatch", inc.has_data_digest_mismatch());
-  f.dump_bool("omap_digest_mismatch", inc.has_omap_digest_mismatch());
-  f.dump_bool("size_mismatch", inc.has_size_mismatch());
-  f.dump_bool("attr_mismatch", inc.has_attr_mismatch());
+
+  f.open_array_section("errors");
+  if (inc.has_attr_unexpected())
+    f.dump_string("error", "attr_unexpected");
+  if (inc.has_shard_missing())
+    f.dump_string("error", "missing");
+  if (inc.has_stat_error())
+    f.dump_string("error", "stat_error");
+  if (inc.has_read_error())
+    f.dump_string("error", "read_error");
+  if (inc.has_data_digest_mismatch())
+    f.dump_string("error", "data_digest_mismatch");
+  if (inc.has_omap_digest_mismatch())
+    f.dump_string("error", "omap_digest_mismatch");
+  if (inc.has_size_mismatch())
+    f.dump_string("error", "size_mismatch");
+  if (inc.has_attr_mismatch())
+    f.dump_string("error", "attr_mismatch");
+  f.close_section();
+
   f.open_array_section("shards");
   for (auto osd_shard : inc.shards) {
     f.open_object_section("shard");
@@ -1321,35 +1353,51 @@ static void dump_inconsistent(const inconsistent_obj_t& inc,
     f.close_section();
   }
   f.close_section();
-  f.close_section();
 }
 
 static void dump_inconsistent(const inconsistent_snapset_t& inc,
 			      Formatter &f)
 {
   dump_object_id(inc.object, f);
-  f.dump_bool("ss_attr_missing", inc.ss_attr_missing());
-  f.dump_bool("ss_attr_corrupted", inc.ss_attr_corrupted());
-  f.dump_bool("clone_missing", inc.clone_missing());
-  f.dump_bool("snapset_mismatch", inc.snapset_mismatch());
-  f.dump_bool("head_mismatch", inc.head_mismatch());
-  f.dump_bool("headless", inc.headless());
-  f.dump_bool("size_mismatch", inc.size_mismatch());
 
-  if (inc.clone_missing()) {
-    f.open_array_section("clones");
+  f.open_array_section("errors");
+  if (inc.ss_attr_missing())
+    f.dump_string("error", "ss_attr_missing");
+  if (inc.ss_attr_corrupted())
+    f.dump_string("error", "ss_attr_corrupted");
+  if (inc.oi_attr_missing())
+    f.dump_string("error", "oi_attr_missing");
+  if (inc.oi_attr_corrupted())
+    f.dump_string("error", "oi_attr_corrupted");
+  if (inc.snapset_mismatch())
+    f.dump_string("error", "snapset_mismatch");
+  if (inc.head_mismatch())
+    f.dump_string("error", "head_mismatch");
+  if (inc.headless())
+    f.dump_string("error", "headless");
+  if (inc.size_mismatch())
+    f.dump_string("error", "size_mismatch");
+  if (inc.extra_clones())
+    f.dump_string("error", "extra_clones");
+  if (inc.clone_missing())
+    f.dump_string("error", "clone_missing");
+  f.close_section();
+
+  if (inc.extra_clones()) {
+    f.open_array_section("extra clones");
     for (auto snap : inc.clones) {
       f.dump_unsigned("snap", snap);
     }
     f.close_section();
+  }
 
+  if (inc.clone_missing()) {
     f.open_array_section("missing");
     for (auto snap : inc.missing) {
       f.dump_unsigned("snap", snap);
     }
     f.close_section();
   }
-  f.close_section();
 }
 
 // dispatch the call by type
@@ -1392,9 +1440,9 @@ static int do_get_inconsistent_cmd(const std::vector<const char*> &nargs,
     cerr << "bad pg: " << nargs[1] << std::endl;
     return ret;
   }
-
-  uint32_t interval = 0;
+  uint32_t interval = 0, first_interval = 0;
   const unsigned max_item_num = 32;
+  bool opened = false;
   for (librados::object_id_t start;;) {
     std::vector<T> items;
     auto completion = librados::Rados::aio_create_completion();
@@ -1403,16 +1451,29 @@ static int do_get_inconsistent_cmd(const std::vector<const char*> &nargs,
     completion->wait_for_safe();
     ret = completion->get_return_value();
     completion->release();
-    if (ret == -EAGAIN) {
-      cerr << "interval#" << interval << " expired." << std::endl;
+    if (ret < 0) {
+      if (ret == -EAGAIN)
+        cerr << "interval#" << interval << " expired." << std::endl;
+      else if (ret == -ENOENT)
+        cerr << "No scrub information available for pg " << pg << std::endl;
+      else
+        cerr << "Unknown error " << cpp_strerror(ret) << std::endl;
       break;
     }
+    // It must be the same interval every time.  EAGAIN would
+    // occur if interval changes.
+    assert(start.name.empty() || first_interval == interval);
     if (start.name.empty()) {
+      first_interval = interval;
+      formatter.open_object_section("info");
+      formatter.dump_int("epoch", interval);
       formatter.open_array_section("inconsistents");
+      opened = true;
     }
     for (auto& inc : items) {
       formatter.open_object_section("inconsistent");
       dump_inconsistent(inc, formatter);
+      formatter.close_section();
     }
     if (items.size() < max_item_num) {
       formatter.close_section();
@@ -1423,7 +1484,10 @@ static int do_get_inconsistent_cmd(const std::vector<const char*> &nargs,
     }
     items.clear();
   }
-  formatter.flush(cout);
+  if (opened) {
+    formatter.close_section();
+    formatter.flush(cout);
+  }
   return ret;
 }
 
diff --git a/src/tools/rbd/action/Journal.cc b/src/tools/rbd/action/Journal.cc
index 0c85c26..ca2620a 100644
--- a/src/tools/rbd/action/Journal.cc
+++ b/src/tools/rbd/action/Journal.cc
@@ -20,6 +20,7 @@
 #include "journal/Journaler.h"
 #include "journal/ReplayEntry.h"
 #include "journal/ReplayHandler.h"
+#include "journal/Settings.h"
 #include "librbd/journal/Types.h"
 
 namespace rbd {
@@ -171,7 +172,7 @@ class Journaler : public ::journal::Journaler {
 public:
   Journaler(librados::IoCtx& io_ctx, const std::string& journal_id,
 	    const std::string &client_id) :
-    ::journal::Journaler(io_ctx, journal_id, client_id, 5) {
+    ::journal::Journaler(io_ctx, journal_id, client_id, {}) {
   }
 
   int init() {
diff --git a/src/tools/rbd_mirror/ImageDeleter.cc b/src/tools/rbd_mirror/ImageDeleter.cc
index 528c985..234c240 100644
--- a/src/tools/rbd_mirror/ImageDeleter.cc
+++ b/src/tools/rbd_mirror/ImageDeleter.cc
@@ -21,6 +21,8 @@
 #include "common/admin_socket.h"
 #include "common/debug.h"
 #include "common/errno.h"
+#include "common/WorkQueue.h"
+#include "global/global_context.h"
 #include "librbd/internal.h"
 #include "librbd/ImageCtx.h"
 #include "librbd/ImageState.h"
@@ -72,11 +74,11 @@ private:
 };
 
 struct DeleteJournalPolicy : public librbd::journal::Policy {
-  virtual void allocate_tag_on_lock(Context *on_finish) {
-    on_finish->complete(0);
+  virtual bool append_disabled() const {
+    return true;
   }
 
-  virtual void cancel_external_replay(Context *on_finish) {
+  virtual void allocate_tag_on_lock(Context *on_finish) {
     on_finish->complete(0);
   }
 };
@@ -126,16 +128,15 @@ private:
   Commands commands;
 };
 
-ImageDeleter::ImageDeleter(RadosRef local_cluster, SafeTimer *timer,
+ImageDeleter::ImageDeleter(ContextWQ *work_queue, SafeTimer *timer,
                            Mutex *timer_lock)
-  : m_local(local_cluster),
-    m_running(1),
+  : m_running(1),
+    m_work_queue(work_queue),
     m_delete_lock("rbd::mirror::ImageDeleter::Delete"),
     m_image_deleter_thread(this),
     m_failed_timer(timer),
     m_failed_timer_lock(timer_lock),
-    m_asok_hook(new ImageDeleterAdminSocketHook((CephContext *)local_cluster->cct(),
-                this))
+    m_asok_hook(new ImageDeleterAdminSocketHook(g_ceph_context, this))
 {
   m_image_deleter_thread.create("image_deleter");
 }
@@ -171,7 +172,7 @@ void ImageDeleter::run() {
       }
     }
 
-    curr_deletion = std::move(m_delete_queue.back());
+    m_active_delete = std::move(m_delete_queue.back());
     m_delete_queue.pop_back();
     m_delete_lock.Unlock();
 
@@ -190,7 +191,8 @@ void ImageDeleter::run() {
   }
 }
 
-void ImageDeleter::schedule_image_delete(uint64_t local_pool_id,
+void ImageDeleter::schedule_image_delete(RadosRef local_rados,
+                                         uint64_t local_pool_id,
                                          const std::string& local_image_id,
                                          const std::string& local_image_name,
                                          const std::string& global_image_id) {
@@ -206,33 +208,51 @@ void ImageDeleter::schedule_image_delete(uint64_t local_pool_id,
   }
 
   m_delete_queue.push_front(unique_ptr<DeleteInfo>(
-        new DeleteInfo(local_pool_id, local_image_id, local_image_name,
-                       global_image_id)));
+        new DeleteInfo(local_rados, local_pool_id, local_image_id,
+                       local_image_name, global_image_id)));
   m_delete_queue_cond.Signal();
 }
 
 void ImageDeleter::wait_for_scheduled_deletion(const std::string& image_name,
                                                Context *ctx,
                                                bool notify_on_failed_retry) {
-  {
-    Mutex::Locker l(m_delete_lock);
 
-    auto del_info = find_delete_info(image_name);
-    if (del_info) {
-      (*del_info)->on_delete = ctx;
-      (*del_info)->notify_on_failed_retry = notify_on_failed_retry;
-      return;
-    }
+  ctx = new FunctionContext([this, ctx](int r) {
+      m_work_queue->queue(ctx, r);
+    });
+
+  Mutex::Locker l(m_delete_lock);
+  auto del_info = find_delete_info(image_name);
+  if (!del_info) {
+    // image not scheduled for deletion
+    ctx->complete(0);
+    return;
   }
 
-  // image not scheduled for deletion
-  ctx->complete(0);
+  if ((*del_info)->on_delete != nullptr) {
+    (*del_info)->on_delete->complete(-ESTALE);
+  }
+  (*del_info)->on_delete = ctx;
+  (*del_info)->notify_on_failed_retry = notify_on_failed_retry;
+}
+
+void ImageDeleter::cancel_waiter(const std::string& image_name) {
+  Mutex::Locker locker(m_delete_lock);
+  auto del_info = find_delete_info(image_name);
+  if (!del_info) {
+    return;
+  }
+
+  if ((*del_info)->on_delete != nullptr) {
+    (*del_info)->on_delete->complete(-ECANCELED);
+    (*del_info)->on_delete = nullptr;
+  }
 }
 
 bool ImageDeleter::process_image_delete() {
 
   stringstream ss;
-  curr_deletion->to_string(ss);
+  m_active_delete->to_string(ss);
   std::string del_info_str = ss.str();
   dout(10) << "start processing delete request: " << del_info_str << dendl;
   int r;
@@ -240,7 +260,8 @@ bool ImageDeleter::process_image_delete() {
 
   // remote image was disabled, now we need to delete local image
   IoCtx ioctx;
-  r = m_local->ioctx_create2(curr_deletion->local_pool_id, ioctx);
+  r = m_active_delete->local_rados->ioctx_create2(
+    m_active_delete->local_pool_id, ioctx);
   if (r < 0) {
     derr << "error accessing local pool: " << cpp_strerror(r) << dendl;
     enqueue_failed_delete(r);
@@ -250,7 +271,8 @@ bool ImageDeleter::process_image_delete() {
   dout(20) << "connected to local pool: " << ioctx.get_pool_name() << dendl;
 
   bool is_primary = false;
-  r = Journal<>::is_tag_owner(ioctx, curr_deletion->local_image_id, &is_primary);
+  r = Journal<>::is_tag_owner(ioctx, m_active_delete->local_image_id,
+                              &is_primary);
   if (r < 0 && r != -ENOENT) {
     derr << "error retrieving image primary info: " << cpp_strerror(r)
          << dendl;
@@ -260,46 +282,38 @@ bool ImageDeleter::process_image_delete() {
   if (is_primary) {
     dout(10) << "local image is the primary image, aborting deletion..."
              << dendl;
-    m_delete_lock.Lock();
-    DeleteInfo *del_info = curr_deletion.release();
-    m_delete_lock.Unlock();
-    del_info->notify(-EISPRM);
+    complete_active_delete(-EISPRM);
     return true;
   }
 
   dout(20) << "local image is not the primary" << dendl;
 
   bool has_snapshots;
-  r = image_has_snapshots_and_children(&ioctx, curr_deletion->local_image_id,
+  r = image_has_snapshots_and_children(&ioctx, m_active_delete->local_image_id,
                                        &has_snapshots);
   if (r < 0) {
     enqueue_failed_delete(r);
     return true;
   }
 
-  mirror_image.global_image_id = curr_deletion->global_image_id;
+  mirror_image.global_image_id = m_active_delete->global_image_id;
   mirror_image.state = cls::rbd::MIRROR_IMAGE_STATE_DISABLING;
-  r = cls_client::mirror_image_set(&ioctx, curr_deletion->local_image_id,
-                                           mirror_image);
+  r = cls_client::mirror_image_set(&ioctx, m_active_delete->local_image_id,
+                                   mirror_image);
   if (r == -ENOENT) {
     dout(10) << "local image is not mirrored, aborting deletion..." << dendl;
-    m_delete_lock.Lock();
-    DeleteInfo *del_info = curr_deletion.release();
-    m_delete_lock.Unlock();
-    del_info->notify(r);
+    complete_active_delete(r);
     return true;
   } else if (r == -EEXIST || r == -EINVAL) {
-    derr << "cannot disable mirroring for image id" << curr_deletion->local_image_id
+    derr << "cannot disable mirroring for image id "
+         << m_active_delete->local_image_id
          << ": global_image_id has changed/reused, aborting deletion: "
          << cpp_strerror(r) << dendl;
-    m_delete_lock.Lock();
-    DeleteInfo *del_info = curr_deletion.release();
-    m_delete_lock.Unlock();
-    del_info->notify(r);
+    complete_active_delete(r);
     return true;
   } else if (r < 0) {
     derr << "cannot disable mirroring for image id "
-         << curr_deletion->local_image_id << ": " << cpp_strerror(r) << dendl;
+         << m_active_delete->local_image_id << ": " << cpp_strerror(r) << dendl;
     enqueue_failed_delete(r);
     return true;
   }
@@ -309,12 +323,12 @@ bool ImageDeleter::process_image_delete() {
   if (has_snapshots) {
     dout(20) << "local image has snapshots" << dendl;
 
-    ImageCtx *imgctx = new ImageCtx("", curr_deletion->local_image_id, nullptr,
-                                    ioctx, false);
+    ImageCtx *imgctx = new ImageCtx("", m_active_delete->local_image_id,
+                                    nullptr, ioctx, false);
     r = imgctx->state->open();
     if (r < 0) {
-      derr << "error opening image id " << curr_deletion->local_image_id
-           << cpp_strerror(r) << dendl;
+      derr << "error opening image id " << m_active_delete->local_image_id
+           << ": " << cpp_strerror(r) << dendl;
       enqueue_failed_delete(r);
       delete imgctx;
       return true;
@@ -357,13 +371,13 @@ bool ImageDeleter::process_image_delete() {
         if (r == -EBUSY) {
           // there are still clones of snapshots of this image, therefore send
           // the delete request to the end of the queue
-          dout(10) << "local image id " << curr_deletion->local_image_id << " has "
+          dout(10) << "local image id " << m_active_delete->local_image_id << " has "
                    << "snapshots with cloned children, postponing deletion..."
                    << dendl;
           imgctx->state->close();
           Mutex::Locker l(m_delete_lock);
-          curr_deletion->notify(r);
-          m_delete_queue.push_front(std::move(curr_deletion));
+          m_active_delete->notify(r);
+          m_delete_queue.push_front(std::move(m_active_delete));
           return false;
         } else if (r < 0) {
           derr << "error unprotecting snapshot " << imgctx->name << "@"
@@ -391,9 +405,10 @@ bool ImageDeleter::process_image_delete() {
   }
 
   librbd::NoOpProgressContext ctx;
-  r = librbd::remove(ioctx, curr_deletion->local_image_name.c_str(), ctx, true);
+  r = librbd::remove(ioctx, m_active_delete->local_image_name.c_str(), ctx,
+                     true);
   if (r < 0 && r != -ENOENT) {
-    derr << "error removing image " << curr_deletion->local_image_name
+    derr << "error removing image " << m_active_delete->local_image_name
          << " from local pool: " << cpp_strerror(r) << dendl;
     enqueue_failed_delete(r);
     return true;
@@ -406,7 +421,7 @@ bool ImageDeleter::process_image_delete() {
              << dendl;
   }
 
-  r = cls_client::mirror_image_remove(&ioctx, curr_deletion->local_image_id);
+  r = cls_client::mirror_image_remove(&ioctx, m_active_delete->local_image_id);
   if (r < 0 && r != -ENOENT) {
     derr << "error removing image from mirroring directory: "
          << cpp_strerror(r) << dendl;
@@ -414,14 +429,10 @@ bool ImageDeleter::process_image_delete() {
     return true;
   }
 
-  dout(10) << "Successfully deleted image: " << curr_deletion->local_image_name
-           << dendl;
-
-  m_delete_lock.Lock();
-  DeleteInfo *del_info = curr_deletion.release();
-  m_delete_lock.Unlock();
-  del_info->notify(0);
+  dout(10) << "Successfully deleted image: "
+           << m_active_delete->local_image_name << dendl;
 
+  complete_active_delete(0);
   return true;
 }
 
@@ -443,16 +454,32 @@ int ImageDeleter::image_has_snapshots_and_children(IoCtx *ioctx,
   return 0;
 }
 
+void ImageDeleter::complete_active_delete(int r) {
+  dout(20) << dendl;
+
+  m_delete_lock.Lock();
+  DeleteInfo *del_info = m_active_delete.release();
+  assert(del_info != nullptr);
+  m_delete_lock.Unlock();
+  del_info->notify(r);
+}
+
 void ImageDeleter::enqueue_failed_delete(int error_code) {
   dout(20) << "enter" << dendl;
 
+  if (error_code == -EBLACKLISTED) {
+    derr << "blacklisted while deleting local image" << dendl;
+    complete_active_delete(error_code);
+    return;
+  }
+
   m_delete_lock.Lock();
-  if (curr_deletion->notify_on_failed_retry) {
-    curr_deletion->notify(error_code);
+  if (m_active_delete->notify_on_failed_retry) {
+    m_active_delete->notify(error_code);
   }
-  curr_deletion->error_code = error_code;
+  m_active_delete->error_code = error_code;
   bool was_empty = m_failed_queue.empty();
-  m_failed_queue.push_front(std::move(curr_deletion));
+  m_failed_queue.push_front(std::move(m_active_delete));
   m_delete_lock.Unlock();
   if (was_empty) {
     FunctionContext *ctx = new FunctionContext(
@@ -478,6 +505,29 @@ void ImageDeleter::retry_failed_deletions() {
   }
 }
 
+unique_ptr<ImageDeleter::DeleteInfo> const* ImageDeleter::find_delete_info(
+    const std::string& image_name) {
+  assert(m_delete_lock.is_locked());
+
+  if (m_active_delete && m_active_delete->match(image_name)) {
+    return &m_active_delete;
+  }
+
+  for (const auto& del_info : m_delete_queue) {
+    if (del_info->match(image_name)) {
+      return &del_info;
+    }
+  }
+
+  for (const auto& del_info : m_failed_queue) {
+    if (del_info->match(image_name)) {
+      return &del_info;
+    }
+  }
+
+  return nullptr;
+}
+
 void ImageDeleter::print_status(Formatter *f, stringstream *ss) {
   dout(20) << "enter" << dendl;
 
@@ -510,8 +560,10 @@ void ImageDeleter::print_status(Formatter *f, stringstream *ss) {
 void ImageDeleter::DeleteInfo::notify(int r) {
   if (on_delete) {
     dout(20) << "executing image deletion handler r=" << r << dendl;
-    on_delete->complete(r);
+
+    Context *ctx = on_delete;
     on_delete = nullptr;
+    ctx->complete(r);
   }
 }
 
diff --git a/src/tools/rbd_mirror/ImageDeleter.h b/src/tools/rbd_mirror/ImageDeleter.h
index 591f71a..3d994b1 100644
--- a/src/tools/rbd_mirror/ImageDeleter.h
+++ b/src/tools/rbd_mirror/ImageDeleter.h
@@ -24,6 +24,8 @@
 #include "common/Timer.h"
 #include "types.h"
 
+class ContextWQ;
+
 namespace rbd {
 namespace mirror {
 
@@ -36,18 +38,20 @@ class ImageDeleter {
 public:
   static const int EISPRM = 1000;
 
-  ImageDeleter(RadosRef local_cluster, SafeTimer *timer, Mutex *timer_lock);
+  ImageDeleter(ContextWQ *work_queue, SafeTimer *timer, Mutex *timer_lock);
   ~ImageDeleter();
   ImageDeleter(const ImageDeleter&) = delete;
   ImageDeleter& operator=(const ImageDeleter&) = delete;
 
-  void schedule_image_delete(uint64_t local_pool_id,
+  void schedule_image_delete(RadosRef local_rados,
+                             uint64_t local_pool_id,
                              const std::string& local_image_id,
                              const std::string& local_image_name,
                              const std::string& global_image_id);
   void wait_for_scheduled_deletion(const std::string& image_name,
                                    Context *ctx,
                                    bool notify_on_failed_retry=true);
+  void cancel_waiter(const std::string& image_name);
 
   void print_status(Formatter *f, std::stringstream *ss);
 
@@ -70,6 +74,7 @@ private:
   };
 
   struct DeleteInfo {
+    RadosRef local_rados;
     uint64_t local_pool_id;
     std::string local_image_id;
     std::string local_image_name;
@@ -79,13 +84,14 @@ private:
     bool notify_on_failed_retry;
     Context *on_delete;
 
-    DeleteInfo(uint64_t local_pool_id, const std::string& local_image_id,
+    DeleteInfo(RadosRef local_rados, uint64_t local_pool_id,
+               const std::string& local_image_id,
                const std::string& local_image_name,
                const std::string& global_image_id) :
-      local_pool_id(local_pool_id), local_image_id(local_image_id),
-      local_image_name(local_image_name), global_image_id(global_image_id),
-      error_code(0), retries(0), notify_on_failed_retry(true),
-      on_delete(nullptr) {
+      local_rados(local_rados), local_pool_id(local_pool_id),
+      local_image_id(local_image_id), local_image_name(local_image_name),
+      global_image_id(global_image_id), error_code(0), retries(0),
+      notify_on_failed_retry(true), on_delete(nullptr) {
     }
 
     bool match(const std::string& image_name) {
@@ -97,14 +103,15 @@ private:
                       bool print_failure_info=false);
   };
 
-  RadosRef m_local;
   atomic_t m_running;
 
+  ContextWQ *m_work_queue;
+
   std::deque<std::unique_ptr<DeleteInfo> > m_delete_queue;
   Mutex m_delete_lock;
   Cond m_delete_queue_cond;
 
-  unique_ptr<DeleteInfo> curr_deletion;
+  unique_ptr<DeleteInfo> m_active_delete;
 
   ImageDeleterThread m_image_deleter_thread;
 
@@ -121,31 +128,12 @@ private:
   int image_has_snapshots_and_children(librados::IoCtx *ioctx,
                                        std::string& image_id,
                                        bool *has_snapshots);
+
+  void complete_active_delete(int r);
   void enqueue_failed_delete(int error_code);
   void retry_failed_deletions();
 
-  unique_ptr<DeleteInfo> const* find_delete_info(
-                                             const std::string& image_name) {
-    assert(m_delete_lock.is_locked());
-
-    if (curr_deletion && curr_deletion->match(image_name)) {
-      return &curr_deletion;
-    }
-
-    for (const auto& del_info : m_delete_queue) {
-      if (del_info->match(image_name)) {
-        return &del_info;
-      }
-    }
-
-    for (const auto& del_info : m_failed_queue) {
-      if (del_info->match(image_name)) {
-        return &del_info;
-      }
-    }
-
-    return nullptr;
-  }
+  unique_ptr<DeleteInfo> const* find_delete_info(const std::string& image_name);
 };
 
 } // namespace mirror
diff --git a/src/tools/rbd_mirror/ImageReplayer.cc b/src/tools/rbd_mirror/ImageReplayer.cc
index 10ad65e..86da4bc 100644
--- a/src/tools/rbd_mirror/ImageReplayer.cc
+++ b/src/tools/rbd_mirror/ImageReplayer.cc
@@ -8,8 +8,10 @@
 #include "cls/rbd/cls_rbd_client.h"
 #include "common/Timer.h"
 #include "common/WorkQueue.h"
+#include "global/global_context.h"
 #include "journal/Journaler.h"
 #include "journal/ReplayHandler.h"
+#include "journal/Settings.h"
 #include "librbd/ExclusiveLock.h"
 #include "librbd/ImageCtx.h"
 #include "librbd/ImageState.h"
@@ -22,6 +24,7 @@
 #include "Threads.h"
 #include "tools/rbd_mirror/image_replayer/BootstrapRequest.h"
 #include "tools/rbd_mirror/image_replayer/CloseImageRequest.h"
+#include "tools/rbd_mirror/image_replayer/EventPreprocessor.h"
 #include "tools/rbd_mirror/image_replayer/ReplayStatusFormatter.h"
 
 #define dout_subsys ceph_subsys_rbd_mirror
@@ -32,6 +35,7 @@
 using std::map;
 using std::string;
 using std::unique_ptr;
+using std::shared_ptr;
 using std::vector;
 
 namespace rbd {
@@ -92,7 +96,7 @@ public:
   explicit StartCommand(ImageReplayer<I> *replayer) : replayer(replayer) {}
 
   bool call(Formatter *f, stringstream *ss) {
-    replayer->start(nullptr, nullptr, true);
+    replayer->start(nullptr, true);
     return true;
   }
 
@@ -220,6 +224,19 @@ private:
   Commands commands;
 };
 
+template <typename I>
+struct ResyncListener : public librbd::journal::ResyncListener {
+  ImageReplayer<I> *img_replayer;
+
+  ResyncListener(ImageReplayer<I> *img_replayer)
+    : img_replayer(img_replayer) {
+  }
+
+  virtual void handle_resync() {
+    img_replayer->resync_image();
+  }
+};
+
 } // anonymous namespace
 
 template <typename I>
@@ -234,7 +251,10 @@ void ImageReplayer<I>::BootstrapProgressContext::update_progress(
 }
 
 template <typename I>
-ImageReplayer<I>::ImageReplayer(Threads *threads, RadosRef local, RadosRef remote,
+ImageReplayer<I>::ImageReplayer(Threads *threads,
+                             shared_ptr<ImageDeleter> image_deleter,
+                             ImageSyncThrottlerRef<I> image_sync_throttler,
+                             RadosRef local, RadosRef remote,
 			     const std::string &local_mirror_uuid,
 			     const std::string &remote_mirror_uuid,
 			     int64_t local_pool_id,
@@ -242,6 +262,8 @@ ImageReplayer<I>::ImageReplayer(Threads *threads, RadosRef local, RadosRef remot
 			     const std::string &remote_image_id,
                              const std::string &global_image_id) :
   m_threads(threads),
+  m_image_deleter(image_deleter),
+  m_image_sync_throttler(image_sync_throttler),
   m_local(local),
   m_remote(remote),
   m_local_mirror_uuid(local_mirror_uuid),
@@ -253,7 +275,8 @@ ImageReplayer<I>::ImageReplayer(Threads *threads, RadosRef local, RadosRef remot
   m_name(stringify(remote_pool_id) + "/" + remote_image_id),
   m_lock("rbd::mirror::ImageReplayer " + stringify(remote_pool_id) + " " +
 	 remote_image_id),
-  m_progress_cxt(this)
+  m_progress_cxt(this),
+  m_resync_listener(new ResyncListener<I>(this))
 {
   // Register asok commands using a temporary "remote_pool_name/global_image_id"
   // name.  When the image name becomes known on start the asok commands will be
@@ -268,13 +291,14 @@ ImageReplayer<I>::ImageReplayer(Threads *threads, RadosRef local, RadosRef remot
   }
   m_name = pool_name + "/" + m_global_image_id;
 
-  CephContext *cct = static_cast<CephContext *>(m_local->cct());
-  m_asok_hook = new ImageReplayerAdminSocketHook<I>(cct, m_name, this);
+  m_asok_hook = new ImageReplayerAdminSocketHook<I>(g_ceph_context, m_name,
+                                                    this);
 }
 
 template <typename I>
 ImageReplayer<I>::~ImageReplayer()
 {
+  assert(m_event_preprocessor == nullptr);
   assert(m_replay_status_formatter == nullptr);
   assert(m_local_image_ctx == nullptr);
   assert(m_local_replay == nullptr);
@@ -284,6 +308,8 @@ ImageReplayer<I>::~ImageReplayer()
   assert(m_on_stop_finish == nullptr);
   assert(m_bootstrap_request == nullptr);
   assert(m_in_flight_status_updates == 0);
+
+  delete m_resync_listener;
   delete m_asok_hook;
 }
 
@@ -297,18 +323,13 @@ void ImageReplayer<I>::set_state_description(int r, const std::string &desc) {
 }
 
 template <typename I>
-void ImageReplayer<I>::start(Context *on_finish,
-			     const BootstrapParams *bootstrap_params,
-			     bool manual)
+void ImageReplayer<I>::start(Context *on_finish, bool manual)
 {
-  assert(m_on_start_finish == nullptr);
-  assert(m_on_stop_finish == nullptr);
   dout(20) << "on_finish=" << on_finish << dendl;
 
   int r = 0;
   {
     Mutex::Locker locker(m_lock);
-
     if (!is_stopped_()) {
       derr << "already running" << dendl;
       r = -EINVAL;
@@ -320,8 +341,13 @@ void ImageReplayer<I>::start(Context *on_finish,
       m_state = STATE_STARTING;
       m_last_r = 0;
       m_state_desc.clear();
-      m_on_start_finish = on_finish;
       m_manual_stop = false;
+
+      if (on_finish != nullptr) {
+        assert(m_on_start_finish == nullptr);
+        m_on_start_finish = on_finish;
+      }
+      assert(m_on_stop_finish == nullptr);
     }
   }
 
@@ -340,10 +366,6 @@ void ImageReplayer<I>::start(Context *on_finish,
     return;
   }
 
-  if (bootstrap_params != nullptr && !bootstrap_params->empty()) {
-    m_local_image_name = bootstrap_params->local_image_name;
-  }
-
   r = m_local->ioctx_create2(m_local_pool_id, m_local_ioctx);
   if (r < 0) {
     derr << "error opening ioctx for local pool " << m_local_pool_id
@@ -353,12 +375,15 @@ void ImageReplayer<I>::start(Context *on_finish,
   }
 
   CephContext *cct = static_cast<CephContext *>(m_local->cct());
-  double commit_interval = cct->_conf->rbd_journal_commit_age;
+  journal::Settings settings;
+  settings.commit_interval = cct->_conf->rbd_mirror_journal_commit_age;
+  settings.max_fetch_bytes = cct->_conf->rbd_mirror_journal_max_fetch_bytes;
+
   m_remote_journaler = new Journaler(m_threads->work_queue,
                                      m_threads->timer,
 				     &m_threads->timer_lock, m_remote_ioctx,
 				     m_remote_image_id, m_local_mirror_uuid,
-                                     commit_interval);
+                                     settings);
   bootstrap();
 }
 
@@ -371,11 +396,11 @@ void ImageReplayer<I>::bootstrap() {
     ImageReplayer, &ImageReplayer<I>::handle_bootstrap>(this);
 
   BootstrapRequest<I> *request = BootstrapRequest<I>::create(
-    m_local_ioctx, m_remote_ioctx, &m_local_image_ctx,
-    m_local_image_name, m_remote_image_id, m_global_image_id,
-    m_threads->work_queue, m_threads->timer, &m_threads->timer_lock,
-    m_local_mirror_uuid, m_remote_mirror_uuid, m_remote_journaler,
-    &m_client_meta, ctx, &m_progress_cxt);
+    m_local_ioctx, m_remote_ioctx, m_image_sync_throttler,
+    &m_local_image_ctx, m_local_image_name, m_remote_image_id,
+    m_global_image_id, m_threads->work_queue, m_threads->timer,
+    &m_threads->timer_lock, m_local_mirror_uuid, m_remote_mirror_uuid,
+    m_remote_journaler, &m_client_meta, ctx, &m_progress_cxt);
 
   {
     Mutex::Locker locker(m_lock);
@@ -417,6 +442,24 @@ void ImageReplayer<I>::handle_bootstrap(int r) {
   {
     Mutex::Locker locker(m_lock);
 
+    m_local_image_ctx->journal->add_listener(
+                                    librbd::journal::ListenerType::RESYNC,
+                                    m_resync_listener);
+
+    bool do_resync = false;
+    r = m_local_image_ctx->journal->check_resync_requested(&do_resync);
+    if (r < 0) {
+      derr << "failed to check if a resync was requested" << dendl;
+    }
+
+    if (do_resync) {
+      Context *on_finish = m_on_start_finish;
+      FunctionContext *ctx = new FunctionContext([this, on_finish](int r) {
+          resync_image(on_finish);
+        });
+      m_on_start_finish = ctx;
+    }
+
     std::string name = m_local_ioctx.get_pool_name() + "/" +
       m_local_image_ctx->name;
     if (m_name != name) {
@@ -428,8 +471,8 @@ void ImageReplayer<I>::handle_bootstrap(int r) {
       }
     }
     if (!m_asok_hook) {
-      CephContext *cct = static_cast<CephContext *>(m_local->cct());
-      m_asok_hook = new ImageReplayerAdminSocketHook<I>(cct, m_name, this);
+      m_asok_hook = new ImageReplayerAdminSocketHook<I>(g_ceph_context, m_name,
+                                                        this);
     }
   }
 
@@ -465,9 +508,23 @@ template <typename I>
 void ImageReplayer<I>::start_replay() {
   dout(20) << dendl;
 
-  Context *ctx = create_context_callback<
-    ImageReplayer, &ImageReplayer<I>::handle_start_replay>(this);
-  m_local_image_ctx->journal->start_external_replay(&m_local_replay, ctx);
+  assert(m_local_journal == nullptr);
+  {
+    RWLock::RLocker snap_locker(m_local_image_ctx->snap_lock);
+    if (m_local_image_ctx->journal != nullptr) {
+      m_local_journal = m_local_image_ctx->journal;
+
+      Context *start_ctx = create_context_callback<
+        ImageReplayer, &ImageReplayer<I>::handle_start_replay>(this);
+      Context *stop_ctx = create_context_callback<
+        ImageReplayer, &ImageReplayer<I>::handle_stop_replay_request>(this);
+      m_local_journal->start_external_replay(&m_local_replay, start_ctx,
+                                             stop_ctx);
+      return;
+    }
+  }
+
+  on_start_fail(-EINVAL, "error starting journal replay");
 }
 
 template <typename I>
@@ -475,6 +532,7 @@ void ImageReplayer<I>::handle_start_replay(int r) {
   dout(20) << "r=" << r << dendl;
 
   if (r < 0) {
+    m_local_journal = nullptr;
     derr << "error starting external replay on local image "
 	 <<  m_local_image_id << ": " << cpp_strerror(r) << dendl;
     on_start_fail(r, "error starting replay on local image");
@@ -490,6 +548,9 @@ void ImageReplayer<I>::handle_start_replay(int r) {
     std::swap(m_on_start_finish, on_finish);
   }
 
+  m_event_preprocessor = EventPreprocessor<I>::create(
+    *m_local_image_ctx, *m_remote_journaler, m_local_mirror_uuid,
+    &m_client_meta, m_threads->work_queue);
   m_replay_status_formatter =
     ReplayStatusFormatter<I>::create(m_remote_journaler, m_local_mirror_uuid);
 
@@ -502,16 +563,34 @@ void ImageReplayer<I>::handle_start_replay(int r) {
     on_finish->complete(r);
   }
 
+  if (on_replay_interrupted()) {
+    return;
+  }
+
   {
+    CephContext *cct = static_cast<CephContext *>(m_local->cct());
+    double poll_seconds = cct->_conf->rbd_mirror_journal_poll_age;
+
     Mutex::Locker locker(m_lock);
     m_replay_handler = new ReplayHandler<I>(this);
-    m_remote_journaler->start_live_replay(m_replay_handler,
-                                          1 /* TODO: configurable */);
+    m_remote_journaler->start_live_replay(m_replay_handler, poll_seconds);
 
     dout(20) << "m_remote_journaler=" << *m_remote_journaler << dendl;
   }
 
-  on_replay_interrupted();
+}
+
+template <typename I>
+void ImageReplayer<I>::handle_stop_replay_request(int r) {
+  if (r < 0) {
+    // error starting or we requested the stop -- ignore
+    return;
+  }
+
+  // journal close has been requested, stop replay so the journal
+  // can be closed (since it will wait on replay to finish)
+  dout(20) << dendl;
+  on_stop_journal_replay();
 }
 
 template <typename I>
@@ -519,7 +598,6 @@ void ImageReplayer<I>::on_start_fail(int r, const std::string &desc)
 {
   dout(20) << "r=" << r << dendl;
   Context *ctx = new FunctionContext([this, r, desc](int _r) {
-      Context *on_start_finish(nullptr);
       {
         Mutex::Locker locker(m_lock);
         m_state = STATE_STOPPING;
@@ -528,12 +606,12 @@ void ImageReplayer<I>::on_start_fail(int r, const std::string &desc)
         } else {
           dout(20) << "start canceled" << dendl;
         }
-        std::swap(m_on_start_finish, on_start_finish);
       }
 
       set_state_description(r, desc);
       update_mirror_image_status(false, boost::none);
-      shut_down(r, on_start_finish);
+      reschedule_update_status_task(-1);
+      shut_down(r);
     });
   m_threads->work_queue->queue(ctx, 0);
 }
@@ -556,6 +634,7 @@ void ImageReplayer<I>::stop(Context *on_finish, bool manual)
 {
   dout(20) << "on_finish=" << on_finish << dendl;
 
+  image_replayer::BootstrapRequest<I> *bootstrap_request = nullptr;
   bool shut_down_replay = false;
   bool running = true;
   {
@@ -567,23 +646,30 @@ void ImageReplayer<I>::stop(Context *on_finish, bool manual)
 	if (m_state == STATE_STARTING) {
 	  dout(20) << "canceling start" << dendl;
 	  if (m_bootstrap_request) {
-	    m_bootstrap_request->cancel();
+            bootstrap_request = m_bootstrap_request;
+            bootstrap_request->get();
 	  }
 	} else {
 	  dout(20) << "interrupting replay" << dendl;
 	  shut_down_replay = true;
 	}
 
-	assert(m_on_stop_finish == nullptr);
-	std::swap(m_on_stop_finish, on_finish);
-	m_stop_requested = true;
-	m_manual_stop = manual;
+        assert(m_on_stop_finish == nullptr);
+        std::swap(m_on_stop_finish, on_finish);
+        m_stop_requested = true;
+        m_manual_stop = manual;
       }
     }
   }
 
+  // avoid holding lock since bootstrap request will update status
+  if (bootstrap_request != nullptr) {
+    bootstrap_request->cancel();
+    bootstrap_request->put();
+  }
+
   if (!running) {
-    derr << "not running" << dendl;
+    dout(20) << "not running" << dendl;
     if (on_finish) {
       on_finish->complete(-EINVAL);
     }
@@ -608,12 +694,14 @@ void ImageReplayer<I>::on_stop_journal_replay()
       // might be invoked multiple times while stopping
       return;
     }
+    m_stop_requested = true;
     m_state = STATE_STOPPING;
   }
 
   set_state_description(0, "");
   update_mirror_image_status(false, boost::none);
-  shut_down(0, nullptr);
+  reschedule_update_status_task(-1);
+  shut_down(0);
 }
 
 template <typename I>
@@ -635,7 +723,7 @@ void ImageReplayer<I>::handle_replay_ready()
   }
 
   if (m_replay_tag_valid && m_replay_tag.tid == m_replay_tag_tid) {
-    process_entry();
+    preprocess_entry();
     return;
   }
 
@@ -650,7 +738,7 @@ void ImageReplayer<I>::restart(Context *on_finish)
       if (r < 0) {
 	// Try start anyway.
       }
-      start(on_finish, nullptr, true);
+      start(on_finish, true);
     });
   stop(ctx);
 }
@@ -662,7 +750,7 @@ void ImageReplayer<I>::flush(Context *on_finish)
 
   {
     Mutex::Locker locker(m_lock);
-    if (m_state == STATE_REPLAYING || m_state == STATE_REPLAYING) {
+    if (m_state == STATE_REPLAYING) {
       Context *ctx = new FunctionContext(
         [on_finish](int r) {
           if (on_finish != nullptr) {
@@ -786,19 +874,51 @@ template <typename I>
 void ImageReplayer<I>::replay_flush() {
   dout(20) << dendl;
 
+  {
+    Mutex::Locker locker(m_lock);
+    if (m_state != STATE_REPLAYING) {
+      dout(20) << "replay interrupted" << dendl;
+      return;
+    }
+    m_state = STATE_REPLAY_FLUSHING;
+  }
+
+  // shut down the replay to flush all IO and ops and create a new
+  // replayer to handle the new tag epoch
   Context *ctx = create_context_callback<
     ImageReplayer<I>, &ImageReplayer<I>::handle_replay_flush>(this);
-  flush(ctx);
+  ctx = new FunctionContext([this, ctx](int r) {
+      m_local_image_ctx->journal->stop_external_replay();
+      m_local_replay = nullptr;
+
+      if (r < 0) {
+        ctx->complete(r);
+        return;
+      }
+
+      Context *stop_ctx = create_context_callback<
+        ImageReplayer, &ImageReplayer<I>::handle_stop_replay_request>(this);
+      m_local_journal->start_external_replay(&m_local_replay, ctx, stop_ctx);
+    });
+  m_local_replay->shut_down(false, ctx);
 }
 
 template <typename I>
 void ImageReplayer<I>::handle_replay_flush(int r) {
   dout(20) << "r=" << r << dendl;
 
+  {
+    Mutex::Locker locker(m_lock);
+    assert(m_state == STATE_REPLAY_FLUSHING);
+    m_state = STATE_REPLAYING;
+  }
+
   if (r < 0) {
     derr << "replay flush encountered an error: " << cpp_strerror(r) << dendl;
     handle_replay_complete(r, "replay flush encountered an error");
     return;
+  } else if (on_replay_interrupted()) {
+    return;
   }
 
   get_remote_tag();
@@ -864,7 +984,7 @@ void ImageReplayer<I>::allocate_local_tag() {
 
   Context *ctx = create_context_callback<
     ImageReplayer, &ImageReplayer<I>::handle_allocate_local_tag>(this);
-  m_local_image_ctx->journal->allocate_tag(
+  m_local_journal->allocate_tag(
     mirror_uuid, predecessor_mirror_uuid,
     m_replay_tag_data.predecessor_commit_valid,
     m_replay_tag_data.predecessor_tag_tid,
@@ -882,21 +1002,56 @@ void ImageReplayer<I>::handle_allocate_local_tag(int r) {
     return;
   }
 
-  process_entry();
+  preprocess_entry();
 }
 
 template <typename I>
-void ImageReplayer<I>::process_entry() {
-  dout(20) << "processing entry tid=" << m_replay_entry.get_commit_tid()
+void ImageReplayer<I>::preprocess_entry() {
+  dout(20) << "preprocessing entry tid=" << m_replay_entry.get_commit_tid()
            << dendl;
 
   bufferlist data = m_replay_entry.get_data();
   bufferlist::iterator it = data.begin();
+  int r = m_local_replay->decode(&it, &m_event_entry);
+  if (r < 0) {
+    derr << "failed to decode journal event" << dendl;
+    handle_replay_complete(r, "failed to decode journal event");
+    return;
+  }
+
+  if (!m_event_preprocessor->is_required(m_event_entry)) {
+    process_entry();
+    return;
+  }
+
+  Context *ctx = create_context_callback<
+    ImageReplayer, &ImageReplayer<I>::handle_preprocess_entry>(this);
+  m_event_preprocessor->preprocess(&m_event_entry, ctx);
+}
+
+template <typename I>
+void ImageReplayer<I>::handle_preprocess_entry(int r) {
+  dout(20) << "r=" << r << dendl;
+
+  if (r < 0) {
+    derr << "failed to preprocess journal event" << dendl;
+    handle_replay_complete(r, "failed to preprocess journal event");
+    return;
+  }
+
+  process_entry();
+}
+
+template <typename I>
+void ImageReplayer<I>::process_entry() {
+  dout(20) << "processing entry tid=" << m_replay_entry.get_commit_tid()
+           << dendl;
 
   Context *on_ready = create_context_callback<
     ImageReplayer, &ImageReplayer<I>::handle_process_entry_ready>(this);
   Context *on_commit = new C_ReplayCommitted(this, std::move(m_replay_entry));
-  m_local_replay->process(&it, on_ready, on_commit);
+  m_local_replay->process(m_event_entry, on_ready, on_commit);
+  m_event_entry = {};
 }
 
 template <typename I>
@@ -921,7 +1076,9 @@ void ImageReplayer<I>::handle_process_entry_safe(const ReplayEntry& replay_entry
     return;
   }
 
-  m_remote_journaler->committed(replay_entry);
+  if (m_remote_journaler) {
+    m_remote_journaler->committed(replay_entry);
+  }
 }
 
 template <typename I>
@@ -1024,13 +1181,17 @@ void ImageReplayer<I>::send_mirror_status_update(const OptionalState &opt_state)
     }
     break;
   case STATE_REPLAYING:
+  case STATE_REPLAY_FLUSHING:
     status.state = cls::rbd::MIRROR_IMAGE_STATUS_STATE_REPLAYING;
     {
       Context *on_req_finish = new FunctionContext(
         [this](int r) {
+          dout(20) << "replay status ready: r=" << r << dendl;
           if (r >= 0) {
-            dout(20) << "replay status ready" << dendl;
             send_mirror_status_update(boost::none);
+          } else if (r == -EAGAIN) {
+            // decrement in-flight status update counter
+            handle_mirror_status_update(r);
           }
         });
 
@@ -1143,18 +1304,31 @@ void ImageReplayer<I>::reschedule_update_status_task(int new_interval) {
 }
 
 template <typename I>
-void ImageReplayer<I>::shut_down(int r, Context *on_start) {
+void ImageReplayer<I>::shut_down(int r) {
   dout(20) << "r=" << r << dendl;
   {
     Mutex::Locker locker(m_lock);
     assert(m_state == STATE_STOPPING);
+
+    // if status updates are in-flight, wait for them to complete
+    // before proceeding
+    if (m_in_flight_status_updates > 0) {
+      if (m_on_update_status_finish == nullptr) {
+        dout(20) << "waiting for in-flight status update" << dendl;
+        m_on_update_status_finish = new FunctionContext(
+          [this, r](int _r) {
+            shut_down(r);
+          });
+      }
+      return;
+    }
   }
 
   // chain the shut down sequence (reverse order)
   Context *ctx = new FunctionContext(
-    [this, r, on_start](int _r) {
+    [this, r](int _r) {
       update_mirror_image_status(true, STATE_STOPPED);
-      handle_shut_down(r, on_start);
+      handle_shut_down(r);
     });
   if (m_local_image_ctx) {
     ctx = new FunctionContext([this, ctx](int r) {
@@ -1172,17 +1346,29 @@ void ImageReplayer<I>::shut_down(int r, Context *on_start) {
     ctx = new FunctionContext([this, ctx](int r) {
         m_remote_journaler->shut_down(ctx);
       });
+    if (m_stopping_for_resync) {
+      ctx = new FunctionContext([this, ctx](int r) {
+          m_remote_journaler->unregister_client(ctx);
+        });
+    }
   }
   if (m_local_replay != nullptr) {
     ctx = new FunctionContext([this, ctx](int r) {
         if (r < 0) {
           derr << "error flushing journal replay: " << cpp_strerror(r) << dendl;
         }
-        m_local_image_ctx->journal->stop_external_replay();
+        m_local_journal->stop_external_replay();
+        m_local_journal = nullptr;
         m_local_replay = nullptr;
+
+        delete m_event_preprocessor;
+        m_event_preprocessor = nullptr;
+
         ctx->complete(0);
       });
     ctx = new FunctionContext([this, ctx](int r) {
+        m_local_journal->remove_listener(
+            librbd::journal::ListenerType::RESYNC, m_resync_listener);
         m_local_replay->shut_down(true, ctx);
       });
   }
@@ -1200,40 +1386,53 @@ void ImageReplayer<I>::shut_down(int r, Context *on_start) {
 }
 
 template <typename I>
-void ImageReplayer<I>::handle_shut_down(int r, Context *on_start) {
+void ImageReplayer<I>::handle_shut_down(int r) {
   reschedule_update_status_task(-1);
 
-  Context *on_stop = nullptr;
   {
     Mutex::Locker locker(m_lock);
 
     // if status updates are in-flight, wait for them to complete
     // before proceeding
     if (m_in_flight_status_updates > 0) {
-      dout(20) << "waiting for in-flight status update" << dendl;
-      assert(m_on_update_status_finish == nullptr);
-      m_on_update_status_finish = new FunctionContext(
-        [this, r, on_start](int r) {
-          handle_shut_down(r, on_start);
-        });
+      if (m_on_update_status_finish == nullptr) {
+        dout(20) << "waiting for in-flight status update" << dendl;
+        m_on_update_status_finish = new FunctionContext(
+          [this, r](int _r) {
+            handle_shut_down(r);
+          });
+      }
       return;
     }
 
-    std::swap(on_stop, m_on_stop_finish);
-    m_stop_requested = false;
-    assert(m_state == STATE_STOPPING);
-    m_state = STATE_STOPPED;
-    m_state_desc.clear();
-    m_last_r = 0;
+    if (m_stopping_for_resync) {
+      m_image_deleter->schedule_image_delete(m_local,
+                                             m_local_pool_id,
+                                             m_local_image_id,
+                                             m_local_image_name,
+                                             m_global_image_id);
+      m_stopping_for_resync = false;
+    }
   }
-  dout(20) << "stop complete" << dendl;
 
+  dout(20) << "stop complete" << dendl;
   m_local_ioctx.close();
   m_remote_ioctx.close();
 
   delete m_replay_status_formatter;
   m_replay_status_formatter = nullptr;
 
+  Context *on_start = nullptr;
+  Context *on_stop = nullptr;
+  {
+    Mutex::Locker locker(m_lock);
+    std::swap(on_start, m_on_start_finish);
+    std::swap(on_stop, m_on_stop_finish);
+    m_stop_requested = false;
+    assert(m_state == STATE_STOPPING);
+    m_state = STATE_STOPPED;
+  }
+
   if (on_start != nullptr) {
     dout(20) << "on start finish complete, r=" << r << dendl;
     on_start->complete(r);
@@ -1252,6 +1451,8 @@ std::string ImageReplayer<I>::to_string(const State state) {
     return "Starting";
   case ImageReplayer<I>::STATE_REPLAYING:
     return "Replaying";
+  case ImageReplayer<I>::STATE_REPLAY_FLUSHING:
+    return "ReplayFlushing";
   case ImageReplayer<I>::STATE_STOPPING:
     return "Stopping";
   case ImageReplayer<I>::STATE_STOPPED:
@@ -1263,6 +1464,18 @@ std::string ImageReplayer<I>::to_string(const State state) {
 }
 
 template <typename I>
+void ImageReplayer<I>::resync_image(Context *on_finish) {
+  dout(20) << dendl;
+
+  {
+    Mutex::Locker l(m_lock);
+    m_stopping_for_resync = true;
+  }
+
+  stop(on_finish);
+}
+
+template <typename I>
 std::ostream &operator<<(std::ostream &os, const ImageReplayer<I> &replayer)
 {
   os << "ImageReplayer: " << &replayer << " [" << replayer.get_local_pool_id()
diff --git a/src/tools/rbd_mirror/ImageReplayer.h b/src/tools/rbd_mirror/ImageReplayer.h
index 6b0f993..1b1a742 100644
--- a/src/tools/rbd_mirror/ImageReplayer.h
+++ b/src/tools/rbd_mirror/ImageReplayer.h
@@ -15,8 +15,10 @@
 #include "cls/journal/cls_journal_types.h"
 #include "cls/rbd/cls_rbd_types.h"
 #include "journal/ReplayEntry.h"
+#include "librbd/ImageCtx.h"
 #include "librbd/journal/Types.h"
 #include "librbd/journal/TypeTraits.h"
+#include "ImageDeleter.h"
 #include "ProgressContext.h"
 #include "types.h"
 #include <boost/optional.hpp>
@@ -43,6 +45,7 @@ namespace mirror {
 struct Threads;
 
 namespace image_replayer { template <typename> class BootstrapRequest; }
+namespace image_replayer { template <typename> class EventPreprocessor; }
 namespace image_replayer { template <typename> class ReplayStatusFormatter; }
 
 /**
@@ -57,24 +60,15 @@ public:
     STATE_UNKNOWN,
     STATE_STARTING,
     STATE_REPLAYING,
+    STATE_REPLAY_FLUSHING,
     STATE_STOPPING,
     STATE_STOPPED,
   };
 
-  struct BootstrapParams {
-    std::string local_image_name;
-
-    BootstrapParams() {}
-    BootstrapParams(const std::string local_image_name) :
-      local_image_name(local_image_name) {}
-
-    bool empty() const {
-      return local_image_name.empty();
-    }
-  };
-
-  ImageReplayer(Threads *threads, RadosRef local, RadosRef remote,
-		const std::string &local_mirror_uuid,
+  ImageReplayer(Threads *threads, std::shared_ptr<ImageDeleter> image_deleter,
+                ImageSyncThrottlerRef<ImageCtxT> image_sync_throttler,
+                RadosRef local, RadosRef remote,
+                const std::string &local_mirror_uuid,
                 const std::string &remote_mirror_uuid, int64_t local_pool_id,
 		int64_t remote_pool_id, const std::string &remote_image_id,
                 const std::string &global_image_id);
@@ -85,10 +79,16 @@ public:
   State get_state() { Mutex::Locker l(m_lock); return get_state_(); }
   bool is_stopped() { Mutex::Locker l(m_lock); return is_stopped_(); }
   bool is_running() { Mutex::Locker l(m_lock); return is_running_(); }
+  bool is_replaying() { Mutex::Locker l(m_lock); return is_replaying_(); }
 
   std::string get_name() { Mutex::Locker l(m_lock); return m_name; };
   void set_state_description(int r, const std::string &desc);
 
+  inline bool is_blacklisted() const {
+    Mutex::Locker locker(m_lock);
+    return (m_last_r == -EBLACKLISTED);
+  }
+
   inline int64_t get_local_pool_id() const {
     return m_local_pool_id;
   }
@@ -110,13 +110,13 @@ public:
     return m_local_image_name;
   }
 
-  void start(Context *on_finish = nullptr,
-	     const BootstrapParams *bootstrap_params = nullptr,
-	     bool manual = false);
+  void start(Context *on_finish = nullptr, bool manual = false);
   void stop(Context *on_finish = nullptr, bool manual = false);
   void restart(Context *on_finish = nullptr);
   void flush(Context *on_finish = nullptr);
 
+  void resync_image(Context *on_finish=nullptr);
+
   void print_status(Formatter *f, stringstream *ss);
 
   virtual void handle_replay_ready();
@@ -166,6 +166,9 @@ protected:
    *    |                     ALLOCATE_LOCAL_TAG  * * * * * *
    *    |                         |                     |   *
    *    |                         v                 (error) *
+   *    |                     PREPROCESS_ENTRY  * * * * * * *
+   *    |                         |                     |   *
+   *    |                         v                 (error) *
    *    |                     PROCESS_ENTRY * * * * * * * * *
    *    |                         |                     |   *
    *    |                         \---------------------/   *
@@ -215,6 +218,8 @@ private:
   };
 
   Threads *m_threads;
+  std::shared_ptr<ImageDeleter> m_image_deleter;
+  ImageSyncThrottlerRef<ImageCtxT> m_image_sync_throttler;
   RadosRef m_local, m_remote;
   std::string m_local_mirror_uuid;
   std::string m_remote_mirror_uuid;
@@ -222,18 +227,23 @@ private:
   std::string m_remote_image_id, m_local_image_id, m_global_image_id;
   std::string m_local_image_name;
   std::string m_name;
-  Mutex m_lock;
+  mutable Mutex m_lock;
   State m_state = STATE_STOPPED;
   int m_last_r = 0;
   std::string m_state_desc;
   BootstrapProgressContext m_progress_cxt;
+  image_replayer::EventPreprocessor<ImageCtxT> *m_event_preprocessor = nullptr;
   image_replayer::ReplayStatusFormatter<ImageCtxT> *m_replay_status_formatter =
     nullptr;
   librados::IoCtx m_local_ioctx, m_remote_ioctx;
   ImageCtxT *m_local_image_ctx = nullptr;
+
+  decltype(ImageCtxT::journal) m_local_journal = nullptr;
   librbd::journal::Replay<ImageCtxT> *m_local_replay = nullptr;
   Journaler* m_remote_journaler = nullptr;
   ::journal::ReplayHandler *m_replay_handler = nullptr;
+  librbd::journal::ResyncListener *m_resync_listener;
+  bool m_stopping_for_resync = false;
 
   Context *m_on_start_finish = nullptr;
   Context *m_on_stop_finish = nullptr;
@@ -258,6 +268,7 @@ private:
   uint64_t m_replay_tag_tid = 0;
   cls::journal::Tag m_replay_tag;
   librbd::journal::TagData m_replay_tag_data;
+  librbd::journal::EventEntry m_event_entry;
 
   struct C_ReplayCommitted : public Context {
     ImageReplayer *replayer;
@@ -283,6 +294,10 @@ private:
   bool is_running_() const {
     return !is_stopped_() && m_state != STATE_STOPPING && !m_stop_requested;
   }
+  bool is_replaying_() const {
+    return (m_state == STATE_REPLAYING ||
+            m_state == STATE_REPLAY_FLUSHING);
+  }
 
   bool update_mirror_image_status(bool force, const OptionalState &state);
   bool start_mirror_image_status_update(bool force, bool restarting);
@@ -292,8 +307,8 @@ private:
   void handle_mirror_status_update(int r);
   void reschedule_update_status_task(int new_interval = 0);
 
-  void shut_down(int r, Context *on_start);
-  void handle_shut_down(int r, Context *on_start);
+  void shut_down(int r);
+  void handle_shut_down(int r);
 
   void bootstrap();
   void handle_bootstrap(int r);
@@ -303,6 +318,7 @@ private:
 
   void start_replay();
   void handle_start_replay(int r);
+  void handle_stop_replay_request(int r);
 
   void replay_flush();
   void handle_replay_flush(int r);
@@ -313,6 +329,9 @@ private:
   void allocate_local_tag();
   void handle_allocate_local_tag(int r);
 
+  void preprocess_entry();
+  void handle_preprocess_entry(int r);
+
   void process_entry();
   void handle_process_entry_ready(int r);
   void handle_process_entry_safe(const ReplayEntry& replay_entry, int r);
diff --git a/src/tools/rbd_mirror/ImageSync.cc b/src/tools/rbd_mirror/ImageSync.cc
index 2f28fd4..1f55a03 100644
--- a/src/tools/rbd_mirror/ImageSync.cc
+++ b/src/tools/rbd_mirror/ImageSync.cc
@@ -73,13 +73,16 @@ template <typename I>
 void ImageSync<I>::send_prune_catch_up_sync_point() {
   update_progress("PRUNE_CATCH_UP_SYNC_POINT");
 
-  if (m_client_meta->sync_points.size() <= 1) {
+  if (m_client_meta->sync_points.empty()) {
     send_create_sync_point();
     return;
   }
 
   dout(20) << dendl;
 
+  // prune will remove sync points with missing snapshots and
+  // ensure we have a maximum of one sync point (in case we
+  // restarted)
   Context *ctx = create_context_callback<
     ImageSync<I>, &ImageSync<I>::handle_prune_catch_up_sync_point>(this);
   SyncPointPruneRequest<I> *request = SyncPointPruneRequest<I>::create(
diff --git a/src/tools/rbd_mirror/ImageSyncThrottler.cc b/src/tools/rbd_mirror/ImageSyncThrottler.cc
new file mode 100644
index 0000000..2a22b35
--- /dev/null
+++ b/src/tools/rbd_mirror/ImageSyncThrottler.cc
@@ -0,0 +1,253 @@
+// -*- mode:C++; tab-width:8; c-basic-offset:2; indent-tabs-mode:t -*-
+// vim: ts=8 sw=2 smarttab
+/*
+ * Ceph - scalable distributed file system
+ *
+ * Copyright (C) 2016 SUSE LINUX GmbH
+ *
+ * This is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License version 2.1, as published by the Free Software
+ * Foundation.  See file COPYING.
+ *
+ */
+
+#include "ImageSyncThrottler.h"
+#include "ImageSync.h"
+#include "common/ceph_context.h"
+
+#define dout_subsys ceph_subsys_rbd_mirror
+#undef dout_prefix
+#define dout_prefix *_dout << "rbd::mirror::ImageSyncThrottler:: " << this \
+                           << " " << __func__ << ": "
+using std::unique_ptr;
+using std::string;
+using std::set;
+
+namespace rbd {
+namespace mirror {
+
+template <typename I>
+ImageSyncThrottler<I>::ImageSyncThrottler()
+  : m_max_concurrent_syncs(g_ceph_context->_conf->rbd_mirror_concurrent_image_syncs),
+    m_lock("rbd::mirror::ImageSyncThrottler")
+{
+  dout(20) << "Initialized max_concurrent_syncs=" << m_max_concurrent_syncs
+           << dendl;
+  g_ceph_context->_conf->add_observer(this);
+}
+
+template <typename I>
+ImageSyncThrottler<I>::~ImageSyncThrottler() {
+  {
+    Mutex::Locker l(m_lock);
+    assert(m_sync_queue.empty());
+    assert(m_inflight_syncs.empty());
+  }
+
+  g_ceph_context->_conf->remove_observer(this);
+}
+
+template <typename I>
+void ImageSyncThrottler<I>::start_sync(I *local_image_ctx, I *remote_image_ctx,
+                                       SafeTimer *timer, Mutex *timer_lock,
+                                       const std::string &mirror_uuid,
+                                       Journaler *journaler,
+                                       MirrorPeerClientMeta *client_meta,
+                                       ContextWQ *work_queue,
+                                       Context *on_finish,
+                                       ProgressContext *progress_ctx) {
+  dout(20) << dendl;
+
+  PoolImageId pool_image_id(local_image_ctx->md_ctx.get_id(),
+                            local_image_ctx->id);
+  C_SyncHolder *sync_holder_ctx = new C_SyncHolder(this, pool_image_id,
+                                                   on_finish);
+  sync_holder_ctx->m_sync = ImageSync<I>::create(local_image_ctx,
+                                                 remote_image_ctx, timer,
+                                                 timer_lock, mirror_uuid,
+                                                 journaler, client_meta,
+                                                 work_queue, sync_holder_ctx,
+                                                 progress_ctx);
+  sync_holder_ctx->m_sync->get();
+
+  bool start = false;
+  {
+    Mutex::Locker l(m_lock);
+
+    if (m_inflight_syncs.size() < m_max_concurrent_syncs) {
+      assert(m_inflight_syncs.count(pool_image_id) == 0);
+      m_inflight_syncs[pool_image_id] = sync_holder_ctx;
+      start = true;
+      dout(10) << "ready to start image sync for local_image_id "
+               << local_image_ctx->id << " [" << m_inflight_syncs.size() << "/"
+               << m_max_concurrent_syncs << "]" << dendl;
+    } else {
+      m_sync_queue.push_front(sync_holder_ctx);
+      dout(10) << "image sync for local_image_id " << local_image_ctx->id
+               << " has been queued" << dendl;
+    }
+  }
+
+  if (start) {
+    sync_holder_ctx->m_sync->send();
+  }
+}
+
+template <typename I>
+void ImageSyncThrottler<I>::cancel_sync(librados::IoCtx &local_io_ctx,
+                                        const std::string local_image_id) {
+  dout(20) << dendl;
+
+  C_SyncHolder *sync_holder = nullptr;
+  bool running_sync = true;
+
+  {
+    Mutex::Locker l(m_lock);
+    if (m_inflight_syncs.empty()) {
+      // no image sync currently running and neither waiting
+      return;
+    }
+
+    PoolImageId local_pool_image_id(local_io_ctx.get_id(),
+                                    local_image_id);
+    auto it = m_inflight_syncs.find(local_pool_image_id);
+    if (it != m_inflight_syncs.end()) {
+      sync_holder = it->second;
+    }
+
+    if (!sync_holder) {
+      for (auto it = m_sync_queue.begin(); it != m_sync_queue.end(); ++it) {
+        if ((*it)->m_local_pool_image_id == local_pool_image_id) {
+          sync_holder = (*it);
+          m_sync_queue.erase(it);
+          running_sync = false;
+          break;
+        }
+      }
+    }
+  }
+
+  if (sync_holder) {
+    if (running_sync) {
+      dout(10) << "canceled running image sync for local_image_id "
+               << sync_holder->m_local_pool_image_id.second << dendl;
+      sync_holder->m_sync->cancel();
+    } else {
+      dout(10) << "canceled waiting image sync for local_image_id "
+               << sync_holder->m_local_pool_image_id.second << dendl;
+      sync_holder->m_on_finish->complete(-ECANCELED);
+      sync_holder->m_sync->put();
+      delete sync_holder;
+    }
+  }
+}
+
+template <typename I>
+void ImageSyncThrottler<I>::handle_sync_finished(C_SyncHolder *sync_holder) {
+  dout(20) << dendl;
+
+  C_SyncHolder *next_sync_holder = nullptr;
+
+  {
+    Mutex::Locker l(m_lock);
+    m_inflight_syncs.erase(sync_holder->m_local_pool_image_id);
+
+    if (m_inflight_syncs.size() < m_max_concurrent_syncs &&
+        !m_sync_queue.empty()) {
+      next_sync_holder = m_sync_queue.back();
+      m_sync_queue.pop_back();
+
+      assert(
+        m_inflight_syncs.count(next_sync_holder->m_local_pool_image_id) == 0);
+      m_inflight_syncs[next_sync_holder->m_local_pool_image_id] =
+        next_sync_holder;
+      dout(10) << "ready to start image sync for local_image_id "
+               << next_sync_holder->m_local_pool_image_id.second
+               << " [" << m_inflight_syncs.size() << "/"
+               << m_max_concurrent_syncs << "]" << dendl;
+    }
+
+    dout(10) << "currently running image syncs [" << m_inflight_syncs.size()
+             << "/" << m_max_concurrent_syncs << "]" << dendl;
+  }
+
+  if (next_sync_holder) {
+    next_sync_holder->m_sync->send();
+  }
+}
+
+template <typename I>
+void ImageSyncThrottler<I>::set_max_concurrent_syncs(uint32_t max) {
+  dout(20) << " max=" << max << dendl;
+
+  assert(max > 0);
+
+  std::list<C_SyncHolder *> next_sync_holders;
+  {
+    Mutex::Locker l(m_lock);
+    this->m_max_concurrent_syncs = max;
+
+    // Start waiting syncs in the case of available free slots
+    while(m_inflight_syncs.size() < m_max_concurrent_syncs
+          && !m_sync_queue.empty()) {
+        C_SyncHolder *next_sync_holder = m_sync_queue.back();
+        next_sync_holders.push_back(next_sync_holder);
+        m_sync_queue.pop_back();
+
+        assert(
+          m_inflight_syncs.count(next_sync_holder->m_local_pool_image_id) == 0);
+        m_inflight_syncs[next_sync_holder->m_local_pool_image_id] =
+          next_sync_holder;
+
+        dout(10) << "ready to start image sync for local_image_id "
+                 << next_sync_holder->m_local_pool_image_id.second
+                 << " [" << m_inflight_syncs.size() << "/"
+                 << m_max_concurrent_syncs << "]" << dendl;
+    }
+  }
+
+  for (const auto& sync_holder : next_sync_holders) {
+    sync_holder->m_sync->send();
+  }
+}
+
+template <typename I>
+void ImageSyncThrottler<I>::print_status(Formatter *f, stringstream *ss) {
+  Mutex::Locker l(m_lock);
+
+  if (f) {
+    f->dump_int("max_parallel_syncs", m_max_concurrent_syncs);
+    f->dump_int("running_syncs", m_inflight_syncs.size());
+    f->dump_int("waiting_syncs", m_sync_queue.size());
+    f->flush(*ss);
+  } else {
+    *ss << "[ ";
+    *ss << "max_parallel_syncs=" << m_max_concurrent_syncs << ", ";
+    *ss << "running_syncs=" << m_inflight_syncs.size() << ", ";
+    *ss << "waiting_syncs=" << m_sync_queue.size() << " ]";
+  }
+}
+
+template <typename I>
+const char** ImageSyncThrottler<I>::get_tracked_conf_keys() const {
+  static const char* KEYS[] = {
+    "rbd_mirror_concurrent_image_syncs",
+    NULL
+  };
+  return KEYS;
+}
+
+template <typename I>
+void ImageSyncThrottler<I>::handle_conf_change(
+                                              const struct md_config_t *conf,
+                                              const set<string> &changed) {
+  if (changed.count("rbd_mirror_concurrent_image_syncs")) {
+    set_max_concurrent_syncs(conf->rbd_mirror_concurrent_image_syncs);
+  }
+}
+
+} // namespace mirror
+} // namespace rbd
+
+template class rbd::mirror::ImageSyncThrottler<librbd::ImageCtx>;
diff --git a/src/tools/rbd_mirror/ImageSyncThrottler.h b/src/tools/rbd_mirror/ImageSyncThrottler.h
new file mode 100644
index 0000000..6c3edf1
--- /dev/null
+++ b/src/tools/rbd_mirror/ImageSyncThrottler.h
@@ -0,0 +1,107 @@
+// -*- mode:C++; tab-width:8; c-basic-offset:2; indent-tabs-mode:t -*-
+// vim: ts=8 sw=2 smarttab
+/*
+ * Ceph - scalable distributed file system
+ *
+ * Copyright (C) 2016 SUSE LINUX GmbH
+ *
+ * This is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License version 2.1, as published by the Free Software
+ * Foundation.  See file COPYING.
+ *
+ */
+
+#ifndef CEPH_RBD_MIRROR_IMAGE_SYNC_THROTTLER_H
+#define CEPH_RBD_MIRROR_IMAGE_SYNC_THROTTLER_H
+
+#include <list>
+#include <map>
+#include <utility>
+#include "common/Mutex.h"
+#include "librbd/ImageCtx.h"
+#include "include/Context.h"
+#include "librbd/journal/TypeTraits.h"
+
+class CephContext;
+class Context;
+class ContextWQ;
+class SafeTimer;
+namespace journal { class Journaler; }
+namespace librbd { namespace journal { struct MirrorPeerClientMeta; } }
+
+namespace rbd {
+namespace mirror {
+
+template <typename> class ImageSync;
+
+class ProgressContext;
+
+/**
+ * Manage concurrent image-syncs
+ */
+template <typename ImageCtxT = librbd::ImageCtx>
+class ImageSyncThrottler : public md_config_obs_t {
+public:
+
+  typedef librbd::journal::TypeTraits<ImageCtxT> TypeTraits;
+  typedef typename TypeTraits::Journaler Journaler;
+  typedef librbd::journal::MirrorPeerClientMeta MirrorPeerClientMeta;
+
+  ImageSyncThrottler();
+  ~ImageSyncThrottler();
+  ImageSyncThrottler(const ImageSyncThrottler&) = delete;
+  ImageSyncThrottler& operator=(const ImageSyncThrottler&) = delete;
+
+  void start_sync(ImageCtxT *local_image_ctx,
+                  ImageCtxT *remote_image_ctx, SafeTimer *timer,
+                  Mutex *timer_lock, const std::string &mirror_uuid,
+                  Journaler *journaler, MirrorPeerClientMeta *client_meta,
+                  ContextWQ *work_queue, Context *on_finish,
+                  ProgressContext *progress_ctx = nullptr);
+
+  void cancel_sync(librados::IoCtx &local_io_ctx,
+                   const std::string local_image_id);
+
+  void set_max_concurrent_syncs(uint32_t max);
+
+  void print_status(Formatter *f, std::stringstream *ss);
+
+private:
+  typedef std::pair<int64_t, std::string> PoolImageId;
+
+  struct C_SyncHolder : public Context {
+    ImageSyncThrottler<ImageCtxT> *m_sync_throttler;
+    PoolImageId m_local_pool_image_id;
+    ImageSync<ImageCtxT> *m_sync = nullptr;
+    Context *m_on_finish;
+
+    C_SyncHolder(ImageSyncThrottler<ImageCtxT> *sync_throttler,
+                 const PoolImageId &local_pool_image_id, Context *on_finish)
+      : m_sync_throttler(sync_throttler),
+        m_local_pool_image_id(local_pool_image_id), m_on_finish(on_finish) {
+    }
+
+    virtual void finish(int r) {
+      m_sync_throttler->handle_sync_finished(this);
+      m_on_finish->complete(r);
+    }
+  };
+
+  void handle_sync_finished(C_SyncHolder *sync_holder);
+
+  const char **get_tracked_conf_keys() const;
+  void handle_conf_change(const struct md_config_t *conf,
+                          const std::set<std::string> &changed);
+
+  uint32_t m_max_concurrent_syncs;
+  Mutex m_lock;
+  std::list<C_SyncHolder *> m_sync_queue;
+  std::map<PoolImageId, C_SyncHolder *> m_inflight_syncs;
+
+};
+
+} // namespace mirror
+} // namespace rbd
+
+#endif // CEPH_RBD_MIRROR_IMAGE_SYNC_THROTTLER_H
diff --git a/src/tools/rbd_mirror/Mirror.cc b/src/tools/rbd_mirror/Mirror.cc
index 71e64ed..de8e378 100644
--- a/src/tools/rbd_mirror/Mirror.cc
+++ b/src/tools/rbd_mirror/Mirror.cc
@@ -9,6 +9,7 @@
 #include "common/errno.h"
 #include "Mirror.h"
 #include "Threads.h"
+#include "ImageSync.h"
 
 #define dout_subsys ceph_subsys_rbd_mirror
 #undef dout_prefix
@@ -217,9 +218,12 @@ int Mirror::init()
   // TODO: make interval configurable
   m_local_cluster_watcher.reset(new ClusterWatcher(m_local, m_lock));
 
-  m_image_deleter.reset(new ImageDeleter(m_local, m_threads->timer,
+  m_image_deleter.reset(new ImageDeleter(m_threads->work_queue,
+                                         m_threads->timer,
                                          &m_threads->timer_lock));
 
+  m_image_sync_throttler.reset(new ImageSyncThrottler<>());
+
   return r;
 }
 
@@ -235,6 +239,13 @@ void Mirror::run()
     // TODO: make interval configurable
     m_cond.WaitInterval(g_ceph_context, m_lock, seconds(30));
   }
+
+  // stop all replayers in parallel
+  Mutex::Locker locker(m_lock);
+  for (auto it = m_replayers.begin(); it != m_replayers.end(); it++) {
+    auto &replayer = it->second;
+    replayer->stop(false);
+  }
   dout(20) << "return" << dendl;
 }
 
@@ -267,6 +278,13 @@ void Mirror::print_status(Formatter *f, stringstream *ss)
 
   if (f) {
     f->close_section();
+    f->open_object_section("sync_throttler");
+  }
+
+  m_image_sync_throttler->print_status(f, ss);
+
+  if (f) {
+    f->close_section();
     f->close_section();
     f->flush(*ss);
   }
@@ -302,7 +320,7 @@ void Mirror::stop()
 
   for (auto it = m_replayers.begin(); it != m_replayers.end(); it++) {
     auto &replayer = it->second;
-    replayer->stop();
+    replayer->stop(true);
   }
 }
 
@@ -347,8 +365,12 @@ void Mirror::update_replayers(const PoolPeers &pool_peers)
   for (auto it = m_replayers.begin(); it != m_replayers.end();) {
     auto &peer = it->first.second;
     auto pool_peer_it = pool_peers.find(it->first.first);
-    if (pool_peer_it == pool_peers.end() ||
-        pool_peer_it->second.find(peer) == pool_peer_it->second.end()) {
+    if (it->second->is_blacklisted()) {
+      derr << "removing blacklisted replayer for " << peer << dendl;
+      // TODO: make async
+      it = m_replayers.erase(it);
+    } else if (pool_peer_it == pool_peers.end() ||
+               pool_peer_it->second.find(peer) == pool_peer_it->second.end()) {
       dout(20) << "removing replayer for " << peer << dendl;
       // TODO: make async
       it = m_replayers.erase(it);
@@ -363,8 +385,8 @@ void Mirror::update_replayers(const PoolPeers &pool_peers)
       if (m_replayers.find(pool_peer) == m_replayers.end()) {
         dout(20) << "starting replayer for " << peer << dendl;
         unique_ptr<Replayer> replayer(new Replayer(m_threads, m_image_deleter,
-                                                   m_local, kv.first, peer,
-                                                   m_args));
+                                                   m_image_sync_throttler,
+                                                   kv.first, peer, m_args));
         // TODO: make async, and retry connecting within replayer
         int r = replayer->init();
         if (r < 0) {
diff --git a/src/tools/rbd_mirror/Mirror.h b/src/tools/rbd_mirror/Mirror.h
index 88f0669..f7a4d02 100644
--- a/src/tools/rbd_mirror/Mirror.h
+++ b/src/tools/rbd_mirror/Mirror.h
@@ -62,6 +62,7 @@ private:
   // monitor local cluster for config changes in peers
   std::unique_ptr<ClusterWatcher> m_local_cluster_watcher;
   std::shared_ptr<ImageDeleter> m_image_deleter;
+  ImageSyncThrottlerRef<> m_image_sync_throttler;
   std::map<PoolPeer, std::unique_ptr<Replayer> > m_replayers;
   atomic_t m_stopping;
   bool m_manual_stop = false;
diff --git a/src/tools/rbd_mirror/PoolWatcher.cc b/src/tools/rbd_mirror/PoolWatcher.cc
index 21a2633..3e431f2 100644
--- a/src/tools/rbd_mirror/PoolWatcher.cc
+++ b/src/tools/rbd_mirror/PoolWatcher.cc
@@ -48,6 +48,11 @@ PoolWatcher::~PoolWatcher()
   m_timer.shutdown();
 }
 
+bool PoolWatcher::is_blacklisted() const {
+  assert(m_lock.is_locked());
+  return m_blacklisted;
+}
+
 const PoolWatcher::ImageIds& PoolWatcher::get_images() const
 {
   assert(m_lock.is_locked());
@@ -62,6 +67,9 @@ void PoolWatcher::refresh_images(bool reschedule)
   Mutex::Locker l(m_lock);
   if (r >= 0) {
     m_images = std::move(image_ids);
+  } else if (r == -EBLACKLISTED) {
+    derr << "blacklisted during image refresh" << dendl;
+    m_blacklisted = true;
   }
 
   if (!m_stopping && reschedule) {
diff --git a/src/tools/rbd_mirror/PoolWatcher.h b/src/tools/rbd_mirror/PoolWatcher.h
index d29a630..4aeca3d 100644
--- a/src/tools/rbd_mirror/PoolWatcher.h
+++ b/src/tools/rbd_mirror/PoolWatcher.h
@@ -50,6 +50,8 @@ public:
   PoolWatcher(const PoolWatcher&) = delete;
   PoolWatcher& operator=(const PoolWatcher&) = delete;
 
+  bool is_blacklisted() const;
+
   const ImageIds& get_images() const;
   void refresh_images(bool reschedule=true);
 
@@ -59,6 +61,7 @@ private:
   Cond &m_refresh_cond;
 
   bool m_stopping = false;
+  bool m_blacklisted = false;
   SafeTimer m_timer;
   double m_interval;
 
diff --git a/src/tools/rbd_mirror/Replayer.cc b/src/tools/rbd_mirror/Replayer.cc
index 07b7991..fd13a8d 100644
--- a/src/tools/rbd_mirror/Replayer.cc
+++ b/src/tools/rbd_mirror/Replayer.cc
@@ -12,6 +12,7 @@
 #include "common/errno.h"
 #include "include/stringify.h"
 #include "cls/rbd/cls_rbd_client.h"
+#include "global/global_context.h"
 #include "librbd/ObjectWatcher.h"
 #include "librbd/internal.h"
 #include "Replayer.h"
@@ -72,7 +73,7 @@ public:
   explicit StopCommand(Replayer *replayer) : replayer(replayer) {}
 
   bool call(Formatter *f, stringstream *ss) {
-    replayer->stop();
+    replayer->stop(true);
     return true;
   }
 
@@ -229,15 +230,15 @@ private:
 };
 
 Replayer::Replayer(Threads *threads, std::shared_ptr<ImageDeleter> image_deleter,
-                   RadosRef local_cluster, int64_t local_pool_id,
-                   const peer_t &peer, const std::vector<const char*> &args) :
+                   ImageSyncThrottlerRef<> image_sync_throttler,
+                   int64_t local_pool_id, const peer_t &peer,
+                   const std::vector<const char*> &args) :
   m_threads(threads),
   m_image_deleter(image_deleter),
+  m_image_sync_throttler(image_sync_throttler),
   m_lock(stringify("rbd::mirror::Replayer ") + stringify(peer)),
   m_peer(peer),
   m_args(args),
-  m_local(local_cluster),
-  m_remote(new librados::Rados),
   m_local_pool_id(local_pool_id),
   m_asok_hook(nullptr),
   m_replayer_thread(this)
@@ -258,36 +259,82 @@ Replayer::~Replayer()
   }
 }
 
+bool Replayer::is_blacklisted() const {
+  Mutex::Locker locker(m_lock);
+  return m_blacklisted;
+}
+
 int Replayer::init()
 {
   dout(20) << "replaying for " << m_peer << dendl;
 
-  int r = m_local->ioctx_create2(m_local_pool_id, m_local_io_ctx);
+  int r = init_rados(g_ceph_context->_conf->cluster,
+                     g_ceph_context->_conf->name.to_str(),
+                     "local cluster", &m_local_rados);
+  if (r < 0) {
+    return r;
+  }
+
+  r = init_rados(m_peer.cluster_name, m_peer.client_name,
+                 std::string("remote peer ") + stringify(m_peer),
+                 &m_remote_rados);
+  if (r < 0) {
+    return r;
+  }
+
+  r = m_local_rados->ioctx_create2(m_local_pool_id, m_local_io_ctx);
   if (r < 0) {
     derr << "error accessing local pool " << m_local_pool_id << ": "
          << cpp_strerror(r) << dendl;
     return r;
   }
 
+  r = m_remote_rados->ioctx_create(m_local_io_ctx.get_pool_name().c_str(),
+                                   m_remote_io_ctx);
+  if (r < 0) {
+    derr << "error accessing remote pool " << m_local_io_ctx.get_pool_name()
+         << ": " << cpp_strerror(r) << dendl;
+    return r;
+  }
+  m_remote_pool_id = m_remote_io_ctx.get_id();
+
+  dout(20) << "connected to " << m_peer << dendl;
+
+  // Bootstrap existing mirroring images
+  init_local_mirroring_images();
+
+  // TODO: make interval configurable
+  m_pool_watcher.reset(new PoolWatcher(m_remote_io_ctx, 30, m_lock, m_cond));
+  m_pool_watcher->refresh_images();
+
+  m_replayer_thread.create("replayer");
+
+  return 0;
+}
+
+int Replayer::init_rados(const std::string &cluster_name,
+                         const std::string &client_name,
+                         const std::string &description, RadosRef *rados_ref) {
+  rados_ref->reset(new librados::Rados());
+
   // NOTE: manually bootstrap a CephContext here instead of via
   // the librados API to avoid mixing global singletons between
   // the librados shared library and the daemon
   // TODO: eliminate intermingling of global singletons within Ceph APIs
   CephInitParameters iparams(CEPH_ENTITY_TYPE_CLIENT);
-  if (m_peer.client_name.empty() ||
-      !iparams.name.from_str(m_peer.client_name)) {
-    derr << "error initializing remote cluster handle for " << m_peer << dendl;
+  if (client_name.empty() || !iparams.name.from_str(client_name)) {
+    derr << "error initializing cluster handle for " << description << dendl;
     return -EINVAL;
   }
 
   CephContext *cct = common_preinit(iparams, CODE_ENVIRONMENT_LIBRARY,
                                     CINIT_FLAG_UNPRIVILEGED_DAEMON_DEFAULTS);
-  cct->_conf->cluster = m_peer.cluster_name;
+  cct->_conf->cluster = cluster_name;
 
   // librados::Rados::conf_read_file
-  r = cct->_conf->parse_config_files(nullptr, nullptr, 0);
+  int r = cct->_conf->parse_config_files(nullptr, nullptr, 0);
   if (r < 0) {
-    derr << "could not read ceph conf for " << m_peer << ": "
+    derr << "could not read ceph conf for " << description << ": "
 	 << cpp_strerror(r) << dendl;
     cct->put();
     return r;
@@ -299,7 +346,7 @@ int Replayer::init()
   env_to_vec(args, nullptr);
   r = cct->_conf->parse_argv(args);
   if (r < 0) {
-    derr << "could not parse environment for " << m_peer << ":"
+    derr << "could not parse environment for " << description << ":"
          << cpp_strerror(r) << dendl;
     cct->put();
     return r;
@@ -307,9 +354,10 @@ int Replayer::init()
 
   if (!m_args.empty()) {
     // librados::Rados::conf_parse_argv
-    r = cct->_conf->parse_argv(m_args);
+    args = m_args;
+    r = cct->_conf->parse_argv(args);
     if (r < 0) {
-      derr << "could not parse command line args for " << m_peer << ": "
+      derr << "could not parse command line args for " << description << ": "
 	   << cpp_strerror(r) << dendl;
       cct->put();
       return r;
@@ -321,36 +369,16 @@ int Replayer::init()
   cct->_conf->apply_changes(nullptr);
   cct->_conf->complain_about_parse_errors(cct);
 
-  r = m_remote->init_with_context(cct);
+  r = (*rados_ref)->init_with_context(cct);
   assert(r == 0);
   cct->put();
 
-  r = m_remote->connect();
+  r = (*rados_ref)->connect();
   if (r < 0) {
-    derr << "error connecting to remote cluster " << m_peer
-	 << " : " << cpp_strerror(r) << dendl;
-    return r;
-  }
-
-  r = m_remote->ioctx_create(m_local_io_ctx.get_pool_name().c_str(),
-                             m_remote_io_ctx);
-  if (r < 0) {
-    derr << "error accessing remote pool " << m_local_io_ctx.get_pool_name()
-         << ": " << cpp_strerror(r) << dendl;
+    derr << "error connecting to " << description << ": "
+	 << cpp_strerror(r) << dendl;
     return r;
   }
-  m_remote_pool_id = m_remote_io_ctx.get_id();
-
-  dout(20) << "connected to " << m_peer << dendl;
-
-  // Bootstrap existing mirroring images
-  init_local_mirroring_images();
-
-  // TODO: make interval configurable
-  m_pool_watcher.reset(new PoolWatcher(m_remote_io_ctx, 30, m_lock, m_cond));
-  m_pool_watcher->refresh_images();
-
-  m_replayer_thread.create("replayer");
 
   return 0;
 }
@@ -413,19 +441,24 @@ void Replayer::run()
       m_asok_hook_name = asok_hook_name;
       delete m_asok_hook;
 
-      CephContext *cct = static_cast<CephContext *>(m_local->cct());
-      m_asok_hook = new ReplayerAdminSocketHook(cct, m_asok_hook_name, this);
+      m_asok_hook = new ReplayerAdminSocketHook(g_ceph_context,
+                                                m_asok_hook_name, this);
     }
 
-    Mutex::Locker l(m_lock);
-    if (!m_manual_stop) {
+    Mutex::Locker locker(m_lock);
+    if (m_pool_watcher->is_blacklisted()) {
+      m_blacklisted = true;
+      m_stopping.set(1);
+    } else if (!m_manual_stop) {
       set_sources(m_pool_watcher->get_images());
     }
+
+    if (m_blacklisted) {
+      break;
+    }
     m_cond.WaitInterval(g_ceph_context, m_lock, seconds(30));
   }
 
-  m_image_deleter.reset();
-
   ImageIds empty_sources;
   while (true) {
     Mutex::Locker l(m_lock);
@@ -476,22 +509,24 @@ void Replayer::start()
 
   for (auto &kv : m_image_replayers) {
     auto &image_replayer = kv.second;
-    image_replayer->start(nullptr, nullptr, true);
+    image_replayer->start(nullptr, true);
   }
 }
 
-void Replayer::stop()
+void Replayer::stop(bool manual)
 {
-  dout(20) << "enter" << dendl;
+  dout(20) << "enter: manual=" << manual << dendl;
 
   Mutex::Locker l(m_lock);
-
-  if (m_stopping.read()) {
+  if (!manual) {
+    m_stopping.set(1);
+    m_cond.Signal();
+    return;
+  } else if (m_stopping.read()) {
     return;
   }
 
   m_manual_stop = true;
-
   for (auto &kv : m_image_replayers) {
     auto &image_replayer = kv.second;
     image_replayer->stop(nullptr, true);
@@ -551,8 +586,9 @@ void Replayer::set_sources(const ImageIds &image_ids)
     for (auto &image : m_init_images) {
       dout(20) << "scheduling the deletion of init image: "
                << image.name << dendl;
-      m_image_deleter->schedule_image_delete(m_local_pool_id, image.id,
-                                             image.name, image.global_id);
+      m_image_deleter->schedule_image_delete(m_local_rados, m_local_pool_id,
+                                             image.id, image.name,
+                                             image.global_id);
     }
     m_init_images.clear();
   }
@@ -611,8 +647,9 @@ void Replayer::set_sources(const ImageIds &image_ids)
     auto it = m_image_replayers.find(image_id.id);
     if (it == m_image_replayers.end()) {
       unique_ptr<ImageReplayer<> > image_replayer(new ImageReplayer<>(
-        m_threads, m_local, m_remote, local_mirror_uuid, remote_mirror_uuid,
-        m_local_pool_id, m_remote_pool_id, image_id.id, image_id.global_id));
+        m_threads, m_image_deleter, m_image_sync_throttler, m_local_rados,
+        m_remote_rados, local_mirror_uuid, remote_mirror_uuid, m_local_pool_id,
+        m_remote_pool_id, image_id.id, image_id.global_id));
       it = m_image_replayers.insert(
         std::make_pair(image_id.id, std::move(image_replayer))).first;
     }
@@ -620,7 +657,7 @@ void Replayer::set_sources(const ImageIds &image_ids)
       dout(20) << "starting image replayer for "
                << it->second->get_global_image_id() << dendl;
     }
-    start_image_replayer(it->second, image_id.name);
+    start_image_replayer(it->second, image_id.id, image_id.name);
   }
 }
 
@@ -666,22 +703,40 @@ void Replayer::mirror_image_status_shut_down() {
 }
 
 void Replayer::start_image_replayer(unique_ptr<ImageReplayer<> > &image_replayer,
+                                    const std::string &image_id,
                                     const boost::optional<std::string>& image_name)
 {
+  assert(m_lock.is_locked());
   dout(20) << "global_image_id=" << image_replayer->get_global_image_id()
            << dendl;
 
   if (!image_replayer->is_stopped()) {
     return;
+  } else if (image_replayer->is_blacklisted()) {
+    derr << "blacklisted detected during image replay" << dendl;
+    m_blacklisted = true;
+    m_stopping.set(1);
+    return;
   }
 
   if (image_name) {
     FunctionContext *ctx = new FunctionContext(
-        [&] (int r) {
+        [this, image_id, image_name] (int r) {
+          if (r == -ESTALE || r == -ECANCELED) {
+            return;
+          }
+
+          Mutex::Locker locker(m_lock);
+          auto it = m_image_replayers.find(image_id);
+          if (it == m_image_replayers.end()) {
+            return;
+          }
+
+          auto &image_replayer = it->second;
           if (r >= 0) {
             image_replayer->start();
           } else {
-            start_image_replayer(image_replayer, image_name);
+            start_image_replayer(image_replayer, image_id, image_name);
           }
        }
     );
@@ -691,39 +746,40 @@ void Replayer::start_image_replayer(unique_ptr<ImageReplayer<> > &image_replayer
 
 bool Replayer::stop_image_replayer(unique_ptr<ImageReplayer<> > &image_replayer)
 {
+  assert(m_lock.is_locked());
   dout(20) << "global_image_id=" << image_replayer->get_global_image_id()
            << dendl;
 
+  // TODO: check how long it is stopping and alert if it is too long.
   if (image_replayer->is_stopped()) {
-    if (m_image_deleter) {
+    m_image_deleter->cancel_waiter(image_replayer->get_local_image_name());
+    if (!m_stopping.read()) {
       dout(20) << "scheduling delete" << dendl;
       m_image_deleter->schedule_image_delete(
+        m_local_rados,
         image_replayer->get_local_pool_id(),
         image_replayer->get_local_image_id(),
         image_replayer->get_local_image_name(),
         image_replayer->get_global_image_id());
     }
     return true;
-  }
-
-  if (image_replayer->is_running()) {
-    if (m_image_deleter) {
+  } else {
+    if (!m_stopping.read()) {
       dout(20) << "scheduling delete after image replayer stopped" << dendl;
     }
     FunctionContext *ctx = new FunctionContext(
         [&image_replayer, this] (int r) {
-          if (m_image_deleter) {
+          if (!m_stopping.read() && r >= 0) {
             m_image_deleter->schedule_image_delete(
-                          image_replayer->get_local_pool_id(),
-                          image_replayer->get_local_image_id(),
-                          image_replayer->get_local_image_name(),
-                          image_replayer->get_global_image_id());
+              m_local_rados,
+              image_replayer->get_local_pool_id(),
+              image_replayer->get_local_image_id(),
+              image_replayer->get_local_image_name(),
+              image_replayer->get_global_image_id());
           }
         }
     );
     image_replayer->stop(ctx);
-  } else {
-    // TODO: checkhow long it is stopping and alert if it is too long.
   }
 
   return false;
diff --git a/src/tools/rbd_mirror/Replayer.h b/src/tools/rbd_mirror/Replayer.h
index cd8efa8..81db162 100644
--- a/src/tools/rbd_mirror/Replayer.h
+++ b/src/tools/rbd_mirror/Replayer.h
@@ -34,18 +34,21 @@ class MirrorStatusWatchCtx;
 class Replayer {
 public:
   Replayer(Threads *threads, std::shared_ptr<ImageDeleter> image_deleter,
-           RadosRef local_cluster, int64_t local_pool_id, const peer_t &peer,
+           ImageSyncThrottlerRef<> image_sync_throttler,
+           int64_t local_pool_id, const peer_t &peer,
            const std::vector<const char*> &args);
   ~Replayer();
   Replayer(const Replayer&) = delete;
   Replayer& operator=(const Replayer&) = delete;
 
+  bool is_blacklisted() const;
+
   int init();
   void run();
 
   void print_status(Formatter *f, stringstream *ss);
   void start();
-  void stop();
+  void stop(bool manual);
   void restart();
   void flush();
 
@@ -57,23 +60,29 @@ private:
   void set_sources(const ImageIds &image_ids);
 
   void start_image_replayer(unique_ptr<ImageReplayer<> > &image_replayer,
+                            const std::string &image_id,
                             const boost::optional<std::string>& image_name);
   bool stop_image_replayer(unique_ptr<ImageReplayer<> > &image_replayer);
 
   int mirror_image_status_init();
   void mirror_image_status_shut_down();
 
+  int init_rados(const std::string &cluser_name, const std::string &client_name,
+                 const std::string &description, RadosRef *rados_ref);
+
   Threads *m_threads;
   std::shared_ptr<ImageDeleter> m_image_deleter;
-  Mutex m_lock;
+  ImageSyncThrottlerRef<> m_image_sync_throttler;
+  mutable Mutex m_lock;
   Cond m_cond;
   atomic_t m_stopping;
   bool m_manual_stop = false;
+  bool m_blacklisted = false;
 
   peer_t m_peer;
   std::vector<const char*> m_args;
-  RadosRef m_local;
-  RadosRef m_remote;
+  RadosRef m_local_rados;
+  RadosRef m_remote_rados;
 
   librados::IoCtx m_local_io_ctx;
   librados::IoCtx m_remote_io_ctx;
diff --git a/src/tools/rbd_mirror/image_replayer/BootstrapRequest.cc b/src/tools/rbd_mirror/image_replayer/BootstrapRequest.cc
index 57f0705..574023a 100644
--- a/src/tools/rbd_mirror/image_replayer/BootstrapRequest.cc
+++ b/src/tools/rbd_mirror/image_replayer/BootstrapRequest.cc
@@ -20,6 +20,7 @@
 #include "librbd/journal/Types.h"
 #include "tools/rbd_mirror/ImageSync.h"
 #include "tools/rbd_mirror/ProgressContext.h"
+#include "tools/rbd_mirror/ImageSyncThrottler.h"
 
 #define dout_subsys ceph_subsys_rbd_mirror
 #undef dout_prefix
@@ -35,23 +36,26 @@ using librbd::util::create_rados_ack_callback;
 using librbd::util::unique_lock_name;
 
 template <typename I>
-BootstrapRequest<I>::BootstrapRequest(librados::IoCtx &local_io_ctx,
-                                      librados::IoCtx &remote_io_ctx,
-                                      I **local_image_ctx,
-                                      const std::string &local_image_name,
-                                      const std::string &remote_image_id,
-                                      const std::string &global_image_id,
-                                      ContextWQ *work_queue, SafeTimer *timer,
-                                      Mutex *timer_lock,
-                                      const std::string &local_mirror_uuid,
-                                      const std::string &remote_mirror_uuid,
-                                      Journaler *journaler,
-                                      MirrorPeerClientMeta *client_meta,
-                                      Context *on_finish,
-				      rbd::mirror::ProgressContext *progress_ctx)
+BootstrapRequest<I>::BootstrapRequest(
+        librados::IoCtx &local_io_ctx,
+        librados::IoCtx &remote_io_ctx,
+        std::shared_ptr<ImageSyncThrottler<I>> image_sync_throttler,
+        I **local_image_ctx,
+        const std::string &local_image_name,
+        const std::string &remote_image_id,
+        const std::string &global_image_id,
+        ContextWQ *work_queue, SafeTimer *timer,
+        Mutex *timer_lock,
+        const std::string &local_mirror_uuid,
+        const std::string &remote_mirror_uuid,
+        Journaler *journaler,
+        MirrorPeerClientMeta *client_meta,
+        Context *on_finish,
+        rbd::mirror::ProgressContext *progress_ctx)
   : BaseRequest("rbd::mirror::image_replayer::BootstrapRequest",
 		reinterpret_cast<CephContext*>(local_io_ctx.cct()), on_finish),
     m_local_io_ctx(local_io_ctx), m_remote_io_ctx(remote_io_ctx),
+    m_image_sync_throttler(image_sync_throttler),
     m_local_image_ctx(local_image_ctx), m_local_image_name(local_image_name),
     m_remote_image_id(remote_image_id), m_global_image_id(global_image_id),
     m_work_queue(work_queue), m_timer(timer), m_timer_lock(timer_lock),
@@ -63,7 +67,6 @@ BootstrapRequest<I>::BootstrapRequest(librados::IoCtx &local_io_ctx,
 
 template <typename I>
 BootstrapRequest<I>::~BootstrapRequest() {
-  assert(m_image_sync_request == nullptr);
   assert(m_remote_image_ctx == nullptr);
 }
 
@@ -79,9 +82,7 @@ void BootstrapRequest<I>::cancel() {
   Mutex::Locker locker(m_lock);
   m_canceled = true;
 
-  if (m_image_sync_request) {
-    m_image_sync_request->cancel();
-  }
+  m_image_sync_throttler->cancel_sync(m_local_io_ctx, m_local_image_id);
 }
 
 template <typename I>
@@ -547,31 +548,29 @@ void BootstrapRequest<I>::image_sync() {
   Context *ctx = create_context_callback<
     BootstrapRequest<I>, &BootstrapRequest<I>::handle_image_sync>(
       this);
-  ImageSync<I> *request = ImageSync<I>::create(*m_local_image_ctx,
-                                               m_remote_image_ctx, m_timer,
-                                               m_timer_lock,
-                                               m_local_mirror_uuid, m_journaler,
-                                               m_client_meta, m_work_queue, ctx,
-					       m_progress_ctx);
+
   {
     Mutex::Locker locker(m_lock);
-    request->get();
-    m_image_sync_request = request;
+    if (!m_canceled) {
+      m_image_sync_throttler->start_sync(*m_local_image_ctx,
+                                         m_remote_image_ctx, m_timer,
+                                         m_timer_lock,
+                                         m_local_mirror_uuid, m_journaler,
+                                         m_client_meta, m_work_queue, ctx,
+                                         m_progress_ctx);
+      return;
+    }
   }
 
-  request->send();
+  dout(10) << ": request canceled" << dendl;
+  m_ret_val = -ECANCELED;
+  close_remote_image();
 }
 
 template <typename I>
 void BootstrapRequest<I>::handle_image_sync(int r) {
   dout(20) << ": r=" << r << dendl;
 
-  {
-    Mutex::Locker locker(m_lock);
-    m_image_sync_request->put();
-    m_image_sync_request = nullptr;
-  }
-
   if (m_canceled) {
     dout(10) << ": request canceled" << dendl;
     m_ret_val = -ECANCELED;
diff --git a/src/tools/rbd_mirror/image_replayer/BootstrapRequest.h b/src/tools/rbd_mirror/image_replayer/BootstrapRequest.h
index 35ca883..e7ef050 100644
--- a/src/tools/rbd_mirror/image_replayer/BootstrapRequest.h
+++ b/src/tools/rbd_mirror/image_replayer/BootstrapRequest.h
@@ -10,6 +10,7 @@
 #include "cls/journal/cls_journal_types.h"
 #include "librbd/journal/TypeTraits.h"
 #include "tools/rbd_mirror/BaseRequest.h"
+#include "tools/rbd_mirror/types.h"
 #include <list>
 #include <string>
 
@@ -24,7 +25,6 @@ namespace librbd { namespace journal { struct MirrorPeerClientMeta; } }
 namespace rbd {
 namespace mirror {
 
-template <typename> class ImageSync;
 class ProgressContext;
 
 namespace image_replayer {
@@ -37,21 +37,24 @@ public:
   typedef librbd::journal::MirrorPeerClientMeta MirrorPeerClientMeta;
   typedef rbd::mirror::ProgressContext ProgressContext;
 
-  static BootstrapRequest* create(librados::IoCtx &local_io_ctx,
-                                  librados::IoCtx &remote_io_ctx,
-                                  ImageCtxT **local_image_ctx,
-                                  const std::string &local_image_name,
-                                  const std::string &remote_image_id,
-                                  const std::string &global_image_id,
-                                  ContextWQ *work_queue, SafeTimer *timer,
-                                  Mutex *timer_lock,
-                                  const std::string &local_mirror_uuid,
-                                  const std::string &remote_mirror_uuid,
-                                  Journaler *journaler,
-                                  MirrorPeerClientMeta *client_meta,
-                                  Context *on_finish,
-				  ProgressContext *progress_ctx = nullptr) {
-    return new BootstrapRequest(local_io_ctx, remote_io_ctx, local_image_ctx,
+  static BootstrapRequest* create(
+        librados::IoCtx &local_io_ctx,
+        librados::IoCtx &remote_io_ctx,
+        ImageSyncThrottlerRef<ImageCtxT> image_sync_throttler,
+        ImageCtxT **local_image_ctx,
+        const std::string &local_image_name,
+        const std::string &remote_image_id,
+        const std::string &global_image_id,
+        ContextWQ *work_queue, SafeTimer *timer,
+        Mutex *timer_lock,
+        const std::string &local_mirror_uuid,
+        const std::string &remote_mirror_uuid,
+        Journaler *journaler,
+        MirrorPeerClientMeta *client_meta,
+        Context *on_finish,
+        ProgressContext *progress_ctx = nullptr) {
+    return new BootstrapRequest(local_io_ctx, remote_io_ctx,
+                                image_sync_throttler, local_image_ctx,
                                 local_image_name, remote_image_id,
                                 global_image_id, work_queue, timer, timer_lock,
                                 local_mirror_uuid, remote_mirror_uuid,
@@ -61,6 +64,7 @@ public:
 
   BootstrapRequest(librados::IoCtx &local_io_ctx,
                    librados::IoCtx &remote_io_ctx,
+                   ImageSyncThrottlerRef<ImageCtxT> image_sync_throttler,
                    ImageCtxT **local_image_ctx,
                    const std::string &local_image_name,
                    const std::string &remote_image_id,
@@ -134,6 +138,7 @@ private:
 
   librados::IoCtx &m_local_io_ctx;
   librados::IoCtx &m_remote_io_ctx;
+  ImageSyncThrottlerRef<ImageCtxT> m_image_sync_throttler;
   ImageCtxT **m_local_image_ctx;
   std::string m_local_image_name;
   std::string m_local_image_id;
@@ -148,7 +153,6 @@ private:
   MirrorPeerClientMeta *m_client_meta;
   ProgressContext *m_progress_ctx;
   Mutex m_lock;
-  ImageSync<ImageCtxT> *m_image_sync_request = nullptr;
   bool m_canceled = false;
 
   Tags m_remote_tags;
diff --git a/src/tools/rbd_mirror/image_replayer/EventPreprocessor.cc b/src/tools/rbd_mirror/image_replayer/EventPreprocessor.cc
new file mode 100644
index 0000000..acb3600
--- /dev/null
+++ b/src/tools/rbd_mirror/image_replayer/EventPreprocessor.cc
@@ -0,0 +1,202 @@
+// -*- mode:C++; tab-width:8; c-basic-offset:2; indent-tabs-mode:t -*-
+// vim: ts=8 sw=2 smarttab
+
+#include "EventPreprocessor.h"
+#include "common/debug.h"
+#include "common/dout.h"
+#include "common/errno.h"
+#include "common/WorkQueue.h"
+#include "journal/Journaler.h"
+#include "librbd/ImageCtx.h"
+#include "librbd/ImageState.h"
+#include "librbd/Utils.h"
+#include "librbd/journal/Types.h"
+#include <boost/variant.hpp>
+
+#define dout_subsys ceph_subsys_rbd_mirror
+
+#undef dout_prefix
+#define dout_prefix *_dout << "rbd::mirror::image_replayer::EventPreprocessor: " \
+                           << this << " " << __func__
+
+namespace rbd {
+namespace mirror {
+namespace image_replayer {
+
+using librbd::util::create_context_callback;
+
+template <typename I>
+EventPreprocessor<I>::EventPreprocessor(I &local_image_ctx,
+                                        Journaler &remote_journaler,
+                                        const std::string &local_mirror_uuid,
+                                        MirrorPeerClientMeta *client_meta,
+                                        ContextWQ *work_queue)
+  : m_local_image_ctx(local_image_ctx), m_remote_journaler(remote_journaler),
+    m_local_mirror_uuid(local_mirror_uuid), m_client_meta(client_meta),
+    m_work_queue(work_queue) {
+}
+
+template <typename I>
+EventPreprocessor<I>::~EventPreprocessor() {
+  assert(!m_in_progress);
+}
+
+template <typename I>
+bool EventPreprocessor<I>::is_required(const EventEntry &event_entry) {
+  SnapSeqs snap_seqs(m_client_meta->snap_seqs);
+  return (prune_snap_map(&snap_seqs) ||
+          event_entry.get_event_type() ==
+            librbd::journal::EVENT_TYPE_SNAP_RENAME);
+}
+
+template <typename I>
+void EventPreprocessor<I>::preprocess(EventEntry *event_entry,
+                                      Context *on_finish) {
+  assert(!m_in_progress);
+  m_in_progress = true;
+  m_event_entry = event_entry;
+  m_on_finish = on_finish;
+
+  refresh_image();
+}
+
+template <typename I>
+void EventPreprocessor<I>::refresh_image() {
+  dout(20) << dendl;
+
+  Context *ctx = create_context_callback<
+    EventPreprocessor<I>, &EventPreprocessor<I>::handle_refresh_image>(this);
+  m_local_image_ctx.state->refresh(ctx);
+}
+
+template <typename I>
+void EventPreprocessor<I>::handle_refresh_image(int r) {
+  dout(20) << ": r=" << r << dendl;
+
+  if (r < 0) {
+    derr << "error encountered during image refresh: " << cpp_strerror(r)
+         << dendl;
+    finish(r);
+    return;
+  }
+
+  preprocess_event();
+}
+
+template <typename I>
+void EventPreprocessor<I>::preprocess_event() {
+  dout(20) << dendl;
+
+  m_snap_seqs = m_client_meta->snap_seqs;
+  m_snap_seqs_updated = prune_snap_map(&m_snap_seqs);
+
+  int r = boost::apply_visitor(PreprocessEventVisitor(this),
+                               m_event_entry->event);
+  if (r < 0) {
+    finish(r);
+    return;
+  }
+
+  update_client();
+}
+
+template <typename I>
+int EventPreprocessor<I>::preprocess_snap_rename(
+    librbd::journal::SnapRenameEvent &event) {
+  dout(20) << ": "
+           << "remote_snap_id=" << event.snap_id << ", "
+           << "src_snap_name=" << event.src_snap_name << ", "
+           << "dest_snap_name=" << event.snap_name << dendl;
+
+  auto snap_seq_it = m_snap_seqs.find(event.snap_id);
+  if (snap_seq_it != m_snap_seqs.end()) {
+    dout(20) << ": remapping remote snap id " << snap_seq_it->first << " "
+             << "to local snap id " << snap_seq_it->second << dendl;
+    event.snap_id = snap_seq_it->second;
+    return 0;
+  }
+
+  auto snap_id_it = m_local_image_ctx.snap_ids.find(event.src_snap_name);
+  if (snap_id_it == m_local_image_ctx.snap_ids.end()) {
+    dout(20) << ": cannot map remote snapshot '" << event.src_snap_name << "' "
+             << "to local snapshot" << dendl;
+    event.snap_id = CEPH_NOSNAP;
+    return -ENOENT;
+  }
+
+  dout(20) << ": mapping remote snap id " << event.snap_id << " "
+           << "to local snap id " << snap_id_it->second << dendl;
+  m_snap_seqs_updated = true;
+  m_snap_seqs[event.snap_id] = snap_id_it->second;
+  event.snap_id = snap_id_it->second;
+  return 0;
+}
+
+template <typename I>
+void EventPreprocessor<I>::update_client() {
+  if (!m_snap_seqs_updated) {
+    finish(0);
+    return;
+  }
+
+  dout(20) << dendl;
+  librbd::journal::MirrorPeerClientMeta client_meta(*m_client_meta);
+  client_meta.snap_seqs = m_snap_seqs;
+
+  librbd::journal::ClientData client_data(client_meta);
+  bufferlist data_bl;
+  ::encode(client_data, data_bl);
+
+  Context *ctx = create_context_callback<
+    EventPreprocessor<I>, &EventPreprocessor<I>::handle_update_client>(
+      this);
+  m_remote_journaler.update_client(data_bl, ctx);
+}
+
+template <typename I>
+void EventPreprocessor<I>::handle_update_client(int r) {
+  dout(20) << ": r=" << r << dendl;
+
+  if (r < 0) {
+    derr << "failed to update mirror peer journal client: "
+         << cpp_strerror(r) << dendl;
+    finish(r);
+    return;
+  }
+
+  m_client_meta->snap_seqs = m_snap_seqs;
+  finish(0);
+}
+
+template <typename I>
+bool EventPreprocessor<I>::prune_snap_map(SnapSeqs *snap_seqs) {
+  bool pruned = false;
+
+  RWLock::RLocker snap_locker(m_local_image_ctx.snap_lock);
+  for (auto it = snap_seqs->begin(); it != snap_seqs->end(); ) {
+    auto current_it(it++);
+    if (m_local_image_ctx.snap_info.count(current_it->second) == 0) {
+      snap_seqs->erase(current_it);
+      pruned = true;
+    }
+  }
+  return pruned;
+}
+
+template <typename I>
+void EventPreprocessor<I>::finish(int r) {
+  dout(20) << ": r=" << r << dendl;
+
+  Context *on_finish = m_on_finish;
+  m_on_finish = nullptr;
+  m_event_entry = nullptr;
+  m_in_progress = false;
+  m_snap_seqs_updated = false;
+  m_work_queue->queue(on_finish, r);
+}
+
+} // namespace image_replayer
+} // namespace mirror
+} // namespace rbd
+
+template class rbd::mirror::image_replayer::EventPreprocessor<librbd::ImageCtx>;
diff --git a/src/tools/rbd_mirror/image_replayer/EventPreprocessor.h b/src/tools/rbd_mirror/image_replayer/EventPreprocessor.h
new file mode 100644
index 0000000..6cdf0f6
--- /dev/null
+++ b/src/tools/rbd_mirror/image_replayer/EventPreprocessor.h
@@ -0,0 +1,118 @@
+// -*- mode:C++; tab-width:8; c-basic-offset:2; indent-tabs-mode:t -*-
+// vim: ts=8 sw=2 smarttab
+
+#ifndef RBD_MIRROR_IMAGE_REPLAYER_EVENT_PREPROCESSOR_H
+#define RBD_MIRROR_IMAGE_REPLAYER_EVENT_PREPROCESSOR_H
+
+#include "include/int_types.h"
+#include "librbd/journal/Types.h"
+#include "librbd/journal/TypeTraits.h"
+#include <map>
+#include <string>
+#include <boost/variant/static_visitor.hpp>
+
+struct Context;
+struct ContextWQ;
+namespace journal { class Journaler; }
+namespace librbd { class ImageCtx; }
+
+namespace rbd {
+namespace mirror {
+namespace image_replayer {
+
+template <typename ImageCtxT = librbd::ImageCtx>
+class EventPreprocessor {
+public:
+  using Journaler = typename librbd::journal::TypeTraits<ImageCtxT>::Journaler;
+  using EventEntry = librbd::journal::EventEntry;
+  using MirrorPeerClientMeta = librbd::journal::MirrorPeerClientMeta;
+
+  static EventPreprocessor *create(ImageCtxT &local_image_ctx,
+                                   Journaler &remote_journaler,
+                                   const std::string &local_mirror_uuid,
+                                   MirrorPeerClientMeta *client_meta,
+                                   ContextWQ *work_queue) {
+    return new EventPreprocessor(local_image_ctx, remote_journaler,
+                                 local_mirror_uuid, client_meta, work_queue);
+  }
+
+  EventPreprocessor(ImageCtxT &local_image_ctx, Journaler &remote_journaler,
+                    const std::string &local_mirror_uuid,
+                    MirrorPeerClientMeta *client_meta, ContextWQ *work_queue);
+  ~EventPreprocessor();
+
+  bool is_required(const EventEntry &event_entry);
+  void preprocess(EventEntry *event_entry, Context *on_finish);
+
+private:
+  /**
+   * @verbatim
+   *
+   * <start>
+   *    |
+   *    v (skip if not required)
+   * REFRESH_IMAGE
+   *    |
+   *    v (skip if not required)
+   * PREPROCESS_EVENT
+   *    |
+   *    v (skip if not required)
+   * UPDATE_CLIENT
+   *
+   * @endverbatim
+   */
+
+  typedef std::map<uint64_t, uint64_t> SnapSeqs;
+
+  class PreprocessEventVisitor : public boost::static_visitor<int> {
+  public:
+    EventPreprocessor *event_preprocessor;
+
+    PreprocessEventVisitor(EventPreprocessor *event_preprocessor)
+      : event_preprocessor(event_preprocessor) {
+    }
+
+    template <typename T>
+    inline int operator()(T&) const {
+      return 0;
+    }
+    inline int operator()(librbd::journal::SnapRenameEvent &event) const {
+      return event_preprocessor->preprocess_snap_rename(event);
+    }
+  };
+
+  ImageCtxT &m_local_image_ctx;
+  Journaler &m_remote_journaler;
+  std::string m_local_mirror_uuid;
+  MirrorPeerClientMeta *m_client_meta;
+  ContextWQ *m_work_queue;
+
+  bool m_in_progress = false;
+  EventEntry *m_event_entry = nullptr;
+  Context *m_on_finish = nullptr;
+
+  SnapSeqs m_snap_seqs;
+  bool m_snap_seqs_updated = false;
+
+  bool prune_snap_map(SnapSeqs *snap_seqs);
+
+  void refresh_image();
+  void handle_refresh_image(int r);
+
+  void preprocess_event();
+  int preprocess_snap_rename(librbd::journal::SnapRenameEvent &event);
+
+  void update_client();
+  void handle_update_client(int r);
+
+  void finish(int r);
+
+};
+
+} // namespace image_replayer
+} // namespace mirror
+} // namespace rbd
+
+extern template class rbd::mirror::image_replayer::EventPreprocessor<librbd::ImageCtx>;
+
+#endif // RBD_MIRROR_IMAGE_REPLAYER_EVENT_PREPROCESSOR_H
diff --git a/src/tools/rbd_mirror/image_replayer/OpenLocalImageRequest.cc b/src/tools/rbd_mirror/image_replayer/OpenLocalImageRequest.cc
index 9367ed6..f023f39 100644
--- a/src/tools/rbd_mirror/image_replayer/OpenLocalImageRequest.cc
+++ b/src/tools/rbd_mirror/image_replayer/OpenLocalImageRequest.cc
@@ -43,17 +43,15 @@ struct MirrorJournalPolicy : public librbd::journal::Policy {
   MirrorJournalPolicy(ContextWQ *work_queue) : work_queue(work_queue) {
   }
 
+  virtual bool append_disabled() const {
+    // avoid recording any events to the local journal
+    return true;
+  }
+
   virtual void allocate_tag_on_lock(Context *on_finish) {
     // rbd-mirror will manually create tags by copying them from the peer
     work_queue->queue(on_finish, 0);
   }
-
-  virtual void cancel_external_replay(Context *on_finish) {
-    // TODO: journal is being closed due to a comms error.  This means
-    // the journal is being closed and the exclusive lock is being released.
-    // ImageReplayer needs to restart.
-  }
-
 };
 
 } // anonymous namespace
@@ -141,6 +139,9 @@ void OpenLocalImageRequest<I>::send_lock_image() {
     return;
   }
 
+  // disallow any proxied maintenance operations before grabbing lock
+  (*m_local_image_ctx)->exclusive_lock->block_requests(-EROFS);
+
   Context *ctx = create_context_callback<
     OpenLocalImageRequest<I>, &OpenLocalImageRequest<I>::handle_lock_image>(
       this);
diff --git a/src/tools/rbd_mirror/image_sync/ImageCopyRequest.cc b/src/tools/rbd_mirror/image_sync/ImageCopyRequest.cc
index e037f88..336f114 100644
--- a/src/tools/rbd_mirror/image_sync/ImageCopyRequest.cc
+++ b/src/tools/rbd_mirror/image_sync/ImageCopyRequest.cc
@@ -5,6 +5,7 @@
 #include "ObjectCopyRequest.h"
 #include "include/stringify.h"
 #include "common/errno.h"
+#include "common/Timer.h"
 #include "journal/Journaler.h"
 #include "librbd/Utils.h"
 #include "tools/rbd_mirror/ProgressContext.h"
@@ -36,9 +37,10 @@ ImageCopyRequest<I>::ImageCopyRequest(I *local_image_ctx, I *remote_image_ctx,
     m_client_meta(client_meta), m_sync_point(sync_point),
     m_progress_ctx(progress_ctx),
     m_lock(unique_lock_name("ImageCopyRequest::m_lock", this)),
+    m_updating_sync_point(false), m_update_sync_ctx(nullptr),
+    m_update_sync_point_interval(g_ceph_context->_conf->rbd_mirror_sync_point_update_age),
     m_client_meta_copy(*client_meta) {
   assert(!m_client_meta_copy.sync_points.empty());
-  assert(!m_client_meta_copy.snap_seqs.empty());
 }
 
 template <typename I>
@@ -146,7 +148,22 @@ void ImageCopyRequest<I>::send_object_copies() {
       }
     }
     complete = (m_current_ops == 0);
+
+    if (!complete) {
+      m_update_sync_ctx = new FunctionContext([this](int r) {
+          this->send_update_sync_point();
+      });
+    }
   }
+
+  {
+    Mutex::Locker timer_locker(*m_timer_lock);
+    if (m_update_sync_ctx) {
+      m_timer->add_event_after(m_update_sync_point_interval,
+                               m_update_sync_ctx);
+    }
+  }
+
   if (complete) {
     send_flush_sync_point();
   }
@@ -205,6 +222,92 @@ void ImageCopyRequest<I>::handle_object_copy(int r) {
   update_progress("COPY_OBJECT " + stringify(percent) + "%", false);
 
   if (complete) {
+    bool do_flush = true;
+    {
+      Mutex::Locker timer_locker(*m_timer_lock);
+      Mutex::Locker locker(m_lock);
+      if (!m_updating_sync_point) {
+        if (m_update_sync_ctx != nullptr) {
+          m_timer->cancel_event(m_update_sync_ctx);
+          m_update_sync_ctx = nullptr;
+        }
+      } else {
+        do_flush = false;
+      }
+    }
+
+    if (do_flush) {
+      send_flush_sync_point();
+    }
+  }
+}
+
+template <typename I>
+void ImageCopyRequest<I>::send_update_sync_point() {
+  Mutex::Locker l(m_lock);
+
+  m_update_sync_ctx = nullptr;
+
+  if (m_canceled || m_ret_val < 0 || m_current_ops == 0) {
+    return;
+  }
+
+  if (m_sync_point->object_number &&
+      (m_object_no-1) == m_sync_point->object_number.get()) {
+    // update sync point did not progress since last sync
+    return;
+  }
+
+  m_updating_sync_point = true;
+
+  m_client_meta_copy = *m_client_meta;
+  m_sync_point->object_number = m_object_no - 1;
+
+  CephContext *cct = m_local_image_ctx->cct;
+  ldout(cct, 20) << ": sync_point=" << *m_sync_point << dendl;
+
+  bufferlist client_data_bl;
+  librbd::journal::ClientData client_data(*m_client_meta);
+  ::encode(client_data, client_data_bl);
+
+  Context *ctx = create_context_callback<
+    ImageCopyRequest<I>, &ImageCopyRequest<I>::handle_update_sync_point>(
+      this);
+  m_journaler->update_client(client_data_bl, ctx);
+}
+
+template <typename I>
+void ImageCopyRequest<I>::handle_update_sync_point(int r) {
+  CephContext *cct = m_local_image_ctx->cct;
+  ldout(cct, 20) << ": r=" << r << dendl;
+
+  if (r < 0) {
+    *m_client_meta = m_client_meta_copy;
+    lderr(cct) << ": failed to update client data: " << cpp_strerror(r)
+               << dendl;
+  }
+
+  bool complete;
+  {
+    Mutex::Locker l(m_lock);
+    m_updating_sync_point = false;
+
+    complete = m_current_ops == 0 || m_canceled || m_ret_val < 0;
+
+    if (!complete) {
+      m_update_sync_ctx = new FunctionContext([this](int r) {
+          this->send_update_sync_point();
+      });
+    }
+  }
+
+  if (!complete) {
+    Mutex::Locker timer_lock(*m_timer_lock);
+    if (m_update_sync_ctx) {
+      m_timer->add_event_after(m_update_sync_point_interval,
+                               m_update_sync_ctx);
+    }
+  } else {
     send_flush_sync_point();
   }
 }
diff --git a/src/tools/rbd_mirror/image_sync/ImageCopyRequest.h b/src/tools/rbd_mirror/image_sync/ImageCopyRequest.h
index 118b48d..85dfe9b 100644
--- a/src/tools/rbd_mirror/image_sync/ImageCopyRequest.h
+++ b/src/tools/rbd_mirror/image_sync/ImageCopyRequest.h
@@ -100,6 +100,10 @@ private:
   uint64_t m_current_ops = 0;
   int m_ret_val = 0;
 
+  bool m_updating_sync_point;
+  Context *m_update_sync_ctx;
+  double m_update_sync_point_interval;
+
   MirrorPeerClientMeta m_client_meta_copy;
 
   void send_update_max_object_count();
@@ -109,6 +113,9 @@ private:
   void send_next_object_copy();
   void handle_object_copy(int r);
 
+  void send_update_sync_point();
+  void handle_update_sync_point(int r);
+
   void send_flush_sync_point();
   void handle_flush_sync_point(int r);
 
diff --git a/src/tools/rbd_mirror/image_sync/ObjectCopyRequest.cc b/src/tools/rbd_mirror/image_sync/ObjectCopyRequest.cc
index 89acd03..9975b5b 100644
--- a/src/tools/rbd_mirror/image_sync/ObjectCopyRequest.cc
+++ b/src/tools/rbd_mirror/image_sync/ObjectCopyRequest.cc
@@ -235,6 +235,13 @@ void ObjectCopyRequest<I>::send_update_object_map() {
     m_local_image_ctx->snap_lock.put_read();
     finish(0);
     return;
+  } else if (m_local_image_ctx->object_map == nullptr) {
+    // possible that exclusive lock was lost in background
+    derr << ": object map is not initialized" << dendl;
+
+    m_local_image_ctx->snap_lock.put_read();
+    finish(-EINVAL);
+    return;
   }
 
   assert(m_local_image_ctx->object_map != nullptr);
diff --git a/src/tools/rbd_mirror/image_sync/SyncPointPruneRequest.cc b/src/tools/rbd_mirror/image_sync/SyncPointPruneRequest.cc
index d9b5e8e..8341fbe 100644
--- a/src/tools/rbd_mirror/image_sync/SyncPointPruneRequest.cc
+++ b/src/tools/rbd_mirror/image_sync/SyncPointPruneRequest.cc
@@ -53,14 +53,26 @@ void SyncPointPruneRequest<I>::send() {
       m_snap_names.push_back(sync_point.from_snap_name);
     }
   } else {
-    // if we have more than one sync point, trim the extras off
+    // if we have more than one sync point or invalid sync points,
+    // trim them off
+    RWLock::RLocker snap_locker(m_remote_image_ctx->snap_lock);
     std::set<std::string> snap_names;
     for (auto it = m_client_meta_copy.sync_points.rbegin();
          it != m_client_meta_copy.sync_points.rend(); ++it) {
-      MirrorPeerSyncPoint &sync_point =
-        m_client_meta_copy.sync_points.back();
+      MirrorPeerSyncPoint &sync_point = *it;
       if (&sync_point == &m_client_meta_copy.sync_points.front()) {
-        break;
+        if (m_remote_image_ctx->get_snap_id(sync_point.snap_name) ==
+              CEPH_NOSNAP) {
+          derr << ": failed to locate sync point snapshot: "
+               << sync_point.snap_name << dendl;
+        } else if (!sync_point.from_snap_name.empty()) {
+          derr << ": unexpected from_snap_name in primary sync point: "
+               << sync_point.from_snap_name << dendl;
+        } else {
+          // first sync point is OK -- keep it
+          break;
+        }
+        m_invalid_master_sync_point = true;
       }
 
       if (snap_names.count(sync_point.snap_name) == 0) {
@@ -156,6 +168,10 @@ void SyncPointPruneRequest<I>::send_update_client() {
     while (m_client_meta_copy.sync_points.size() > 1) {
       m_client_meta_copy.sync_points.pop_back();
     }
+    if (m_invalid_master_sync_point) {
+      // all subsequent sync points would have been pruned
+      m_client_meta_copy.sync_points.clear();
+    }
   }
 
   bufferlist client_data_bl;
diff --git a/src/tools/rbd_mirror/image_sync/SyncPointPruneRequest.h b/src/tools/rbd_mirror/image_sync/SyncPointPruneRequest.h
index 3ef4ab6..65e13ef 100644
--- a/src/tools/rbd_mirror/image_sync/SyncPointPruneRequest.h
+++ b/src/tools/rbd_mirror/image_sync/SyncPointPruneRequest.h
@@ -73,6 +73,8 @@ private:
   MirrorPeerClientMeta m_client_meta_copy;
   std::list<std::string> m_snap_names;
 
+  bool m_invalid_master_sync_point = false;
+
   void send_remove_snap();
   void handle_remove_snap(int r);
 
diff --git a/src/tools/rbd_mirror/types.h b/src/tools/rbd_mirror/types.h
index c45b963..9c0bea2 100644
--- a/src/tools/rbd_mirror/types.h
+++ b/src/tools/rbd_mirror/types.h
@@ -10,6 +10,7 @@
 #include <vector>
 
 #include "include/rbd/librbd.hpp"
+#include "ImageSyncThrottler.h"
 
 namespace rbd {
 namespace mirror {
@@ -18,6 +19,9 @@ typedef shared_ptr<librados::Rados> RadosRef;
 typedef shared_ptr<librados::IoCtx> IoCtxRef;
 typedef shared_ptr<librbd::Image> ImageRef;
 
+template <typename I = librbd::ImageCtx>
+using ImageSyncThrottlerRef = std::shared_ptr<ImageSyncThrottler<I>>;
+
 struct peer_t {
   peer_t() = default;
   peer_t(const std::string &uuid, const std::string &cluster_name,
diff --git a/src/tools/rbd_nbd/rbd-nbd.cc b/src/tools/rbd_nbd/rbd-nbd.cc
index f7e7531..4b014dd 100644
--- a/src/tools/rbd_nbd/rbd-nbd.cc
+++ b/src/tools/rbd_nbd/rbd-nbd.cc
@@ -391,33 +391,27 @@ std::ostream &operator<<(std::ostream &os, const NBDServer::IOContext &ctx) {
   return os;
 }
 
-class NBDWatchCtx : public librados::WatchCtx2
+class NBDWatchCtx : public librbd::UpdateWatchCtx
 {
 private:
   int fd;
   librados::IoCtx &io_ctx;
   librbd::Image ℑ
-  std::string header_oid;
   unsigned long size;
 public:
   NBDWatchCtx(int _fd,
               librados::IoCtx &_io_ctx,
               librbd::Image &_image,
-              std::string &_header_oid,
               unsigned long _size)
     : fd(_fd)
     , io_ctx(_io_ctx)
     , image(_image)
-    , header_oid(_header_oid)
     , size(_size)
   { }
 
   virtual ~NBDWatchCtx() {}
 
-  virtual void handle_notify(uint64_t notify_id,
-                             uint64_t cookie,
-                             uint64_t notifier_id,
-                             bufferlist& bl)
+  virtual void handle_notify()
   {
     librbd::image_info_t info;
     if (image.stat(info, sizeof(info)) == 0) {
@@ -433,14 +427,6 @@ public:
         size = new_size;
       }
     }
-
-    bufferlist reply;
-    io_ctx.notify_ack(header_oid, notify_id, cookie, reply);
-  }
-
-  virtual void handle_error(uint64_t cookie, int err)
-  {
-    //ignore
   }
 };
 
@@ -607,22 +593,10 @@ static int do_map()
     goto close_nbd;
 
   {
-    string header_oid;
-    uint64_t watcher;
-
-    if (old_format != 0) {
-      header_oid = imgname + RBD_SUFFIX;
-    } else {
-      char prefix[RBD_MAX_BLOCK_NAME_SIZE + 1];
-      strncpy(prefix, info.block_name_prefix, RBD_MAX_BLOCK_NAME_SIZE);
-      prefix[RBD_MAX_BLOCK_NAME_SIZE] = '\0';
-
-      std::string image_id(prefix + strlen(RBD_DATA_PREFIX));
-      header_oid = RBD_HEADER_PREFIX + image_id;
-    }
+    uint64_t handle;
 
-    NBDWatchCtx watch_ctx(nbd, io_ctx, image, header_oid, info.size);
-    r = io_ctx.watch2(header_oid, &watcher, &watch_ctx);
+    NBDWatchCtx watch_ctx(nbd, io_ctx, image, info.size);
+    r = image.update_watch(&watch_ctx, &handle);
     if (r < 0)
       goto close_nbd;
 
@@ -642,7 +616,8 @@ static int do_map()
       server.stop();
     }
 
-    io_ctx.unwatch2(watcher);
+    r = image.update_unwatch(handle);
+    assert(r == 0);
   }
 
 close_nbd:
diff --git a/src/tracing/librbd.tp b/src/tracing/librbd.tp
index f91e4e3..96b478b 100644
--- a/src/tracing/librbd.tp
+++ b/src/tracing/librbd.tp
@@ -1795,3 +1795,41 @@ TRACEPOINT_EVENT(librbd, stat_exit,
         ctf_array_text(char, parent_name, info->parent_name, RBD_MAX_IMAGE_NAME_SIZE)
     )
 )
+
+TRACEPOINT_EVENT(librbd, update_watch_enter,
+    TP_ARGS(
+        void*, imagectx,
+        void*, watchctx),
+    TP_FIELDS(
+        ctf_integer_hex(void*, imagctx, imagectx)
+        ctf_integer_hex(void*, watchctx, watchctx)
+    )
+)
+
+TRACEPOINT_EVENT(librbd, update_watch_exit,
+    TP_ARGS(
+        int, retval,
+        uint64_t, handle),
+    TP_FIELDS(
+        ctf_integer(int, retval, retval)
+        ctf_integer(uint64_t, handle, handle)
+    )
+)
+
+TRACEPOINT_EVENT(librbd, update_unwatch_enter,
+    TP_ARGS(
+        void*, imagectx,
+        uint64_t, handle),
+    TP_FIELDS(
+        ctf_integer_hex(void*, imagctx, imagectx)
+        ctf_integer(uint64_t, handle, handle)
+    )
+)
+
+TRACEPOINT_EVENT(librbd, update_unwatch_exit,
+    TP_ARGS(
+        int, retval),
+    TP_FIELDS(
+        ctf_integer(int, retval, retval)
+    )
+)
diff --git a/src/vstart.sh b/src/vstart.sh
index fd70ccc..47a6523 100755
--- a/src/vstart.sh
+++ b/src/vstart.sh
@@ -99,6 +99,7 @@ cephx=1 #turn cephx on by default
 cache=""
 memstore=0
 bluestore=0
+lockdep=${LOCKDEP:-1}
 
 MON_ADDR=""
 
@@ -133,6 +134,7 @@ usage=$usage"\t--bluestore use bluestore as the osd objectstore backend\n"
 usage=$usage"\t--memstore use memstore as the osd objectstore backend\n"
 usage=$usage"\t--cache <pool>: enable cache tiering on pool\n"
 usage=$usage"\t--short: short object names only; necessary for ext4 dev\n"
+usage=$usage"\t--nolockdep disable lockdep\n"
 
 usage_exit() {
 	printf "$usage"
@@ -260,6 +262,9 @@ case $1 in
 	    fi
 	    shift
 	    ;;
+    --nolockdep )
+            lockdep=0
+            ;;
     * )
 	    usage_exit
 esac
@@ -355,9 +360,6 @@ if [ "$bluestore" -eq 1 ]; then
 	osd objectstore = bluestore'
 fi
 
-# lockdep everywhere?
-# export CEPH_ARGS="--lockdep 1"
-
 if [ -z "$CEPH_PORT" ]; then
     CEPH_PORT=6789
     [ -e ".ceph_port" ] && CEPH_PORT=`cat .ceph_port`
@@ -457,8 +459,12 @@ if [ "$start_mon" -eq 1 ]; then
         filestore fd cache size = 32
         run dir = $CEPH_OUT_DIR
         enable experimental unrecoverable data corrupting features = *
+EOF
+if [ "$lockdep" -eq 1 ] ; then
+cat <<EOF >> $conf_fn
         lockdep = true
 EOF
+fi
 if [ "$cephx" -eq 1 ] ; then
 cat <<EOF >> $conf_fn
         auth supported = cephx
diff --git a/udev/60-ceph-by-parttypeuuid.rules b/udev/60-ceph-by-parttypeuuid.rules
new file mode 100644
index 0000000..1fe8fc5
--- /dev/null
+++ b/udev/60-ceph-by-parttypeuuid.rules
@@ -0,0 +1,31 @@
+#
+# Make sure /dev/disk/by-parttypeuuid is populated because
+# ceph-disk activate-all relies on it.
+#
+
+# forward scsi device event to corresponding block device
+ACTION=="change", SUBSYSTEM=="scsi", ENV{DEVTYPE}=="scsi_device", TEST=="block", ATTR{block/*/uevent}="change"
+
+ACTION=="remove", GOTO="persistent_storage_end_two"
+
+SUBSYSTEM!="block", GOTO="persistent_storage_end_two"
+
+# skip rules for inappropriate block devices
+KERNEL=="fd*|mtd*|nbd*|gnbd*|btibm*|md*", GOTO="persistent_storage_end_two"
+
+# ignore partitions that span the entire disk
+TEST=="whole_disk", GOTO="persistent_storage_end_two"
+
+# for partitions import parent information
+ENV{DEVTYPE}=="partition", IMPORT{parent}="ID_*"
+
+# skip unpartitioned removable media devices from drivers which do not send "change" events
+ENV{DEVTYPE}=="disk", KERNEL!="sd*|sr*", ATTR{removable}=="1", GOTO="persistent_storage_end_two"
+
+# probe filesystem metadata of disks
+KERNEL!="sr*", IMPORT{program}="/sbin/blkid -o udev -p $tempnode"
+
+# NEW: by-parttypeuuid links (type.id)
+ENV{ID_PART_ENTRY_SCHEME}=="gpt", ENV{ID_PART_ENTRY_TYPE}=="?*", ENV{ID_PART_ENTRY_UUID}=="?*", SYMLINK+="disk/by-parttypeuuid/$env{ID_PART_ENTRY_TYPE}.$env{ID_PART_ENTRY_UUID}"
+
+LABEL="persistent_storage_end_two"

-- 
Alioth's /usr/local/bin/git-commit-notice on /srv/git.debian.org/git/pkg-ceph/ceph.git



More information about the Pkg-ceph-commits mailing list